discipline-md 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +80 -0
  3. package/bin/discipline.js +587 -0
  4. package/package.json +40 -0
  5. package/templates/.claude/settings.json +58 -0
  6. package/templates/AGENTS.md +463 -0
  7. package/templates/AGENT_TRACKER.md +138 -0
  8. package/templates/API_REFERENCE.md +131 -0
  9. package/templates/ARCHITECTURE.md +89 -0
  10. package/templates/ASSETS.md +90 -0
  11. package/templates/AUTONOMOUS_QUEUE.md +119 -0
  12. package/templates/BUILD_PLAN.md +89 -0
  13. package/templates/CHANGELOG.md +90 -0
  14. package/templates/CLAUDE.md +89 -0
  15. package/templates/CREDITS.md +109 -0
  16. package/templates/DATA_MODEL.md +88 -0
  17. package/templates/DECISIONS.md +120 -0
  18. package/templates/DEPLOYMENT.md +342 -0
  19. package/templates/HANDOFF.md +289 -0
  20. package/templates/IMPROVEMENT_LOOP.md +103 -0
  21. package/templates/INVESTIGATION.md +154 -0
  22. package/templates/LICENSE +68 -0
  23. package/templates/NOTICE +55 -0
  24. package/templates/OPEN_DECISIONS.md +61 -0
  25. package/templates/PLAYBOOK_FEEDBACK.md +87 -0
  26. package/templates/PROJECT_CONTEXT.md +91 -0
  27. package/templates/README.md +60 -0
  28. package/templates/ROADMAP.md +96 -0
  29. package/templates/SECURITY_AUDIT.md +235 -0
  30. package/templates/SETUP.md +162 -0
  31. package/templates/SPEC.md +105 -0
  32. package/templates/SPEC_WORKFLOW.md +173 -0
  33. package/templates/TODO.md +118 -0
  34. package/templates/USAGE.md +153 -0
  35. package/templates/VERIFICATION_GATE.md +68 -0
  36. package/templates/agents/CROSS_REPO_SYNC.md +124 -0
  37. package/templates/agents/DEBUGGER.md +112 -0
  38. package/templates/agents/PLANNER.md +111 -0
  39. package/templates/agents/README.md +64 -0
  40. package/templates/agents/RECON.md +99 -0
  41. package/templates/agents/SECURITY_REVIEWER.md +123 -0
  42. package/templates/agents/SPEC_ARCHITECT.md +133 -0
  43. package/templates/agents/STAKEHOLDER.md +197 -0
  44. package/templates/agents/_TEMPLATE.md +116 -0
  45. package/templates/agents/optional/ARCHITECT.md +109 -0
  46. package/templates/agents/optional/BACKEND_IMPACT.md +108 -0
  47. package/templates/agents/optional/DOC_AUDIT.md +108 -0
  48. package/templates/agents/optional/FRONTEND_IMPACT.md +109 -0
  49. package/templates/agents/optional/QUEUE_CURATOR.md +114 -0
  50. package/templates/agents/optional/TEST_STRATEGIST.md +107 -0
@@ -0,0 +1,58 @@
1
+ {
2
+ "_README": "Project-level Claude Code settings. DEFAULT BEHAVIOR: this file delegates to user-level (~/.claude/settings.json or %USERPROFILE%\\.claude\\settings.json) — projects inherit whatever permissions the operator runs personally. Project-level overrides go in the top-level `permissions` block below if and only if the project has a specific reason to be tighter or looser than the operator default. The `_alternatives` block at the bottom is REFERENCE ONLY — opt-in presets that are NOT applied unless their contents are moved up into the active `permissions` block. NEVER paste blindly; review the risk profile first.",
3
+
4
+ "permissions": {
5
+ "_comment": "Empty by default. Project inherits user-level permissions. To override at project scope, populate `allow`, `deny`, and/or `ask` arrays here. See `_alternatives` below for opt-in presets."
6
+ },
7
+
8
+ "_alternatives": {
9
+ "_README": "OPT-IN PRESETS — not applied. Each preset shows what could go in the top-level `permissions` block above. Do not paste blindly. Each is calibrated to a specific operator trust posture; review the chat / docs that authored it before adopting.",
10
+
11
+ "permissive_individual_high_autonomy": {
12
+ "_provenance": "Authored for a single-operator personal cross-repo workflow. Designed for a SINGLE TRUSTED OPERATOR running on their own machine, not for shared CI, multi-developer use, or environments with sensitive credentials accessible to the agent.",
13
+ "_intent": "Auto-approve most coding-task work; prompt only on commands that add new libraries, push to remote, deploy, fetch arbitrary code (npx/dlx), or have known-dangerous variants.",
14
+ "_warning": "NOT a recommended default. Inherits the operator's trust posture; any subagent runs under these rules. Pair with: (a) diff review at end of every agent run, (b) the Pre-Deploy Gate in templates/DEPLOYMENT.md, (c) the security-audit recency rule.",
15
+
16
+ "permissions": {
17
+ "allow": [
18
+ "Read", "Edit", "Write", "Glob", "Grep", "NotebookEdit", "TodoWrite", "Skill", "Agent", "WebSearch", "WebFetch",
19
+
20
+ "Bash(git status:*)", "Bash(git -C *:status*)", "Bash(git diff:*)", "Bash(git -C *:diff*)", "Bash(git log:*)", "Bash(git -C *:log*)", "Bash(git show:*)", "Bash(git -C *:show*)", "Bash(git branch:*)", "Bash(git -C *:branch*)", "Bash(git remote:*)", "Bash(git -C *:remote*)", "Bash(git symbolic-ref:*)", "Bash(git -C *:symbolic-ref*)", "Bash(git rev-parse:*)", "Bash(git -C *:rev-parse*)", "Bash(git ls-files:*)", "Bash(git -C *:ls-files*)", "Bash(git blame:*)", "Bash(git -C *:blame*)", "Bash(git fetch:*)", "Bash(git -C *:fetch*)", "Bash(git config --get:*)", "Bash(git -C *:config --get*)", "Bash(git config --list:*)", "Bash(git -C *:config --list*)",
21
+
22
+ "Bash(git switch:*)", "Bash(git -C *:switch*)", "Bash(git checkout -b:*)", "Bash(git -C *:checkout -b*)", "Bash(git restore:*)", "Bash(git -C *:restore*)", "Bash(git add:*)", "Bash(git -C *:add*)", "Bash(git commit:*)", "Bash(git -C *:commit*)", "Bash(git stash push:*)", "Bash(git -C *:stash push*)", "Bash(git stash pop:*)", "Bash(git -C *:stash pop*)", "Bash(git stash list:*)", "Bash(git -C *:stash list*)", "Bash(git stash show:*)", "Bash(git -C *:stash show*)", "Bash(git stash apply:*)", "Bash(git -C *:stash apply*)", "Bash(git tag:*)", "Bash(git -C *:tag*)", "Bash(git merge:*)", "Bash(git -C *:merge*)", "Bash(git rebase:*)", "Bash(git -C *:rebase*)", "Bash(git worktree:*)", "Bash(git -C *:worktree*)", "Bash(git pull:*)", "Bash(git -C *:pull*)", "Bash(git clone:*)", "Bash(git init:*)", "Bash(git reflog:*)", "Bash(git -C *:reflog*)", "Bash(git cherry-pick:*)", "Bash(git -C *:cherry-pick*)",
23
+
24
+ "Bash(gh pr view:*)", "Bash(gh pr list:*)", "Bash(gh pr create:*)", "Bash(gh pr comment:*)", "Bash(gh pr diff:*)", "Bash(gh pr checks:*)", "Bash(gh pr status:*)", "Bash(gh issue view:*)", "Bash(gh issue list:*)", "Bash(gh issue create:*)", "Bash(gh issue comment:*)", "Bash(gh repo view:*)", "Bash(gh repo list:*)", "Bash(gh run list:*)", "Bash(gh run view:*)", "Bash(gh run watch:*)", "Bash(gh release view:*)", "Bash(gh release list:*)",
25
+
26
+ "Bash(npm test:*)", "Bash(npm run test:*)", "Bash(npm run lint:*)", "Bash(npm run typecheck:*)", "Bash(npm run build:*)", "Bash(npm run check:*)", "Bash(npm run dev:*)", "Bash(npm run format:*)", "Bash(npm run watch:*)", "Bash(npm run preview:*)", "Bash(npm ls:*)", "Bash(npm list:*)", "Bash(npm outdated:*)", "Bash(npm audit:*)", "Bash(npm view:*)", "Bash(npm pack:*)",
27
+
28
+ "Bash(pnpm test:*)", "Bash(pnpm run test:*)", "Bash(pnpm lint:*)", "Bash(pnpm run lint:*)", "Bash(pnpm typecheck:*)", "Bash(pnpm run typecheck:*)", "Bash(pnpm build:*)", "Bash(pnpm run build:*)", "Bash(pnpm check:*)", "Bash(pnpm run check:*)", "Bash(pnpm dev:*)", "Bash(pnpm run dev:*)", "Bash(pnpm format:*)", "Bash(pnpm run format:*)", "Bash(pnpm watch:*)", "Bash(pnpm run watch:*)", "Bash(pnpm preview:*)", "Bash(pnpm run preview:*)", "Bash(pnpm exec:*)", "Bash(pnpm list:*)", "Bash(pnpm ls:*)", "Bash(pnpm outdated:*)", "Bash(pnpm audit:*)", "Bash(pnpm view:*)",
29
+
30
+ "Bash(yarn test:*)", "Bash(yarn run test:*)", "Bash(yarn lint:*)", "Bash(yarn typecheck:*)", "Bash(yarn build:*)", "Bash(yarn list:*)", "Bash(yarn outdated:*)", "Bash(yarn audit:*)",
31
+
32
+ "Bash(node:*)", "Bash(deno test:*)", "Bash(deno check:*)", "Bash(deno fmt:*)", "Bash(deno lint:*)", "Bash(bun test:*)", "Bash(bun run test:*)", "Bash(bun run lint:*)", "Bash(bun run typecheck:*)", "Bash(bun run build:*)", "Bash(tsc:*)", "Bash(tsx:*)", "Bash(eslint:*)", "Bash(prettier:*)", "Bash(jest:*)", "Bash(vitest:*)", "Bash(vite build:*)", "Bash(vite preview:*)", "Bash(playwright:*)", "Bash(wrangler dev:*)", "Bash(wrangler tail:*)", "Bash(wrangler whoami:*)",
33
+
34
+ "Bash(python:*)", "Bash(python3:*)", "Bash(py:*)", "Bash(pytest:*)", "Bash(ruff:*)", "Bash(black:*)", "Bash(mypy:*)", "Bash(pyright:*)",
35
+
36
+ "Bash(cargo build:*)", "Bash(cargo check:*)", "Bash(cargo test:*)", "Bash(cargo clippy:*)", "Bash(cargo fmt:*)", "Bash(cargo run:*)", "Bash(cargo doc:*)", "Bash(cargo bench:*)", "Bash(cargo tree:*)",
37
+
38
+ "Bash(go build:*)", "Bash(go test:*)", "Bash(go run:*)", "Bash(go vet:*)", "Bash(go fmt:*)", "Bash(go mod tidy:*)", "Bash(go mod verify:*)", "Bash(go mod why:*)",
39
+
40
+ "Bash(docker ps:*)", "Bash(docker logs:*)", "Bash(docker inspect:*)", "Bash(docker compose ps:*)", "Bash(docker compose logs:*)",
41
+
42
+ "PowerShell(Get-*)", "PowerShell(Test-Path:*)", "PowerShell(Resolve-Path:*)", "PowerShell(Select-Object:*)", "PowerShell(Where-Object:*)", "PowerShell(ForEach-Object:*)", "PowerShell(Sort-Object:*)", "PowerShell(Group-Object:*)", "PowerShell(Measure-Object:*)", "PowerShell(Compare-Object:*)", "PowerShell(Format-*)", "PowerShell(Out-File:*)", "PowerShell(Out-String:*)", "PowerShell(ConvertTo-*)", "PowerShell(ConvertFrom-*)", "PowerShell(Set-Location:*)", "PowerShell(Push-Location:*)", "PowerShell(Pop-Location:*)",
43
+
44
+ "PowerShell(New-Item:*)", "PowerShell(Copy-Item:*)", "PowerShell(Move-Item:*)", "PowerShell(Rename-Item:*)", "PowerShell(Set-Content:*)", "PowerShell(Add-Content:*)", "PowerShell(Clear-Content:*)",
45
+
46
+ "PowerShell(Stop-Process:*)", "PowerShell(Start-Process:*)", "PowerShell(Wait-Process:*)"
47
+ ],
48
+ "deny": [
49
+ "Bash(git push --force:*)", "Bash(git -C *:push --force*)", "Bash(git push -f:*)", "Bash(git -C *:push -f*)", "Bash(git push --force-with-lease:*)", "Bash(git -C *:push --force-with-lease*)",
50
+ "Bash(git branch -D:*)", "Bash(git -C *:branch -D*)", "Bash(git branch --delete --force:*)", "Bash(git -C *:branch --delete --force*)",
51
+ "Bash(git stash drop:*)", "Bash(git -C *:stash drop*)", "Bash(git stash clear:*)", "Bash(git -C *:stash clear*)",
52
+ "Bash(rm -rf:*)", "Bash(rm -fr:*)",
53
+ "Bash(gh pr merge:*)", "Bash(gh release create:*)", "Bash(gh workflow run:*)", "Bash(gh secret:*)"
54
+ ]
55
+ }
56
+ }
57
+ }
58
+ }
@@ -0,0 +1,463 @@
1
+ # Agents
2
+
3
+ Playbook for AI coding agents working in this repository. Defines the host model, when to escalate, the standard named subagent set, autonomy tags, and the human-collaboration gate for risky work.
4
+
5
+ This file is read at the start of any non-trivial session. Keep it short, prescriptive, and honest about cost and risk. Update it when the model lineup, escalation rules, autonomy queue, or the standard subagent set changes.
6
+
7
+ This file is the framework starter. Project copies should add a one-line note near the top pointing back at the framework's canonical template (wherever you keep it) so that promotions land back in the framework via `docs/PLAYBOOK_FEEDBACK.md` rather than diverging silently.
8
+
9
+ ## Host Model And Escalation
10
+
11
+ Default host: **Sonnet 4.6**.
12
+
13
+ Run the host as Sonnet by default. Escalate to **Opus 4.7** for the categories listed below. Use **Haiku 4.5** aggressively as a subagent for cheap, well-bounded read-only and small-edit work.
14
+
15
+ The reason for Sonnet-by-default is cost over time: Opus is roughly 5x the per-token cost of Sonnet, and most work in this repo is doc-driven and well-specified — the marginal Opus advantage rarely pays for itself. Cache hygiene and parallelism matter more than raw capability for long autonomous runs.
16
+
17
+ ### Escalate to Opus 4.7 for
18
+
19
+ - Ambiguous architecture decisions that will land in `docs/DECISIONS.md`.
20
+ - Cross-repo refactors that touch multiple subproducts at once.
21
+ - Security-sensitive code: auth, JWT, rate limiting, input validation, password handling.
22
+ - Non-obvious debugging where the failure mode is not yet understood.
23
+ - Work tagged `[needs-human-collab]` (after explicit user approval — see below).
24
+ - Pre-implementation planning passes (run a short Opus Plan, then drop to Sonnet for the body of the work).
25
+
26
+ When "ambiguous" is itself ambiguous, use the countable triggers — any one is sufficient to escalate:
27
+
28
+ - The task touches 3+ repos or 6+ files.
29
+ - The change alters an interface, schema, or contract that another repo or component imports.
30
+ - You have rewritten your own plan twice and it still doesn't survive contact with the code.
31
+ - Two distinct root-cause hypotheses, each backed by cited evidence, have both been falsified.
32
+ - The decision will be recorded in `docs/DECISIONS.md`.
33
+
34
+ ### Use Haiku 4.5 for
35
+
36
+ - Codebase reconnaissance ("where is symbol X used", "list all references to Y").
37
+ - Doc-consistency audits (find stale paths, names, endpoints after a rename).
38
+ - Test-output summarization.
39
+ - Content-spill / accessibility / link-checker scans.
40
+ - Mechanical CSS/palette find-replace.
41
+ - Single-line tweaks (default values, size bumps, copy edits).
42
+ - File-existence and directory-shape checks.
43
+
44
+ ### Default to Sonnet 4.6 for
45
+
46
+ Everything else. Most feature work, most refactors, most test writing, most documentation updates, most CHANGELOG/DECISIONS entries.
47
+
48
+ ## Cross-Ecosystem Tier Framework
49
+
50
+ The Sonnet/Opus/Haiku names above are Claude-specific. The same three-tier structure applies to other providers and to local models.
51
+
52
+ ### Three tiers (model-agnostic)
53
+
54
+ - **Frontier tier** — maximum reasoning, designed for ambiguity and complex synthesis. Slowest and most expensive. Use for: ambiguous architecture, cross-repo refactors, security review, non-obvious debugging, paired-design work tagged `[autonomy: needs-human-collab]`. Claude equivalent: Opus 4.7.
55
+ - **Workhorse tier** — strong general capability, balanced speed and cost. The default host. Use for: most feature work, test writing, doc updates, single-tool features, CHANGELOG/DECISIONS entries. Claude equivalent: Sonnet 4.6.
56
+ - **Recon tier** — fast and cheap, well-bounded tasks. Use for: file/symbol search, doc-consistency audits, mechanical find-replace, test-output summarization, single-line tweaks. Claude equivalent: Haiku 4.5.
57
+
58
+ ### Weak-model floor (hard rule)
59
+
60
+ Some tasks are never run below Frontier tier unsupervised, regardless of how well-specified they look. A Workhorse- or Recon-tier agent that encounters one of these mid-task stops and surfaces it rather than attempting it:
61
+
62
+ - Scoring or ranking decisions that feed a funding/approval gate (scorecard application, comparative ranking, judge passes).
63
+ - Adversarial review (red-team, security review) — weaker models soft-pedal adversarial roles.
64
+ - Materiality calls (is this drift? is this friction real?) that gate a re-pitch or a playbook change.
65
+ - Edits to the funding gate, approval signals, or `docs/AUTONOMOUS_QUEUE.md` membership.
66
+
67
+ Recon-tier agents additionally never self-assess "this is within my envelope" for work outside the Recon task list — when in doubt at Recon tier, the answer is hand back, not attempt.
68
+
69
+ ### Experimental scope and the two-failure rule
70
+
71
+ When you introduce a new `(model, task-type)` pair — e.g. trialing a Recon-tier model on simple-single-test additions, or running a Workhorse-tier model on an architectural decision normally reserved for Frontier — mark the pair **experimental** rather than silently widening validated scope. Track every experimental run in `docs/AGENT_TRACKER.md` (see the framework template) regardless of outcome.
72
+
73
+ - **Two failures contracts the scope.** Two failures on a `(model, task-type)` pair drops that combination back to its prior validated tier. Record the contraction with a one-line entry in `docs/PLAYBOOK_FEEDBACK.md` "Applied (recent)".
74
+ - **Three clean runs graduates.** Three successful runs on a `(model, task-type)` pair promotes it from experimental to validated scope; update the tier table or role contract accordingly.
75
+ - Vibes-based escalation is out — the rule is empirical. Cross-reference `AGENT_TRACKER.md` for the full schema and the running record.
76
+
77
+ ### Mapping examples (early 2026)
78
+
79
+ | Tier | Anthropic | OpenAI | Google | Local |
80
+ |---|---|---|---|---|
81
+ | Frontier | Opus 4.7 | GPT-5 / o4 / o3-pro | Gemini 3 Pro | Llama 3.3 405B, Qwen 3 235B, DeepSeek-R1 |
82
+ | Workhorse | Sonnet 4.6 | GPT-5 mini, GPT-4.1 | Gemini 2.5 Flash | Llama 3.3 70B, Qwen 2.5 72B, DeepSeek-V3 |
83
+ | Recon | Haiku 4.5 | GPT-5 nano, GPT-4.1 mini | Gemini 2.5 Flash Lite | Llama 3.1 8B, Qwen 2.5 7B, Phi 4 |
84
+
85
+ Lineups change quarterly; treat the table as illustrative, not authoritative.
86
+
87
+ ### Picking a tier for an unknown model
88
+
89
+ In order of reliability:
90
+
91
+ 1. **Provider positioning** — top of lineup (frontier), mid-tier balanced (workhorse), smallest/cheapest (recon).
92
+ 2. **Cost per token** — early-2026 thresholds: above ~$5/M output → frontier; ~$0.5–5/M → workhorse; below ~$0.5/M → recon.
93
+ 3. **Parameter count for local models** — above ~100B → frontier-adjacent; 30–100B → workhorse; below 30B → recon. Reasoning fine-tunes punch a tier above their base counterparts.
94
+ 4. **Reasoning vs. base mode** — extended-reasoning mode is effectively tier-up from base.
95
+
96
+ ### Runtime patterns
97
+
98
+ - **Multi-model routing** (Cursor, Continue.dev, custom orchestrators with per-subagent model selection): apply tier-by-role. Workhorse for the host and `cross-repo-sync` / `*-impact`; Recon for `recon` / `doc-audit`; Frontier for explicit Plan-pass calls or `[autonomy: needs-human-collab]` items.
99
+ - **Single-model runtime** (one provider key, one local endpoint): stick with Workhorse-tier as the host. Subagent specialization becomes conceptual rather than enforced — you still spawn subagents for context isolation and parallelism, but they all run on the same model.
100
+
101
+ ### Local-model caveats
102
+
103
+ - Tool-calling reliability varies — test JSON tool-call output before relying on subagent orchestration.
104
+ - Smaller context windows — prefer focused subagent prompts; offload via file reads.
105
+ - Streaming behavior varies — limits long-running supervised mode UX.
106
+ - Latency math changes — a local Workhorse on consumer GPU may be slower than a cloud Recon. If every spawn costs 30 seconds of warmup, coordinator-heavy is the wrong default for that runtime; flip to Supervised-Direct as the local default.
107
+
108
+ ## Host Posture: Coordinator-Heavy by Default
109
+
110
+ **The host's default posture is coordinator-heavy in every mode.** The host orchestrates; named subagents do the work; the host reconciles results, writes commits, reports back. This applies in autonomous queue runs AND in interactive/supervised sessions.
111
+
112
+ ### What the host does directly (always)
113
+
114
+ - Conversational replies (questions, tradeoffs, design discussion, status).
115
+ - Reading docs the host must update (CHANGELOG, TODO, AUTONOMOUS_QUEUE, DECISIONS).
116
+ - Reconciling subagent output.
117
+ - Writing the final commit / branch / merge / PR. Subagents do not own git.
118
+ - Single trivial edits the user pointed at directly ("fix this typo", "rename this variable").
119
+
120
+ ### What the host delegates by default
121
+
122
+ Everything else: file/symbol search → Recon `recon`; doc-consistency sweeps → Recon `doc-audit`; backend impact → Workhorse `backend-impact`; frontend impact → Workhorse `frontend-impact`; test plans → Workhorse `test-strategist`; multi-file/multi-repo edits → Workhorse `cross-repo-sync` in worktrees; long-output runs (tests, lints) → subagent absorbs noise, returns verdict.
123
+
124
+ ### Why coordinator-heavy as the universal default
125
+
126
+ Context budget hygiene, parallelism via concurrent subagents, worktree-safe fan-out for edits, consistency across modes, auditable orchestration log.
127
+
128
+ ### Cost of coordinator-heavy (be honest)
129
+
130
+ Slower interactive turns, less interruptible, spawn overhead can exceed task size for tiny edits. The user accepts these costs in exchange for context hygiene and consistency.
131
+
132
+ ### Supervised-Direct mode (opt-in override)
133
+
134
+ The user can override the default for a task or session. Signals: "just do it directly", "no subagents", "be quick I'm watching", "edit it yourself". In Supervised-Direct mode, the host works directly with Edit/Read/Grep/Bash. Subagents still fire for genuinely-parallel cross-repo work, workspace-wide searches, or long-output runs (those are coordinator wins regardless of supervision).
135
+
136
+ To return: "back to coordinator mode", "use subagents from here", or starting a new task. State the mode change out loud when it happens.
137
+
138
+ ## Named Subagent Roles
139
+
140
+ When spawning a subagent, pick a named role from the standard set below and follow its prompt template. Each role has a fixed tier, a read-only-or-edit posture, and a required output shape.
141
+
142
+ ### Standard subagent set
143
+
144
+ This is the canonical list the framework ships with. Project-local roles go under `docs/agents/<ROLE>.md` (see "Project-Local Roles" below). The set continues to evolve — projects may add more, and patterns that prove general should be promoted back via `docs/PLAYBOOK_FEEDBACK.md`.
145
+
146
+ | Role | Tier | Posture | One-line description |
147
+ |---|---|---|---|
148
+ | `recon` | Recon | read-only | Fast read-only search, file lookups, status reads. |
149
+ | `planner` | Frontier | read-only | High-level approach planning, multi-step task decomposition. |
150
+ | `architect` | Frontier | read-only | Design decisions, trade-off analysis, structural choices. |
151
+ | `debugger` | Frontier | read-only | Root-cause hunts on failing tests and mysterious behaviour. |
152
+ | `doc-audit` | Recon | read-only | Documentation review for stale, inconsistent, or missing content. |
153
+ | `test-strategist` | Workhorse | read-only | Test plan design, gap analysis, coverage strategy. |
154
+ | `security-reviewer` | Frontier | read-only | Security audit passes, threat modelling. **Required** before any deploy to a public surface — see `templates/DEPLOYMENT.md` Pre-Deploy Gate. **Recommended** before major architecture decisions and changes to auth/data/deploy/integrations. |
155
+ | `cross-repo-sync` | Workhorse | edit-capable | Multi-repo coordination, sibling-doc sync. |
156
+ | `backend-impact` | Workhorse | read-only | Backend change-impact analysis. |
157
+ | `frontend-impact` | Workhorse | read-only | Frontend change-impact analysis. |
158
+ | `queue-curator` | Frontier | read-only | Autonomous queue maintenance, refill, prioritization. |
159
+
160
+ The detailed contracts for each role follow.
161
+
162
+ ### `recon` (Recon, read-only)
163
+
164
+ Locate code: file patterns, symbol references, "where is X defined / which files reference Y". Use this before editing.
165
+
166
+ Output: bulleted list of `path:line` references plus a one-paragraph summary of what was found.
167
+
168
+ ### `backend-impact` (Sonnet, read-only)
169
+
170
+ Read backend code paths affected by a planned change. Identify route handlers, schema dependencies, mutex/lock points, rate-limit interactions, test coverage.
171
+
172
+ Output: affected files, risks, suggested edit order, tests that must be updated.
173
+
174
+ ### `frontend-impact` (Sonnet, read-only)
175
+
176
+ Same as `backend-impact` for the frontend layer. Identify pages/components affected, state interactions, asset references, accessibility considerations.
177
+
178
+ Output: same shape as `backend-impact`.
179
+
180
+ ### `doc-audit` (Haiku, read-only)
181
+
182
+ After a rename or behavior change, sweep the docs for stale references. Searches for old name/path/endpoint strings across `docs/` and any code that documents itself.
183
+
184
+ Output: list of stale references with line numbers, grouped by file.
185
+
186
+ ### `test-strategist` (Sonnet, read-only)
187
+
188
+ Given a change, propose the test plan: which existing tests must change, which new tests are needed, which manual smoke checks are still required.
189
+
190
+ Output: existing-test diffs needed, new-test list with file paths, manual smoke checklist.
191
+
192
+ ### `cross-repo-sync` (Sonnet, edit-capable)
193
+
194
+ Apply a change consistently across sibling repos (palette swap, tool-link add, back-link insert). Always edit-capable; always uses worktree isolation.
195
+
196
+ Output: list of edits per repo, plus a verification command per repo.
197
+
198
+ ### `queue-curator` (Frontier, read-only, optionally background)
199
+
200
+ Audit `docs/AUTONOMOUS_QUEUE.md`, `docs/TODO.md`, `docs/ROADMAP.md`, `docs/CHANGELOG.md`, plus any sibling repos' equivalents to propose new entries for the queue. Useful when (a) the queue is running dry, (b) at the end of a long autonomous chain to scope next-session work, or (c) periodically as a hygiene pass to surface forgotten queue-able items.
201
+
202
+ The curator must NOT promote items to `[autonomy: safe]` that the author tagged otherwise — proposals respect the original autonomy tag. Items the curator considers but rejects go in a "Notes" section with a one-line reason so future curators don't re-evaluate.
203
+
204
+ **Run pattern:** spawn read-only with `run_in_background: true` so the curator works alongside active queue execution. The host surfaces proposals to the user at the end of the session; user picks which to add. Adding to `AUTONOMOUS_QUEUE.md` is a coordinator action, not a curator action — the queue stays a curated artifact.
205
+
206
+ Output: structured proposal list — for each entry, source `path:line`, why-safe rationale, acceptance criterion, and a queue-format-ready bullet line. Plus a Notes section for rejected items, plus a one-paragraph queue-health summary.
207
+
208
+ ### Frontier-tier roles (Opus / equivalent)
209
+
210
+ Spawn these when the host hits a subtask matching the escalation list at the top of this file. Pattern: the host stays at Workhorse tier and delegates the Frontier-tier subtask to a named Frontier subagent, then resumes implementation as Workhorse with the subagent's structured output as input. This is preferable to escalating the host itself: Opus burn stays bounded to the hard subtask, host context stays lean, and Frontier subagent output is structured and reviewable.
211
+
212
+ #### `planner` (Frontier, read-only)
213
+
214
+ Pre-implementation planning pass for medium/large features. Reads the spec (TODO/ROADMAP entry), relevant files, and prior `docs/DECISIONS.md`. Output: numbered plan with `path:line` references, alternatives considered with rationale, risk list, test plan. Plain markdown, no code.
215
+
216
+ When to spawn: any task tagged `[size: M|L]` or `[autonomy: review]`, work expected to span 5+ files, or anywhere multiple implementation paths are plausible. Run before implementation subagents fire.
217
+
218
+ #### `architect` (Frontier, read-only)
219
+
220
+ Architecture decisions destined for `docs/DECISIONS.md`. Reads project context, prior decisions, and relevant code. Output: structured decision record (`Status` / `Context` / `Decision` / `Consequences`) plus a 1-paragraph rationale per major alternative.
221
+
222
+ When to spawn: any work tagged `[autonomy: needs-human-collab]`, refactors that revise an existing decision, or any ambiguous-architecture sub-task surfaced by `planner`.
223
+
224
+ #### `security-reviewer` (Frontier, read-only)
225
+
226
+ Security review of auth/credential/access-control code. Reads changed code plus its callers, schema columns, and existing test coverage. Looks for OWASP-class issues: auth bypass, IDOR, credential leak, race conditions, rate-limit bypass, JWT verification edge cases.
227
+
228
+ Output: findings list with severity (`high` / `med` / `low` / `note`), `path:line`, brief description, suggested mitigation. If clean: explicit "no findings" with the surface area covered.
229
+
230
+ When to spawn: before merging any change to authn/authz, password handling, token issuance/verification, rate-limit middleware, or schema columns affecting access control.
231
+
232
+ #### `debugger` (Frontier, read-only)
233
+
234
+ Non-obvious failure investigation. Distinguished from `recon` by depth — `recon` locates code, `debugger` reasons about behavior. Reads test output, repro steps, error logs, then traces code paths to hypothesize root cause.
235
+
236
+ Output: ranked root-cause hypotheses with evidence supporting each, plus verification steps the host should run before fixing.
237
+
238
+ When to spawn: failures where the assertion doesn't reveal the cause, suspected race conditions, persistence corruption, or anywhere "I'm not sure why this is happening" is the host's honest state.
239
+
240
+ Add project-specific Frontier roles as needed (e.g. `compliance-review`, `migration-review`, `perf-review`, `dsp-design`). Each gets the same shape: tier, read-only-or-edit, when to spawn, required output structure.
241
+
242
+ #### When to escalate the host itself to a Frontier model (rare)
243
+
244
+ The subagent pattern is the default. Recommend a direct planning session — host running at Frontier tier, multi-turn dialogue with the user — when:
245
+
246
+ - The host can't write a good `planner` prompt (genuinely novel work; doesn't know what to ask). A subagent given a vague prompt produces a vague plan; a multi-turn Frontier host can ask clarifying questions.
247
+ - The user explicitly wants an interactive design conversation, not a one-shot plan.
248
+ - A `planner` pass already ran and the choice between alternatives is the actual hard problem — the decision needs dialogue, not another plan.
249
+ - The work needs simultaneous reasoning over several large docs where the host's full context window is the right tool.
250
+ - The work is tagged `[autonomy: needs-human-collab]` and the user has approved starting it.
251
+
252
+ **How.** Surface the recommendation to the user — don't switch tiers silently. End the current Workhorse session cleanly (commit in-flight work, leave a TODO note about the planning handoff). User starts a fresh Frontier session, then returns to Workhorse for implementation once the plan is in hand. `planner` / `architect` subagents resume normal use from there.
253
+
254
+ **Phrasing:** "I think this needs a direct planning session at Frontier tier, not a `planner` subagent, because [specific reason]. Want me to wrap up here so you can start a fresh Frontier session?"
255
+
256
+ **Cost discipline.** Frontier host time is several multiples of Workhorse host time per token, and planning conversations can run long. The user authorizes the tier switch — don't act as if the host were Frontier without an explicit handoff.
257
+
258
+ ### Subagent prompting rules
259
+
260
+ - State the goal in one sentence.
261
+ - List the relevant files or directories explicitly.
262
+ - State the model tier and the role name.
263
+ - State whether the subagent is read-only or edit-capable.
264
+ - Require the role's output shape verbatim.
265
+ - Subagents do not own commits, merges, or doc-completion-gate sign-off. The host reconciles results and performs git operations.
266
+
267
+ ## Project-Local Roles
268
+
269
+ The standard subagent set above ships with the framework. Project-specific roles — anything with content-safety rules, domain validation, custom approval signals, or workflow steps unique to this product — live under `docs/agents/<ROLE>.md` as per-role work contracts.
270
+
271
+ Each role file states:
272
+
273
+ - **Tier** — Frontier / Workhorse / Recon (cross-reference the tier table above).
274
+ - **Authority boundary** — what the role may do directly, what it must escalate, what it must not touch.
275
+ - **Drift-and-re-pitch rules** — when scope drift triggers a return to `docs/OPEN_DECISIONS.md` or a re-pitch to the funding/approval gate.
276
+ - **Content-safety rules** — domain-specific constraints (e.g. solvability checks, accessibility floors, license/IP limits, tone or audience guardrails).
277
+ - **Cleanup gate** — completion-gate steps the role owns before sign-off (CHANGELOG, TODO trim, decision logging, etc.).
278
+ - **Approval signals** — exact phrases or document states that unblock the role (e.g. `FUNDING_APPROVED` for a Founder, a sign-off line in `DECISIONS.md` for a paired-design item).
279
+
280
+ ### Index pattern
281
+
282
+ `docs/AGENTS.md` in each project lists its role files in a small table so the host can find them without crawling the directory:
283
+
284
+ | Role | File | One-line description |
285
+ |---|---|---|
286
+ | `<Role-Name>` | [`docs/agents/<ROLE>.md`](agents/<ROLE>.md) | <one-line summary of what the role owns> |
287
+ | `<Role-Name>` | [`docs/agents/<ROLE>.md`](agents/<ROLE>.md) | <one-line summary> |
288
+
289
+ Roles in this index are project-specific — the standard subagent set above does not need to be re-listed unless a project meaningfully overrides one of them (and an override of that kind belongs in "Local Overrides", below).
290
+
291
+ ### Local Overrides
292
+
293
+ When this repo inherits an `AGENTS.md` from a parent or upstream workspace (for example, a sibling product repo that defers to a workspace-level playbook), do not fork the whole playbook. Instead, add a "Local Overrides" subsection under `docs/AGENTS.md` here that annotates only the divergences:
294
+
295
+ - A different default tier for implementation work.
296
+ - Project-specific escalation rules (e.g. extra approval gates, additional `[autonomy: needs-human-collab]` items).
297
+ - Content-safety constraints unique to this product.
298
+ - Approval-signal renames (record any change as a durable decision in `docs/DECISIONS.md` before adopting).
299
+
300
+ This keeps the upstream playbook authoritative while making project-specific deltas visible at a glance. Drift between the override list and the upstream playbook should be reconciled at the next end-of-session cleanup.
301
+
302
+ ## Worktree Isolation
303
+
304
+ For any edit-capable subagent during autonomous runs, spawn with `isolation: "worktree"` so parallel work cannot conflict. The host reviews the worktree diff before merging.
305
+
306
+ ## Shell And Git Invocation
307
+
308
+ When running git in a sibling repo (or any directory other than the session's working directory), use `git -C "<absolute path>" <command>` instead of `cd "<path>" && git <command>`.
309
+
310
+ `git -C` changes git's working directory without changing the shell's. The harness gates `cd <dir> && <cmd>` with a permission prompt (the `cd` could expose untrusted hooks); `git -C` skips that gate and is the idiomatic flag for the case. It also keeps shell state stateless across calls, which matters when parallel subagents run.
311
+
312
+ ```bash
313
+ # Preferred
314
+ git -C "/path/to/sibling-repo" status
315
+ git -C "/path/to/sibling-repo" log --oneline -5
316
+
317
+ # Avoid
318
+ cd "/path/to/sibling-repo" && git status # triggers permission prompt
319
+ ```
320
+
321
+ `cd` is still appropriate for non-git commands that genuinely need a different cwd (e.g. `cd <repo> && npm test` — `npm` has no `-C` equivalent and reads `package.json` from cwd). The rule is specifically about git.
322
+
323
+ ## Permission Allowlist Maintenance
324
+
325
+ The `git -C` rule above prevents most cross-repo prompts at the source. For patterns that legitimately need `cd` (e.g. `cd <repo> && npm test`), use the `fewer-permission-prompts` skill to add them to the workspace allowlist so they stop prompting on future runs.
326
+
327
+ The skill scans recent Claude Code transcripts, identifies Bash/MCP tool calls that triggered an Allow/Deny prompt, filters to read-only-by-pattern operations, and writes a prioritized allowlist into `.claude/settings.json`. Properties:
328
+
329
+ - Read-only operations only — destructive commands are never auto-allowed.
330
+ - Workspace-scoped (`.claude/settings.json`); applies to every agent in the project.
331
+ - Idempotent across runs; small reviewable diffs.
332
+
333
+ Run after any autonomous run that hit prompts (captures patterns while they're in transcript history), when prompt friction bites, or periodically as hygiene. Do not edit `.claude/settings.json` by hand for routine allowlisting — let the skill handle prioritization and de-duplication.
334
+
335
+ The `git -C` rule is **prevention** (correct agent behaviour avoids the prompt); the skill is **cure** (the harness allows the unavoidable patterns). Use both.
336
+
337
+ ## Autonomy Tags
338
+
339
+ Every entry in `docs/TODO.md` and `docs/ROADMAP.md` should carry one or more of these tags. The autonomous-host policy reads tags before deciding whether to pick up a task without user input.
340
+
341
+ - `[size: S|M|L]` — rough effort. S = under an hour focused work. M = a session. L = multi-session or design-required.
342
+ - `[tier: haiku|sonnet|opus]` — the model that should drive implementation.
343
+ - `[risk: low|med|high]` — blast radius if it goes wrong.
344
+ - `[scope: isolated|cross-repo]` — whether the task touches one repo or several.
345
+ - `[autonomy: safe|review|needs-human-collab]` — the human-gate level (see below).
346
+
347
+ A reasonable default for a small isolated TODO entry is `[size: S][tier: sonnet][risk: low][scope: isolated][autonomy: safe]`. Omit tags that match this default; only call out where the entry differs.
348
+
349
+ ### Autonomy Markers
350
+
351
+ The `[autonomy: …]` tag has three exact values. Use these spellings verbatim — downstream tooling and the autonomous-queue policy match on them.
352
+
353
+ - **`[autonomy: safe]`** — autonomous host can implement and merge unattended. Implementation lands on a branch; tests must pass; doc gates must be honored; the host may open and merge the PR without human review.
354
+ - **`[autonomy: review]`** — autonomous host implements but opens a **draft PR**; a human reviews before merge. Use for changes that are well-scoped but touch surfaces where a second pair of eyes is cheap insurance.
355
+ - **`[autonomy: needs-human-collab]`** — implementation is blocked until paired with a human; do not start. Use for high architectural stakes, unclear product direction, or risky integrations. Surface the task to the user and wait for explicit approval and a planning conversation.
356
+
357
+ These markers are referenced from `docs/TODO.md`, `docs/AUTONOMOUS_QUEUE.md`, `docs/ROADMAP.md`, and `docs/DECISIONS.md`. A change to a marker on an item that already shipped is itself a `DECISIONS.md`-worthy moment.
358
+
359
+ ### Currently `needs-human-collab` in this repo
360
+
361
+ List the project's current paired-session items here so they show up alongside the rule rather than being buried in TODO. Adding or removing items here is a `docs/DECISIONS.md`-worthy moment; do not unilaterally promote/demote.
362
+
363
+ - (none yet — populate as items get tagged)
364
+
365
+ ## Approved Autonomous Queue
366
+
367
+ Tasks pre-approved for unsupervised work belong in `docs/AUTONOMOUS_QUEUE.md` (a thin priority-ordered list of pointers into TODO.md / ROADMAP.md). The autonomous host pulls from the top of that queue. Anything not in the queue requires user input even if it is tagged `[autonomy: safe]`.
368
+
369
+ The queue is curated by the user (and optionally by Frontier-tier planning passes), not promoted automatically by Workhorse-tier implementers.
370
+
371
+ ## End-of-Session Cleanup (Mandatory)
372
+
373
+ Documentation hygiene is a completion gate. Before declaring a task complete, the host MUST:
374
+
375
+ 1. **Delete completed entries from `docs/TODO.md`** — once an item ships and is logged in `docs/CHANGELOG.md`, remove it from TODO. Do not leave `[x] DONE` entries lingering. TODO is a queue, not a log.
376
+ 2. **Update `docs/ROADMAP.md`** — mark completed items with ✅ or move them to a `## Completed` section so the planning surface stays current.
377
+ 3. **Add a `docs/CHANGELOG.md` entry** for any meaningful change.
378
+ 4. **Update `docs/DECISIONS.md`** when the work involved a non-obvious tradeoff.
379
+ 5. **Run `git diff -- docs`** before signing off; verify no stale names, endpoints, or paths remain.
380
+ 6. **Reflect on the playbook** — see "Playbook Improvement Loop" below. If this session revealed a real workflow-impact change, add an entry to `docs/PLAYBOOK_FEEDBACK.md`. Do NOT edit AGENTS / HANDOFF / etc. directly without user review.
381
+ 7. **Run `npx discipline-md lint`** and fix anything red before signing off.
382
+
383
+ "Completed" for the cleanup gate means: the change has landed on the default branch (or, for docs-only work, the edit is committed) AND its `docs/CHANGELOG.md` entry is written. Delete the TODO entry at that moment — not when tests pass, not when a PR is opened. The CHANGELOG entry and the TODO deletion happen in the same commit.
384
+
385
+ The chronological feature log lives in `docs/CHANGELOG.md`. The "what the system does today" surface lives in `docs/PROJECT_CONTEXT.md` (and per-area cold-path docs). Do not add a separate `FEATURES.md` — it would only duplicate one of those two.
386
+
387
+ ## Playbook Improvement Loop
388
+
389
+ The agent playbook should improve over time without becoming improvement-theater. After every non-trivial session, the host should ask: *did anything reveal that AGENTS / HANDOFF / TODO conventions or other workflow docs would work better with a change?*
390
+
391
+ Capture proposals in `docs/PLAYBOOK_FEEDBACK.md`, **not directly in the workflow docs**. Direct edits skip the user review gate.
392
+
393
+ ### Workflow-impact discipline (hard rule)
394
+
395
+ Only propose a change when at least one is true:
396
+
397
+ - A real friction point in this session would have been prevented or reduced by the change.
398
+ - A real friction point is foreseeable in upcoming work and the change prevents it.
399
+ - A pattern showed up multiple times and is worth codifying so future agents don't re-derive it.
400
+ - A subagent role's contract had to be bent to fit a real situation, suggesting the contract needs tuning.
401
+
402
+ **If none of these apply, do NOT propose.** Improvement-theater — adding aspirational language because language is easy — is the failure mode. Stale aspirational guidance is worse than absent guidance.
403
+
404
+ ### Two kinds of proposal
405
+
406
+ - **Local improvements** — edits to this repo's `docs/AGENTS.md`, `docs/HANDOFF.md`, TODO conventions, completion gate, named subagent role contracts, etc.
407
+ - **Template promotions** — a pattern that started project-local has proved general enough to push back to your framework's canonical `templates/`. Higher bar: the pattern should have shipped at least once in this project (ideally with measurable workflow improvement) before promotion.
408
+
409
+ ### Two improvement loops — don't conflate them
410
+
411
+ This section is the **playbook (meta) loop**: improving *how the agents work* (AGENTS / HANDOFF / conventions), captured in `PLAYBOOK_FEEDBACK.md`, gated on workflow-impact.
412
+
413
+ The **object-level loop** is separate: using subagents to recursively improve *the project's own code/features* (discover → execute → verify → evaluate → integrate → repeat). It is optional and lives in its own contracts — `IMPROVEMENT_LOOP.md` (the cycle, composing the queue + autonomy tags + the RECON / reviewer roles) and `VERIFICATION_GATE.md` (the machine-checkable ground-truth signal every iteration must pass). Install with `npx discipline-md add IMPROVEMENT_LOOP VERIFICATION_GATE`. The object-level loop **must not run on any step lacking a verification gate** — that step is judgment, route it to `OPEN_DECISIONS.md`.
414
+
415
+ ### Lifecycle
416
+
417
+ 1. **Propose** — agent adds an entry to `docs/PLAYBOOK_FEEDBACK.md` with rationale + the specific friction point. State workflow impact concretely.
418
+ 2. **Review** — user accepts / rejects / asks for revision.
419
+ 3. **Apply** — on accept, agent edits the canonical doc, adds a CHANGELOG entry, moves the PLAYBOOK_FEEDBACK entry to "Applied (recent)".
420
+ 4. **Trim** — once CHANGELOG covers it, delete from PLAYBOOK_FEEDBACK on the next cleanup pass.
421
+ 5. **Verify (later, not the same session)** — the next session in the changed area checks whether the recorded friction is actually gone. If the rule is being ignored or the friction recurs, propose its removal or revision: the loop must be able to subtract, not only add. See `docs/PLAYBOOK_FEEDBACK.md`.
422
+
423
+ Rejected proposals get a brief note under "Rejected (recent)" so the same idea does not get re-proposed.
424
+
425
+ ### Examples of proposals that should NOT land
426
+
427
+ - "AGENTS.md could be clearer about X" — without a specific friction point from this session.
428
+ - "Maybe we should add a `<role>` subagent" — no work this session would have used it; speculative.
429
+ - Wording polish, prose tightening, formatting consistency — not workflow improvements.
430
+
431
+ If you reach for one of these, mention it in the end-of-session summary as an observation and let it die if it doesn't recur.
432
+
433
+ ## Human-Collaboration Gate
434
+
435
+ Some work is too high-stakes for autonomous-host execution even with passing tests. Tag those entries `[autonomy: needs-human-collab]` in TODO/ROADMAP. The agent's job in those cases is to:
436
+
437
+ 1. Surface the task to the user with current context.
438
+ 2. Wait for explicit approval and a planning conversation.
439
+ 3. Run an Opus Plan agent for the design pass.
440
+ 4. Implement only after the plan is reviewed.
441
+
442
+ Do not silently start work on `[needs-human-collab]` items just because tests pass and the task looks scoped.
443
+
444
+ ## Quick Decision Tree
445
+
446
+ Tier names (Frontier / Workhorse / Recon) are used here for ecosystem neutrality — see Cross-Ecosystem Tier Framework above for the Claude / OpenAI / Google / local mapping.
447
+
448
+ ```
449
+ Is this task in docs/AUTONOMOUS_QUEUE.md?
450
+ ├── Yes → tag says safe → run as Workhorse host, open PR
451
+ ├── Yes → tag says review → run as Workhorse host, open DRAFT PR
452
+ └── No / tag says needs-human-collab
453
+ └── Surface to user, wait for approval, plan via Frontier subagent or direct Frontier session
454
+
455
+ For each subtask within the work:
456
+ ├── Recon / lookup → spawn Recon-tier `recon` subagent
457
+ ├── Doc audit / sweep → spawn Recon-tier `doc-audit` subagent
458
+ ├── Backend impact? → spawn Workhorse `backend-impact` subagent
459
+ ├── Frontend impact? → spawn Workhorse `frontend-impact` subagent
460
+ ├── Test plan? → spawn Workhorse `test-strategist` subagent
461
+ ├── Cross-repo edits → spawn Workhorse `cross-repo-sync` subagent (worktree)
462
+ └── Architecture call → spawn Frontier `architect` subagent or escalate the host itself; write decision into DECISIONS.md
463
+ ```