@tianhai/pi-workflow-kit 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +509 -0
  3. package/ROADMAP.md +16 -0
  4. package/agents/code-reviewer.md +18 -0
  5. package/agents/config.ts +5 -0
  6. package/agents/implementer.md +26 -0
  7. package/agents/spec-reviewer.md +13 -0
  8. package/agents/worker.md +17 -0
  9. package/banner.jpg +0 -0
  10. package/docs/developer-usage-guide.md +463 -0
  11. package/docs/oversight-model.md +49 -0
  12. package/docs/workflow-phases.md +71 -0
  13. package/extensions/constants.ts +9 -0
  14. package/extensions/lib/logging.ts +138 -0
  15. package/extensions/plan-tracker.ts +496 -0
  16. package/extensions/subagent/agents.ts +144 -0
  17. package/extensions/subagent/concurrency.ts +52 -0
  18. package/extensions/subagent/env.ts +47 -0
  19. package/extensions/subagent/index.ts +1116 -0
  20. package/extensions/subagent/lifecycle.ts +25 -0
  21. package/extensions/subagent/timeout.ts +13 -0
  22. package/extensions/workflow-monitor/debug-monitor.ts +98 -0
  23. package/extensions/workflow-monitor/git.ts +31 -0
  24. package/extensions/workflow-monitor/heuristics.ts +58 -0
  25. package/extensions/workflow-monitor/investigation.ts +52 -0
  26. package/extensions/workflow-monitor/reference-tool.ts +42 -0
  27. package/extensions/workflow-monitor/skip-confirmation.ts +19 -0
  28. package/extensions/workflow-monitor/tdd-monitor.ts +137 -0
  29. package/extensions/workflow-monitor/test-runner.ts +37 -0
  30. package/extensions/workflow-monitor/verification-monitor.ts +61 -0
  31. package/extensions/workflow-monitor/warnings.ts +81 -0
  32. package/extensions/workflow-monitor/workflow-handler.ts +358 -0
  33. package/extensions/workflow-monitor/workflow-tracker.ts +231 -0
  34. package/extensions/workflow-monitor/workflow-transitions.ts +55 -0
  35. package/extensions/workflow-monitor.ts +885 -0
  36. package/package.json +49 -0
  37. package/skills/brainstorming/SKILL.md +70 -0
  38. package/skills/dispatching-parallel-agents/SKILL.md +194 -0
  39. package/skills/executing-tasks/SKILL.md +247 -0
  40. package/skills/receiving-code-review/SKILL.md +196 -0
  41. package/skills/systematic-debugging/SKILL.md +170 -0
  42. package/skills/systematic-debugging/condition-based-waiting-example.ts +158 -0
  43. package/skills/systematic-debugging/condition-based-waiting.md +115 -0
  44. package/skills/systematic-debugging/defense-in-depth.md +122 -0
  45. package/skills/systematic-debugging/find-polluter.sh +63 -0
  46. package/skills/systematic-debugging/reference/rationalizations.md +61 -0
  47. package/skills/systematic-debugging/root-cause-tracing.md +169 -0
  48. package/skills/test-driven-development/SKILL.md +266 -0
  49. package/skills/test-driven-development/reference/examples.md +101 -0
  50. package/skills/test-driven-development/reference/rationalizations.md +67 -0
  51. package/skills/test-driven-development/reference/when-stuck.md +33 -0
  52. package/skills/test-driven-development/testing-anti-patterns.md +299 -0
  53. package/skills/using-git-worktrees/SKILL.md +231 -0
  54. package/skills/writing-plans/SKILL.md +149 -0
package/package.json ADDED
@@ -0,0 +1,49 @@
1
+ {
2
+ "name": "@tianhai/pi-workflow-kit",
3
+ "version": "0.4.1",
4
+ "description": "Workflow skills and enforcement extensions for pi",
5
+ "keywords": [
6
+ "pi-package"
7
+ ],
8
+ "scripts": {
9
+ "test": "vitest run",
10
+ "lint": "biome check .",
11
+ "check": "biome check . && vitest run"
12
+ },
13
+ "license": "MIT",
14
+ "author": "yinloo-ola",
15
+ "repository": {
16
+ "type": "git",
17
+ "url": "https://github.com/yinloo-ola/pi-workflow-kit.git"
18
+ },
19
+ "files": [
20
+ "extensions/",
21
+ "agents/",
22
+ "skills/",
23
+ "docs/",
24
+ "banner.jpg",
25
+ "LICENSE",
26
+ "README.md",
27
+ "ROADMAP.md"
28
+ ],
29
+ "pi": {
30
+ "extensions": [
31
+ "extensions/plan-tracker.ts",
32
+ "extensions/workflow-monitor.ts",
33
+ "extensions/subagent/index.ts"
34
+ ],
35
+ "skills": [
36
+ "skills"
37
+ ]
38
+ },
39
+ "peerDependencies": {
40
+ "@mariozechner/pi-ai": "*",
41
+ "@mariozechner/pi-coding-agent": "*",
42
+ "@mariozechner/pi-tui": "*",
43
+ "@sinclair/typebox": "*"
44
+ },
45
+ "devDependencies": {
46
+ "@biomejs/biome": "^2.3.15",
47
+ "vitest": "^4.0.18"
48
+ }
49
+ }
@@ -0,0 +1,70 @@
1
+ ---
2
+ name: brainstorming
3
+ description: "You MUST use this before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores user intent, requirements and design before implementation."
4
+ ---
5
+
6
+ > **Related skills:** Consider `/skill:using-git-worktrees` to set up an isolated workspace, then `/skill:writing-plans` for implementation planning.
7
+
8
+ # Brainstorming Ideas Into Designs
9
+
10
+ ## Overview
11
+
12
+ Help turn ideas into fully formed designs and specs through natural collaborative dialogue.
13
+
14
+ Start by understanding the current project context, then ask questions one at a time to refine the idea. Once you understand what you're building, present the design in small sections (200-300 words), checking after each section whether it looks right so far.
15
+
16
+ ## Boundaries
17
+ - Read code and docs: yes
18
+ - Write to docs/plans/: yes
19
+ - Edit or create any other files: no
20
+
21
+ ## The Process
22
+
23
+ **Before anything else — check git state:**
24
+ - Run `git status` and `git log --oneline -5`
25
+ - If on a feature branch with uncommitted or unmerged work, ask the user:
26
+ - "You're on `<branch>` with uncommitted changes. Want to finish/merge that first, stash it, or continue here?"
27
+ - Require exactly one of: finish prior work, stash, or explicitly continue here
28
+ - If the topic is new, suggest creating a new branch before brainstorming
29
+
30
+ **Understanding the idea:**
31
+ - Check out the current project state first (files, docs, recent commits)
32
+ - Check if the codebase or ecosystem already solves this before designing from scratch
33
+ - Ask questions one at a time to refine the idea
34
+ - Prefer multiple choice questions when possible, but open-ended is fine too
35
+ - Only one question per message - if a topic needs more exploration, break it into multiple questions
36
+ - Focus on understanding: purpose, constraints, success criteria
37
+
38
+ **Exploring approaches:**
39
+ - Propose 2-3 different approaches with trade-offs
40
+ - Present options conversationally with your recommendation and reasoning
41
+ - Lead with your recommended option and explain why
42
+
43
+ **Presenting the design:**
44
+ - Once you believe you understand what you're building, present the design
45
+ - Break it into sections of 200-300 words
46
+ - Ask after each section whether it looks right so far
47
+ - Cover: architecture, components, data flow, error handling, testing
48
+ - Be ready to go back and clarify if something doesn't make sense
49
+
50
+ ## After the Design
51
+
52
+ **Documentation:**
53
+ - Write the validated design to `docs/plans/YYYY-MM-DD-<topic>-design.md`
54
+ - Commit the design document to git
55
+ - The workflow monitor automatically tracks phase transitions when you invoke skills
56
+
57
+ **Implementation (if continuing):**
58
+ - Ask: "Ready to set up for implementation?"
59
+ - Set up isolated workspace — `/skill:using-git-worktrees` for larger work, or just create a branch for small changes
60
+ - Use `/skill:writing-plans` to create detailed implementation plan
61
+
62
+ ## Key Principles
63
+
64
+ - **One question at a time** - Don't overwhelm with multiple questions
65
+ - **Multiple choice preferred** - Easier to answer than open-ended when possible
66
+ - **YAGNI ruthlessly** - Remove unnecessary features from all designs
67
+ - **Design for testability** - Favor approaches with clear boundaries that are easy to verify with TDD
68
+ - **Explore alternatives** - Always propose 2-3 approaches before settling
69
+ - **Incremental validation** - Present design in sections, validate each
70
+ - **Be flexible** - Go back and clarify when something doesn't make sense
@@ -0,0 +1,194 @@
1
+ ---
2
+ name: dispatching-parallel-agents
3
+ description: Use when facing 2+ independent tasks that can be worked on without shared state or sequential dependencies
4
+ ---
5
+
6
+ > **Related skills:** Debug each problem with `/skill:systematic-debugging`. Verify all fixes with `/skill:executing-tasks`.
7
+
8
+ # Dispatching Parallel Agents
9
+
10
+ ## Overview
11
+
12
+ When you have multiple unrelated failures (different test files, different subsystems, different bugs), investigating them sequentially wastes time. Each investigation is independent and can happen in parallel.
13
+
14
+ **Core principle:** Dispatch one agent per independent problem domain. Let them work concurrently.
15
+
16
+ ## When to Use
17
+
18
+ ```dot
19
+ digraph when_to_use {
20
+ "Multiple failures?" [shape=diamond];
21
+ "Are they independent?" [shape=diamond];
22
+ "Single agent investigates all" [shape=box];
23
+ "One agent per problem domain" [shape=box];
24
+ "Can they work in parallel?" [shape=diamond];
25
+ "Sequential agents" [shape=box];
26
+ "Parallel dispatch" [shape=box];
27
+
28
+ "Multiple failures?" -> "Are they independent?" [label="yes"];
29
+ "Are they independent?" -> "Single agent investigates all" [label="no - related"];
30
+ "Are they independent?" -> "Can they work in parallel?" [label="yes"];
31
+ "Can they work in parallel?" -> "Parallel dispatch" [label="yes"];
32
+ "Can they work in parallel?" -> "Sequential agents" [label="no - shared state"];
33
+ }
34
+ ```
35
+
36
+ **Use when:**
37
+ - 3+ test files failing with different root causes
38
+ - Multiple subsystems broken independently
39
+ - Each problem can be understood without context from others
40
+ - No shared state between investigations
41
+
42
+ **Don't use when:**
43
+ - Failures are related (fix one might fix others)
44
+ - Need to understand full system state
45
+ - Agents would interfere with each other
46
+
47
+ ## The Pattern
48
+
49
+ ### 1. Identify Independent Domains
50
+
51
+ Group failures by what's broken:
52
+ - File A tests: Tool approval flow
53
+ - File B tests: Batch completion behavior
54
+ - File C tests: Abort functionality
55
+
56
+ Each domain is independent - fixing tool approval doesn't affect abort tests.
57
+
58
+ ### 2. Create Focused Agent Tasks
59
+
60
+ Each agent gets:
61
+ - **Specific scope:** One test file or subsystem
62
+ - **Clear goal:** Make these tests pass
63
+ - **Constraints:** Don't change other code
64
+ - **Expected output:** Summary of what you found and fixed
65
+
66
+ ### 3. Dispatch in Parallel
67
+
68
+ **How to dispatch:**
69
+
70
+ Use the `subagent` tool in parallel mode:
71
+
72
+ > **Agent scope:** The built-in agents (`worker`, `implementer`, `code-reviewer`, `spec-reviewer`)
73
+ > are bundled with this package. To use them, set `agentScope: "both"`. The default scope `"user"`
74
+ > only loads agents from `~/.pi/agent/agents/`.
75
+
76
+ ```ts
77
+ subagent({
78
+ agentScope: "both", // include bundled agents (worker, implementer, etc.)
79
+ tasks: [
80
+ { agent: "worker", task: "Fix agent-tool-abort.test.ts failures" },
81
+ { agent: "worker", task: "Fix batch-completion-behavior.test.ts failures" },
82
+ { agent: "worker", task: "Fix tool-approval-race-conditions.test.ts failures" },
83
+ ],
84
+ })
85
+ ```
86
+
87
+ ### 4. Review and Integrate
88
+
89
+ When agents return:
90
+ - Read each summary
91
+ - Verify fixes don't conflict
92
+ - Run full test suite
93
+ - Integrate all changes
94
+
95
+ **If agents edited the same files:** Review manually. Pick the correct version per hunk, or re-run one agent with the other's changes as context. Don't blindly merge.
96
+
97
+ **If some agents failed:** Integrate successful agents first (commit their work). Then retry the failed agent with fresh context that includes the integrated changes.
98
+
99
+ ## Agent Prompt Structure
100
+
101
+ Good agent prompts are:
102
+ 1. **Focused** - One clear problem domain
103
+ 2. **Self-contained** - All context needed to understand the problem
104
+ 3. **Specific about output** - What should the agent return?
105
+
106
+ ```markdown
107
+ Fix the 3 failing tests in src/agents/agent-tool-abort.test.ts:
108
+
109
+ 1. "should abort tool with partial output capture" - expects 'interrupted at' in message
110
+ 2. "should handle mixed completed and aborted tools" - fast tool aborted instead of completed
111
+ 3. "should properly track pendingToolCount" - expects 3 results but gets 0
112
+
113
+ These are timing/race condition issues. Your task:
114
+
115
+ 1. Read the test file and understand what each test verifies
116
+ 2. Identify root cause - timing issues or actual bugs?
117
+ 3. Fix by:
118
+ - Replacing arbitrary timeouts with event-based waiting
119
+ - Fixing bugs in abort implementation if found
120
+ - Adjusting test expectations if testing changed behavior
121
+
122
+ Do NOT just increase timeouts - find the real issue.
123
+
124
+ Return: Summary of what you found and what you fixed.
125
+ ```
126
+
127
+ ## Common Mistakes
128
+
129
+ **❌ Too broad:** "Fix all the tests" - agent gets lost
130
+ **✅ Specific:** "Fix agent-tool-abort.test.ts" - focused scope
131
+
132
+ **❌ No context:** "Fix the race condition" - agent doesn't know where
133
+ **✅ Context:** Paste the error messages and test names
134
+
135
+ **❌ No constraints:** Agent might refactor everything
136
+ **✅ Constraints:** "Do NOT change production code" or "Fix tests only"
137
+
138
+ **❌ Vague output:** "Fix it" - you don't know what changed
139
+ **✅ Specific:** "Return summary of root cause and changes"
140
+
141
+ ## When NOT to Use
142
+
143
+ **Related failures:** Fixing one might fix others - investigate together first
144
+ **Need full context:** Understanding requires seeing entire system
145
+ **Exploratory debugging:** You don't know what's broken yet
146
+ **Shared state:** Agents would interfere (editing same files, using same resources)
147
+
148
+ ## Real Example from Session
149
+
150
+ **Scenario:** 6 test failures across 3 files after major refactoring
151
+
152
+ **Failures:**
153
+ - agent-tool-abort.test.ts: 3 failures (timing issues)
154
+ - batch-completion-behavior.test.ts: 2 failures (tools not executing)
155
+ - tool-approval-race-conditions.test.ts: 1 failure (execution count = 0)
156
+
157
+ **Decision:** Independent domains - abort logic separate from batch completion separate from race conditions
158
+
159
+ **Dispatch:**
160
+ ```ts
161
+ subagent({
162
+ agentScope: "both", // include bundled agents (worker, implementer, etc.)
163
+ tasks: [
164
+ { agent: "worker", task: "Fix agent-tool-abort.test.ts" },
165
+ { agent: "worker", task: "Fix batch-completion-behavior.test.ts" },
166
+ { agent: "worker", task: "Fix tool-approval-race-conditions.test.ts" },
167
+ ],
168
+ })
169
+ ```
170
+
171
+ **Results:**
172
+ - Agent 1: Replaced timeouts with event-based waiting
173
+ - Agent 2: Fixed event structure bug (threadId in wrong place)
174
+ - Agent 3: Added wait for async tool execution to complete
175
+
176
+ **Integration:** All fixes independent, no conflicts, full suite green
177
+
178
+ **Time saved:** 3 problems solved in parallel vs sequentially
179
+
180
+ ## Verification
181
+
182
+ After agents return:
183
+ 1. **Review each summary** - Understand what changed
184
+ 2. **Check for conflicts** - Did agents edit same code?
185
+ 3. **Run full suite** - Verify all fixes work together
186
+ 4. **Spot check** - Agents can make systematic errors
187
+
188
+ > **Integration-mode note:** When integrating parallel agent results, run `git stash` if
189
+ > needed before the integration test run to isolate any stash conflicts from true failures.
190
+ > If tests fail during integration, rule out merge conflicts first before treating it as a
191
+ > new bug. Only invoke `workflow_reference debug-rationalizations` if you have confirmed
192
+ > the failure is not from a merge conflict.
193
+
194
+
@@ -0,0 +1,247 @@
1
+ ---
2
+ name: executing-tasks
3
+ description: Use when you have an approved implementation plan to execute task-by-task with human gates and bounded retries
4
+ ---
5
+
6
+ # Executing Tasks
7
+
8
+ ## Overview
9
+
10
+ Execute an implementation plan task-by-task using a per-task lifecycle with human gates and bounded retry loops. Each task goes through: **define → approve → execute → verify → review → fix**.
11
+
12
+ **Announce at start:** "I'm using the executing-tasks skill to implement the plan."
13
+
14
+ ## Prerequisites
15
+
16
+ Before starting, verify:
17
+ - [ ] On the correct branch/worktree
18
+ - [ ] Plan file exists at `docs/plans/YYYY-MM-DD-<name>.md`
19
+ - [ ] Plan has been reviewed and approved
20
+
21
+ ## Initialization
22
+
23
+ 1. Read the plan file and extract all tasks, including each task's `Type:` field
24
+ 2. Initialize plan_tracker with structured task metadata:
25
+ ```
26
+ plan_tracker({
27
+ action: "init",
28
+ tasks: [
29
+ { name: "Task 1 name", type: "code" },
30
+ { name: "Task 2 name", type: "non-code" },
31
+ ],
32
+ })
33
+ ```
34
+ 3. Mark the execute phase as active
35
+
36
+ ## Per-Task Lifecycle
37
+
38
+ For each task in the plan:
39
+
40
+ ### 1. Define
41
+
42
+ **Code task →** Write actual test file(s) with assertions:
43
+ - Create test files that exercise the new/modified behavior
44
+ - Tests must be specific, deterministic, and fail before implementation
45
+ - Include edge cases and error conditions
46
+ - Apply TDD-specific guidance only to code tasks
47
+
48
+ **Non-code task →** Reuse and refine the plan's acceptance criteria:
49
+ - List specific, measurable conditions
50
+ - Each criterion must be independently verifiable
51
+ - Treat these criteria as the basis for approval and verification
52
+
53
+ Update plan_tracker:
54
+ ```
55
+ plan_tracker({ action: "update", index: N, phase: "define" })
56
+ ```
57
+
58
+ ### 2. Approve (Human Gate)
59
+
60
+ Present the test cases or acceptance criteria to the human:
61
+
62
+ **For code tasks:**
63
+ - Show the test files to be written
64
+ - Explain what each test verifies
65
+ - Ask: "Do these test cases cover the requirements? Approve, revise, or reject?"
66
+
67
+ **For non-code tasks:**
68
+ - Show the acceptance criteria list from the plan
69
+ - Ask: "Do these criteria capture the intent? Approve, revise, or reject?"
70
+
71
+ **No execution begins until approved.**
72
+
73
+ If revised → return to Define step.
74
+ If rejected → skip task and mark as blocked.
75
+
76
+ ```
77
+ plan_tracker({ action: "update", index: N, phase: "approve" })
78
+ ```
79
+
80
+ ### 3. Execute (max 3 attempts)
81
+
82
+ Implement the task following the plan's steps.
83
+
84
+ For each attempt:
85
+ 1. Write/modify code as specified in the plan
86
+ 2. Run tests or verify against acceptance criteria
87
+ 3. If all pass → move to Verify
88
+ 4. If failures:
89
+ - Analyze the failures
90
+ - Fix the implementation
91
+ - Increment executeAttempts
92
+ - If executeAttempts reaches 3 → **escalate to human**
93
+
94
+ ```
95
+ plan_tracker({ action: "update", index: N, phase: "execute" })
96
+ plan_tracker({ action: "update", index: N, attempts: 1 }) // after each attempt (routes to executeAttempts based on phase)
97
+ ```
98
+
99
+ **Escalation on budget exhaustion:**
100
+ > "I've attempted this task 3 times without success. Options:
101
+ > 1. Revise the scope or approach
102
+ > 2. Adjust the test cases / acceptance criteria
103
+ > 3. Abandon this task and move on
104
+ >
105
+ > What would you like to do?"
106
+
107
+ ### 4. Verify
108
+
109
+ Re-run all tests or check all acceptance criteria.
110
+
111
+ Report results to the human:
112
+ - ✅ Condition 1: passed
113
+ - ✅ Condition 2: passed
114
+ - ❌ Condition 3: failed — [description of failure]
115
+
116
+ **Does not auto-fix.** Flags failures to human for decision.
117
+
118
+ ```
119
+ plan_tracker({ action: "update", index: N, phase: "verify" })
120
+ ```
121
+
122
+ If failures detected:
123
+ > "Verification found issues. Options:
124
+ > 1. Go back to Execute for another attempt
125
+ > 2. Revise the tests/criteria
126
+ > 3. Accept as-is (mark partial)
127
+ >
128
+ > What would you like to do?"
129
+
130
+ ### 5. Review (two layers)
131
+
132
+ **Layer 1 — Subagent review:**
133
+ - Dispatch a subagent to review the implementation against the task spec
134
+ - Subagent checks: correctness, edge cases, code quality, test coverage
135
+ - Subagent reports findings
136
+
137
+ Use `agentScope: "both"` to access the bundled `code-reviewer` agent:
138
+ ```
139
+ subagent({ agent: "code-reviewer", task: "Review implementation of task N against spec", agentScope: "both" })
140
+ ```
141
+
142
+ **Layer 2 — Human sign-off:**
143
+ - Present the subagent review + test results to the human
144
+ - Summarize what was done, what passed, any concerns
145
+ - Ask: "Does this look good? Approve or request changes?"
146
+
147
+ ```
148
+ plan_tracker({ action: "update", index: N, phase: "review" })
149
+ ```
150
+
151
+ If issues found → move to Fix.
152
+
153
+ ### 6. Fix (max 3 loops, re-enters Verify → Review)
154
+
155
+ 1. Address the review feedback
156
+ 2. Re-enter Verify → Review cycle
157
+ 3. Increment fixAttempts after each fix round
158
+ 4. If fixAttempts reaches 3 → **escalate to human**
159
+
160
+ ```
161
+ plan_tracker({ action: "update", index: N, phase: "fix" })
162
+ plan_tracker({ action: "update", index: N, attempts: 1 }) // routes to fixAttempts based on phase
163
+ ```
164
+
165
+ **Escalation on budget exhaustion:**
166
+ > "I've attempted fixes 3 times. Options:
167
+ > 1. Proceed as-is despite remaining issues
168
+ > 2. Keep fixing (at your own risk)
169
+ > 3. Abandon this task and move on
170
+ >
171
+ > What would you like to do?"
172
+
173
+ ### Task Complete
174
+
175
+ When both reviewers are satisfied and all conditions pass:
176
+
177
+ ```
178
+ plan_tracker({ action: "update", index: N, status: "complete" })
179
+ ```
180
+
181
+ Commit the task:
182
+ ```bash
183
+ git add <relevant files>
184
+ git commit -m "feat(task N): <description>"
185
+ ```
186
+
187
+ ## Escalation Rules
188
+
189
+ | Event | Action |
190
+ |-------|--------|
191
+ | Execute 3 attempts exhausted | Escalate to human — never auto-skip |
192
+ | Fix loop 3 attempts exhausted | Escalate to human — never auto-skip |
193
+ | Verify fails | Flag to human — human decides next step |
194
+
195
+ **No silent skipping. Consistent escalation everywhere.**
196
+
197
+ ## Finalize Phase
198
+
199
+ After all tasks complete (or are explicitly accepted by human):
200
+
201
+ ### 1. Final Review
202
+ - Dispatch subagent to review the entire implementation holistically
203
+ - Check for integration issues, consistency across tasks, documentation gaps
204
+
205
+ ### 2. Create PR
206
+ ```bash
207
+ git push origin <branch>
208
+ gh pr create --title "feat: <feature summary>" --body "<task summary>"
209
+ ```
210
+
211
+ ### 3. Archive Planning Docs
212
+ ```bash
213
+ mkdir -p docs/plans/completed
214
+ mv docs/plans/<plan-file> docs/plans/completed/
215
+ ```
216
+
217
+ ### 4. Update Repo Docs
218
+ - Update CHANGELOG with feature summary
219
+ - Update README if API/surface changed
220
+ - Update inline documentation as needed
221
+
222
+ ### 5. Update Project Documentation
223
+ - Update README if project overview has changed
224
+ - Update CONTRIBUTING or architecture docs if structure changed
225
+ - Note any new patterns or conventions introduced
226
+
227
+ ### 6. Clean Up
228
+ - Remove worktree if one was used
229
+ - Mark finalize phase complete
230
+
231
+ ## Boundaries
232
+ - Read code, docs, and tests: yes
233
+ - Write tests and implementation code: yes (within current task scope)
234
+ - Write to docs/plans/completed/: yes (during finalize)
235
+ - Edit files outside task scope: no (unless human explicitly approves)
236
+
237
+ ## Remember
238
+ - Always present test cases/criteria for human approval before executing
239
+ - Extract each task's `Type:` from the plan and preserve it in `plan_tracker`
240
+ - Track per-task phase and attempts in plan_tracker
241
+ - Code tasks use TDD; non-code tasks use acceptance criteria during define, approve, and verify
242
+ - Escalate immediately on budget exhaustion — never silently skip or continue
243
+ - Verify does not auto-fix — always flag to human
244
+ - Review has two layers (subagent first, then human)
245
+ - Fix loops re-enter verify → review (max 3 fix loops)
246
+ - Execute has separate budget (max 3 attempts)
247
+ - Total max cycles per task: 3 execute + 3 fix = 6