@oh-my-pi/pi-coding-agent 12.19.2 → 13.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/CHANGELOG.md +53 -0
  2. package/package.json +7 -7
  3. package/src/commit/prompts/analysis-system.md +3 -3
  4. package/src/commit/prompts/analysis-user.md +14 -14
  5. package/src/commit/prompts/changelog-system.md +4 -4
  6. package/src/commit/prompts/changelog-user.md +4 -4
  7. package/src/commit/prompts/file-observer-system.md +2 -2
  8. package/src/commit/prompts/file-observer-user.md +2 -2
  9. package/src/commit/prompts/reduce-system.md +4 -4
  10. package/src/commit/prompts/reduce-user.md +6 -6
  11. package/src/commit/prompts/summary-system.md +4 -4
  12. package/src/commit/prompts/summary-user.md +6 -6
  13. package/src/config/settings-schema.ts +0 -11
  14. package/src/discovery/helpers.ts +13 -1
  15. package/src/internal-urls/docs-index.generated.ts +2 -2
  16. package/src/internal-urls/index.ts +8 -3
  17. package/src/internal-urls/local-protocol.ts +223 -0
  18. package/src/internal-urls/{docs-protocol.ts → pi-protocol.ts} +12 -12
  19. package/src/internal-urls/router.ts +1 -1
  20. package/src/internal-urls/types.ts +1 -1
  21. package/src/ipy/executor.ts +4 -32
  22. package/src/main.ts +0 -1
  23. package/src/memories/index.ts +1 -1
  24. package/src/modes/components/settings-defs.ts +0 -5
  25. package/src/modes/controllers/event-controller.ts +4 -4
  26. package/src/modes/interactive-mode.ts +84 -64
  27. package/src/modes/types.ts +11 -3
  28. package/src/modes/utils/ui-helpers.ts +5 -3
  29. package/src/patch/hashline.ts +42 -42
  30. package/src/patch/index.ts +24 -21
  31. package/src/patch/shared.ts +21 -43
  32. package/src/plan-mode/approved-plan.ts +55 -0
  33. package/src/prompts/agents/designer.md +6 -6
  34. package/src/prompts/agents/explore.md +4 -4
  35. package/src/prompts/agents/frontmatter.md +1 -0
  36. package/src/prompts/agents/init.md +10 -10
  37. package/src/prompts/agents/plan.md +6 -6
  38. package/src/prompts/agents/reviewer.md +4 -3
  39. package/src/prompts/agents/task.md +10 -10
  40. package/src/prompts/compaction/branch-summary.md +3 -3
  41. package/src/prompts/compaction/compaction-short-summary.md +7 -7
  42. package/src/prompts/compaction/compaction-summary-context.md +1 -1
  43. package/src/prompts/compaction/compaction-summary.md +5 -5
  44. package/src/prompts/compaction/compaction-turn-prefix.md +3 -3
  45. package/src/prompts/compaction/compaction-update-summary.md +11 -11
  46. package/src/prompts/memories/consolidation.md +5 -5
  47. package/src/prompts/memories/read-path.md +11 -0
  48. package/src/prompts/memories/stage_one_input.md +1 -1
  49. package/src/prompts/memories/stage_one_system.md +5 -5
  50. package/src/prompts/review-request.md +4 -4
  51. package/src/prompts/system/agent-creation-architect.md +17 -17
  52. package/src/prompts/system/agent-creation-user.md +2 -2
  53. package/src/prompts/system/custom-system-prompt.md +6 -6
  54. package/src/prompts/system/plan-mode-active.md +20 -20
  55. package/src/prompts/system/plan-mode-approved.md +9 -7
  56. package/src/prompts/system/plan-mode-reference.md +2 -2
  57. package/src/prompts/system/plan-mode-subagent.md +8 -8
  58. package/src/prompts/system/subagent-submit-reminder.md +5 -5
  59. package/src/prompts/system/subagent-system-prompt.md +9 -9
  60. package/src/prompts/system/subagent-user-prompt.md +3 -5
  61. package/src/prompts/system/summarization-system.md +1 -1
  62. package/src/prompts/system/system-prompt.md +109 -84
  63. package/src/prompts/system/title-system.md +2 -2
  64. package/src/prompts/system/ttsr-interrupt.md +2 -2
  65. package/src/prompts/system/web-search.md +16 -16
  66. package/src/prompts/tools/ask.md +6 -6
  67. package/src/prompts/tools/bash.md +9 -9
  68. package/src/prompts/tools/browser.md +5 -5
  69. package/src/prompts/tools/cancel-job.md +2 -2
  70. package/src/prompts/tools/exit-plan-mode.md +13 -10
  71. package/src/prompts/tools/find.md +2 -2
  72. package/src/prompts/tools/gemini-image.md +7 -7
  73. package/src/prompts/tools/grep.md +4 -3
  74. package/src/prompts/tools/hashline.md +37 -39
  75. package/src/prompts/tools/patch.md +5 -5
  76. package/src/prompts/tools/poll-jobs.md +1 -1
  77. package/src/prompts/tools/python.md +8 -10
  78. package/src/prompts/tools/read.md +2 -12
  79. package/src/prompts/tools/replace.md +6 -6
  80. package/src/prompts/tools/ssh.md +2 -7
  81. package/src/prompts/tools/task.md +34 -23
  82. package/src/prompts/tools/todo-write.md +65 -49
  83. package/src/prompts/tools/web-search.md +2 -2
  84. package/src/prompts/tools/write.md +4 -3
  85. package/src/sdk.ts +11 -9
  86. package/src/session/agent-session.ts +92 -51
  87. package/src/session/artifacts.ts +1 -1
  88. package/src/session/messages.ts +1 -0
  89. package/src/task/agents.ts +1 -0
  90. package/src/task/index.ts +2 -1
  91. package/src/task/render.ts +2 -2
  92. package/src/task/types.ts +1 -0
  93. package/src/tools/bash-interactive.ts +1 -1
  94. package/src/tools/bash-skill-urls.ts +3 -2
  95. package/src/tools/bash.ts +38 -19
  96. package/src/tools/exit-plan-mode.ts +30 -2
  97. package/src/tools/grep.ts +131 -75
  98. package/src/tools/index.ts +13 -3
  99. package/src/tools/path-utils.ts +2 -1
  100. package/src/tools/plan-mode-guard.ts +8 -8
  101. package/src/tools/python.ts +0 -2
  102. package/src/tools/read.ts +2 -2
  103. package/src/tools/todo-write.ts +276 -146
  104. package/src/internal-urls/plan-protocol.ts +0 -95
  105. package/src/modes/components/todo-display.ts +0 -114
  106. package/src/prompts/memories/read_path.md +0 -11
@@ -3,19 +3,17 @@
3
3
  Apply precise file edits using `LINE#ID` tags, anchoring to the file content.
4
4
 
5
5
  <workflow>
6
- 1. `read` the target range to capture current `LINE#ID` tags.
7
- 2. Pick the smallest operation per change site (line/range/insert/content-replace).
8
- 3. Direction-lock every edit: exact current text → intended text.
9
- 4. Submit one `edit` call per file containing all operations.
10
- 5. If another edit is needed in that file, re-read first (hashes changed).
11
- 6. Output tool calls only; no prose.
6
+ 1. You MUST `read` the target range to capture current `LINE#ID` tags.
7
+ 2. You MUST pick the smallest operation per change site (line/range/insert/content-replace).
8
+ 3. You MUST direction-lock every edit: exact current text → intended text.
9
+ 4. You MUST submit one `edit` call per file containing all operations.
10
+ 5. If another edit is needed in that file, you MUST re-read first (hashes changed).
11
+ 6. You MUST output tool calls only; no prose.
12
12
  </workflow>
13
13
 
14
14
  <operations>
15
- - **Single line replace/delete**
16
- - `{ op: "set", tag: "N#ID", content: […] }`
17
- - `content: null` deletes the line; `content: [""]` keeps a blank line.
18
- - **Range replace/delete**
15
+ - **Line or range replace/delete**
16
+ - `{ op: "replace", tag: "N#ID", content: […] }`
19
17
  - `{ op: "replace", first: "N#ID", last: "N#ID", content: […] }`
20
18
  - Use for swaps, block rewrites, or deleting a full span (`content: null`).
21
19
  - **Insert** (new content)
@@ -29,33 +27,33 @@ Apply precise file edits using `LINE#ID` tags, anchoring to the file content.
29
27
  </operations>
30
28
 
31
29
  <rules>
32
- 1. **Minimize scope:** one logical mutation site per operation.
33
- 2. **Preserve formatting:** keep indentation, punctuation, line breaks, trailing commas, brace style.
34
- 3. **Prefer insertion over neighbor rewrites:** anchor on structural boundaries (`}`, `]`, `},`) not interior property lines.
35
- 4. **No no-ops:** replacement content must differ from current content.
36
- 5. **Touch only requested code:** avoid incidental edits.
37
- 6. **Use exact current tokens:** never rewrite approximately; mutate the token that exists now.
38
- 7. **For swaps/moves:** prefer one range operation over multiple single-line operations.
30
+ 1. **Minimize scope:** You MUST use one logical mutation site per operation.
31
+ 2. **Preserve formatting:** You MUST keep indentation, punctuation, line breaks, trailing commas, brace style.
32
+ 3. **Prefer insertion over neighbor rewrites:** You SHOULD anchor on structural boundaries (`}`, `]`, `},`) not interior property lines.
33
+ 4. **No no-ops:** replacement content MUST differ from current content.
34
+ 5. **Touch only requested code:** You MUST NOT make incidental edits.
35
+ 6. **Use exact current tokens:** You MUST NOT rewrite approximately; mutate the token that exists now.
36
+ 7. **For swaps/moves:** You SHOULD prefer one range operation over multiple single-line operations.
39
37
  </rules>
40
38
 
41
- <op_choice>
42
- - One wrong line → `set`
43
- - Adjacent block changed → `insert`
44
- - Missing line/block → insert with `append`/`prepend`
45
- </op_choice>
39
+ <op-choice>
40
+ - One wrong line → MUST use `set`
41
+ - Adjacent block changed → MUST use `insert`
42
+ - Missing line/block → MUST use `append`/`prepend`
43
+ </op-choice>
46
44
 
47
- <tag_choice>
48
- - Copy tags exactly from the prefix of the `read` or error output.
49
- - Never guess tags.
50
- - For inserts, prefer `insert`>`append`/`prepend` when both boundaries are known.
51
- - Re-read after each successful edit call before issuing another on same file.
52
- </tag_choice>
45
+ <tag-choice>
46
+ - You MUST copy tags exactly from the prefix of the `read` or error output.
47
+ - You MUST NOT guess tags.
48
+ - For inserts, you SHOULD prefer `insert` > `append`/`prepend` when both boundaries are known.
49
+ - You MUST re-read after each successful edit call before issuing another on same file.
50
+ </tag-choice>
53
51
 
54
52
  <recovery>
55
53
  **Tag mismatch (`>>>`)**
56
- - Retry with the updated tags shown in error output.
57
- - Re-read only if required tags are missing from error snippet.
58
- - If mismatch repeats, stop and re-read the exact block.
54
+ - You MUST retry with the updated tags shown in error output.
55
+ - You MUST re-read only if required tags are missing from error snippet.
56
+ - If mismatch repeats, you MUST stop and re-read the exact block.
59
57
  </recovery>
60
58
 
61
59
  <example name="fix a value or type">
@@ -63,7 +61,7 @@ Apply precise file edits using `LINE#ID` tags, anchoring to the file content.
63
61
  {{hlinefull 23 " const timeout: number = 5000;"}}
64
62
  ```
65
63
  ```
66
- op: "set"
64
+ op: "replace"
67
65
  tag: "{{hlineref 23 " const timeout: number = 5000;"}}"
68
66
  content: [" const timeout: number = 30_000;"]
69
67
  ```
@@ -75,7 +73,7 @@ content: [" const timeout: number = 30_000;"]
75
73
  {{hlinefull 8 "const data = fetchSync(url);"}}
76
74
  ```
77
75
  ```
78
- op: "set"
76
+ op: "replace"
79
77
  tag: "{{hlineref 7 "// @ts-ignore"}}"
80
78
  content: null
81
79
  ```
@@ -86,7 +84,7 @@ content: null
86
84
  {{hlinefull 14 " placeholder: \"DO NOT SHIP\","}}
87
85
  ```
88
86
  ```
89
- op: "set"
87
+ op: "replace"
90
88
  tag: "{{hlineref 14 " placeholder: \"DO NOT SHIP\","}}"
91
89
  content: [""]
92
90
  ```
@@ -207,10 +205,10 @@ content: ["function validate() {", …, "}"]
207
205
  </example>
208
206
 
209
207
  <critical>
210
- Ensure:
208
+ You MUST ensure:
211
209
  - Payload shape is `{ "path": string, "edits": [operation, …], "delete"?: boolean, "rename"?: string }`
212
- - Every edit matches exactly one variant
213
- - Every tag has been copied EXACTLY from a tool result as `N#ID`
214
- - Scope is minimal and formatting is preserved except targeted token changes
210
+ - Every edit MUST match exactly one variant
211
+ - Every tag MUST be copied EXACTLY from a tool result as `N#ID`
212
+ - Scope MUST be minimal and formatting MUST be preserved except targeted token changes
215
213
  </critical>
216
- **Final reminder:** tags are immutable references to the last read snapshot. Re-read when state changes, then edit.
214
+ **Final reminder:** tags are immutable references to the last read snapshot. You MUST re-read when state changes, then edit.
@@ -43,11 +43,11 @@ Returns success/failure; on failure, error message indicates:
43
43
  </output>
44
44
 
45
45
  <critical>
46
- - Always read target file before editing
47
- - Copy anchors and context lines verbatim (including whitespace)
48
- - Never use anchors as comments (no line numbers, location labels, placeholders like `@@ @@`)
49
- - Do not place new lines outside intended block
50
- - If edit fails or breaks structure, re-read file and produce new patch from current content—do not retry same diff
46
+ - You MUST read the target file before editing
47
+ - You MUST copy anchors and context lines verbatim (including whitespace)
48
+ - You MUST NOT use anchors as comments (no line numbers, location labels, placeholders like `@@ @@`)
49
+ - You MUST NOT place new lines outside the intended block
50
+ - If edit fails or breaks structure, you MUST re-read the file and produce a new patch from current content you MUST NOT retry the same diff
51
51
  - **NEVER** use edit to fix indentation, whitespace, or reformat code. Formatting is a single command run once at the end (`bun fmt`, `cargo fmt`, `prettier --write`, etc.)—not N individual edits. If you see inconsistent indentation after an edit, leave it; the formatter will fix all of it in one pass.
52
52
  </critical>
53
53
 
@@ -2,6 +2,6 @@
2
2
 
3
3
  Block until one or more background jobs complete, fail, or are cancelled.
4
4
 
5
- Use this instead of polling `read jobs://` in a loop when you need to wait for background task or bash results before continuing.
5
+ You MUST use this instead of polling `read jobs://` in a loop when you need to wait for background task or bash results before continuing.
6
6
 
7
7
  Returns the status and results of all watched jobs once at least one finishes.
@@ -5,13 +5,13 @@ Runs Python cells sequentially in persistent IPython kernel.
5
5
  <instruction>
6
6
  Kernel persists across calls and cells; **imports, variables, and functions survive—use this.**
7
7
  **Work incrementally:**
8
- - One logical step per cell (imports, define function, test it, use it)
9
- - Pass multiple small cells in one call
10
- - Define small functions you can reuse and debug individually
11
- - Put explanations in assistant message or cell title, **not** in code
8
+ - You SHOULD use one logical step per cell (imports, define function, test it, use it)
9
+ - You SHOULD pass multiple small cells in one call
10
+ - You SHOULD define small functions you can reuse and debug individually
11
+ - You MUST put explanations in assistant message or cell title, MUST NOT put them in code
12
12
  **When something fails:**
13
13
  - Errors tell you which cell failed (e.g., "Cell 3 failed")
14
- - Resubmit only fixed cell (or fixed cell + remaining cells)
14
+ - You SHOULD resubmit only the fixed cell (or fixed cell + remaining cells)
15
15
  </instruction>
16
16
 
17
17
  <prelude>
@@ -34,23 +34,21 @@ All helpers auto-print results and return values for chaining.
34
34
  </prelude>
35
35
 
36
36
  <output>
37
- Streams in real time, truncated after 100KB; if truncated, full output stored under $ARTIFACTS and referenced as `artifact://<id>` in metadata.
38
-
39
37
  User sees output like Jupyter notebook; rich displays render fully:
40
38
  - `display(JSON(data))` → interactive JSON tree
41
39
  - `display(HTML(...))` → rendered HTML
42
40
  - `display(Markdown(...))` → formatted markdown
43
41
  - `plt.show()` → inline figures
44
- **You will see object repr** (e.g., `<IPython.core.display.JSON object>`). Trust `display()`; do not assume user sees only repr.
42
+ **You will see object repr** (e.g., `<IPython.core.display.JSON object>`). Trust `display()`; you MUST NOT assume user sees only repr.
45
43
  </output>
46
44
 
47
45
  <caution>
48
46
  - Per-call mode uses fresh kernel each call
49
- - Use `reset: true` to clear state when session mode active
47
+ - You MUST use `reset: true` to clear state when session mode active
50
48
  </caution>
51
49
 
52
50
  <critical>
53
- - Use `run()` for shell commands; never raw `subprocess`
51
+ - You MUST use `run()` for shell commands; you MUST NOT use raw `subprocess`
54
52
  </critical>
55
53
 
56
54
  <example name="good">
@@ -1,6 +1,6 @@
1
1
  # Read
2
2
 
3
- Reads files from local filesystem or internal URLs.
3
+ Reads files from local filesystem or harness URLs.
4
4
 
5
5
  <instruction>
6
6
  - Reads up to {{DEFAULT_MAX_LINES}} lines default
@@ -14,17 +14,7 @@ Reads files from local filesystem or internal URLs.
14
14
  {{/if}}
15
15
  - Supports images (PNG, JPG) and PDFs
16
16
  - For directories, returns formatted listing with modification times
17
- - Parallelize reads when exploring related files
18
- - Supports internal URLs:
19
- - `skill://<name>` - read SKILL.md for a skill
20
- - `skill://<name>/<path>` - read relative path within skill directory
21
- - `rule://<name>` - read rule content
22
- - `memory://root` - read memory summary (`memory_summary.md`)
23
- - `memory://root/<path>` - read relative path within project memory root
24
- - `agent://<id>` - read agent output artifact
25
- - `agent://<id>/<path>` or `agent://<id>?q=<query>` - extract JSON from agent output
26
- - `docs://` - list available pi documentation files
27
- - `docs://<file>.md` - read a specific pi documentation file
17
+ - You SHOULD parallelize reads when exploring related files
28
18
  </instruction>
29
19
 
30
20
  <output>
@@ -3,10 +3,10 @@
3
3
  String replacements in files with fuzzy whitespace matching.
4
4
 
5
5
  <instruction>
6
- - Use smallest edit that uniquely identifies change
7
- - If `old_text` not unique, expand to include more context or use `all: true` to replace all occurrences
6
+ - You MUST use the smallest edit that uniquely identifies the change
7
+ - If `old_text` not unique, you MUST expand to include more context or use `all: true` to replace all occurrences
8
8
  - Fuzzy matching handles minor whitespace/indentation differences automatically
9
- - Prefer editing existing files over creating new ones
9
+ - You SHOULD prefer editing existing files over creating new ones
10
10
  </instruction>
11
11
 
12
12
  <output>
@@ -14,10 +14,10 @@ Returns success/failure status. On success, file modified in place with replacem
14
14
  </output>
15
15
 
16
16
  <critical>
17
- - Must read file at least once in conversation before editing. Tool errors if you attempt edit without reading file first.
17
+ - You MUST read the file at least once in the conversation before editing. Tool errors if you attempt edit without reading file first.
18
18
  </critical>
19
19
 
20
- <bash*alternatives>
20
+ <bash-alternatives>
21
21
  Replace for content-addressed changes—you identify \_what* to change by its text.
22
22
 
23
23
  For position-addressed or pattern-addressed changes, bash more efficient:
@@ -35,4 +35,4 @@ For position-addressed or pattern-addressed changes, bash more efficient:
35
35
 
36
36
  Use Replace when _content itself_ identifies location.
37
37
  Use bash when _position_ or _pattern_ identifies what to change.
38
- </bash_alternatives>
38
+ </bash-alternatives>
@@ -3,7 +3,7 @@
3
3
  Run commands on remote hosts.
4
4
 
5
5
  <instruction>
6
- Build commands from reference below
6
+ You MUST build commands from the reference below
7
7
  </instruction>
8
8
 
9
9
  <commands>
@@ -23,13 +23,8 @@ Build commands from reference below
23
23
  - Navigation: `cd`, `echo %CD%`
24
24
  </commands>
25
25
 
26
- <output>
27
- stdout/stderr combined, truncated at 50KB; exit code captured.
28
- If truncated, full output stored under $ARTIFACTS as `artifact://<id>`.
29
- </output>
30
-
31
26
  <critical>
32
- Verify shell type from "Available hosts", use matching commands.
27
+ You MUST verify the shell type from "Available hosts" and use matching commands.
33
28
  </critical>
34
29
 
35
30
  <example name="linux">
@@ -2,12 +2,12 @@
2
2
 
3
3
  Launch subagents to execute parallel, well-scoped tasks.
4
4
  {{#if asyncEnabled}}
5
- Use `read jobs://` to inspect background task state and `read jobs://<job_id>` for detailed status/output when needed.
6
- When you need to wait for async results before continuing, call `poll_jobs` — it blocks until jobs complete. Do NOT poll `read jobs://` in a loop or yield and hope for delivery.
5
+ Use `read jobs://` to inspect background task state and `read jobs://<job-id>` for detailed status/output when needed.
6
+ When you need to wait for async results before continuing, call `poll_jobs` — it blocks until jobs complete. You MUST NOT poll `read jobs://` in a loop or yield and hope for delivery.
7
7
  {{/if}}
8
8
 
9
9
  ## What subagents inherit automatically
10
- Subagents receive the **full system prompt**, including AGENTS.md, context files, and skills. Do NOT repeat project rules, coding conventions, or style guidelines in `context` — they already have them.
10
+ Subagents receive the **full system prompt**, including AGENTS.md, context files, and skills. You MUST NOT repeat project rules, coding conventions, or style guidelines in `context` — they already have them.
11
11
 
12
12
  ## What subagents do NOT have
13
13
  Subagents have no access to your conversation history. They don't know:
@@ -17,6 +17,8 @@ Subagents have no access to your conversation history. They don't know:
17
17
  - Requirements the user stated only in conversation
18
18
 
19
19
  Subagents CAN grep the parent conversation file for supplementary details.
20
+
21
+ For large intermediate outputs (long traces, JSON payloads, temporary analysis snapshots), you SHOULD write them to `local://<path>` and pass the path in task context instead of inlining bulky text.
20
22
  ---
21
23
 
22
24
  ## Parameters
@@ -30,9 +32,9 @@ Agent type for all tasks in this batch.
30
32
  Shared background prepended verbatim to every task `assignment`. Use only for session-specific information subagents lack.
31
33
 
32
34
  <critical>
33
- Do NOT include project rules, coding conventions, or style guidelines — subagents already have AGENTS.md and context files in their system prompt. Repeating them wastes tokens and inflates context. Restating any rule from AGENTS.md in `context` is a bug — treat it like a lint error.
35
+ You MUST NOT include project rules, coding conventions, or style guidelines — subagents already have AGENTS.md and context files in their system prompt. Repeating them wastes tokens and inflates context. Restating any rule from AGENTS.md in `context` is a bug — treat it like a lint error.
34
36
  </critical>
35
- **Before writing each line of context, ask:** "Would this sentence be true for ANY task in this repo, or only for THIS specific batch?" If it applies to any task → it's a project rule → the subagent already has it → delete the line.
37
+ **Before writing each line of context, ask:** "Would this sentence be true for ANY task in this repo, or only for THIS specific batch?" If it applies to any task → it's a project rule → the subagent already has it → you MUST delete the line.
36
38
 
37
39
  WRONG — restating project rules the subagent already has:
38
40
  ```
@@ -42,7 +44,7 @@ WRONG — restating project rules the subagent already has:
42
44
  - Run the formatter after changes
43
45
  - Follow the logging convention
44
46
  ```
45
- Every line above restates a project convention. The subagent reads AGENTS.md. Delete them all.
47
+ Every line above restates a project convention. The subagent reads AGENTS.md. You MUST delete them all.
46
48
 
47
49
  RIGHT — only session-specific decisions the subagent cannot infer from project files:
48
50
  ```
@@ -99,7 +101,7 @@ Run in isolated git worktree; returns patches. Use when tasks edit overlapping f
99
101
  {{/if}}
100
102
  ### `schema` (optional — recommended for structured output)
101
103
 
102
- JTD schema defining expected response structure. Use typed properties. If you care about parsing result, define here — **never describe output format in `context` or `assignment`**.
104
+ JTD schema defining expected response structure. Use typed properties. If you care about parsing result, define here — you MUST NOT describe output format in `context` or `assignment`.
103
105
 
104
106
  <caution>
105
107
  **Schema vs agent mismatch causes null output.** Agents with `output="structured"` (e.g., `explore`) have a built-in schema. If you also pass `schema`, yours takes precedence — but if you describe output format in `context`/`assignment` instead, the agent's built-in schema wins. The agent gets confused trying to fit your requested format into its schema shape and submits `null`. Either: (1) use `schema` to override the built-in one, (2) use `task` agent which has no built-in schema, or (3) match your instructions to the agent's expected output shape.
@@ -110,7 +112,7 @@ JTD schema defining expected response structure. Use typed properties. If you ca
110
112
 
111
113
  <critical>## Task scope
112
114
 
113
- `assignment` must contain enough info for agent to act **without asking a clarifying question**.
115
+ `assignment` MUST contain enough info for agent to act **without asking a clarifying question**.
114
116
  **Minimum bar:** assignment under ~8 lines or missing acceptance criteria = too vague. One-liners guaranteed failure.
115
117
 
116
118
  Use structure every assignment:
@@ -135,7 +137,7 @@ Use structure every assignment:
135
137
  - DO NOT include project-wide build/test/lint commands (see below)
136
138
  ```
137
139
 
138
- `context` carries shared background. `assignment` carries only delta: file-specific instructions, local edge cases, per-task acceptance checks. Never duplicate shared constraints across assignments.
140
+ `context` carries shared background. `assignment` carries only delta: file-specific instructions, local edge cases, per-task acceptance checks. You MUST NOT duplicate shared constraints across assignments.
139
141
 
140
142
  ### Anti-patterns (ban these)
141
143
  **Vague assignments** — agent guesses wrong or stalls:
@@ -156,9 +158,9 @@ If a constraint appears in AGENTS.md, it MUST NOT appear in `context`. The subag
156
158
 
157
159
  If tempted to write above, expand using templates.
158
160
  **Output format in prose instead of `schema`** — agent returns null:
159
- Structured agents (`explore`, `reviewer`) have built-in output schemas. Describing a different output format in `context`/`assignment` without overriding via `schema` creates a mismatch — the agent can't reconcile your prose instructions with its schema and submits null data. Always use `schema` for output structure, or pick an agent whose built-in schema matches your needs.
161
+ Structured agents (`explore`, `reviewer`) have built-in output schemas. Describing a different output format in `context`/`assignment` without overriding via `schema` creates a mismatch — the agent can't reconcile your prose instructions with its schema and submits null data. You MUST use `schema` for output structure, or pick an agent whose built-in schema matches your needs.
160
162
  **Test/lint commands in parallel tasks** — edit wars:
161
- Parallel agents share working tree. If two agents run `bun check` or `bun test` concurrently, they see each other's half-finished edits, "fix" phantom errors, loop. **Never tell parallel tasks run project-wide build/test/lint commands.** Each task edits, stops. Caller verifies after all tasks complete.
163
+ Parallel agents share working tree. If two agents run `bun check` or `bun test` concurrently, they see each other's half-finished edits, "fix" phantom errors, loop. You MUST NOT tell parallel tasks to run project-wide build/test/lint commands. Each task edits, stops. Caller verifies after all tasks complete.
162
164
  **If you can't specify scope yet**, create **Discovery task** first: enumerate files, find callsites, list candidates. Then fan out with explicit paths.
163
165
 
164
166
  ### Delegate intent, not keystrokes
@@ -247,12 +249,12 @@ Do not touch TS bindings or downstream consumers — separate phase.
247
249
 
248
250
  ## Task scope
249
251
 
250
- Each task small, well-defined scope — **at most 3–5 files**.
252
+ Each task MUST have small, well-defined scope — **at most 3–5 files**.
251
253
  **Signs task too broad:**
252
254
  - File paths use globs (`src/**/*.ts`) instead of explicit names
253
255
  - Assignment says "update all" / "migrate everything" / "refactor across"
254
256
  - Scope covers entire package or directory tree
255
- **Fix:** enumerate files first (grep/glob discovery), then fan out one task per file or small cluster.
257
+ **Fix:** You MUST enumerate files first (grep/glob discovery), then fan out one task per file or small cluster.
256
258
  ---
257
259
 
258
260
  ## Parallelization
@@ -278,23 +280,32 @@ Each task small, well-defined scope — **at most 3–5 files**.
278
280
 
279
281
  ### Phased execution
280
282
 
283
+ <caution>
284
+ **Parallel agents share the working tree.** They see each other's half-finished edits in real time. This is why:
285
+ - Parallel tasks MUST NOT run project-wide build/test/lint — they will collide on phantom errors
286
+ - Tasks editing overlapping files MUST use `isolated: true` (worktree isolation) or be made sequential
287
+ - The caller MUST run verification after all tasks complete, not inside any individual task
288
+ </caution>
289
+
281
290
  Layered work with dependencies:
282
- **Phase 1 — Foundation** (do yourself or single task): define interfaces, create scaffolds, establish API shape. Never fan out until contract known.
291
+ **Phase 1 — Foundation** (caller MUST do this, MUST NOT delegate): define interfaces, create scaffolds, establish API shape. You MUST NOT fan out until contract is known.
283
292
  **Phase 2 — Parallel implementation**: fan out tasks consuming same known interface. Include Phase 1 API contract in `context`.
284
- **Phase 3 — Integration** (do yourself): wire modules, fix mismatches, verify builds.
293
+ **Phase 3 — Integration** (caller MUST do this, MUST NOT delegate): wire modules, fix mismatches, verify builds.
285
294
  **Phase 4 — Dependent layer**: fan out tasks consuming Phase 2 outputs.
286
295
  ---
287
296
 
288
297
  ## Pre-flight checklist
289
298
 
290
- Before calling tool, verify:
291
- - [ ] `context` includes only session-specific info not already in AGENTS.md/context files
292
- - [ ] Each `assignment` follows assignment template not one-liner
293
- - [ ] Each `assignment` includes edge cases / "don’t break" items
294
- - [ ] Tasks truly parallel (no hidden dependencies)
295
- - [ ] Scope small, file paths explicit (no globs)
296
- - [ ] No task runs project-wide build/test/lint you do after all tasks complete
297
- - [ ] `schema` used if you expect information
299
+ <critical>
300
+ Before calling tool, verify each item:
301
+ - [ ] `context` MUST include only session-specific info not already in AGENTS.md/context files
302
+ - [ ] Each `assignment` MUST follow the assignment template one-liners are PROHIBITED
303
+ - [ ] Each `assignment` MUST include edge cases / "don't break" items
304
+ - [ ] Tasks MUST be truly parallel — you MUST be able to articulate why no task depends on another's output
305
+ - [ ] Scope MUST be small; file paths MUST be explicit (no globs)
306
+ - [ ] Tasks MUST NOT run project-wide build/test/lint — caller MUST verify after all tasks complete
307
+ - [ ] `schema` MUST be used if you expect structured output
308
+ </critical>
298
309
  ---
299
310
 
300
311
  ## Agents
@@ -1,65 +1,81 @@
1
1
  # Todo Write
2
2
 
3
- Create/manage structured task list for coding session.
3
+ Manage a phased task list. Submit an `ops` array — each op mutates state incrementally.
4
+ **Primary op: `update`.** Use it to mark tasks `in_progress` or `completed`. Only reach for other ops when the structure itself needs to change.
5
+
6
+ <critical>
7
+ You MUST call this tool twice per task:
8
+ 1. Before beginning — `{op: "update", id: "task-N", status: "in_progress"}`
9
+ 2. Immediately after finishing — `{op: "update", id: "task-N", status: "completed"}`
10
+
11
+ You MUST keep exactly one task `in_progress` at all times. Mark `completed` immediately — no batching.
12
+ </critical>
4
13
 
5
14
  <conditions>
6
- Use proactively:
7
- 1. Complex multi-step tasks requiring 3+ steps/actions
8
- 2. User requests todo list
9
- 3. User provides multiple tasks (numbered/comma-separated)
10
- 4. After new instructions—capture requirements as todos
11
- 5. Starting task—mark in_progress BEFORE beginning
12
- 6. After completing—mark completed, add follow-up tasks found
15
+ Create a todo list when:
16
+ 1. Task requires 3+ distinct steps
17
+ 2. User explicitly requests one
18
+ 3. User provides a set of tasks to complete
19
+ 4. New instructions arrive mid-task capture before proceeding
13
20
  </conditions>
14
21
 
15
22
  <protocol>
16
- 1. **Task States**:
17
- - pending: not started
18
- - in_progress: working
19
- - completed: finished
20
- 2. **Task Management**:
21
- - Update status in real time
22
- - Mark complete IMMEDIATELY after finishing (no batching)
23
- - Keep exactly ONE task in_progress at a time
24
- - Remove tasks no longer relevant
25
- - Complete tasks in list order (do not mark later tasks completed while earlier tasks remain incomplete)
26
- 3. **Task Completion Requirements**:
27
- - ONLY mark completed when FULLY accomplished
28
- - On errors/blockers/inability to finish, keep in_progress
29
- - When blocked, create task describing what needs resolving
30
- 4. **Task Breakdown**:
31
- - Create specific, actionable items
32
- - Keep each todo scoped to one logical unit of work; split unrelated work into separate items
33
- - Break complex tasks into smaller steps
34
- - Use clear, descriptive names
35
- </protocol>
23
+ ## Operations
36
24
 
37
- <output>
38
- Returns confirmation todo list updated.
39
- </output>
25
+ |op|When to use|
26
+ |---|---|
27
+ |`update`|Mark a task in_progress / completed / abandoned, or edit content/notes|
28
+ |`replace`|Initial setup, or full restructure when the plan changes significantly|
29
+ |`add_phase`|Add a new phase of work discovered mid-task|
30
+ |`add_task`|Add a task to an existing phase|
31
+ |`remove_task`|Remove a task that is no longer relevant|
40
32
 
41
- <caution>
42
- When in doubt, use this.
43
- </caution>
33
+ ## Statuses
34
+
35
+ |Status|Meaning|
36
+ |---|---|
37
+ |`pending`|Not started|
38
+ |`in_progress`|Currently working — exactly one at a time|
39
+ |`completed`|Fully done|
40
+ |`abandoned`|Dropped intentionally|
41
+
42
+ ## Rules
43
+ - You MUST mark `in_progress` **before** starting work, not after
44
+ - You MUST mark `completed` **immediately** — never defer
45
+ - You MUST keep exactly **one** task `in_progress`
46
+ - You MUST complete phases in order — do not mark later tasks `completed` while earlier ones are `pending`
47
+ - On blockers: keep `in_progress`, add a new task describing the blocker
48
+ - Multiple ops can be batched in one call (e.g., complete current + start next)
49
+ </protocol>
44
50
 
45
- <example name="use-dark-mode">
46
- User: Add dark mode toggle to settings. Run tests when done.
47
- Creates todos: toggle component, state management, theme styles, update components, run tests
51
+ <avoid>
52
+ - Single-step tasks act directly
53
+ - Conversational or informational requests
54
+ - Tasks completable in under 3 trivial steps
55
+ </avoid>
56
+
57
+ <example name="start-task">
58
+ Mark task-2 in_progress before beginning work:
59
+ ops: [{op: "update", id: "task-2", status: "in_progress"}]
48
60
  </example>
49
61
 
50
- <example name="use-features">
51
- User: Implement user registration, product catalog, shopping cart, checkout.
52
- Creates todos per feature with subtasks
62
+ <example name="complete-and-advance">
63
+ Finish task-2 and start task-3 in one call:
64
+ ops: [
65
+ {op: "update", id: "task-2", status: "completed"},
66
+ {op: "update", id: "task-3", status: "in_progress"}
67
+ ]
53
68
  </example>
54
69
 
55
- <example name="skip">
56
- User: Run npm install / Add a comment to this function / What does git status do?
57
- Do directly. Single-step/informational tasks need no tracking.
70
+ <example name="initial-setup">
71
+ Replace is for setup only. Prefer add_phase / add_task for incremental additions.
72
+ ops: [{op: "replace", phases: [
73
+ {name: "Investigation", tasks: [{content: "Read source"}, {content: "Map callsites"}]},
74
+ {name: "Implementation", tasks: [{content: "Apply fix"}, {content: "Run tests"}]}
75
+ ]}]
58
76
  </example>
59
77
 
60
- <avoid>
61
- Skip when:
62
- 1. Single straightforward task
63
- 2. Task completable in <3 trivial steps
64
- 3. Task purely conversational/informational
65
- </avoid>
78
+ <example name="skip">
79
+ User: "What does this function do?" / "Add a comment" / "Run npm install"
80
+ → Do it directly. No list needed.
81
+ </example>
@@ -3,8 +3,8 @@
3
3
  Search the web for up-to-date information beyond Claude's knowledge cutoff.
4
4
 
5
5
  <instruction>
6
- - Prefer primary sources (papers, official docs) and corroborate key claims with multiple sources
7
- - Include links for cited sources in the final response
6
+ - You SHOULD prefer primary sources (papers, official docs) and corroborate key claims with multiple sources
7
+ - You MUST include links for cited sources in the final response
8
8
  </instruction>
9
9
 
10
10
  <output>
@@ -5,6 +5,7 @@ Creates or overwrites file at specified path.
5
5
  <conditions>
6
6
  - Creating new files explicitly required by task
7
7
  - Replacing entire file contents when editing would be more complex
8
+ - Prefer `local://<path>` for large temporary artifacts, subagent handoff payloads, and reusable planning artifacts that should survive within the session
8
9
  </conditions>
9
10
 
10
11
  <output>
@@ -12,7 +13,7 @@ Confirmation of file creation/write with path. When LSP available, content may b
12
13
  </output>
13
14
 
14
15
  <critical>
15
- - Prefer Edit tool for modifying existing files (more precise, preserves formatting)
16
- - Create documentation files (*.md, README) only when explicitly requested
17
- - No emojis unless requested
16
+ - You SHOULD use Edit tool for modifying existing files (more precise, preserves formatting)
17
+ - You MUST NOT create documentation files (*.md, README) unless explicitly requested
18
+ - You MUST NOT use emojis unless requested
18
19
  </critical>