@seanyao/roll 0.5.0 → 2.602.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/CHANGELOG.md +717 -0
  2. package/LICENSE +21 -0
  3. package/README.md +65 -165
  4. package/bin/dream-test-quality-scan +110 -0
  5. package/bin/roll +14897 -815
  6. package/conventions/config.yaml +17 -1
  7. package/conventions/global/AGENTS.md +146 -100
  8. package/conventions/global/CLAUDE.md +1 -21
  9. package/conventions/global/GEMINI.md +8 -22
  10. package/conventions/global/project_rules.md +9 -0
  11. package/conventions/templates/backend-service/AGENTS.md +30 -81
  12. package/conventions/templates/backend-service/GEMINI.md +3 -3
  13. package/conventions/templates/backend-service/project_rules.md +16 -0
  14. package/conventions/templates/cli/AGENTS.md +31 -58
  15. package/conventions/templates/cli/CLAUDE.md +3 -5
  16. package/conventions/templates/cli/GEMINI.md +3 -3
  17. package/conventions/templates/cli/project_rules.md +16 -0
  18. package/conventions/templates/frontend-only/AGENTS.md +29 -64
  19. package/conventions/templates/frontend-only/GEMINI.md +3 -3
  20. package/conventions/templates/frontend-only/project_rules.md +14 -0
  21. package/conventions/templates/fullstack/AGENTS.md +31 -79
  22. package/conventions/templates/fullstack/CLAUDE.md +1 -1
  23. package/conventions/templates/fullstack/GEMINI.md +3 -3
  24. package/conventions/templates/fullstack/project_rules.md +15 -0
  25. package/lib/README.md +42 -0
  26. package/lib/__pycache__/github_sync.cpython-314.pyc +0 -0
  27. package/lib/__pycache__/loop-fmt.cpython-314.pyc +0 -0
  28. package/lib/__pycache__/loop_result_eval.cpython-314.pyc +0 -0
  29. package/lib/__pycache__/loop_unstick.cpython-314.pyc +0 -0
  30. package/lib/__pycache__/model_prices.cpython-314.pyc +0 -0
  31. package/lib/__pycache__/prices_fetcher.cpython-314.pyc +0 -0
  32. package/lib/__pycache__/roll-home.cpython-314.pyc +0 -0
  33. package/lib/__pycache__/roll-loop-status.cpython-314.pyc +0 -0
  34. package/lib/__pycache__/roll_git.cpython-314.pyc +0 -0
  35. package/lib/__pycache__/roll_render.cpython-314.pyc +0 -0
  36. package/lib/__pycache__/slides-render.cpython-314.pyc +0 -0
  37. package/lib/agent_usage/README.md +49 -0
  38. package/lib/agent_usage/__init__.py +108 -0
  39. package/lib/agent_usage/__pycache__/__init__.cpython-314.pyc +0 -0
  40. package/lib/agent_usage/__pycache__/gemini.cpython-314.pyc +0 -0
  41. package/lib/agent_usage/__pycache__/kimi.cpython-314.pyc +0 -0
  42. package/lib/agent_usage/__pycache__/openai.cpython-314.pyc +0 -0
  43. package/lib/agent_usage/__pycache__/pi.cpython-314.pyc +0 -0
  44. package/lib/agent_usage/__pycache__/pi_emit.cpython-314.pyc +0 -0
  45. package/lib/agent_usage/__pycache__/qwen.cpython-314.pyc +0 -0
  46. package/lib/agent_usage/gemini.py +127 -0
  47. package/lib/agent_usage/kimi.py +278 -0
  48. package/lib/agent_usage/kimi_emit.py +123 -0
  49. package/lib/agent_usage/openai.py +126 -0
  50. package/lib/agent_usage/pi.py +200 -0
  51. package/lib/agent_usage/pi_emit.py +135 -0
  52. package/lib/agent_usage/qwen.py +128 -0
  53. package/lib/backfill-pi-usage.py +243 -0
  54. package/lib/changelog_audit.py +155 -0
  55. package/lib/changelog_generate.py +263 -0
  56. package/lib/context_feed_budget.sh +194 -0
  57. package/lib/github_sync.py +876 -0
  58. package/lib/i18n/README.md +54 -0
  59. package/lib/i18n/agent.sh +75 -0
  60. package/lib/i18n/alert.sh +20 -0
  61. package/lib/i18n/backlog.sh +96 -0
  62. package/lib/i18n/brief.sh +5 -0
  63. package/lib/i18n/changelog.sh +5 -0
  64. package/lib/i18n/ci.sh +15 -0
  65. package/lib/i18n/debug.sh +0 -0
  66. package/lib/i18n/doctor.sh +44 -0
  67. package/lib/i18n/dream.sh +0 -0
  68. package/lib/i18n/init.sh +91 -0
  69. package/lib/i18n/lang.sh +10 -0
  70. package/lib/i18n/loop.sh +140 -0
  71. package/lib/i18n/migrate.sh +74 -0
  72. package/lib/i18n/offboard.sh +31 -0
  73. package/lib/i18n/onboard.sh +0 -0
  74. package/lib/i18n/peer.sh +41 -0
  75. package/lib/i18n/peer_help.sh +25 -0
  76. package/lib/i18n/peer_reset.sh +7 -0
  77. package/lib/i18n/peer_status.sh +5 -0
  78. package/lib/i18n/prices.sh +3 -0
  79. package/lib/i18n/prices_refresh.sh +17 -0
  80. package/lib/i18n/prices_show.sh +7 -0
  81. package/lib/i18n/propose.sh +0 -0
  82. package/lib/i18n/release.sh +0 -0
  83. package/lib/i18n/research.sh +0 -0
  84. package/lib/i18n/review_pr.sh +0 -0
  85. package/lib/i18n/sentinel.sh +0 -0
  86. package/lib/i18n/setup.sh +3 -0
  87. package/lib/i18n/shared.sh +157 -0
  88. package/lib/i18n/skills/roll-brief.sh +47 -0
  89. package/lib/i18n/skills/roll-build.sh +97 -0
  90. package/lib/i18n/skills/roll-design.sh +18 -0
  91. package/lib/i18n/skills/roll-fix.sh +53 -0
  92. package/lib/i18n/skills/roll-loop.sh +28 -0
  93. package/lib/i18n/skills/roll-onboard.sh +33 -0
  94. package/lib/i18n/skills_catalog.sh +30 -0
  95. package/lib/i18n/slides.sh +3 -0
  96. package/lib/i18n/slides_build.sh +38 -0
  97. package/lib/i18n/slides_delete.sh +19 -0
  98. package/lib/i18n/slides_list.sh +14 -0
  99. package/lib/i18n/slides_logs.sh +12 -0
  100. package/lib/i18n/slides_new.sh +15 -0
  101. package/lib/i18n/slides_preview.sh +14 -0
  102. package/lib/i18n/slides_templates.sh +7 -0
  103. package/lib/i18n/status.sh +21 -0
  104. package/lib/i18n/update.sh +24 -0
  105. package/lib/i18n.sh +211 -0
  106. package/lib/loop-exit-summary.py +393 -0
  107. package/lib/loop-fmt.py +589 -0
  108. package/lib/loop_pick_agent.py +316 -0
  109. package/lib/loop_result_eval.py +469 -0
  110. package/lib/loop_unstick.py +180 -0
  111. package/lib/model_prices.py +186 -0
  112. package/lib/prices/README.md +35 -0
  113. package/lib/prices/snapshot-2026-05-22.json +22 -0
  114. package/lib/prices/snapshot-2026-05-23-deepseek.json +15 -0
  115. package/lib/prices/snapshot-2026-05-23-kimi.json +14 -0
  116. package/lib/prices_fetcher.py +285 -0
  117. package/lib/roll-backlog.py +225 -0
  118. package/lib/roll-brief.py +286 -0
  119. package/lib/roll-help.py +158 -0
  120. package/lib/roll-home.py +556 -0
  121. package/lib/roll-init.py +156 -0
  122. package/lib/roll-loop-status.py +1683 -0
  123. package/lib/roll-loop-story.py +191 -0
  124. package/lib/roll-onboard-render.py +378 -0
  125. package/lib/roll-peer.py +252 -0
  126. package/lib/roll-plan-validate.py +386 -0
  127. package/lib/roll-setup.py +102 -0
  128. package/lib/roll-status.py +367 -0
  129. package/lib/roll_git.py +41 -0
  130. package/lib/roll_render.py +414 -0
  131. package/lib/slides/components/README.md +123 -0
  132. package/lib/slides/components/cards-2.html +9 -0
  133. package/lib/slides/components/cards-3.html +9 -0
  134. package/lib/slides/components/cards-4.html +9 -0
  135. package/lib/slides/components/compare.html +22 -0
  136. package/lib/slides/components/highlight.html +9 -0
  137. package/lib/slides/components/pipeline.html +12 -0
  138. package/lib/slides/components/plain.html +7 -0
  139. package/lib/slides/components/quote.html +4 -0
  140. package/lib/slides/components/timeline.html +9 -0
  141. package/lib/slides/templates/introduction-v3.html +571 -0
  142. package/lib/slides/templates/pitch.html +0 -0
  143. package/lib/slides-render.py +778 -0
  144. package/lib/slides-validate.py +357 -0
  145. package/lib/test_quality_gate.py +143 -0
  146. package/package.json +8 -7
  147. package/skills/roll-.changelog/SKILL.md +406 -33
  148. package/skills/roll-.clarify/SKILL.md +5 -2
  149. package/skills/roll-.dream/SKILL.md +374 -0
  150. package/skills/roll-.echo/SKILL.md +5 -2
  151. package/skills/roll-.qa/SKILL.md +57 -3
  152. package/skills/roll-.review/SKILL.md +42 -3
  153. package/skills/roll-brief/SKILL.md +209 -0
  154. package/skills/roll-build/SKILL.md +308 -63
  155. package/skills/roll-debug/SKILL.md +341 -162
  156. package/skills/roll-debug/injectable-bb.js +263 -0
  157. package/skills/roll-deck/SKILL.md +296 -0
  158. package/skills/roll-design/ENGINEERING_CHECKLIST.md +1 -1
  159. package/skills/roll-design/SKILL.md +727 -94
  160. package/skills/roll-doc/SKILL.md +595 -0
  161. package/skills/roll-doctor/SKILL.md +192 -0
  162. package/skills/roll-fix/SKILL.md +149 -32
  163. package/skills/{roll-jot → roll-idea}/SKILL.md +18 -10
  164. package/skills/roll-loop/SKILL.md +578 -0
  165. package/skills/roll-notes/SKILL.md +103 -0
  166. package/skills/roll-onboard/SKILL.md +234 -0
  167. package/skills/roll-peer/SKILL.md +336 -0
  168. package/skills/roll-propose/SKILL.md +157 -0
  169. package/skills/roll-review-pr/SKILL.md +58 -0
  170. package/skills/roll-sentinel/SKILL.md +11 -2
  171. package/skills/roll-spar/SKILL.md +8 -6
  172. package/template/.github/workflows/ci.yml +5 -2
  173. package/template/AGENTS.md +20 -74
  174. package/skills/roll-research/SKILL.md +0 -307
  175. package/skills/roll-research/references/schema.json +0 -162
  176. package/skills/roll-research/scripts/md_to_pdf.py +0 -289
  177. package/tools/roll-fetch/SKILL.md +0 -182
  178. package/tools/roll-fetch/package.json +0 -15
  179. package/tools/roll-fetch/smart-web-fetch.js +0 -558
  180. package/tools/roll-probe/SKILL.md +0 -84
  181. /package/template/{BACKLOG.md → .roll/backlog.md} +0 -0
@@ -1,5 +1,7 @@
1
1
  ---
2
2
  name: roll-build
3
+ license: MIT
4
+ allowed-tools: "Read, Edit, Write, Glob, Grep, Bash, Skill, Agent"
3
5
  description: "Universal delivery skill. Handles any input: a US-XXX ID executes from BACKLOG via TCR; a FIX-XXX redirects to roll-fix; any other text auto-clarifies, designs, and ships as a new Story."
4
6
  ---
5
7
 
@@ -17,7 +19,7 @@ One entry point. Any input. Full delivery.
17
19
  Input received
18
20
  ├── matches "US-[A-Z]+-[0-9]+" → Story mode: read BACKLOG → TCR workflow
19
21
  ├── matches "FIX-[A-Z]+-[0-9]+" → redirect to $roll-fix
20
- ├── matches "IDEA-[0-9]+" → redirect to $roll-jot (lookup and expand)
22
+ ├── matches "IDEA-[0-9]+" → redirect to $roll-idea (lookup and expand)
21
23
  └── anything else → Fly mode: clarify → design → execute
22
24
  ```
23
25
 
@@ -51,9 +53,70 @@ Do not use for:
51
53
 
52
54
  Activate when input is a `US-[A-Z]+-[0-9]+` identifier.
53
55
 
56
+ ### Step 0: Pre-flight self-check (US-AGENT-007)
57
+
58
+ Before reading the Story in depth or splitting actions, **read the Agent profile** from the story's feature md and decide whether this cycle can realistically deliver it. The check is mechanical and turns on a single axis — the story's `est_min` estimate (US-AGENT-022 retired the old three-dimension type/est/risk routing; there is no per-agent capacity range, risk zone, or history threshold anymore):
59
+
60
+ ```
61
+ inputs:
62
+ story.est_min (from **Agent profile:** block, US-AGENT-001)
63
+ story.chain_depth (0 unless already a downgrade product)
64
+
65
+ complexity tier (lib/loop_pick_agent.py, single source of truth):
66
+ est_min <= 8 → easy
67
+ 8 < est_min <= 20 → default
68
+ est_min > 20 → hard
69
+ missing / illegal → default
70
+
71
+ verdict:
72
+ too_big when:
73
+ story.est_min is large enough that even the `hard` tier won't fit one
74
+ cycle — i.e. the work plainly composes too many files / behaviours to
75
+ land green in a single cycle — AND story.chain_depth == 0
76
+ (still have downgrade budget; don't burn a cycle on a guaranteed miss).
77
+ ok otherwise
78
+ ```
79
+
80
+ Output the verdict as the first line of the cycle response:
81
+
82
+ ```yaml
83
+ verdict: ok # or: too_big
84
+ reason: <one short line — which condition triggered, with numbers>
85
+ ```
86
+
87
+ When `verdict: ok` → continue to Step 1 normally.
88
+ When `verdict: too_big` → go to **US-AGENT-008 self-downgrade path**, **but** first run the **US-AGENT-009 chain_depth cap check**:
89
+
90
+ ```bash
91
+ # 0a. Cap check: refuse the third consecutive auto-split.
92
+ # exit 0 → split allowed; exit 1 → cap hit, take cap-hit path instead.
93
+ if ! bash -c 'source "$(command -v roll)"; _loop_chain_depth_cap_check US-XXX-NNN'; then
94
+ # Cap hit (chain_depth ≥ 2): hold + ALERT, exit cleanly.
95
+ bash -c 'source "$(command -v roll)"; _loop_split_cap_hit US-XXX-NNN "depth >= 2, human triage required"'
96
+ exit 0
97
+ fi
98
+
99
+ # 1. Invoke roll-design to re-split the story into smaller sub-stories.
100
+ # Each sub-story carries chain_depth = (parent.chain_depth + 1).
101
+ # Sub-stories land as 📋 Todo with depends-on:<parent> chained.
102
+ Skill("roll-design", "--from-story US-XXX-NNN")
103
+
104
+ # 2. After the sub-stories are written to BACKLOG, flip the parent
105
+ # to 🚫 Hold and emit the downgrade event. The helper handles ALERT.
106
+ bash -c 'source "$(command -v roll)"; _loop_self_downgrade US-XXX-NNN "too_big: <reason from verdict>" "US-XXX-NNNa,US-XXX-NNNb"'
107
+
108
+ # 3. Exit cleanly — no TCR commits this cycle. The next loop cycle picks
109
+ # up the first sub-story (which is smaller and should pass pre-flight).
110
+ exit 0
111
+ ```
112
+
113
+ If `roll-design` cannot produce ≥2 sub-stories (story is already irreducible), fall through to **US-AGENT-009 cap-hit path** by invoking `_loop_split_cap_hit` directly. The cap is purely about stopping infinite split chains; even on the first re-split, if the design step gives up, the cap-hit handler raises ALERT for human triage.
114
+
115
+ > Pre-flight is honest, not paranoid: a small story (est_min ≤ 8 — the `easy` tier — with chain_depth=0) should almost always go `ok`. The check pays off on the long tail — stories with a large `est_min` that, on inspection, plainly compose far more files and behaviours than one cycle can land green.
116
+
54
117
  ### Step 1: Read the Story
55
118
 
56
- 1. Open `BACKLOG.md`, find the US row, follow the link to `docs/features/<feature>.md`
119
+ 1. Open `.roll/backlog.md`, find the US row, follow the link to `.roll/features/<feature>.md`
57
120
  2. Read the full AC / Files / Dependencies section
58
121
  3. If a plan doc (`<feature>-plan.md`) exists, read it for context
59
122
 
@@ -63,6 +126,16 @@ Activate when input is a `US-[A-Z]+-[0-9]+` identifier.
63
126
  - Pick the smallest shippable Action first
64
127
  - **Granularity constraint**: Each Action completable in 2–5 minutes; split if larger
65
128
  - **No placeholders**: Action descriptions must be specific and directly executable
129
+ - **Test-quality self-check (US-QA-011)** — for every Action that adds tests:
130
+ 1. Tests call project functions / public command entry points; do NOT inline
131
+ external-tool behaviour (`sed`/`awk`/`grep`/`find`/`cut` pipelines that
132
+ duplicate logic already in `lib/` or `bin/`) — rubric ❼.
133
+ 2. Tests sandbox filesystem state via `BATS_TMPDIR` (or equivalent); do NOT
134
+ touch or assert on paths outside this repo (`~/.codex`, `~/.kimi`,
135
+ `~/.roll/`, `/etc/...`) — rubric ❽.
136
+ 3. If you can't satisfy (1) or (2), reshape the Action: extract a project
137
+ helper, redirect the env var to a tmp dir, or move the test to an
138
+ integration tier where the boundary is intentional and documented.
66
139
 
67
140
  #### 2.5 Parallel Dispatch (auto-determined)
68
141
 
@@ -89,16 +162,16 @@ git worktree add .worktrees/{action-id} -b dispatch/{action-id}
89
162
  **Status notifications (required):**
90
163
 
91
164
  ```
92
- 🔀 Parallel Dispatch: N Actions running in parallel
165
+ 🔀 $(msg build.parallel_dispatch N)
93
166
 
94
- Agent 1 [Action: ...] ⏳ Running...
95
- Agent 2 [Action: ...] ⏳ Running...
167
+ $(msg build.agent_running 1 "...")
168
+ $(msg build.agent_running 2 "...")
96
169
 
97
- Agent 1 [Action: ...] ✅ Done (N TCR commits)
98
- Agent 2 [Action: ...] ✅ Done (N TCR commits)
170
+ $(msg build.agent_done 1 "..." N)
171
+ $(msg build.agent_done 2 "..." N)
99
172
 
100
- 🔀 Merge: N/N succeeded, merging...
101
- 🧪 Integration tests: running...
173
+ 🔀 $(msg build.merge_summary N N)
174
+ 🧪 $(msg build.integration_tests)
102
175
  ```
103
176
 
104
177
  When parallel conditions are not met, execute Actions sequentially.
@@ -122,9 +195,9 @@ Activate when input does not match any `US-XXX` / `FIX-XXX` pattern, or when no
122
195
  Before any code, assess clarity:
123
196
 
124
197
  ```
125
- 🎯 Clarified Goal: {1-2 sentences capturing user intent}
126
- 📏 Complexity Assessment: {small|medium|large}
127
- 🔍 Uncertainty Areas: {list what needs investigation/decision}
198
+ 🎯 $(msg build.clarified_goal): {1-2 sentences capturing user intent}
199
+ 📏 $(msg build.complexity_assessment): {small|medium|large}
200
+ 🔍 $(msg build.uncertainty_areas): {list what needs investigation/decision}
128
201
  ```
129
202
 
130
203
  **If uncertainty areas are non-empty or the request is vague, auto-trigger `$roll-.clarify`:**
@@ -132,6 +205,22 @@ Before any code, assess clarity:
132
205
  - Follow with 3–5 targeted questions
133
206
  - Stop and wait for user answers before proceeding
134
207
 
208
+ **Approach Confirmation (required for UX / format / automation decisions):**
209
+
210
+ If the request involves any of: output format, layout, automation level (manual vs automatic), or architecture structure — output a confirmation block **before writing any code**:
211
+
212
+ ```
213
+ 📐 $(msg build.approach_confirmation)
214
+
215
+ 1. $(msg build.what_changes): {what will be built or modified}
216
+ 2. $(msg build.the_approach): {specific format / automation level / structure chosen}
217
+ 3. $(msg build.files_touched): {list of files}
218
+
219
+ Proceeding unless you say otherwise.
220
+ ```
221
+
222
+ Wait for the user's response before editing files. If the user does not object within one exchange, proceed.
223
+
135
224
  **Complexity Rules (AI coding time):**
136
225
 
137
226
  | Level | Scope | Action |
@@ -143,8 +232,8 @@ Before any code, assess clarity:
143
232
  ### Phase 2: Create US / Actions
144
233
 
145
234
  - Use `$roll-design` to split vague request into INVEST-compliant User Stories
146
- - Insert US into `BACKLOG.md` under the relevant Epic > Feature group
147
- - If a new `docs/features/<feature>.md` is needed, create it
235
+ - Insert US into `.roll/backlog.md` under the relevant Epic > Feature group
236
+ - If a new `.roll/features/<feature>.md` is needed, create it
148
237
 
149
238
  After creation, switch to **Story mode** and execute the first US immediately.
150
239
 
@@ -156,19 +245,46 @@ Proceed to the **Shared TCR Workflow** (Phase 4 onward).
156
245
 
157
246
  The following phases apply to both Story mode and Fly mode after planning is complete.
158
247
 
248
+ ### Phase 3.5: Peer Review Gate
249
+
250
+ After planning is complete, before entering Test Design Review, assess whether the plan warrants peer review:
251
+
252
+ **Auto-trigger `$roll-peer` when any of the following is true:**
253
+ - Plan affects **>3 files** or **crosses modules**
254
+ - **Architecture decisions** or non-obvious trade-offs are involved
255
+ - **Destructive / irreversible operations** (deletions, migrations, production deploys)
256
+ - **High-risk signal words** detected in user request ("critical / important / don't break / 关键 / 别搞砸")
257
+ - User explicitly requests peer review ("/peer", "叫上 peer")
258
+
259
+ **With 10s opt-out:**
260
+ ```
261
+ Plan affects N files across M modules. Estimated peer review: 2–3 rounds, ~X tokens.
262
+ Press Enter to launch peer review, or type 'n' to skip. Auto-executing in 10s...
263
+ ```
264
+
265
+ **After peer review result:**
266
+ - **AGREE** → proceed to Phase 4 (Test Design Review)
267
+ - **REFINE** → incorporate feedback, regenerate plan, re-run Phase 3.5
268
+ - **OBJECT** → consider alternative plan, re-run Phase 3.5 with revised proposal
269
+ - **ESCALATE** → present both proposals to user for final decision before proceeding
270
+
271
+ **Never trigger:**
272
+ - Single-file changes or well-defined fixes
273
+ - Plans with no cross-module impact and no architecture decisions
274
+
159
275
  ### Phase 4: Test Design Review
160
276
 
161
277
  Before writing implementation code:
162
278
 
163
279
  ```
164
- 🧪 Test Design for Action: {Action name}
280
+ 🧪 $(msg build.test_design): {Action name}
165
281
 
166
- Scenarios:
282
+ $(msg build.scenarios):
167
283
  ├── {Happy path scenario}
168
284
  ├── {Edge case scenario}
169
285
  └── {Failure/regression scenario}
170
286
 
171
- Test Types:
287
+ $(msg build.test_types):
172
288
  ├── Unit tests for: {logic components}
173
289
  ├── Integration tests for: {API/data flows}
174
290
  └── Manual verification for: {UI/visual elements}
@@ -187,10 +303,10 @@ Reference `$roll-.qa` for coverage requirements and test pyramid strategy.
187
303
 
188
304
  ```
189
305
  ┌────────────────────────────────────────────────────────────┐
190
- TCR CYCLE (Test && Commit || Revert)
306
+ $(msg build.tcr_cycle)
191
307
  └────────────────────────────────────────────────────────────┘
192
308
 
193
- MICRO-STEP {N}: {description of smallest testable change}
309
+ $(msg build.micro_step {N} "{description of smallest testable change}")
194
310
 
195
311
  Step 1: Write/Update Test
196
312
  └── Run test → Confirm RED (expected failure)
@@ -219,6 +335,81 @@ MICRO-STEP {N}: {description of smallest testable change}
219
335
 
220
336
  Accumulate 3–5 micro-commits per Action. Each commit is a guaranteed working state.
221
337
 
338
+ #### Architectural Friction Signal (non-blocking)
339
+
340
+ While implementing, watch for these signals:
341
+
342
+ - This Action requires touching code in 3+ unrelated modules
343
+ - The existing module boundary has to be bent or bypassed to make this work
344
+ - A data structure or interface needs to change in a way that ripples across contexts
345
+ - The implementation feels "wrong" even when the test passes
346
+
347
+ When any signal appears, **do not stop — flag it**:
348
+
349
+ ```bash
350
+ # 1. Append to .roll/backlog.md under ## ♻️ Refactor
351
+ # REFACTOR-XXX | <one-line description> | 📋 Todo
352
+
353
+ # 2. Append a brief entry to .roll/features/autonomous-evolution/refactor-log.md
354
+ ```
355
+
356
+ **REFACTOR entry format in .roll/backlog.md:**
357
+
358
+ ```markdown
359
+ | REFACTOR-001 | {one-line plain-language description} | 📋 Todo |
360
+ ```
361
+
362
+ 描述写法:参见 AGENTS.md "Backlog descriptions" 规则。说清楚"什么需要改"以及"不改会怎样",技术细节写在 `.roll/features/autonomous-evolution/refactor-log.md`。
363
+
364
+ **refactor-log.md entry format:**
365
+
366
+ ```markdown
367
+ ## REFACTOR-001 Extract payment boundary
368
+
369
+ **Flagged**: {YYYY-MM-DD} during US-XXX
370
+ **Signal**: {which friction signal triggered this}
371
+ **Observation**: {1–3 sentences describing what felt wrong}
372
+ **Suggested scope**: {rough sense of what a fix would touch}
373
+ ```
374
+
375
+ Then continue implementing the current Story normally.
376
+
377
+ **Event emission** — after all TCR micro-steps for a Story complete, emit a `build` event so the cycle event stream reflects the work done:
378
+
379
+ ```bash
380
+ # _tcr_count = number of "tcr:" prefix commits made during this Story
381
+ _loop_event build "$US_ID" "${_tcr_count} commits" "" 2>/dev/null || true
382
+ ```
383
+
384
+ ### Phase 5.5: E2E Deposit
385
+
386
+ After TCR micro-steps pass, deposit an E2E test for this Story's core user flow.
387
+
388
+ ```
389
+ E2E DEPOSIT
390
+
391
+ Step 1: Detect
392
+ └── Read project's existing E2E infrastructure
393
+ (test directories, config files, framework, naming conventions)
394
+
395
+ Step 2: Write
396
+ └── One E2E test covering the Story's golden path
397
+ (the critical user journey this Story delivers)
398
+
399
+ Step 3: Run
400
+ └── Execute the new E2E test
401
+
402
+ Step 4: TCR
403
+ ├── ✅ GREEN → git commit -m "tcr: e2e deposit for {story}"
404
+ └── ❌ RED → Fix via TCR cycle until green
405
+ ```
406
+
407
+ **Rules:**
408
+ - Follow whatever E2E patterns the project already uses — framework, directory, naming
409
+ - If no E2E infrastructure exists, reference `$roll-.qa` "Missing Test Infrastructure" section to bootstrap minimally, then deposit
410
+ - One test per Story — covers the golden path, not exhaustive edge cases (those are unit/integration from Phase 5)
411
+ - Each deposited E2E becomes a replayable case: CI runs it on every push, Sentinel can sample it against production
412
+
222
413
  ### Phase 6: Pre-Push CI Gate
223
414
 
224
415
  After all micro-steps, run full CI locally before pushing:
@@ -259,33 +450,51 @@ EOF
259
450
  chmod +x .git/hooks/pre-push
260
451
  ```
261
452
 
262
- ### Phase 7: Pre-Push Code Review
453
+ ### Phase 7: Pre-Push Code Review (Three-Axis Deep Review)
454
+
455
+ This phase runs **once per Story** (not per micro-step) on the full accumulated diff.
456
+ Per-micro-step review uses `$roll-.review staged` inline checklist (zero extra cost).
457
+
458
+ **Phase 3.5 vs Phase 7 split**: Phase 3.5 (Peer Review) focuses on architectural direction
459
+ and approach before coding begins. Phase 7 focuses on implementation quality after all
460
+ micro-steps are done — catching issues that only appear at diff scale (parameter sprawl
461
+ across files, copy-paste patterns, cross-file N+1, etc.).
263
462
 
264
463
  ```bash
265
- $roll-.review staged
464
+ # Capture full Story diff
465
+ git diff main...HEAD
266
466
  ```
267
467
 
268
- **Review output:**
468
+ **Launch three review agents in parallel** (each receives the full diff):
469
+
269
470
  ```
270
- 🔍 Self Review Report
271
- ├── Scope: X files (+Y/-Z lines)
272
- ├── 🔴 Critical: N issues (must fix)
273
- ├── 🟡 Warnings: N issues (should fix)
274
- ├── 🟢 Suggestions: N items (optional)
275
- └── ✅ Passed dimensions: [Quality, Design, Scope, ...]
471
+ Agent 1: Reuse Review
472
+ Search for existing utilities / helpers the new code could use instead
473
+ Flag any new function that duplicates existing functionality
474
+ Flag inline logic replaceable by existing tools
475
+
476
+ Agent 2: Quality Review
477
+ → Redundant state, Parameter sprawl, Copy-paste near-duplicate,
478
+ Leaky abstraction, Stringly-typed, JSX nesting,
479
+ Nested conditionals ≥3 deep, Unnecessary comments
480
+
481
+ Agent 3: Efficiency Review
482
+ → Redundant computation / N+1, Missed concurrency,
483
+ Hot-path bloat, Loop no-op updates, TOCTOU existence pre-check,
484
+ Memory leaks, Overly broad operations
276
485
  ```
277
486
 
278
- **Review dimensions** (correctness guaranteed by TCR):
279
- - 🎯 **Quality**: Naming clarity, DRY, function size, readability
280
- - 📐 **Design**: Architecture, abstraction level, separation of concerns
281
- - ⚠️ **Scope**: No opportunistic changes
282
- - 📝 **Documentation**: Comments where needed
487
+ Wait for all three agents to complete. Aggregate findings → fix each issue
488
+ (false positives: note and skip, no debate) summarize what was fixed.
489
+
490
+ **Fallback**: If parallel agent invocation fails, run `$roll-.review staged` on
491
+ the full diff as a single-pass fallback do not skip review entirely.
283
492
 
284
493
  **Decision:**
285
494
  ```
286
495
  🔴 Critical > 0 → Fix via new TCR cycle → Re-review
287
496
  🟡 Warnings > 0 → Fix if quick (< 5 min) or document
288
- 🟢 Suggestions / ✅ All clear → Proceed to push
497
+ 🟢 Suggestions / ✅ All clear → Proceed to Phase 8
289
498
  ```
290
499
 
291
500
  ### Phase 8: Commit & Push
@@ -348,17 +557,17 @@ Follow the repo's deployment path (Vercel / Railway / etc.) and record the deplo
348
557
  **Before marking as DONE, fresh evidence must be provided.**
349
558
 
350
559
  ```
351
- 🚦 Verification Gate
560
+ 🚦 $(msg build.verification_gate)
352
561
 
353
- Evidence checklist (each item must have actual output):
354
- ├── [ ] Tests passed: paste actual test run output
355
- ├── [ ] Build succeeded: paste build output
356
- ├── [ ] Online verification: screenshot / curl output / log snippet
357
- └── [ ] No regression: verify at least one existing feature still works
562
+ $(msg build.evidence_checklist):
563
+ ├── [ ] $(msg build.tests_passed)
564
+ ├── [ ] $(msg build.build_succeeded)
565
+ ├── [ ] $(msg build.online_verification)
566
+ └── [ ] $(msg build.no_regression)
358
567
 
359
- Gate Decision:
360
- ├── ✅ All items have evidence → Can mark as DONE
361
- └── ❌ Any item missing evidence → Gather evidence before passing the gate
568
+ $(msg build.gate_decision):
569
+ ├── ✅ $(msg build.gate_pass)
570
+ └── ❌ $(msg build.gate_fail)
362
571
  ```
363
572
 
364
573
  **Hard Rule**: "I confirmed the tests passed" does not count as evidence. Must be **freshly run** command output from this session.
@@ -367,16 +576,16 @@ Follow the repo's deployment path (Vercel / Railway / etc.) and record the deplo
367
576
 
368
577
  Both locations must be updated — neither can be skipped:
369
578
 
370
- **① Update BACKLOG.md index row (Status column):**
579
+ **① Update .roll/backlog.md index row (Status column):**
371
580
 
372
581
  ```markdown
373
- | [US-{ID}](docs/features/<feature>.md#us-{id}) | {Title} | ✅ Done |
582
+ | [US-{ID}](.roll/features/<feature>.md#us-{id}) | {Title} | ✅ Done |
374
583
  ```
375
584
 
376
- Change the Status from `📋 Todo` to `✅ Done`.
585
+ Change the Status from `📋 Todo` or `🔨 In Progress` (whichever the row currently shows) to `✅ Done`. When invoked by `roll-loop`, the row will already be `🔨 In Progress` — that is the expected starting state, and the transition is the same Edit operation.
377
586
  For Fly mode: first append an index row under the appropriate Epic > Feature group, then mark it done.
378
587
 
379
- **② Update `docs/features/<feature>.md` US section:**
588
+ **② Update `.roll/features/<feature>.md` US section:**
380
589
 
381
590
  ```markdown
382
591
  ## US-{ID} {Story Title} ✅
@@ -399,8 +608,15 @@ For Fly mode: first append an index row under the appropriate Epic > Feature gro
399
608
 
400
609
  If the US section does not yet exist, create the full section (AC / Files / Dependencies).
401
610
 
611
+ **Before committing, run `$roll-.changelog`** to stage CHANGELOG.md — then include
612
+ it in the completion commit so no separate changelog commit is created.
613
+
402
614
  ```bash
403
- git add BACKLOG.md docs/features/
615
+ # 1. Stage changelog (roll-.changelog stages CHANGELOG.md only, does not commit)
616
+ $roll-.changelog
617
+
618
+ # 2. Commit BACKLOG + feature doc + CHANGELOG.md together
619
+ git add .roll/backlog.md .roll/features/ CHANGELOG.md
404
620
  git commit -m "docs: mark {US-ID} as completed"
405
621
  git push
406
622
  ```
@@ -408,19 +624,20 @@ git push
408
624
  ### Phase 12: Report & Celebrate
409
625
 
410
626
  ```
411
- Pushed to GitHub: origin/main
412
- 🚀 Deployed: <url>
413
- Verified: <what was checked>
414
- 📦 Changes: <summary>
415
- 🔢 Commits: <count> micro-commits via TCR
416
- 🧪 Tests: <what tests were added/modified>
417
- 📊 TCR Stats: <success rate, revert count if any>
418
- 📋 Review Gate: <self-review findings summary>
419
- 📝 BACKLOG: <US-ID> marked ✅ Done
627
+ $(msg build.pushed_to)
628
+ 🚀 $(msg build.deployed): <url>
629
+ $(msg build.verified): <what was checked>
630
+ 📦 $(msg build.changes_summary): <summary>
631
+ 🔢 $(msg build.commits_count): <count> micro-commits via TCR
632
+ 🧪 $(msg build.tests_added): <what tests were added/modified>
633
+ 📊 $(msg build.tcr_stats): <success rate, revert count if any>
634
+ 📋 $(msg build.review_gate): <self-review findings summary>
635
+ 📝 $(msg build.backlog_updated "<US-ID>")
636
+ 📄 $(msg build.changelog_bundled)
420
637
 
421
- 🎉 Shipped.
638
+ 🎉 $(msg build.shipped)
422
639
 
423
- 🔄 Next Options:
640
+ 🔄 $(msg build.next_options):
424
641
  1. Continue to next Action (if Story has more)
425
642
  2. Start next US (if Fly mode created multiple)
426
643
  3. Done (if all completed)
@@ -469,7 +686,7 @@ Before creating any file or directory:
469
686
  - No "while I'm here" refactors unless in a separate TCR cycle
470
687
 
471
688
  7. **Always update BACKLOG status**
472
- - BACKLOG.md index row and `docs/features/<feature>.md` US section are both required
689
+ - .roll/backlog.md index row and `.roll/features/<feature>.md` US section are both required
473
690
  - Neither can be skipped
474
691
 
475
692
  ---
@@ -479,6 +696,7 @@ Before creating any file or directory:
479
696
  - [ ] Story and Action clearly defined
480
697
  - [ ] Test design reviewed and approved
481
698
  - [ ] **TCR cycles completed** (all micro-steps via Test && Commit)
699
+ - [ ] **E2E deposited** (golden path test for this Story, committed via TCR)
482
700
  - [ ] All commits are green states (no broken commits)
483
701
  - [ ] Local CI checks passed (format + lint + build + test)
484
702
  - [ ] Self-code-review passed, blocking issues fixed via TCR
@@ -487,10 +705,36 @@ Before creating any file or directory:
487
705
  - [ ] Deployed to production
488
706
  - [ ] Online verification performed
489
707
  - [ ] **Verification Gate passed** (fresh evidence for tests, build, deploy, no regression)
490
- - [ ] **BACKLOG.md index status updated** (📋 → ✅, REQUIRED)
491
- - [ ] **`docs/features/<feature>.md` US section updated** (Completed date + [x] ACs, REQUIRED)
708
+ - [ ] **.roll/backlog.md index status updated** (📋 → ✅, REQUIRED)
709
+ - [ ] **`.roll/features/<feature>.md` US section updated** (Completed date + [x] ACs, REQUIRED)
710
+ - [ ] **CHANGELOG.md staged and bundled** into completion commit via `$roll-.changelog` in Phase 11 (REQUIRED)
711
+ - [ ] **Self-score note written (US-SKILL-010 / 012)** — see "Self-score" subsection below
492
712
  - [ ] Summary reported to user
493
713
 
714
+ ### Self-score (US-SKILL-012)
715
+
716
+ Before reporting completion to the user, write one self-score note. The
717
+ helper lands the note under `.roll/notes/<date>-roll-build-<US-id>-<epoch>.md`
718
+ with YAML frontmatter so trend analysis (US-SKILL-014) can aggregate later:
719
+
720
+ ```bash
721
+ bash -c 'source "$(command -v roll)"; \
722
+ _skill_write_self_score roll-build US-XXX-NNN <score 1..10> <good|ok|regression> "<rationale>"'
723
+ ```
724
+
725
+ Score guidance (integer 1..10):
726
+ - **9..10** — story shipped cleanly: AC fully met, TCR rhythm tight, no
727
+ re-tries from `verdict: too_big`, peer review concerns addressed inline.
728
+ - **6..8** — shipped with caveats: re-tries on red, edge case left to a
729
+ follow-up FIX, documentation lagged behind code by one cycle, etc.
730
+ - **1..5** — shipped but at low confidence: AC partially met (note which),
731
+ TCR rhythm broken (multiple revert iterations), or `regression` verdict.
732
+
733
+ Verdict values:
734
+ - `good` — story fully delivered; AC met; no concerning signal.
735
+ - `ok` — shipped but with at least one documented trade-off (use rationale).
736
+ - `regression` — story landed but another behaviour broke (rare; open a FIX).
737
+
494
738
  ---
495
739
 
496
740
  ## TCR Recovery Patterns
@@ -538,7 +782,7 @@ When complex state management is error-prone → consider full reset + re-initia
538
782
  roll-build → ship anything (new idea, US-ID, free-text request)
539
783
  roll-fix → fix a specific known bug (FIX-XXX / BUG-XXX)
540
784
  roll-design → plan and design before building (no code output)
541
- roll-jot → fast capture a bug or idea into BACKLOG.md
785
+ roll-idea → fast capture a bug or idea into .roll/backlog.md
542
786
  roll-.clarify → passive scope clarification for vague build requests
543
787
  ```
544
788
 
@@ -555,5 +799,6 @@ The agent must explicitly produce (in text) before or during execution:
555
799
  - **Test Design**: scenarios, edge cases, test types
556
800
  - **Test Design Review**: coverage validation result
557
801
  - **TCR Log**: micro-step descriptions and commit count
802
+ - **E2E Deposit**: golden path E2E test file for this Story
558
803
  - **Quality Review**: post-TCR code review result
559
804
  - **Deployment target**: where it will be verified