@exaudeus/workrail 3.67.0 → 3.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/dist/application/services/compiler/template-registry.js +10 -1
  2. package/dist/cli/commands/worktrain-init.js +1 -1
  3. package/dist/console-ui/assets/{index-tOl8Vowf.js → index-CyzltI6D.js} +1 -1
  4. package/dist/console-ui/index.html +1 -1
  5. package/dist/coordinators/modes/full-pipeline.js +4 -4
  6. package/dist/coordinators/modes/implement-shared.js +5 -5
  7. package/dist/coordinators/modes/implement.js +4 -4
  8. package/dist/coordinators/pr-review.js +4 -4
  9. package/dist/daemon/workflow-runner.d.ts +1 -0
  10. package/dist/daemon/workflow-runner.js +1 -0
  11. package/dist/manifest.json +25 -25
  12. package/dist/mcp/handlers/v2-workflow.js +1 -1
  13. package/dist/mcp/workflow-protocol-contracts.js +2 -2
  14. package/docs/authoring-v2.md +4 -4
  15. package/docs/changelog-recent.md +3 -3
  16. package/docs/configuration.md +1 -1
  17. package/docs/design/adaptive-coordinator-context-candidates.md +1 -1
  18. package/docs/design/adaptive-coordinator-context.md +1 -1
  19. package/docs/design/adaptive-coordinator-routing-candidates.md +18 -18
  20. package/docs/design/adaptive-coordinator-routing-review.md +1 -1
  21. package/docs/design/adaptive-coordinator-routing.md +34 -34
  22. package/docs/design/agent-cascade-protocol.md +2 -2
  23. package/docs/design/console-daemon-separation-discovery.md +323 -0
  24. package/docs/design/context-assembly-design-candidates.md +1 -1
  25. package/docs/design/context-assembly-implementation-plan.md +1 -1
  26. package/docs/design/context-assembly-layer.md +2 -2
  27. package/docs/design/context-assembly-review-findings.md +1 -1
  28. package/docs/design/coordinator-access-audit.md +293 -0
  29. package/docs/design/coordinator-architecture-audit.md +62 -0
  30. package/docs/design/coordinator-error-handling-audit.md +240 -0
  31. package/docs/design/coordinator-testability-audit.md +426 -0
  32. package/docs/design/daemon-architecture-discovery.md +1 -1
  33. package/docs/design/daemon-console-separation-discovery.md +242 -0
  34. package/docs/design/daemon-memory-audit.md +203 -0
  35. package/docs/design/design-candidates-console-daemon-separation.md +256 -0
  36. package/docs/design/design-candidates-discovery-loop-fix.md +141 -0
  37. package/docs/design/design-review-findings-console-daemon-separation.md +106 -0
  38. package/docs/design/design-review-findings-discovery-loop-fix.md +81 -0
  39. package/docs/design/discovery-loop-fix-candidates.md +161 -0
  40. package/docs/design/discovery-loop-fix-design-review.md +106 -0
  41. package/docs/design/discovery-loop-fix-validation.md +258 -0
  42. package/docs/design/discovery-loop-investigation-A.md +188 -0
  43. package/docs/design/discovery-loop-investigation-B.md +287 -0
  44. package/docs/design/exploration-workflow-candidates.md +205 -0
  45. package/docs/design/exploration-workflow-design-review.md +166 -0
  46. package/docs/design/exploration-workflow-discovery.md +443 -0
  47. package/docs/design/ide-context-files-candidates.md +231 -0
  48. package/docs/design/ide-context-files-design-review.md +85 -0
  49. package/docs/design/ide-context-files.md +615 -0
  50. package/docs/design/implementation-plan-discovery-loop-fix.md +199 -0
  51. package/docs/design/implementation-plan-queue-poll-rotation.md +102 -0
  52. package/docs/design/in-process-http-audit.md +190 -0
  53. package/docs/design/layer3b-ghost-nodes-design-candidates.md +2 -2
  54. package/docs/design/loadSessionNotes-candidates.md +108 -0
  55. package/docs/design/loadSessionNotes-test-coverage-discovery.md +297 -0
  56. package/docs/design/loadSessionNotes-test-coverage-session4.md +209 -0
  57. package/docs/design/loadSessionNotes-test-coverage-v3.md +321 -0
  58. package/docs/design/probe-session-design-candidates.md +261 -0
  59. package/docs/design/probe-session-phase0.md +490 -0
  60. package/docs/design/routines-guide.md +7 -7
  61. package/docs/design/session-metrics-attribution-candidates.md +250 -0
  62. package/docs/design/session-metrics-attribution-design-review.md +115 -0
  63. package/docs/design/session-metrics-attribution-discovery.md +319 -0
  64. package/docs/design/session-metrics-candidates.md +227 -0
  65. package/docs/design/session-metrics-design-review.md +104 -0
  66. package/docs/design/session-metrics-discovery.md +454 -0
  67. package/docs/design/spawn-session-debug.md +202 -0
  68. package/docs/design/trigger-validator-candidates.md +214 -0
  69. package/docs/design/trigger-validator-review.md +109 -0
  70. package/docs/design/trigger-validator-shaping-phase0.md +239 -0
  71. package/docs/design/trigger-validator.md +454 -0
  72. package/docs/design/v2-core-design-locks.md +2 -2
  73. package/docs/design/workflow-extension-points.md +15 -15
  74. package/docs/design/workflow-id-validation-at-startup.md +1 -1
  75. package/docs/design/workflow-id-validation-implementation-plan.md +2 -2
  76. package/docs/design/workflow-trigger-lifecycle-audit.md +175 -0
  77. package/docs/design/worktrain-task-queue-candidates.md +5 -5
  78. package/docs/design/worktrain-task-queue.md +4 -4
  79. package/docs/discovery/coordinator-script-design.md +1 -1
  80. package/docs/discovery/coordinator-ux-discovery.md +3 -3
  81. package/docs/discovery/simulation-report.md +1 -1
  82. package/docs/discovery/workflow-modernization-discovery.md +326 -0
  83. package/docs/discovery/workflow-selection-for-discovery-tasks.md +33 -33
  84. package/docs/discovery/worktrain-status-briefing.md +1 -1
  85. package/docs/discovery/wr-discovery-goal-reframing.md +1 -1
  86. package/docs/docker.md +1 -1
  87. package/docs/ideas/backlog.md +227 -0
  88. package/docs/ideas/third-party-workflow-setup-design-thinking.md +1 -1
  89. package/docs/integrations/claude-code.md +5 -5
  90. package/docs/integrations/firebender.md +1 -1
  91. package/docs/plans/agentic-orchestration-roadmap.md +2 -2
  92. package/docs/plans/mr-review-workflow-redesign.md +9 -9
  93. package/docs/plans/ui-ux-workflow-design-candidates.md +4 -4
  94. package/docs/plans/ui-ux-workflow-discovery.md +2 -2
  95. package/docs/plans/workflow-categories-candidates.md +8 -8
  96. package/docs/plans/workflow-categories-discovery.md +4 -4
  97. package/docs/plans/workflow-modernization-design.md +430 -0
  98. package/docs/plans/workflow-staleness-detection-candidates.md +11 -11
  99. package/docs/plans/workflow-staleness-detection-review.md +4 -4
  100. package/docs/plans/workflow-staleness-detection.md +9 -9
  101. package/docs/plans/workrail-platform-vision.md +3 -3
  102. package/docs/reference/agent-context-cleaner-snippet.md +1 -1
  103. package/docs/reference/agent-context-guidance.md +4 -4
  104. package/docs/reference/context-optimization.md +2 -2
  105. package/docs/roadmap/now-next-later.md +2 -2
  106. package/docs/roadmap/open-work-inventory.md +16 -16
  107. package/docs/workflows.md +31 -31
  108. package/package.json +1 -1
  109. package/spec/workflow-tags.json +47 -47
  110. package/workflows/adaptive-ticket-creation.json +16 -16
  111. package/workflows/architecture-scalability-audit.json +22 -22
  112. package/workflows/bug-investigation.agentic.v2.json +3 -3
  113. package/workflows/classify-task-workflow.json +1 -1
  114. package/workflows/coding-task-workflow-agentic.json +6 -6
  115. package/workflows/cross-platform-code-conversion.v2.json +8 -8
  116. package/workflows/document-creation-workflow.json +8 -8
  117. package/workflows/documentation-update-workflow.json +8 -8
  118. package/workflows/intelligent-test-case-generation.json +2 -2
  119. package/workflows/learner-centered-course-workflow.json +2 -2
  120. package/workflows/mr-review-workflow.agentic.v2.json +4 -4
  121. package/workflows/personal-learning-materials-creation-branched.json +8 -8
  122. package/workflows/presentation-creation.json +5 -5
  123. package/workflows/production-readiness-audit.json +1 -1
  124. package/workflows/relocation-workflow-us.json +31 -31
  125. package/workflows/routines/context-gathering.json +1 -1
  126. package/workflows/routines/design-review.json +1 -1
  127. package/workflows/routines/execution-simulation.json +1 -1
  128. package/workflows/routines/feature-implementation.json +3 -3
  129. package/workflows/routines/final-verification.json +1 -1
  130. package/workflows/routines/hypothesis-challenge.json +1 -1
  131. package/workflows/routines/ideation.json +1 -1
  132. package/workflows/routines/parallel-work-partitioning.json +3 -3
  133. package/workflows/routines/philosophy-alignment.json +2 -2
  134. package/workflows/routines/plan-analysis.json +1 -1
  135. package/workflows/routines/plan-generation.json +1 -1
  136. package/workflows/routines/tension-driven-design.json +6 -6
  137. package/workflows/scoped-documentation-workflow.json +26 -26
  138. package/workflows/ui-ux-design-workflow.json +14 -14
  139. package/workflows/workflow-diagnose-environment.json +1 -1
  140. package/workflows/workflow-for-workflows.json +1 -1
@@ -11,8 +11,8 @@
11
11
  "converting code between platforms or languages"
12
12
  ],
13
13
  "examples": [
14
- "coding-task-workflow-agentic",
15
- "cross-platform-code-conversion"
14
+ "wr.coding-task",
15
+ "wr.cross-platform-code-conversion"
16
16
  ]
17
17
  },
18
18
  {
@@ -24,8 +24,8 @@
24
24
  "checking architecture for scalability issues"
25
25
  ],
26
26
  "examples": [
27
- "mr-review-workflow-agentic",
28
- "production-readiness-audit"
27
+ "wr.mr-review",
28
+ "wr.production-readiness-audit"
29
29
  ]
30
30
  },
31
31
  {
@@ -36,8 +36,8 @@
36
36
  "diagnosing tool, environment, or MCP server issues"
37
37
  ],
38
38
  "examples": [
39
- "bug-investigation-agentic",
40
- "workflow-diagnose-environment"
39
+ "wr.bug-investigation",
40
+ "wr.diagnose-environment"
41
41
  ]
42
42
  },
43
43
  {
@@ -50,7 +50,7 @@
50
50
  "shaping a fuzzy problem into a bounded, implementation-ready pitch before coding begins"
51
51
  ],
52
52
  "examples": [
53
- "ui-ux-design-workflow",
53
+ "wr.ui-ux-design",
54
54
  "wr.discovery",
55
55
  "wr.shaping"
56
56
  ]
@@ -64,8 +64,8 @@
64
64
  "writing documentation for a single bounded component or concept"
65
65
  ],
66
66
  "examples": [
67
- "document-creation-workflow",
68
- "documentation-update-workflow"
67
+ "wr.document-creation",
68
+ "wr.documentation-update"
69
69
  ]
70
70
  },
71
71
  {
@@ -77,7 +77,7 @@
77
77
  "generating test cases from ticket requirements"
78
78
  ],
79
79
  "examples": [
80
- "adaptive-ticket-creation",
80
+ "wr.adaptive-ticket-creation",
81
81
  "ticket-grooming"
82
82
  ]
83
83
  },
@@ -90,8 +90,8 @@
90
90
  "making a major personal decision like where to relocate"
91
91
  ],
92
92
  "examples": [
93
- "personal-learning-materials-creation-branched",
94
- "presentation-creation"
93
+ "wr.personal-learning-materials",
94
+ "wr.presentation-creation"
95
95
  ]
96
96
  },
97
97
  {
@@ -104,8 +104,8 @@
104
104
  "running a final verification pass"
105
105
  ],
106
106
  "examples": [
107
- "routine-context-gathering",
108
- "routine-hypothesis-challenge"
107
+ "wr.routine-context-gathering",
108
+ "wr.routine-hypothesis-challenge"
109
109
  ]
110
110
  },
111
111
  {
@@ -116,144 +116,144 @@
116
116
  "modernizing or updating an existing workflow"
117
117
  ],
118
118
  "examples": [
119
- "workflow-for-workflows"
119
+ "wr.workflow-for-workflows"
120
120
  ]
121
121
  }
122
122
  ],
123
123
  "workflows": {
124
- "adaptive-ticket-creation": {
124
+ "wr.adaptive-ticket-creation": {
125
125
  "tags": [
126
126
  "tickets"
127
127
  ]
128
128
  },
129
- "architecture-scalability-audit": {
129
+ "wr.architecture-scalability-audit": {
130
130
  "tags": [
131
131
  "review_audit"
132
132
  ]
133
133
  },
134
- "bug-investigation-agentic": {
134
+ "wr.bug-investigation": {
135
135
  "tags": [
136
136
  "investigation"
137
137
  ]
138
138
  },
139
- "coding-task-workflow-agentic": {
139
+ "wr.coding-task": {
140
140
  "tags": [
141
141
  "coding"
142
142
  ]
143
143
  },
144
- "cross-platform-code-conversion": {
144
+ "wr.cross-platform-code-conversion": {
145
145
  "tags": [
146
146
  "coding"
147
147
  ]
148
148
  },
149
- "document-creation-workflow": {
149
+ "wr.document-creation": {
150
150
  "tags": [
151
151
  "documentation"
152
152
  ]
153
153
  },
154
- "documentation-update-workflow": {
154
+ "wr.documentation-update": {
155
155
  "tags": [
156
156
  "documentation"
157
157
  ]
158
158
  },
159
- "intelligent-test-case-generation": {
159
+ "wr.intelligent-test-case-generation": {
160
160
  "tags": [
161
161
  "tickets",
162
162
  "coding"
163
163
  ]
164
164
  },
165
- "mr-review-workflow-agentic": {
165
+ "wr.mr-review": {
166
166
  "tags": [
167
167
  "review_audit"
168
168
  ]
169
169
  },
170
- "personal-learning-course-design": {
170
+ "wr.personal-learning-course-design": {
171
171
  "tags": [
172
172
  "learning"
173
173
  ]
174
174
  },
175
- "personal-learning-materials-creation-branched": {
175
+ "wr.personal-learning-materials": {
176
176
  "tags": [
177
177
  "learning"
178
178
  ]
179
179
  },
180
- "presentation-creation": {
180
+ "wr.presentation-creation": {
181
181
  "tags": [
182
182
  "learning"
183
183
  ]
184
184
  },
185
- "production-readiness-audit": {
185
+ "wr.production-readiness-audit": {
186
186
  "tags": [
187
187
  "review_audit"
188
188
  ]
189
189
  },
190
- "relocation-workflow-us": {
190
+ "wr.relocation-us": {
191
191
  "tags": [
192
192
  "learning"
193
193
  ]
194
194
  },
195
- "routine-context-gathering": {
195
+ "wr.routine-context-gathering": {
196
196
  "tags": [
197
197
  "routines"
198
198
  ]
199
199
  },
200
- "routine-design-review": {
200
+ "wr.routine-design-review": {
201
201
  "tags": [
202
202
  "routines"
203
203
  ]
204
204
  },
205
- "routine-execution-simulation": {
205
+ "wr.routine-execution-simulation": {
206
206
  "tags": [
207
207
  "routines"
208
208
  ]
209
209
  },
210
- "routine-feature-implementation": {
210
+ "wr.routine-feature-implementation": {
211
211
  "tags": [
212
212
  "routines",
213
213
  "coding"
214
214
  ]
215
215
  },
216
- "routine-final-verification": {
216
+ "wr.routine-final-verification": {
217
217
  "tags": [
218
218
  "routines"
219
219
  ]
220
220
  },
221
- "routine-hypothesis-challenge": {
221
+ "wr.routine-hypothesis-challenge": {
222
222
  "tags": [
223
223
  "routines"
224
224
  ]
225
225
  },
226
- "routine-ideation": {
226
+ "wr.routine-ideation": {
227
227
  "tags": [
228
228
  "routines"
229
229
  ]
230
230
  },
231
- "routine-parallel-work-partitioning": {
231
+ "wr.routine-parallel-work-partitioning": {
232
232
  "tags": [
233
233
  "routines"
234
234
  ]
235
235
  },
236
- "routine-philosophy-alignment": {
236
+ "wr.routine-philosophy-alignment": {
237
237
  "tags": [
238
238
  "routines"
239
239
  ]
240
240
  },
241
- "routine-plan-analysis": {
241
+ "wr.routine-plan-analysis": {
242
242
  "tags": [
243
243
  "routines"
244
244
  ]
245
245
  },
246
- "routine-plan-generation": {
246
+ "wr.routine-plan-generation": {
247
247
  "tags": [
248
248
  "routines"
249
249
  ]
250
250
  },
251
- "routine-tension-driven-design": {
251
+ "wr.routine-tension-driven-design": {
252
252
  "tags": [
253
253
  "routines"
254
254
  ]
255
255
  },
256
- "scoped-documentation-workflow": {
256
+ "wr.scoped-documentation": {
257
257
  "tags": [
258
258
  "documentation"
259
259
  ]
@@ -263,17 +263,17 @@
263
263
  "tickets"
264
264
  ]
265
265
  },
266
- "ui-ux-design-workflow": {
266
+ "wr.ui-ux-design": {
267
267
  "tags": [
268
268
  "design"
269
269
  ]
270
270
  },
271
- "workflow-diagnose-environment": {
271
+ "wr.diagnose-environment": {
272
272
  "tags": [
273
273
  "investigation"
274
274
  ]
275
275
  },
276
- "workflow-for-workflows": {
276
+ "wr.workflow-for-workflows": {
277
277
  "tags": [
278
278
  "authoring"
279
279
  ]
@@ -284,7 +284,7 @@
284
284
  "investigation"
285
285
  ]
286
286
  },
287
- "classify-task-workflow": {
287
+ "wr.classify-task": {
288
288
  "tags": [
289
289
  "routines",
290
290
  "coding"
@@ -315,4 +315,4 @@
315
315
  ]
316
316
  }
317
317
  }
318
- }
318
+ }
@@ -1,5 +1,5 @@
1
1
  {
2
- "id": "adaptive-ticket-creation",
2
+ "id": "wr.adaptive-ticket-creation",
3
3
  "name": "Adaptive Ticket Creation Workflow",
4
4
  "version": "1.0.0",
5
5
  "metricsProfile": "ticket",
@@ -16,9 +16,9 @@
16
16
  "Agent has file system access for loading team preferences and persisting rules."
17
17
  ],
18
18
  "metaGuidance": [
19
- "ROLE: expert Product Manager and Mobile Tech Lead. Triage autonomously, write developer-ready tickets with full context, and produce objectively testable acceptance criteria not user-story paraphrases.",
19
+ "ROLE: expert Product Manager and Mobile Tech Lead. Triage autonomously, write developer-ready tickets with full context, and produce objectively testable acceptance criteria \u2014 not user-story paraphrases.",
20
20
  "EXPLORE FIRST: use tools to gather context before asking the user anything. Ask only for information you genuinely cannot determine with tools or from the request itself.",
21
- "TEAM RULES: load and follow ./.workflow_rules/ticket_creation.md when it exists. Preferences there override your defaults. Rules are captured only on the Epic path complex sessions are where durable conventions emerge and where the investment pays off.",
21
+ "TEAM RULES: load and follow ./.workflow_rules/ticket_creation.md when it exists. Preferences there override your defaults. Rules are captured only on the Epic path \u2014 complex sessions are where durable conventions emerge and where the investment pays off.",
22
22
  "AUTONOMOUS TRIAGE: decide pathComplexity (Simple / Standard / Epic) yourself from the request. Surface your reasoning, then wait for confirmation.",
23
23
  "QUALITY FLOOR: every ticket must have a context-rich description, checkbox-style acceptance criteria that are objectively testable, and an effort estimate."
24
24
  ],
@@ -29,7 +29,7 @@
29
29
  "promptBlocks": {
30
30
  "goal": "Analyze the request, gather available context, and select the right complexity path before doing any ticket work.",
31
31
  "constraints": [
32
- "Decide the path yourself do not ask the user to choose.",
32
+ "Decide the path yourself \u2014 do not ask the user to choose.",
33
33
  "Load ./.workflow_rules/ticket_creation.md if it exists and let it influence your triage. If the file does not exist, note this explicitly in your output so the user knows team conventions were not applied.",
34
34
  "Set pathComplexity to exactly one of: Simple, Standard, or Epic."
35
35
  ],
@@ -37,7 +37,7 @@
37
37
  "Read any attached documents, linked PRDs, or referenced specs.",
38
38
  "Identify complexity signals: scope breadth, number of distinct deliverables, cross-team dependencies, technical unknowns, and estimated ticket count.",
39
39
  "Apply the triage rubric: Simple = single ticket, clear requirements, no blocking unknowns, minimal dependencies. Standard = multiple related tickets, moderate scope, some analysis needed. Epic = complex feature requiring decomposition, multiple teams or significant unknowns, likely 6+ tickets.",
40
- "Upgrade triggers escalate to Standard if: request implies more than one clearly separate work item. Escalate to Epic if: multiple teams are involved, architecture decisions are unresolved, or you estimate more than five tickets.",
40
+ "Upgrade triggers \u2014 escalate to Standard if: request implies more than one clearly separate work item. Escalate to Epic if: multiple teams are involved, architecture decisions are unresolved, or you estimate more than five tickets.",
41
41
  "State your selected path and the top three reasons. Capture pathComplexity in context."
42
42
  ],
43
43
  "outputRequired": {
@@ -61,7 +61,7 @@
61
61
  "promptBlocks": {
62
62
  "goal": "Generate one complete, developer-ready Jira ticket for this request.",
63
63
  "constraints": [
64
- "Acceptance criteria must be phrased as observable, testable conditions not user-story restatements.",
64
+ "Acceptance criteria must be phrased as observable, testable conditions \u2014 not user-story restatements.",
65
65
  "Follow any team conventions from ./.workflow_rules/ticket_creation.md.",
66
66
  "Include all fields a developer needs to start work without asking follow-up questions."
67
67
  ],
@@ -111,7 +111,7 @@
111
111
  "Load ./.workflow_rules/ticket_creation.md and note any relevant team conventions.",
112
112
  "Identify: key stakeholders, team dependencies, technical constraints, known risks, and any conflicting requirements.",
113
113
  "Classify each gap as: Critical (blocks planning), Important (affects scope), or Nice-to-have (can proceed without it).",
114
- "For Critical and Important gaps that tools cannot resolve, ask the user in a single consolidated question block, not one at a time.",
114
+ "For Critical and Important gaps that tools cannot resolve, ask the user \u2014 in a single consolidated question block, not one at a time.",
115
115
  "After receiving answers, check whether any response reveals scope that would change `pathComplexity` (e.g. the user confirms three teams are involved, or the feature is narrower than initially assessed). If so, state the new classification and reasoning, and ask the user to confirm before continuing to Phase 2."
116
116
  ],
117
117
  "outputRequired": {
@@ -143,16 +143,16 @@
143
143
  "promptBlocks": {
144
144
  "goal": "Produce a structured plan that will drive ticket generation. This plan is the source of truth for scope.",
145
145
  "constraints": [
146
- "Be explicit about scope boundaries ambiguous scope will produce ambiguous tickets.",
146
+ "Be explicit about scope boundaries \u2014 ambiguous scope will produce ambiguous tickets.",
147
147
  "Success criteria must be measurable, not just descriptive.",
148
148
  "For Standard path: this plan feeds directly into batch ticket generation."
149
149
  ],
150
150
  "procedure": [
151
151
  "Write: Project Summary (2-3 sentences, what is being built and why).",
152
152
  "Write: Key Deliverables (bulleted list of distinct components or features).",
153
- "Write: In-Scope (explicit list prevents scope creep).",
154
- "Write: Out-of-Scope (explicit exclusions prevents misunderstandings).",
155
- "Write: Success Criteria (measurable definition of done each item verifiable).",
153
+ "Write: In-Scope (explicit list \u2014 prevents scope creep).",
154
+ "Write: Out-of-Scope (explicit exclusions \u2014 prevents misunderstandings).",
155
+ "Write: Success Criteria (measurable definition of done \u2014 each item verifiable).",
156
156
  "Write: High-Level Timeline (phases or milestones with rough sizing).",
157
157
  "Review: does every deliverable map clearly to implementable work? Is anything in scope that should be out?"
158
158
  ],
@@ -178,7 +178,7 @@
178
178
  "goal": "Break the approved plan into a logical work hierarchy that development teams can execute.",
179
179
  "constraints": [
180
180
  "Every item in the plan's In-Scope list must map to at least one work item in the hierarchy.",
181
- "Dependencies must be explicit not implied by ordering alone.",
181
+ "Dependencies must be explicit \u2014 not implied by ordering alone.",
182
182
  "Oversized stories (more than one sprint of work) should be split."
183
183
  ],
184
184
  "procedure": [
@@ -210,7 +210,7 @@
210
210
  "promptBlocks": {
211
211
  "goal": "Add effort estimates, risk assessments, and team assignments to each story in the hierarchy.",
212
212
  "constraints": [
213
- "Conservative estimates are better than optimistic ones note uncertainty explicitly.",
213
+ "Conservative estimates are better than optimistic ones \u2014 note uncertainty explicitly.",
214
214
  "Justify each estimate with one sentence of reasoning.",
215
215
  "Flag stories on the critical path."
216
216
  ],
@@ -220,7 +220,7 @@
220
220
  "Assign priority: must-have for MVP, should-have, nice-to-have.",
221
221
  "Note suggested team or skill area for each story.",
222
222
  "Identify critical path: which stories block the most downstream work? Surface these explicitly.",
223
- "Flag any stories whose estimates feel uncertain surface the unknowns rather than hiding them in a range."
223
+ "Flag any stories whose estimates feel uncertain \u2014 surface the unknowns rather than hiding them in a range."
224
224
  ],
225
225
  "outputRequired": {
226
226
  "notesMarkdown": "Total story point estimate, critical path items, high-risk stories."
@@ -285,7 +285,7 @@
285
285
  "promptBlocks": {
286
286
  "goal": "Extract actionable team preferences from this session and persist them so future runs use them automatically.",
287
287
  "constraints": [
288
- "Only write rules that are genuinely reusable across future tickets skip one-off project specifics.",
288
+ "Only write rules that are genuinely reusable across future tickets \u2014 skip one-off project specifics.",
289
289
  "Keep rules concise and actionable, not narrative.",
290
290
  "Append to ./.workflow_rules/ticket_creation.md rather than replacing it."
291
291
  ],
@@ -293,7 +293,7 @@
293
293
  "Review what conventions, preferences, or requirements emerged during this session.",
294
294
  "Identify patterns worth preserving: naming conventions, field usage, AC format preferences, estimation approach, labeling rules.",
295
295
  "Draft new rules as short, imperative statements (e.g., 'Use T-shirt sizing not Fibonacci', 'Always include a Figma link in design tickets').",
296
- "Check against existing rules avoid duplicates or contradictions.",
296
+ "Check against existing rules \u2014 avoid duplicates or contradictions.",
297
297
  "Append new rules to ./.workflow_rules/ticket_creation.md, creating the file if it does not exist."
298
298
  ],
299
299
  "outputRequired": {
@@ -1,5 +1,5 @@
1
1
  {
2
- "id": "architecture-scalability-audit",
2
+ "id": "wr.architecture-scalability-audit",
3
3
  "name": "Architecture Scalability Audit",
4
4
  "version": "0.1.0",
5
5
  "metricsProfile": "research",
@@ -28,7 +28,7 @@
28
28
  "DEFAULT BEHAVIOR: self-execute with tools. Ask only for true scope or dimension decisions you cannot resolve yourself.",
29
29
  "V2 DURABILITY: keep workflow truth in output.notesMarkdown and explicit context fields. Human-facing markdown artifacts are optional companions only.",
30
30
  "OWNERSHIP: the main agent owns the fact packet, synthesis, verdict calibration, and final handoff. Delegated dimension audits are evidence, not authority.",
31
- "DIMENSION DISCIPLINE: audit only the dimensions the user declared. Do not add dimensions the user did not select, even if they look relevant surface them as advisory notes instead.",
31
+ "DIMENSION DISCIPLINE: audit only the dimensions the user declared. Do not add dimensions the user did not select, even if they look relevant \u2014 surface them as advisory notes instead.",
32
32
  "EVIDENCE FIRST: every risk or will_break finding must cite a specific file, class, method, or pattern in the codebase. Technology name alone is not evidence.",
33
33
  "GROWTH SCENARIO: every concern must name a growth scenario (e.g. 10x traffic, 100x records, 3x team size). Generic 'won't scale' findings are not acceptable.",
34
34
  "VERDICT TIERS: use will_break / risk / fine. Do not force a cleaner answer than the evidence supports.",
@@ -52,10 +52,10 @@
52
52
  ],
53
53
  "procedure": [
54
54
  "Read the codebase to understand the architecture: key components, entry points, data flows, and main patterns within the declared scope.",
55
- "Present the five scalability dimensions and ask the user to select which apply: (1) load handles more requests, users, or throughput; (2) data_volume handles more records, storage, or query size; (3) team_org more teams or developers working on this scope; (4) feature_extensibility more features added without rearchitecting; (5) operational more deployments, environments, or operational complexity.",
56
- "Ask the user to confirm the scope boundary what is explicitly in and explicitly out.",
57
- "Classify audit complexity: Simple (1–2 dimensions, small scope), Medium (2–3 dimensions, moderate scope), Complex (4–5 dimensions or large scope).",
58
- "Run a context-clarity check: score boundary_clarity, dimension_clarity, and codebase_familiarity 1–3. If any score is 1, gather more context before advancing."
55
+ "Present the five scalability dimensions and ask the user to select which apply: (1) load \u2014 handles more requests, users, or throughput; (2) data_volume \u2014 handles more records, storage, or query size; (3) team_org \u2014 more teams or developers working on this scope; (4) feature_extensibility \u2014 more features added without rearchitecting; (5) operational \u2014 more deployments, environments, or operational complexity.",
56
+ "Ask the user to confirm the scope boundary \u2014 what is explicitly in and explicitly out.",
57
+ "Classify audit complexity: Simple (1\u20132 dimensions, small scope), Medium (2\u20133 dimensions, moderate scope), Complex (4\u20135 dimensions or large scope).",
58
+ "Run a context-clarity check: score boundary_clarity, dimension_clarity, and codebase_familiarity 1\u20133. If any score is 1, gather more context before advancing."
59
59
  ],
60
60
  "outputRequired": {
61
61
  "notesMarkdown": "Scope boundary (in and out), declared dimensions with rationale, audit complexity classification, and any open boundary questions.",
@@ -113,7 +113,7 @@
113
113
  "procedure": [
114
114
  "Create a neutral `scalabilityFactPacket` containing: scope boundary (in and out), declared dimensions, key architectural patterns found, main components and their roles, data flow and storage patterns, concurrency and state management approach, dependency boundaries and coupling, deployment and runtime assumptions, and explicit open unknowns.",
115
115
  "Include realism signals: code that looks scalable at a glance but may have hidden limits (e.g. in-memory state, synchronous choke points, missing pagination, tight coupling between components).",
116
- "For each declared dimension, assign a reviewer family mission: load = examine request handling, concurrency, session/state management, caching, connection pools, and horizontal scaling readiness check whether session state is in-memory or distributed, whether connection pools are bounded, whether synchronous bottlenecks exist in hot paths; data_volume = examine query patterns, pagination, indexing, result set bounds, storage growth, and data access layer scalability check for unbounded queries (missing LIMIT/pagination), missing indexes on filtered columns, N+1 patterns in repository/service layers, and data structures that grow unboundedly; team_org = examine module coupling, shared state, and parallel development friction specifically check import graphs for cross-module dependencies that would cause merge conflicts, identify shared mutable singletons or global state, look for test setup that requires spinning up adjacent modules, and check whether public interfaces change frequently or are stable; feature_extensibility = examine how much code changes when a new variant of a core concept is added specifically look for switch/when/if-else chains on type discriminators that would need a new branch per feature, hardcoded business-rule constants, direct concrete dependencies instead of interfaces or abstractions, and files that are edited for every new feature; operational = examine deployment complexity, environment-specific behavior, observability, configuration surface, and operational runbook needs specifically check for environment-specific code paths (if/switch on env vars that create different behavior per environment), configuration that must be updated in multiple places per deployment, whether logs and metrics cover the main operational failure modes, and whether a new deployment of this scope would require manual steps beyond a standard deploy.",
116
+ "For each declared dimension, assign a reviewer family mission: load = examine request handling, concurrency, session/state management, caching, connection pools, and horizontal scaling readiness \u2014 check whether session state is in-memory or distributed, whether connection pools are bounded, whether synchronous bottlenecks exist in hot paths; data_volume = examine query patterns, pagination, indexing, result set bounds, storage growth, and data access layer scalability \u2014 check for unbounded queries (missing LIMIT/pagination), missing indexes on filtered columns, N+1 patterns in repository/service layers, and data structures that grow unboundedly; team_org = examine module coupling, shared state, and parallel development friction \u2014 specifically check import graphs for cross-module dependencies that would cause merge conflicts, identify shared mutable singletons or global state, look for test setup that requires spinning up adjacent modules, and check whether public interfaces change frequently or are stable; feature_extensibility = examine how much code changes when a new variant of a core concept is added \u2014 specifically look for switch/when/if-else chains on type discriminators that would need a new branch per feature, hardcoded business-rule constants, direct concrete dependencies instead of interfaces or abstractions, and files that are edited for every new feature; operational = examine deployment complexity, environment-specific behavior, observability, configuration surface, and operational runbook needs \u2014 specifically check for environment-specific code paths (if/switch on env vars that create different behavior per environment), configuration that must be updated in multiple places per deployment, whether logs and metrics cover the main operational failure modes, and whether a new deployment of this scope would require manual steps beyond a standard deploy.",
117
117
  "Set selectedReviewerFamilies to the list of assigned families (one per declared dimension). Set contradictionCount and blindSpotCount to 0."
118
118
  ],
119
119
  "outputRequired": {
@@ -132,7 +132,7 @@
132
132
  "var": "auditComplexity",
133
133
  "equals": "Simple"
134
134
  },
135
- "text": "For a Simple audit, keep the fact packet compact scope summary, key patterns, and declared dimensions only. Skip exhaustive realism signal enumeration."
135
+ "text": "For a Simple audit, keep the fact packet compact \u2014 scope summary, key patterns, and declared dimensions only. Skip exhaustive realism signal enumeration."
136
136
  }
137
137
  ],
138
138
  "requireConfirmation": false
@@ -157,11 +157,11 @@
157
157
  ],
158
158
  "Each reviewer family uses scalabilityFactPacket as primary truth.",
159
159
  "Reviewer-family outputs are raw evidence. The main agent owns synthesis and verdict assignment.",
160
- "Each reviewer family audits only its declared dimension no cross-dimension scope creep."
160
+ "Each reviewer family audits only its declared dimension \u2014 no cross-dimension scope creep."
161
161
  ],
162
162
  "procedure": [
163
163
  "Before investigating, restate your scalabilityHypothesis and name which dimension is most likely to challenge it.",
164
- "Run one investigation per declared dimension. For each dimension, the investigation must return: top findings, evidence for each finding (specific file, class, method, or pattern references not just technology names), verdict tier per finding (will_break / risk / fine), growth scenario for each concern (e.g. 10x traffic, 100x records, 3x team size), biggest uncertainty, and likely false-confidence vector for this dimension.",
164
+ "Run one investigation per declared dimension. For each dimension, the investigation must return: top findings, evidence for each finding (specific file, class, method, or pattern references \u2014 not just technology names), verdict tier per finding (will_break / risk / fine), growth scenario for each concern (e.g. 10x traffic, 100x records, 3x team size), biggest uncertainty, and likely false-confidence vector for this dimension.",
165
165
  "After completing all dimension investigations, synthesize explicitly: what was confirmed, what was genuinely new, what looks weak or overstated, and what changed your current hypothesis.",
166
166
  "Build dimensionFindings keyed by dimension containing: findings list, verdict summary, evidence quality assessment, and open questions.",
167
167
  "Identify cross-cutting concerns: architectural patterns or components that appear in findings from multiple dimensions."
@@ -252,10 +252,10 @@
252
252
  "This is a structured four-item check, not a free-form review."
253
253
  ],
254
254
  "procedure": [
255
- "Check 1 Technology-vs-usage: did any reviewer identify a scalable technology without checking actual usage patterns in the code? (e.g. Postgres was identified as the DB, but were N+1 queries, missing indexes, or unbounded result sets actually checked?) Fix any instances found.",
256
- "Check 2 Scope drift: did any reviewer audit components outside the declared scope boundary? Remove out-of-scope findings.",
257
- "Check 3 Undeclared relevant dimensions: does the codebase have patterns suggesting a declared-out dimension actually matters for this scope? If so, surface it as an advisory note without adding it to the audit verdict.",
258
- "Check 4 Growth scenario vagueness: does every concern name a specific growth scenario? If not, assign one now based on the most realistic growth pattern for this scope.",
255
+ "Check 1 \u2014 Technology-vs-usage: did any reviewer identify a scalable technology without checking actual usage patterns in the code? (e.g. Postgres was identified as the DB, but were N+1 queries, missing indexes, or unbounded result sets actually checked?) Fix any instances found.",
256
+ "Check 2 \u2014 Scope drift: did any reviewer audit components outside the declared scope boundary? Remove out-of-scope findings.",
257
+ "Check 3 \u2014 Undeclared relevant dimensions: does the codebase have patterns suggesting a declared-out dimension actually matters for this scope? If so, surface it as an advisory note without adding it to the audit verdict.",
258
+ "Check 4 \u2014 Growth scenario vagueness: does every concern name a specific growth scenario? If not, assign one now based on the most realistic growth pattern for this scope.",
259
259
  "Set blindSpotCount to the number of blind spots found across all four checks."
260
260
  ],
261
261
  "outputRequired": {
@@ -307,11 +307,11 @@
307
307
  "Do not advance to handoff with known hard gate failures."
308
308
  ],
309
309
  "procedure": [
310
- "Verdict aggregation derive scalabilityVerdict from dimensionFindings using these explicit rules: (1) at_risk if any declared dimension has a will_break finding; (2) conditional if no will_break findings exist but at least one dimension has a risk finding; (3) ready_to_scale if all declared dimensions have only fine findings; (4) inconclusive if any dimension still has evidenceWeak = true after the synthesis loop, making a reliable verdict impossible. Capture verdictRationale naming the specific dimension and finding that drove the verdict.",
311
- "Hard gate 1 Evidence grounding: for every will_break and risk finding in dimensionFindings, confirm it cites a specific file, class, method, or code pattern. Technology name alone fails this gate. Fix by locating the code evidence or downgrading to risk with an evidence-needed note.",
312
- "Hard gate 2 Dimension coverage: confirm every declared dimension has at least one substantive finding. A verdict of fine with supporting evidence counts. A dimension with no findings at all fails this gate.",
313
- "Hard gate 3 Hypothesis revisited: confirm that scalabilityHypothesis from Phase 1 is either confirmed or explicitly revised in synthesis notes. If it was never addressed, address it now.",
314
- "Hard gate 4 Growth scenario specificity: confirm every concern in dimensionFindings names a growth scenario. If any do not, assign one now.",
310
+ "Verdict aggregation \u2014 derive scalabilityVerdict from dimensionFindings using these explicit rules: (1) at_risk if any declared dimension has a will_break finding; (2) conditional if no will_break findings exist but at least one dimension has a risk finding; (3) ready_to_scale if all declared dimensions have only fine findings; (4) inconclusive if any dimension still has evidenceWeak = true after the synthesis loop, making a reliable verdict impossible. Capture verdictRationale naming the specific dimension and finding that drove the verdict.",
311
+ "Hard gate 1 \u2014 Evidence grounding: for every will_break and risk finding in dimensionFindings, confirm it cites a specific file, class, method, or code pattern. Technology name alone fails this gate. Fix by locating the code evidence or downgrading to risk with an evidence-needed note.",
312
+ "Hard gate 2 \u2014 Dimension coverage: confirm every declared dimension has at least one substantive finding. A verdict of fine with supporting evidence counts. A dimension with no findings at all fails this gate.",
313
+ "Hard gate 3 \u2014 Hypothesis revisited: confirm that scalabilityHypothesis from Phase 1 is either confirmed or explicitly revised in synthesis notes. If it was never addressed, address it now.",
314
+ "Hard gate 4 \u2014 Growth scenario specificity: confirm every concern in dimensionFindings names a growth scenario. If any do not, assign one now.",
315
315
  "Set hardGatesPassed = true only when the verdict aggregation and all four gates pass. Set hardGateFailures to the list of any that needed fixing."
316
316
  ],
317
317
  "outputRequired": {
@@ -335,13 +335,13 @@
335
335
  "Do not drift into implementation planning or remediation design unless the user explicitly asks."
336
336
  ],
337
337
  "procedure": [
338
- "Open with the overall scalability readiness verdict (ready_to_scale / conditional / at_risk / inconclusive) and the verdictRationale name the specific dimension and finding that drove it.",
338
+ "Open with the overall scalability readiness verdict (ready_to_scale / conditional / at_risk / inconclusive) and the verdictRationale \u2014 name the specific dimension and finding that drove it.",
339
339
  "For each declared dimension, give: dimension name, verdict tier (will_break / risk / fine), top finding with specific code reference, growth scenario, and severity.",
340
340
  "List cross-cutting concerns: patterns that create scalability risk across multiple dimensions.",
341
341
  "Revisit scalabilityHypothesis from Phase 1: was it confirmed or revised? What evidence changed your view?",
342
342
  "Give a prioritized concern list ordered by: (1) will_break findings first, (2) risk findings by severity, (3) cross-cutting concerns, (4) fine findings worth noting as already solid.",
343
343
  "Surface any advisory notes for undeclared dimensions that may be worth considering.",
344
- "State what is already well-designed for scale not everything should be a concern."
344
+ "State what is already well-designed for scale \u2014 not everything should be a concern."
345
345
  ],
346
346
  "outputRequired": {
347
347
  "notesMarkdown": "Decision-ready scalability handoff: overall verdict, per-dimension summary with code references, prioritized concerns, cross-cutting concerns, hypothesis outcome, and what is already solid."
@@ -350,7 +350,7 @@
350
350
  "The handoff is verdict-first and evidence-grounded.",
351
351
  "Every concern is tied to a specific code reference and growth scenario.",
352
352
  "The hypothesis from Phase 1 is explicitly addressed.",
353
- "What is already well-designed is stated not just the concerns."
353
+ "What is already well-designed is stated \u2014 not just the concerns."
354
354
  ]
355
355
  },
356
356
  "requireConfirmation": false
@@ -1,5 +1,5 @@
1
1
  {
2
- "id": "bug-investigation-agentic",
2
+ "id": "wr.bug-investigation",
3
3
  "name": "Bug Investigation",
4
4
  "version": "2.0.0",
5
5
  "description": "Use this to diagnose a bug or unexpected behavior in code. Builds a hypothesis, gathers evidence, and proves or disproves the root cause before concluding.",
@@ -58,7 +58,7 @@
58
58
  "steps": [
59
59
  {
60
60
  "id": "phase-0-triage-and-intake",
61
- "title": "Phase 0: Triage (Bug Intake Risk Mode)",
61
+ "title": "Phase 0: Triage (Bug Intake \u2022 Risk \u2022 Mode)",
62
62
  "prompt": "Understand the bug report and choose the right rigor.\n\nCapture:\n- `bugSummary`: concise statement of the issue\n- `reproSummary`: repro steps, symptoms, expected behavior, environment notes\n- `investigationComplexity`: Small / Medium / Large\n- `riskLevel`: Low / Medium / High\n- `rigorMode`: QUICK / STANDARD / THOROUGH\n- `automationLevel`: High / Medium / Low\n- `maxParallelism`: 0 / 2 / 3\n\nDecision guidance:\n- QUICK: clear repro, narrow surface area, low ambiguity\n- STANDARD: moderate ambiguity, moderate system breadth, or meaningful risk\n- THOROUGH: high ambiguity, high-risk production impact, broad surface area, or multiple plausible causes\n\nSet context variables:\n- `bugSummary`\n- `reproSummary`\n- `investigationComplexity`\n- `riskLevel`\n- `rigorMode`\n- `automationLevel`\n- `maxParallelism`\n- `reproducibilityConfidence` (High / Medium / Low)\n\nAsk for confirmation only if the chosen rigor materially affects expectations or if critical repro details are still missing.",
63
63
  "requireConfirmation": true
64
64
  },
@@ -141,7 +141,7 @@
141
141
  {
142
142
  "id": "phase-4b-loop-decision",
143
143
  "title": "Evidence Loop Decision",
144
- "prompt": "Decide whether the evidence loop should continue.\n\nDecision rules:\n- if `contradictionCount > 0` continue\n- else if `unresolvedEvidenceGapCount > 0` continue\n- else if `hasStrongAlternative = true` and the alternative is not meaningfully weaker continue\n- else if `diagnosisType = inconclusive_but_narrowed` and further evidence is not realistically available stop with bounded uncertainty\n- else stop\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
144
+ "prompt": "Decide whether the evidence loop should continue.\n\nDecision rules:\n- if `contradictionCount > 0` \u2192 continue\n- else if `unresolvedEvidenceGapCount > 0` \u2192 continue\n- else if `hasStrongAlternative = true` and the alternative is not meaningfully weaker \u2192 continue\n- else if `diagnosisType = inconclusive_but_narrowed` and further evidence is not realistically available \u2192 stop with bounded uncertainty\n- else \u2192 stop\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
145
145
  "requireConfirmation": true,
146
146
  "outputContract": {
147
147
  "contractRef": "wr.contracts.loop_control"
@@ -1,5 +1,5 @@
1
1
  {
2
- "id": "classify-task-workflow",
2
+ "id": "wr.classify-task",
3
3
  "name": "Classify Task",
4
4
  "version": "0.1.0",
5
5
  "metricsProfile": "none",
@@ -1,5 +1,5 @@
1
1
  {
2
- "id": "coding-task-workflow-agentic",
2
+ "id": "wr.coding-task",
3
3
  "name": "Agentic Task Dev Workflow",
4
4
  "version": "1.2.0",
5
5
  "description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
@@ -143,7 +143,7 @@
143
143
  "SUBAGENT SYNTHESIS: treat subagent output as evidence, not conclusions. State your hypothesis before delegating, then interrogate what came back: what was missed, wrong, or new? Say what changed your mind or what you still reject, and why.",
144
144
  "PARALLELISM: when reads, audits, or delegations are independent, run them in parallel inside the phase. Parallelize cognition; serialize synthesis and canonical writes.",
145
145
  "PHILOSOPHY LENS: apply the user's coding philosophy (from active session rules) as the evaluation lens. Flag violations by principle name, not as generic feedback. If principles conflict, surface the tension explicitly instead of silently choosing.",
146
- "VALIDATION: prefer static/compile-time safety over runtime checks. Use build, type-checking, and tests as the primary proof of correctness in that order of reliability.",
146
+ "VALIDATION: prefer static/compile-time safety over runtime checks. Use build, type-checking, and tests as the primary proof of correctness \u2014 in that order of reliability.",
147
147
  "DRIFT HANDLING: when reality diverges from the plan, update the plan artifact and re-audit deliberately rather than accumulating undocumented drift.",
148
148
  "NEVER COMMIT MARKDOWN FILES UNLESS USER EXPLICITLY ASKS.",
149
149
  "SLICE DISCIPLINE: Phase 6 is a loop -- implement ONE slice per iteration. Do not implement multiple slices at once. The verification loop exists to catch drift per slice, not retroactively."
@@ -218,7 +218,7 @@
218
218
  },
219
219
  {
220
220
  "id": "phase-1b-design-deep",
221
- "title": "Phase 1b: Design Generation (Injected Routine Tension-Driven Design)",
221
+ "title": "Phase 1b: Design Generation (Injected Routine \u2014 Tension-Driven Design)",
222
222
  "runCondition": {
223
223
  "and": [
224
224
  {
@@ -257,7 +257,7 @@
257
257
  }
258
258
  ]
259
259
  },
260
- "prompt": "Read `design-candidates.md`, compare it to your original guess, and make the call.\n\nBe explicit about three things:\n- what the design work confirmed\n- what changed your mind\n- what you missed the first time\n\nThen pressure-test the leading option:\n- what's the strongest case against it?\n- what assumption breaks it?\n\nAfter the challenge batch, say:\n- what changed your mind\n- what didn't\n- which findings you reject and why\n\nPick the approach yourself. Don't hide behind the artifact. If the simplest thing works, prefer it. If the front-runner stops looking right after challenge, switch.\n\nCapture:\n- `selectedApproach` chosen design with rationale tied to tensions\n- `runnerUpApproach` next-best option and why it lost\n- `architectureRationale` tensions resolved vs accepted\n- `pivotTriggers` conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` failure mode of the selected approach\n- `acceptedTradeoffs`\n- `identifiedFailureModes`",
260
+ "prompt": "Read `design-candidates.md`, compare it to your original guess, and make the call.\n\nBe explicit about three things:\n- what the design work confirmed\n- what changed your mind\n- what you missed the first time\n\nThen pressure-test the leading option:\n- what's the strongest case against it?\n- what assumption breaks it?\n\nAfter the challenge batch, say:\n- what changed your mind\n- what didn't\n- which findings you reject and why\n\nPick the approach yourself. Don't hide behind the artifact. If the simplest thing works, prefer it. If the front-runner stops looking right after challenge, switch.\n\nCapture:\n- `selectedApproach` \u2014 chosen design with rationale tied to tensions\n- `runnerUpApproach` \u2014 next-best option and why it lost\n- `architectureRationale` \u2014 tensions resolved vs accepted\n- `pivotTriggers` \u2014 conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` \u2014 failure mode of the selected approach\n- `acceptedTradeoffs`\n- `identifiedFailureModes`",
261
261
  "promptFragments": [
262
262
  {
263
263
  "id": "phase-1c-challenge-standard",
@@ -421,7 +421,7 @@
421
421
  "var": "taskComplexity",
422
422
  "not_equals": "Small"
423
423
  },
424
- "prompt": "Turn the decision into a plan someone else could execute without guessing.\n\n**Open questions gate:** check `openQuestions` from Phase 0. If any remain unanswered and would materially affect implementation quality, either resolve them now with tools or record them in the risk register with an explicit decision about how to proceed without them. Do not silently carry unanswered questions into implementation.\n\nUpdate `implementation_plan.md`.\n\nIt should cover:\n1. Problem statement\n2. Acceptance criteria (mirror `spec.md` if it exists; `spec.md` owns observable behavior)\n3. Non-goals\n4. Philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only if they actually help\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n - [principle] -> [satisfied / tension / violated + 1-line why]\n\nCapture:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount` count of open questions that would materially affect implementation quality\n- `planConfidenceBand` Low / Medium / High\n\nThe plan is the deliverable for this step. Do not implement anything -- not a \"quick win\", not a file read that bleeds into edits, nothing. Execution begins in Phase 6, one slice at a time. If you find yourself writing code or editing source files right now, stop immediately.",
424
+ "prompt": "Turn the decision into a plan someone else could execute without guessing.\n\n**Open questions gate:** check `openQuestions` from Phase 0. If any remain unanswered and would materially affect implementation quality, either resolve them now with tools or record them in the risk register with an explicit decision about how to proceed without them. Do not silently carry unanswered questions into implementation.\n\nUpdate `implementation_plan.md`.\n\nIt should cover:\n1. Problem statement\n2. Acceptance criteria (mirror `spec.md` if it exists; `spec.md` owns observable behavior)\n3. Non-goals\n4. Philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only if they actually help\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n - [principle] -> [satisfied / tension / violated + 1-line why]\n\nCapture:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount` \u2014 count of open questions that would materially affect implementation quality\n- `planConfidenceBand` \u2014 Low / Medium / High\n\nThe plan is the deliverable for this step. Do not implement anything -- not a \"quick win\", not a file read that bleeds into edits, nothing. Execution begins in Phase 6, one slice at a time. If you find yourself writing code or editing source files right now, stop immediately.",
425
425
  "assessmentRefs": [
426
426
  "plan-completeness-gate",
427
427
  "invariant-clarity-gate",
@@ -535,7 +535,7 @@
535
535
  {
536
536
  "id": "phase-4b-loop-decision",
537
537
  "title": "Loop Exit Decision",
538
- "prompt": "Decide whether the plan needs another pass.\n\nIf `planFindings` is non-empty, keep going.\nIf it's empty, stop but say what you checked so the clean pass means something.\nIf you've hit the limit, stop and record what still bothers you.\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
538
+ "prompt": "Decide whether the plan needs another pass.\n\nIf `planFindings` is non-empty, keep going.\nIf it's empty, stop \u2014 but say what you checked so the clean pass means something.\nIf you've hit the limit, stop and record what still bothers you.\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
539
539
  "requireConfirmation": true,
540
540
  "outputContract": {
541
541
  "contractRef": "wr.contracts.loop_control"