@exaudeus/workrail 3.39.0 → 3.41.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/init.js +0 -3
- package/dist/cli-worktrain.js +58 -26
- package/dist/cli.js +0 -18
- package/dist/config/app-config.d.ts +0 -16
- package/dist/config/app-config.js +0 -14
- package/dist/config/config-file.js +0 -3
- package/dist/console-ui/assets/index-CQt4UhPB.js +28 -0
- package/dist/console-ui/assets/index-DGj8EsFR.css +1 -0
- package/dist/console-ui/index.html +2 -2
- package/dist/coordinators/pr-review.d.ts +23 -1
- package/dist/coordinators/pr-review.js +224 -5
- package/dist/daemon/daemon-events.d.ts +9 -1
- package/dist/daemon/soul-template.d.ts +2 -2
- package/dist/daemon/soul-template.js +11 -1
- package/dist/daemon/workflow-runner.d.ts +17 -3
- package/dist/daemon/workflow-runner.js +401 -28
- package/dist/di/container.js +1 -25
- package/dist/di/tokens.d.ts +0 -3
- package/dist/di/tokens.js +0 -3
- package/dist/engine/engine-factory.js +0 -1
- package/dist/infrastructure/console-defaults.d.ts +1 -0
- package/dist/infrastructure/console-defaults.js +4 -0
- package/dist/infrastructure/session/index.d.ts +0 -1
- package/dist/infrastructure/session/index.js +1 -3
- package/dist/manifest.json +124 -124
- package/dist/mcp/handlers/session.d.ts +1 -0
- package/dist/mcp/handlers/session.js +61 -13
- package/dist/mcp/output-schemas.d.ts +10 -10
- package/dist/mcp/server.js +1 -18
- package/dist/mcp/tools.d.ts +12 -12
- package/dist/mcp/transports/http-entry.js +0 -2
- package/dist/mcp/transports/stdio-entry.js +1 -2
- package/dist/mcp/types.d.ts +0 -2
- package/dist/trigger/daemon-console.d.ts +2 -0
- package/dist/trigger/daemon-console.js +1 -1
- package/dist/trigger/trigger-listener.d.ts +2 -0
- package/dist/trigger/trigger-listener.js +3 -1
- package/dist/trigger/trigger-router.d.ts +4 -3
- package/dist/trigger/trigger-router.js +13 -5
- package/dist/trigger/trigger-store.js +17 -4
- package/dist/types/workflow-source.d.ts +0 -1
- package/dist/types/workflow-source.js +3 -6
- package/dist/types/workflow.d.ts +1 -1
- package/dist/types/workflow.js +1 -2
- package/dist/v2/durable-core/domain/artifact-contract-validator.js +66 -0
- package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.d.ts +25 -0
- package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.js +31 -0
- package/dist/v2/durable-core/schemas/artifacts/index.d.ts +3 -1
- package/dist/v2/durable-core/schemas/artifacts/index.js +14 -1
- package/dist/v2/durable-core/schemas/artifacts/review-verdict.d.ts +41 -0
- package/dist/v2/durable-core/schemas/artifacts/review-verdict.js +30 -0
- package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +236 -236
- package/dist/v2/durable-core/schemas/session/events.d.ts +50 -50
- package/dist/v2/durable-core/schemas/session/gaps.d.ts +2 -2
- package/dist/v2/durable-core/schemas/session/manifest.d.ts +4 -4
- package/dist/v2/durable-core/schemas/session/outputs.d.ts +8 -8
- package/dist/v2/usecases/console-routes.d.ts +2 -1
- package/dist/v2/usecases/console-routes.js +207 -5
- package/dist/v2/usecases/console-service.js +14 -0
- package/dist/v2/usecases/console-types.d.ts +1 -0
- package/docs/authoring.md +16 -16
- package/docs/design/coordinator-artifact-protocol-design-candidates.md +155 -0
- package/docs/design/coordinator-artifact-protocol-design-review.md +103 -0
- package/docs/design/coordinator-artifact-protocol-implementation-plan.md +259 -0
- package/docs/design/coordinator-message-queue-drain-plan.md +241 -0
- package/docs/design/coordinator-message-queue-drain-review.md +120 -0
- package/docs/design/coordinator-message-queue-drain.md +289 -0
- package/docs/design/shaping-workflow-external-research.md +119 -0
- package/docs/discovery/late-bound-goals-impl-plan.md +147 -0
- package/docs/discovery/late-bound-goals-review.md +82 -0
- package/docs/discovery/late-bound-goals.md +118 -0
- package/docs/discovery/steer-endpoint-design-candidates.md +288 -0
- package/docs/discovery/steer-endpoint-design-review-findings.md +104 -0
- package/docs/discovery/steer-endpoint-implementation-plan.md +284 -0
- package/docs/ideas/backlog.md +447 -97
- package/docs/ideas/design-candidates-console-session-tree-impl.md +64 -0
- package/docs/ideas/design-candidates-session-tree-view.md +196 -0
- package/docs/ideas/design-review-findings-console-session-tree-impl.md +75 -0
- package/docs/ideas/design-review-findings-session-tree-view.md +88 -0
- package/docs/ideas/implementation_plan_session_tree_view.md +238 -0
- package/package.json +2 -1
- package/spec/authoring-spec.json +16 -16
- package/spec/shape.schema.json +178 -0
- package/spec/workflow-tags.json +232 -47
- package/workflows/coding-task-workflow-agentic.json +491 -480
- package/workflows/mr-review-workflow.agentic.v2.json +5 -1
- package/workflows/wr.shaping.json +182 -0
- package/dist/console-ui/assets/index-3oXZ_A9m.js +0 -28
- package/dist/console-ui/assets/index-8dh0Psu-.css +0 -1
- package/dist/infrastructure/session/DashboardHeartbeat.d.ts +0 -8
- package/dist/infrastructure/session/DashboardHeartbeat.js +0 -39
- package/dist/infrastructure/session/DashboardLockRelease.d.ts +0 -2
- package/dist/infrastructure/session/DashboardLockRelease.js +0 -29
- package/dist/infrastructure/session/HttpServer.d.ts +0 -60
- package/dist/infrastructure/session/HttpServer.js +0 -912
- package/workflows/coding-task-workflow-agentic.lean.v2.json +0 -648
- package/workflows/coding-task-workflow-agentic.v2.json +0 -324
|
@@ -1,648 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"id": "coding-task-workflow-agentic",
|
|
3
|
-
"name": "Agentic Task Dev Workflow (Lean)",
|
|
4
|
-
"version": "1.1.0",
|
|
5
|
-
"description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
|
|
6
|
-
"about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",
|
|
7
|
-
"examples": [
|
|
8
|
-
"Implement JWT refresh token rotation in the auth service",
|
|
9
|
-
"Fix the race condition in the cache invalidation path when concurrent writes occur",
|
|
10
|
-
"Refactor the payment flow to use a Result type instead of throwing exceptions",
|
|
11
|
-
"Add pagination support to the messaging inbox API endpoint"
|
|
12
|
-
],
|
|
13
|
-
"recommendedPreferences": {
|
|
14
|
-
"recommendedAutonomy": "guided",
|
|
15
|
-
"recommendedRiskPolicy": "conservative"
|
|
16
|
-
},
|
|
17
|
-
"assessments": [
|
|
18
|
-
{
|
|
19
|
-
"id": "design-soundness-gate",
|
|
20
|
-
"purpose": "The selected design approach is committed with rationale. No unresolved ambiguity remains about what to build.",
|
|
21
|
-
"dimensions": [
|
|
22
|
-
{
|
|
23
|
-
"id": "design_soundness",
|
|
24
|
-
"purpose": "Design decision is made, tradeoffs are recorded, and there is no remaining ambiguity about the chosen approach.",
|
|
25
|
-
"levels": [
|
|
26
|
-
"low",
|
|
27
|
-
"high"
|
|
28
|
-
]
|
|
29
|
-
}
|
|
30
|
-
]
|
|
31
|
-
},
|
|
32
|
-
{
|
|
33
|
-
"id": "design-gaps-gate",
|
|
34
|
-
"purpose": "A deliberate scan for unconsidered alternatives, unhandled edge cases, or untracked risks has been completed.",
|
|
35
|
-
"dimensions": [
|
|
36
|
-
{
|
|
37
|
-
"id": "design_gaps",
|
|
38
|
-
"purpose": "Active scan completed: either no material gaps were found, or any found were addressed or explicitly filed.",
|
|
39
|
-
"levels": [
|
|
40
|
-
"low",
|
|
41
|
-
"high"
|
|
42
|
-
]
|
|
43
|
-
}
|
|
44
|
-
]
|
|
45
|
-
},
|
|
46
|
-
{
|
|
47
|
-
"id": "plan-completeness-gate",
|
|
48
|
-
"purpose": "Every slice has a defined scope and verifiable acceptance criterion. No slice is vague or open-ended.",
|
|
49
|
-
"dimensions": [
|
|
50
|
-
{
|
|
51
|
-
"id": "plan_completeness",
|
|
52
|
-
"purpose": "Slices have clear boundaries and acceptance criteria. The agent knows what done looks like for each.",
|
|
53
|
-
"levels": [
|
|
54
|
-
"low",
|
|
55
|
-
"high"
|
|
56
|
-
]
|
|
57
|
-
}
|
|
58
|
-
]
|
|
59
|
-
},
|
|
60
|
-
{
|
|
61
|
-
"id": "invariant-clarity-gate",
|
|
62
|
-
"purpose": "Invariants and non-goals are explicit enough to verify against during and after implementation.",
|
|
63
|
-
"dimensions": [
|
|
64
|
-
{
|
|
65
|
-
"id": "invariant_clarity",
|
|
66
|
-
"purpose": "Named invariants are checkable in the implementation. Non-goals are stated and will prevent scope creep.",
|
|
67
|
-
"levels": [
|
|
68
|
-
"low",
|
|
69
|
-
"high"
|
|
70
|
-
]
|
|
71
|
-
}
|
|
72
|
-
]
|
|
73
|
-
},
|
|
74
|
-
{
|
|
75
|
-
"id": "plan-gaps-gate",
|
|
76
|
-
"purpose": "A deliberate scan for missing slices, untracked risks, or acceptance criteria mismatches has been completed.",
|
|
77
|
-
"dimensions": [
|
|
78
|
-
{
|
|
79
|
-
"id": "plan_gaps",
|
|
80
|
-
"purpose": "Active scan completed: either no material gaps were found, or any found were addressed or explicitly filed.",
|
|
81
|
-
"levels": [
|
|
82
|
-
"low",
|
|
83
|
-
"high"
|
|
84
|
-
]
|
|
85
|
-
}
|
|
86
|
-
]
|
|
87
|
-
},
|
|
88
|
-
{
|
|
89
|
-
"id": "build-correctness-gate",
|
|
90
|
-
"purpose": "The implementation compiles and passes all relevant tests.",
|
|
91
|
-
"dimensions": [
|
|
92
|
-
{
|
|
93
|
-
"id": "build_correctness",
|
|
94
|
-
"purpose": "Build succeeds and tests pass. No compilation errors or failing assertions.",
|
|
95
|
-
"levels": [
|
|
96
|
-
"low",
|
|
97
|
-
"high"
|
|
98
|
-
]
|
|
99
|
-
}
|
|
100
|
-
]
|
|
101
|
-
},
|
|
102
|
-
{
|
|
103
|
-
"id": "invariant-preservation-gate",
|
|
104
|
-
"purpose": "Invariants identified during planning still hold in the implemented code.",
|
|
105
|
-
"dimensions": [
|
|
106
|
-
{
|
|
107
|
-
"id": "invariant_preservation",
|
|
108
|
-
"purpose": "Each named invariant from the plan has been verified in the implementation.",
|
|
109
|
-
"levels": [
|
|
110
|
-
"low",
|
|
111
|
-
"high"
|
|
112
|
-
]
|
|
113
|
-
}
|
|
114
|
-
]
|
|
115
|
-
},
|
|
116
|
-
{
|
|
117
|
-
"id": "implementation-gaps-gate",
|
|
118
|
-
"purpose": "A deliberate scan for gaps, issues, or improvements surfaced during implementation has been completed.",
|
|
119
|
-
"dimensions": [
|
|
120
|
-
{
|
|
121
|
-
"id": "implementation_gaps",
|
|
122
|
-
"purpose": "Active scan completed: gaps found are either fixed inline, filed as follow-up tickets, or explicitly deferred with rationale.",
|
|
123
|
-
"levels": [
|
|
124
|
-
"low",
|
|
125
|
-
"high"
|
|
126
|
-
]
|
|
127
|
-
}
|
|
128
|
-
]
|
|
129
|
-
}
|
|
130
|
-
],
|
|
131
|
-
"preconditions": [
|
|
132
|
-
"User provides a task description or equivalent objective.",
|
|
133
|
-
"Agent has codebase read access and can run the tools needed for analysis and validation.",
|
|
134
|
-
"A deterministic validation path exists (tests, build, or an explicit verification strategy).",
|
|
135
|
-
"If the task touches critical paths, rollback or containment strategy can be defined."
|
|
136
|
-
],
|
|
137
|
-
"metaGuidance": [
|
|
138
|
-
"DEFAULT BEHAVIOR: self-execute with tools. Only ask the user for business decisions, missing external artifacts, or permissions you cannot resolve.",
|
|
139
|
-
"V2 DURABILITY: use output.notesMarkdown as the primary durable record. Do NOT mirror execution state into CONTEXT.md or any markdown checkpoint file.",
|
|
140
|
-
"ARTIFACT STRATEGY: `implementation_plan.md` drives execution. `spec.md`, when created, is canonical for observable behavior and serves as the verification anchor. Do not create extra artifacts unless they materially improve handoff.",
|
|
141
|
-
"OWNERSHIP & DELEGATION: the main agent owns strategy, decisions, synthesis, and implementation. Delegate only bounded cognitive routines via WorkRail Executor. Never hand off full task ownership or rely on named Builder/Researcher identities.",
|
|
142
|
-
"SUBAGENT SYNTHESIS: treat subagent output as evidence, not conclusions. State your hypothesis before delegating, then interrogate what came back: what was missed, wrong, or new? Say what changed your mind or what you still reject, and why.",
|
|
143
|
-
"PARALLELISM: when reads, audits, or delegations are independent, run them in parallel inside the phase. Parallelize cognition; serialize synthesis and canonical writes.",
|
|
144
|
-
"PHILOSOPHY LENS: apply the user's coding philosophy (from active session rules) as the evaluation lens. Flag violations by principle name, not as generic feedback. If principles conflict, surface the tension explicitly instead of silently choosing.",
|
|
145
|
-
"VALIDATION: prefer static/compile-time safety over runtime checks. Use build, type-checking, and tests as the primary proof of correctness — in that order of reliability.",
|
|
146
|
-
"DRIFT HANDLING: when reality diverges from the plan, update the plan artifact and re-audit deliberately rather than accumulating undocumented drift.",
|
|
147
|
-
"NEVER COMMIT MARKDOWN FILES UNLESS USER EXPLICITLY ASKS.",
|
|
148
|
-
"SLICE DISCIPLINE: Phase 6 is a loop -- implement ONE slice per iteration. Do not implement multiple slices at once. The verification loop exists to catch drift per slice, not retroactively."
|
|
149
|
-
],
|
|
150
|
-
"references": [
|
|
151
|
-
{
|
|
152
|
-
"id": "authoring-spec",
|
|
153
|
-
"title": "Authoring Specification",
|
|
154
|
-
"source": "./spec/authoring-spec.json",
|
|
155
|
-
"purpose": "Canonical rules and constraints for workflow authoring. Consult when making structural decisions about workflow design.",
|
|
156
|
-
"authoritative": true,
|
|
157
|
-
"resolveFrom": "package"
|
|
158
|
-
},
|
|
159
|
-
{
|
|
160
|
-
"id": "workflow-schema",
|
|
161
|
-
"title": "Workflow JSON Schema",
|
|
162
|
-
"source": "./spec/workflow.schema.json",
|
|
163
|
-
"purpose": "The JSON schema that all workflow definitions must conform to. Use as the structural contract reference.",
|
|
164
|
-
"authoritative": true,
|
|
165
|
-
"resolveFrom": "package"
|
|
166
|
-
},
|
|
167
|
-
{
|
|
168
|
-
"id": "authoring-provenance",
|
|
169
|
-
"title": "Workflow Authoring Provenance",
|
|
170
|
-
"source": "./spec/authoring-spec.provenance.json",
|
|
171
|
-
"purpose": "Source-of-truth map showing what is canonical, derived, and non-canonical in workflow authoring guidance.",
|
|
172
|
-
"authoritative": false,
|
|
173
|
-
"resolveFrom": "package"
|
|
174
|
-
}
|
|
175
|
-
],
|
|
176
|
-
"steps": [
|
|
177
|
-
{
|
|
178
|
-
"id": "phase-0-understand-and-classify",
|
|
179
|
-
"title": "Phase 0: Understand & Classify",
|
|
180
|
-
"prompt": "Understand this before you touch anything.\n\nMake sure the expected behavior is clear enough to proceed. If it really isn't, ask me only what you can't answer yourself. Don't ask me things you can find with tools.\n\nThen dig through the code. Figure out:\n- where this starts and what the call chain looks like\n- which files, modules, and functions matter\n- what patterns this should follow\n- how this repo verifies similar work\n- what the real risks, invariants, and non-goals are\n\nFigure out what philosophy to use while doing the work. Prefer, in order: Memory MCP (`mcp_memory_conventions`, `mcp_memory_prefer`, `mcp_memory_recall`), active session/Firebender rules, repo patterns, then me only if those still conflict or aren't enough.\n\nRecord where that philosophy lives, not a summary. If the stated rules and repo patterns disagree, capture the conflict.\n\nOnce you actually understand the task, classify it:\n- `taskComplexity`: Small / Medium / Large\n- `riskLevel`: Low / Medium / High\n- `rigorMode`: QUICK / STANDARD / THOROUGH\n- `automationLevel`: High / Medium / Low\n- `prStrategy`: SinglePR / MultiPR\n\nUse this guidance:\n- QUICK: small, low-risk, clear path, little ambiguity\n- STANDARD: medium scope or moderate risk\n- THOROUGH: large scope, architectural uncertainty, or high-risk change\n\nThen force a context-clarity check. Score each from 0-2 and give one sentence of evidence for each score:\n- `entryPointClarity`: 0 = clear entry point and call chain, 1 = partial chain with gaps, 2 = still unclear where behavior starts or flows\n- `boundaryClarity`: 0 = clear boundary, 1 = likely boundary but some uncertainty, 2 = patch-vs-boundary decision still unclear\n- `invariantClarity`: 0 = important invariants are explicit, 1 = some are inferred or uncertain, 2 = important invariants are still unclear\n- `verificationClarity`: 0 = clear deterministic verification path, 1 = partial verification path, 2 = verification is still weak or unclear\n\nUse the rubric, not vibes:\n- QUICK: do not run the deeper context batch; if the rubric says you're missing too much context, your classification is probably wrong and you should reclassify upward before moving on\n- STANDARD: run the deeper context batch if the total score is 3 or more, or if `boundaryClarity`, `invariantClarity`, or `verificationClarity` is 2\n- THOROUGH: always run the deeper context batch\n\nThe deeper context batch is:\n- `routine-context-gathering` with `focus=COMPLETENESS`\n- `routine-context-gathering` with `focus=DEPTH`\n\nAfter the batch, synthesize what changed, what stayed the same, and what is still unknown. If the extra context changes the classification, update it before you leave this step.\n\nCapture:\n- `taskComplexity`\n- `riskLevel`\n- `rigorMode`\n- `automationLevel`\n- `prStrategy`\n- `contextSummary`\n- `candidateFiles`\n- `invariants`\n- `nonGoals`\n- `openQuestions` (only real human-decision questions)\n- `philosophySources`\n- `philosophyConflicts`",
|
|
181
|
-
"requireConfirmation": {
|
|
182
|
-
"or": [
|
|
183
|
-
{
|
|
184
|
-
"var": "taskComplexity",
|
|
185
|
-
"equals": "Large"
|
|
186
|
-
},
|
|
187
|
-
{
|
|
188
|
-
"var": "riskLevel",
|
|
189
|
-
"equals": "High"
|
|
190
|
-
}
|
|
191
|
-
]
|
|
192
|
-
}
|
|
193
|
-
},
|
|
194
|
-
{
|
|
195
|
-
"id": "phase-1a-hypothesis",
|
|
196
|
-
"title": "Phase 1a: State Hypothesis",
|
|
197
|
-
"runCondition": {
|
|
198
|
-
"and": [
|
|
199
|
-
{
|
|
200
|
-
"var": "taskComplexity",
|
|
201
|
-
"not_equals": "Small"
|
|
202
|
-
},
|
|
203
|
-
{
|
|
204
|
-
"var": "rigorMode",
|
|
205
|
-
"not_equals": "QUICK"
|
|
206
|
-
}
|
|
207
|
-
]
|
|
208
|
-
},
|
|
209
|
-
"prompt": "Before you do design work, tell me your current best guess.\n\nKeep it short:\n1. what you think the right approach is\n2. what worries you about it\n3. what would most likely make it wrong\n\nCapture:\n- `initialHypothesis`",
|
|
210
|
-
"requireConfirmation": false
|
|
211
|
-
},
|
|
212
|
-
{
|
|
213
|
-
"id": "phase-1b-design-quick",
|
|
214
|
-
"title": "Phase 1b: Lightweight Design (QUICK)",
|
|
215
|
-
"runCondition": {
|
|
216
|
-
"and": [
|
|
217
|
-
{
|
|
218
|
-
"var": "taskComplexity",
|
|
219
|
-
"not_equals": "Small"
|
|
220
|
-
},
|
|
221
|
-
{
|
|
222
|
-
"var": "rigorMode",
|
|
223
|
-
"equals": "QUICK"
|
|
224
|
-
}
|
|
225
|
-
]
|
|
226
|
-
},
|
|
227
|
-
"prompt": "Generate a lightweight design inline. QUICK rigor means the path is clear and risk is low.\n\nProduce two mandatory candidates:\n1. The simplest possible change that satisfies acceptance criteria\n2. Follow the existing repo pattern for this kind of change\n\nFor each candidate:\n- One-sentence summary\n- Which tensions it resolves and which it accepts\n- How it relates to existing repo patterns (follows / adapts / departs)\n- Failure mode to watch\n- Philosophy fit (name specific principles)\n\nCompare and recommend. If both converge on the same approach, say so honestly.\n\nWrite the output to `design-candidates.md` with this structure:\n- Problem Understanding (core tensions, what makes it hard)\n- Philosophy Constraints (which principles matter for this problem)\n- Candidates (each with: summary, tensions resolved/accepted, failure mode, philosophy fit)\n- Comparison and Recommendation\n- Open Questions (if any remain)",
|
|
228
|
-
"requireConfirmation": false
|
|
229
|
-
},
|
|
230
|
-
{
|
|
231
|
-
"id": "phase-1b-design-deep",
|
|
232
|
-
"title": "Phase 1b: Design Generation (Injected Routine — Tension-Driven Design)",
|
|
233
|
-
"runCondition": {
|
|
234
|
-
"and": [
|
|
235
|
-
{
|
|
236
|
-
"var": "taskComplexity",
|
|
237
|
-
"not_equals": "Small"
|
|
238
|
-
},
|
|
239
|
-
{
|
|
240
|
-
"var": "rigorMode",
|
|
241
|
-
"not_equals": "QUICK"
|
|
242
|
-
}
|
|
243
|
-
]
|
|
244
|
-
},
|
|
245
|
-
"templateCall": {
|
|
246
|
-
"templateId": "wr.templates.routine.tension-driven-design",
|
|
247
|
-
"args": {
|
|
248
|
-
"deliverableName": "design-candidates.md"
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
},
|
|
252
|
-
{
|
|
253
|
-
"id": "phase-1c-challenge-and-select",
|
|
254
|
-
"title": "Phase 1c: Challenge and Select",
|
|
255
|
-
"runCondition": {
|
|
256
|
-
"var": "taskComplexity",
|
|
257
|
-
"not_equals": "Small"
|
|
258
|
-
},
|
|
259
|
-
"prompt": "Read `design-candidates.md`, compare it to your original guess, and make the call.\n\nBe explicit about three things:\n- what the design work confirmed\n- what changed your mind\n- what you missed the first time\n\nThen pressure-test the leading option:\n- what's the strongest case against it?\n- what assumption breaks it?\n\nAfter the challenge batch, say:\n- what changed your mind\n- what didn't\n- which findings you reject and why\n\nPick the approach yourself. Don't hide behind the artifact. If the simplest thing works, prefer it. If the front-runner stops looking right after challenge, switch.\n\nCapture:\n- `selectedApproach` — chosen design with rationale tied to tensions\n- `runnerUpApproach` — next-best option and why it lost\n- `architectureRationale` — tensions resolved vs accepted\n- `pivotTriggers` — conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` — failure mode of the selected approach\n- `acceptedTradeoffs`\n- `identifiedFailureModes`",
|
|
260
|
-
"promptFragments": [
|
|
261
|
-
{
|
|
262
|
-
"id": "phase-1c-challenge-standard",
|
|
263
|
-
"when": {
|
|
264
|
-
"var": "rigorMode",
|
|
265
|
-
"in": [
|
|
266
|
-
"STANDARD",
|
|
267
|
-
"THOROUGH"
|
|
268
|
-
]
|
|
269
|
-
},
|
|
270
|
-
"text": "Run `routine-hypothesis-challenge` on the leading option's failure modes before you decide."
|
|
271
|
-
},
|
|
272
|
-
{
|
|
273
|
-
"id": "phase-1c-challenge-thorough",
|
|
274
|
-
"when": {
|
|
275
|
-
"var": "rigorMode",
|
|
276
|
-
"equals": "THOROUGH"
|
|
277
|
-
},
|
|
278
|
-
"text": "Also run `routine-execution-simulation` on the three most likely failure paths before you decide."
|
|
279
|
-
}
|
|
280
|
-
],
|
|
281
|
-
"assessmentRefs": [
|
|
282
|
-
"design-soundness-gate",
|
|
283
|
-
"design-gaps-gate"
|
|
284
|
-
],
|
|
285
|
-
"assessmentConsequences": [
|
|
286
|
-
{
|
|
287
|
-
"when": {
|
|
288
|
-
"anyEqualsLevel": "low"
|
|
289
|
-
},
|
|
290
|
-
"effect": {
|
|
291
|
-
"kind": "require_followup",
|
|
292
|
-
"guidance": "Address whichever gate scored low: design_soundness low -- the design decision is still ambiguous; commit to an approach and record the rationale before proceeding. design_gaps low -- the gap scan was not completed or found unaddressed gaps; either resolve them or explicitly file them before proceeding."
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
],
|
|
296
|
-
"requireConfirmation": {
|
|
297
|
-
"or": [
|
|
298
|
-
{
|
|
299
|
-
"var": "automationLevel",
|
|
300
|
-
"equals": "Low"
|
|
301
|
-
},
|
|
302
|
-
{
|
|
303
|
-
"var": "taskComplexity",
|
|
304
|
-
"equals": "Large"
|
|
305
|
-
},
|
|
306
|
-
{
|
|
307
|
-
"var": "riskLevel",
|
|
308
|
-
"equals": "High"
|
|
309
|
-
}
|
|
310
|
-
]
|
|
311
|
-
}
|
|
312
|
-
},
|
|
313
|
-
{
|
|
314
|
-
"id": "phase-2-design-review",
|
|
315
|
-
"type": "loop",
|
|
316
|
-
"title": "Phase 2: Design Review",
|
|
317
|
-
"runCondition": {
|
|
318
|
-
"var": "taskComplexity",
|
|
319
|
-
"not_equals": "Small"
|
|
320
|
-
},
|
|
321
|
-
"loop": {
|
|
322
|
-
"type": "while",
|
|
323
|
-
"conditionSource": {
|
|
324
|
-
"kind": "artifact_contract",
|
|
325
|
-
"contractRef": "wr.contracts.loop_control",
|
|
326
|
-
"loopId": "design_review_loop"
|
|
327
|
-
},
|
|
328
|
-
"maxIterations": 2
|
|
329
|
-
},
|
|
330
|
-
"body": [
|
|
331
|
-
{
|
|
332
|
-
"id": "phase-2a-pre-assess-design-review",
|
|
333
|
-
"title": "Pre-Assess Design Review",
|
|
334
|
-
"prompt": "Before the detailed design review, state your current assessment in 2-4 sentences.\n\nSay:\n- what you think the strongest part of the selected design is right now\n- what you think the weakest part is right now\n- which tradeoff or failure mode worries you most\n\nThis is your reference point for interpreting the review findings.\n\nSet this key in the next `continue_workflow` call's `context` object:\n- `designReviewAssessment`",
|
|
335
|
-
"requireConfirmation": false
|
|
336
|
-
},
|
|
337
|
-
{
|
|
338
|
-
"id": "phase-2b-design-review-core",
|
|
339
|
-
"title": "Design Review Core",
|
|
340
|
-
"templateCall": {
|
|
341
|
-
"templateId": "wr.templates.routine.design-review",
|
|
342
|
-
"args": {
|
|
343
|
-
"deliverableName": "design-review-findings.md"
|
|
344
|
-
}
|
|
345
|
-
},
|
|
346
|
-
"requireConfirmation": false
|
|
347
|
-
},
|
|
348
|
-
{
|
|
349
|
-
"id": "phase-2c-synthesize-design-review",
|
|
350
|
-
"title": "Synthesize Design Review Findings",
|
|
351
|
-
"prompt": "Read `design-review-findings.md` and turn the review into workflow-owned decisions.\n\nCompare it against `designReviewAssessment`:\n- what did the review confirm?\n- what did it surface that you missed?\n- what changed your mind and what held firm?\n\nIf the findings are real, fix the design before you continue (`selectedApproach`, `architectureRationale`, `pivotTriggers`, `acceptedTradeoffs`, `identifiedFailureModes`).\n\nAfter any extra challenge, synthesize explicitly:\n- which findings actually matter\n- what changed in the design\n- what you reject and why\n\nFor any finding that changes the decision, classify it as:\n- `Confirmed`: you checked it against primary evidence (code, artifacts, spec, tests/build, or direct workflow context)\n- `Plausible`: interesting, but not verified enough to drive the decision yet\n- `Rejected`: contradicted by fuller context or direct evidence\n\nSubagent agreement alone is not enough for `Confirmed`.\n\nCapture:\n- `designFindings`\n- `designRevised`",
|
|
352
|
-
"promptFragments": [
|
|
353
|
-
{
|
|
354
|
-
"id": "phase-2c-challenge-thorough",
|
|
355
|
-
"when": {
|
|
356
|
-
"var": "rigorMode",
|
|
357
|
-
"equals": "THOROUGH"
|
|
358
|
-
},
|
|
359
|
-
"text": "If the review surfaced materially non-empty or surprising findings, run `routine-hypothesis-challenge` on the most serious finding and `routine-execution-simulation` on the most dangerous failure mode before you finalize the revised design."
|
|
360
|
-
}
|
|
361
|
-
],
|
|
362
|
-
"requireConfirmation": false
|
|
363
|
-
},
|
|
364
|
-
{
|
|
365
|
-
"id": "phase-2d-loop-decision",
|
|
366
|
-
"title": "Design Review Loop Decision",
|
|
367
|
-
"prompt": "Decide whether the design needs another pass.\n\nIf `designFindings` is non-empty and the design was revised, keep going so the revision gets checked.\nIf `designFindings` is empty, stop.\nIf you've hit the limit, stop and record the remaining concerns.\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
|
|
368
|
-
"requireConfirmation": false,
|
|
369
|
-
"outputContract": {
|
|
370
|
-
"contractRef": "wr.contracts.loop_control"
|
|
371
|
-
}
|
|
372
|
-
}
|
|
373
|
-
]
|
|
374
|
-
},
|
|
375
|
-
{
|
|
376
|
-
"id": "phase-3-plan-and-test-design",
|
|
377
|
-
"title": "Phase 3: Slice, Plan, and Test Design",
|
|
378
|
-
"runCondition": {
|
|
379
|
-
"var": "taskComplexity",
|
|
380
|
-
"not_equals": "Small"
|
|
381
|
-
},
|
|
382
|
-
"prompt": "Turn the decision into a plan someone else could execute without guessing.\n\nUpdate `implementation_plan.md`.\n\nIt should cover:\n1. Problem statement\n2. Acceptance criteria (mirror `spec.md` if it exists; `spec.md` owns observable behavior)\n3. Non-goals\n4. Philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only if they actually help\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n - [principle] -> [satisfied / tension / violated + 1-line why]\n\nCapture:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount` — count of open issues that would materially affect implementation quality\n- `planConfidenceBand` — Low / Medium / High\n\nThe plan is the deliverable for this step. Do not implement anything -- not a \"quick win\", not a file read that bleeds into edits, nothing. Execution begins in Phase 6, one slice at a time. If you find yourself writing code or editing source files right now, stop immediately.",
|
|
383
|
-
"assessmentRefs": [
|
|
384
|
-
"plan-completeness-gate",
|
|
385
|
-
"invariant-clarity-gate",
|
|
386
|
-
"plan-gaps-gate"
|
|
387
|
-
],
|
|
388
|
-
"assessmentConsequences": [
|
|
389
|
-
{
|
|
390
|
-
"when": {
|
|
391
|
-
"anyEqualsLevel": "low"
|
|
392
|
-
},
|
|
393
|
-
"effect": {
|
|
394
|
-
"kind": "require_followup",
|
|
395
|
-
"guidance": "Address whichever gate scored low: plan_completeness low -- one or more slices lack clear boundaries or verifiable acceptance criteria; sharpen them before implementation begins. invariant_clarity low -- invariants or non-goals are too vague to verify against; make them concrete. plan_gaps low -- the gap scan was not completed or found unaddressed gaps; resolve or file them before proceeding."
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
],
|
|
399
|
-
"requireConfirmation": false
|
|
400
|
-
},
|
|
401
|
-
{
|
|
402
|
-
"id": "phase-3b-spec",
|
|
403
|
-
"title": "Phase 3b: Spec (Observable Behavior)",
|
|
404
|
-
"runCondition": {
|
|
405
|
-
"and": [
|
|
406
|
-
{
|
|
407
|
-
"var": "taskComplexity",
|
|
408
|
-
"not_equals": "Small"
|
|
409
|
-
},
|
|
410
|
-
{
|
|
411
|
-
"or": [
|
|
412
|
-
{
|
|
413
|
-
"var": "taskComplexity",
|
|
414
|
-
"equals": "Large"
|
|
415
|
-
},
|
|
416
|
-
{
|
|
417
|
-
"var": "riskLevel",
|
|
418
|
-
"equals": "High"
|
|
419
|
-
}
|
|
420
|
-
]
|
|
421
|
-
}
|
|
422
|
-
]
|
|
423
|
-
},
|
|
424
|
-
"prompt": "Write `spec.md`.\n\nKeep it about what the feature does from the outside, not how you plan to build it.\n\nInclude:\n1. Feature summary\n2. Acceptance criteria\n3. Non-goals\n4. External API / interface contract if it matters\n5. Edge cases and failure modes\n6. How each acceptance criterion will be verified\n\nKeep it tight. If something can't be verified, it doesn't belong as an acceptance criterion.\n\n`spec.md` is canonical for observable behavior.",
|
|
425
|
-
"requireConfirmation": false
|
|
426
|
-
},
|
|
427
|
-
{
|
|
428
|
-
"id": "phase-4-plan-audit",
|
|
429
|
-
"type": "loop",
|
|
430
|
-
"title": "Phase 4: Plan Audit (Review, Fix, Decide)",
|
|
431
|
-
"runCondition": {
|
|
432
|
-
"and": [
|
|
433
|
-
{
|
|
434
|
-
"var": "taskComplexity",
|
|
435
|
-
"not_equals": "Small"
|
|
436
|
-
},
|
|
437
|
-
{
|
|
438
|
-
"var": "rigorMode",
|
|
439
|
-
"not_equals": "QUICK"
|
|
440
|
-
}
|
|
441
|
-
]
|
|
442
|
-
},
|
|
443
|
-
"loop": {
|
|
444
|
-
"type": "while",
|
|
445
|
-
"conditionSource": {
|
|
446
|
-
"kind": "artifact_contract",
|
|
447
|
-
"contractRef": "wr.contracts.loop_control",
|
|
448
|
-
"loopId": "plan_audit_loop"
|
|
449
|
-
},
|
|
450
|
-
"maxIterations": 2
|
|
451
|
-
},
|
|
452
|
-
"body": [
|
|
453
|
-
{
|
|
454
|
-
"id": "phase-4a-audit-and-refocus",
|
|
455
|
-
"title": "Audit Plan and Apply Fixes",
|
|
456
|
-
"prompt": "Audit the plan and fix it in the same pass.\n\nLook for:\n- missing work\n- weak assumptions and risks\n- invariant gaps\n- bad slice boundaries\n- philosophy violations or tensions\n- regressions from things you already fixed\n- mismatches between `implementation_plan.md` and `spec.md` if there is a spec\n\nBefore you delegate, say what looks weakest right now and what you trust least.\n\nAfter the audit batch, synthesize explicitly:\n- what multiple auditors agreed on\n- what only one auditor raised\n- what you reject and why\n- what changed in the plan because of the audit\n\nFor any finding that changes the plan, classify it as:\n- `Confirmed`: you checked it against primary evidence (code, plan/spec artifacts, tests/build, or direct workflow context)\n- `Plausible`: interesting, but not verified enough to change the plan yet\n- `Rejected`: contradicted by fuller context or direct evidence\n\nSubagent agreement alone is not enough for `Confirmed`.\n\nThen fix the plan immediately:\n- update `implementation_plan.md`\n- update `spec.md` if acceptance criteria or other observable behavior changed\n- update `slices` if the shape changed\n- move out-of-scope work into `followUpTickets`\n- track resolved findings (cap at 10, drop oldest)\n\nCapture:\n- `planFindings`\n- `planConfidence`\n- `resolvedFindings`\n- `followUpTickets`\n\nIf the plan drifted, fix the plan. Don't just keep going.",
|
|
457
|
-
"promptFragments": [
|
|
458
|
-
{
|
|
459
|
-
"id": "phase-4a-delegation-quick",
|
|
460
|
-
"when": {
|
|
461
|
-
"var": "rigorMode",
|
|
462
|
-
"equals": "QUICK"
|
|
463
|
-
},
|
|
464
|
-
"text": "Do this yourself."
|
|
465
|
-
},
|
|
466
|
-
{
|
|
467
|
-
"id": "phase-4a-delegation-standard",
|
|
468
|
-
"when": {
|
|
469
|
-
"var": "rigorMode",
|
|
470
|
-
"equals": "STANDARD"
|
|
471
|
-
},
|
|
472
|
-
"text": "Run `routine-plan-analysis`, `routine-hypothesis-challenge`, and `routine-philosophy-alignment` in parallel before you decide whether the plan is good enough."
|
|
473
|
-
},
|
|
474
|
-
{
|
|
475
|
-
"id": "phase-4a-delegation-thorough",
|
|
476
|
-
"when": {
|
|
477
|
-
"var": "rigorMode",
|
|
478
|
-
"equals": "THOROUGH"
|
|
479
|
-
},
|
|
480
|
-
"text": "Run `routine-plan-analysis`, `routine-hypothesis-challenge`, `routine-execution-simulation`, and `routine-philosophy-alignment` in parallel before you decide whether the plan is good enough."
|
|
481
|
-
}
|
|
482
|
-
],
|
|
483
|
-
"requireConfirmation": false
|
|
484
|
-
},
|
|
485
|
-
{
|
|
486
|
-
"id": "phase-4b-loop-decision",
|
|
487
|
-
"title": "Loop Exit Decision",
|
|
488
|
-
"prompt": "Decide whether the plan needs another pass.\n\nIf `planFindings` is non-empty, keep going.\nIf it's empty, stop — but say what you checked so the clean pass means something.\nIf you've hit the limit, stop and record what still bothers you.\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
|
|
489
|
-
"requireConfirmation": true,
|
|
490
|
-
"outputContract": {
|
|
491
|
-
"contractRef": "wr.contracts.loop_control"
|
|
492
|
-
}
|
|
493
|
-
}
|
|
494
|
-
]
|
|
495
|
-
},
|
|
496
|
-
{
|
|
497
|
-
"id": "phase-5-small-task-fast-path",
|
|
498
|
-
"title": "Phase 5: Small Task Fast Path",
|
|
499
|
-
"runCondition": {
|
|
500
|
-
"var": "taskComplexity",
|
|
501
|
-
"equals": "Small"
|
|
502
|
-
},
|
|
503
|
-
"prompt": "For Small tasks, fast does not mean shallow. Every item below is required.\n\n**1. Confirm all wiring points with tools.**\nDon't assume a file you create is reachable. Check every public entry point:\n- Does the new symbol need to be exported from an index file?\n- Does it need to be imported and registered somewhere (CLI command map, router, DI container, plugin registry)?\n- Is there a test file that needs to reference it?\nTrace the full call path from the public interface down to your new code before writing anything.\n\n**2. Implement the smallest correct change.**\nChange exactly what needs changing. No drive-by refactors, no extra abstractions.\n\n**3. Verify end-to-end.**\n- Run build and tests. Both must pass.\n- Manually trace the new behavior through the public entry point (e.g. run the CLI command, check the export resolves, hit the endpoint). If you can't do this deterministically with tools, say why.\n- Apply the user's coding philosophy as the review lens. Flag any violation by principle name.\n\n**4. Produce a handoff note.**\nOutput a notes artifact containing a JSON fenced block with the following fields.\nThe daemon reads this block to run `git commit` and `gh pr create` -- write it exactly as shown:\n\n```json\n{\n \"commitType\": \"feat\",\n \"commitScope\": \"mcp\",\n \"commitSubject\": \"imperative mood, max 72 chars total with type(scope): prefix, no period\",\n \"prTitle\": \"same as full commit first line\",\n \"prBody\": \"markdown with ## Summary (bullets) and ## Test plan (checklist)\",\n \"followUpTickets\": [],\n \"filesChanged\": [\"src/path/to/file.ts\", \"tests/unit/file.test.ts\"]\n}\n```\n\nFields:\n- `commitType`: feat / fix / chore / refactor / docs / test / perf (pick one)\n- `commitScope`: product area only (console / mcp / workflows / engine / schema / docs)\n- `commitSubject`: imperative mood, max 72 chars total with type(scope): prefix, no period\n- `prTitle`: same as full commit first line\n- `prBody`: markdown with ## Summary (bullets) and ## Test plan (checklist)\n- `followUpTickets`: list of deferred items, or empty array\n- `filesChanged`: list of every file you created or modified (required -- do not omit)\n\nThe daemon will use this artifact to run git commit and open the PR. Do not commit or push yourself.\n\nDo not create heavyweight planning artifacts unless risk unexpectedly grows.",
|
|
504
|
-
"requireConfirmation": false
|
|
505
|
-
},
|
|
506
|
-
{
|
|
507
|
-
"id": "phase-6-implement-slices",
|
|
508
|
-
"type": "loop",
|
|
509
|
-
"title": "Phase 6: Implement Slice-by-Slice",
|
|
510
|
-
"runCondition": {
|
|
511
|
-
"var": "taskComplexity",
|
|
512
|
-
"not_equals": "Small"
|
|
513
|
-
},
|
|
514
|
-
"loop": {
|
|
515
|
-
"type": "forEach",
|
|
516
|
-
"items": "slices",
|
|
517
|
-
"itemVar": "currentSlice",
|
|
518
|
-
"indexVar": "sliceIndex",
|
|
519
|
-
"maxIterations": 20
|
|
520
|
-
},
|
|
521
|
-
"body": [
|
|
522
|
-
{
|
|
523
|
-
"id": "phase-6a-implement-slice",
|
|
524
|
-
"title": "Implement Slice",
|
|
525
|
-
"prompt": "Implement the current slice: `{{currentSlice.name}}`.\n\nBefore writing a single line of code, declare your scope:\n- List the exact files and symbols this slice touches\n- Confirm none of them belong to a later slice\n- If you have already edited files from this or any other slice in a previous step, stop and report it\n\nHard scope rule: you may only modify what is described in `{{currentSlice.name}}`. Anything outside that boundary is out of scope for this iteration -- not \"do it early\", not \"while I'm here\". If you discover you need to touch something outside this slice to make it compile or integrate, set `unexpectedScopeChange = true` and do the minimum necessary to stay green, then stop.\n\nImplement incrementally. Run tests and build to prove the slice works before advancing.\n\nTrack:\n- `specialCaseIntroduced` -- did this slice require a new special-case?\n- `unplannedAbstractionIntroduced` -- did this slice introduce an abstraction not in the plan?\n- `unexpectedScopeChange` -- did this slice touch files outside its planned scope?\n\nSet `verifyNeeded` to true if ANY of:\n- `sliceIndex` is odd (verify every 2 slices)\n- `prStrategy = MultiPR`\n- `specialCaseIntroduced = true`\n- `unplannedAbstractionIntroduced = true`\n- `unexpectedScopeChange = true`\n- tests or build failed\n\nCapture: `specialCaseIntroduced`, `unplannedAbstractionIntroduced`, `unexpectedScopeChange`, `verifyNeeded`",
|
|
526
|
-
"requireConfirmation": false
|
|
527
|
-
},
|
|
528
|
-
{
|
|
529
|
-
"id": "phase-6b-verify-slice",
|
|
530
|
-
"title": "Verify Slice",
|
|
531
|
-
"runCondition": {
|
|
532
|
-
"var": "verifyNeeded",
|
|
533
|
-
"equals": true
|
|
534
|
-
},
|
|
535
|
-
"prompt": "Take a fresh look at what you just changed.\n\nCheck whether:\n- it matches the plan's intent, not just the letter\n- it hides assumptions or skips edge cases\n- invariants still hold\n- it regressed against the user's philosophy\n- multiple unverified slices now need to be reviewed together\n- `unexpectedScopeChange` was just harmless integration work or real plan drift\n\nIf any of `specialCaseIntroduced`, `unplannedAbstractionIntroduced`, or `unexpectedScopeChange` is true, or if tests/build were shaky, run the verification batch before you decide this slice is done.\n\nAfter the verification batch, synthesize explicitly:\n- what multiple reviewers agreed on\n- what only one reviewer raised\n- what you reject and why\n- whether the drift was harmless integration work or real plan drift\n\nFor any finding that changes whether this slice is accepted, classify it as:\n- `Confirmed`: you checked it against primary evidence (code, plan/spec artifacts, tests/build, or direct workflow context)\n- `Plausible`: interesting, but not verified enough to accept or block the slice yet\n- `Rejected`: contradicted by fuller context or direct evidence\n\nSubagent agreement alone is not enough for `Confirmed`.\n\nSay where you're least confident.\n\nIf the slice drifted materially, update `implementation_plan.md` and `spec.md` if observable behavior changed. If the drift changed boundaries or makes the current plan unreliable, stop and go back to planning.\n\nIf the concerns are serious, stop and go back to planning or ask me. Don't wave this through just because the code exists.\n\nCapture:\n- `verificationFindings`\n- `verificationFailed`",
|
|
536
|
-
"promptFragments": [
|
|
537
|
-
{
|
|
538
|
-
"id": "phase-6b-delegation-quick",
|
|
539
|
-
"when": {
|
|
540
|
-
"var": "rigorMode",
|
|
541
|
-
"equals": "QUICK"
|
|
542
|
-
},
|
|
543
|
-
"text": "Do the verification yourself."
|
|
544
|
-
},
|
|
545
|
-
{
|
|
546
|
-
"id": "phase-6b-delegation-standard",
|
|
547
|
-
"when": {
|
|
548
|
-
"var": "rigorMode",
|
|
549
|
-
"equals": "STANDARD"
|
|
550
|
-
},
|
|
551
|
-
"text": "If any slice-risk trigger fired, run `routine-hypothesis-challenge` and `routine-philosophy-alignment` before you decide this slice is done."
|
|
552
|
-
},
|
|
553
|
-
{
|
|
554
|
-
"id": "phase-6b-delegation-thorough",
|
|
555
|
-
"when": {
|
|
556
|
-
"var": "rigorMode",
|
|
557
|
-
"equals": "THOROUGH"
|
|
558
|
-
},
|
|
559
|
-
"text": "If any slice-risk trigger fired, also run `routine-execution-simulation` before you decide this slice is done."
|
|
560
|
-
},
|
|
561
|
-
{
|
|
562
|
-
"id": "phase-6b-multi-pr",
|
|
563
|
-
"when": {
|
|
564
|
-
"var": "prStrategy",
|
|
565
|
-
"equals": "MultiPR"
|
|
566
|
-
},
|
|
567
|
-
"text": "If this slice is verified and ready, stop here and package it for review before you move to the next slice."
|
|
568
|
-
}
|
|
569
|
-
],
|
|
570
|
-
"requireConfirmation": {
|
|
571
|
-
"or": [
|
|
572
|
-
{
|
|
573
|
-
"var": "verificationFailed",
|
|
574
|
-
"equals": true
|
|
575
|
-
},
|
|
576
|
-
{
|
|
577
|
-
"var": "prStrategy",
|
|
578
|
-
"equals": "MultiPR"
|
|
579
|
-
}
|
|
580
|
-
]
|
|
581
|
-
}
|
|
582
|
-
}
|
|
583
|
-
]
|
|
584
|
-
},
|
|
585
|
-
{
|
|
586
|
-
"id": "phase-7-final-verification",
|
|
587
|
-
"type": "loop",
|
|
588
|
-
"title": "Phase 7: Final Verification Barrier (Verify, Fix, Re-Verify)",
|
|
589
|
-
"runCondition": {
|
|
590
|
-
"var": "taskComplexity",
|
|
591
|
-
"not_equals": "Small"
|
|
592
|
-
},
|
|
593
|
-
"loop": {
|
|
594
|
-
"type": "while",
|
|
595
|
-
"conditionSource": {
|
|
596
|
-
"kind": "artifact_contract",
|
|
597
|
-
"contractRef": "wr.contracts.loop_control",
|
|
598
|
-
"loopId": "final_verification_loop"
|
|
599
|
-
},
|
|
600
|
-
"maxIterations": 2
|
|
601
|
-
},
|
|
602
|
-
"body": [
|
|
603
|
-
{
|
|
604
|
-
"id": "phase-7a-final-verification-core",
|
|
605
|
-
"title": "Run Final Verification Batch",
|
|
606
|
-
"templateCall": {
|
|
607
|
-
"templateId": "wr.templates.routine.final-verification",
|
|
608
|
-
"args": {
|
|
609
|
-
"deliverableName": "final-verification-findings.md"
|
|
610
|
-
}
|
|
611
|
-
},
|
|
612
|
-
"requireConfirmation": false
|
|
613
|
-
},
|
|
614
|
-
{
|
|
615
|
-
"id": "phase-7b-fix-and-summarize",
|
|
616
|
-
"title": "Synthesize Findings, Fix, and Re-Verify",
|
|
617
|
-
"prompt": "Read `final-verification-findings.md` and decide what actually needs fixing.\n\nDon't rubber-stamp it. The verifier is evidence, not the decision.\n\nIf `spec.md` exists, use it as the verification anchor and make sure every acceptance criterion is actually met.\n\nThis loop is verify, fix, then re-verify. If you fix anything here, the next pass exists to prove the fixes worked.\n\nSynthesize the verification output explicitly:\n- what the verifier found\n- what you agree with\n- what you reject and why\n- what changed because of the fixes\n\nFor any finding that changes final acceptance, classify it as:\n- `Confirmed`: you checked it against primary evidence (code, spec, tests/build, or direct workflow context)\n- `Plausible`: interesting, but not verified enough to accept or block final signoff yet\n- `Rejected`: contradicted by fuller context or direct evidence\n\nSubagent agreement alone is not enough for `Confirmed`.\n\nFix what has to be fixed now, rerun the affected verification, and update:\n- `implementation_plan.md` if the execution shape changed\n- `spec.md` if acceptance criteria, observable behavior, or external contracts changed\n\nCapture:\n- `integrationFindings`\n- `integrationPassed`\n- `regressionDetected`",
|
|
618
|
-
"assessmentRefs": [
|
|
619
|
-
"build-correctness-gate",
|
|
620
|
-
"invariant-preservation-gate",
|
|
621
|
-
"implementation-gaps-gate"
|
|
622
|
-
],
|
|
623
|
-
"assessmentConsequences": [
|
|
624
|
-
{
|
|
625
|
-
"when": {
|
|
626
|
-
"anyEqualsLevel": "low"
|
|
627
|
-
},
|
|
628
|
-
"effect": {
|
|
629
|
-
"kind": "require_followup",
|
|
630
|
-
"guidance": "Address whichever gate scored low: build_correctness low -- the build or tests are still failing; fix them before this step can complete. invariant_preservation low -- one or more invariants from the plan are violated; fix the implementation. implementation_gaps low -- the gap scan was not completed or found unaddressed gaps; fix them inline, file as follow-up tickets, or explicitly defer with rationale."
|
|
631
|
-
}
|
|
632
|
-
}
|
|
633
|
-
],
|
|
634
|
-
"requireConfirmation": false
|
|
635
|
-
},
|
|
636
|
-
{
|
|
637
|
-
"id": "phase-7c-loop-decision",
|
|
638
|
-
"title": "Final Verification Loop Decision",
|
|
639
|
-
"prompt": "Decide whether final verification needs another pass or whether we're done.\n\nThis loop gets up to two verify/fix passes.\n- If verification found real issues and you fixed them, keep going so the fixes get re-verified.\n- If the issues are clean or resolved, stop.\n- If you've hit the limit, stop and record what remains.\n\nWhen you stop, include:\n- acceptance criteria status\n- invariant status\n- test/build summary\n- follow-up tickets\n- any philosophy tensions you accepted on purpose\n\n**Handoff block (required for daemon auto-commit):**\nInclude a JSON fenced block in your notes. The daemon reads this to run `git commit` and `gh pr create`:\n\n```json\n{\n \"commitType\": \"feat\",\n \"commitScope\": \"mcp\",\n \"commitSubject\": \"imperative mood, max 72 chars total with type(scope): prefix, no period\",\n \"prTitle\": \"same as full commit first line\",\n \"prBody\": \"markdown with ## Summary (bullets) and ## Test plan (checklist)\",\n \"followUpTickets\": [],\n \"filesChanged\": [\"src/path/to/file.ts\", \"tests/unit/file.test.ts\"]\n}\n```\n\nFields: `commitType` (feat/fix/chore/refactor/docs/test/perf), `commitScope` (product area only: console/mcp/workflows/engine/schema/docs), `commitSubject` (imperative, <=72 chars including prefix, no period), `prTitle` (same as commit first line), `prBody` (markdown), `followUpTickets` (array), `filesChanged` (required -- every file created or modified).\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
|
|
640
|
-
"requireConfirmation": true,
|
|
641
|
-
"outputContract": {
|
|
642
|
-
"contractRef": "wr.contracts.loop_control"
|
|
643
|
-
}
|
|
644
|
-
}
|
|
645
|
-
]
|
|
646
|
-
}
|
|
647
|
-
]
|
|
648
|
-
}
|