@exaudeus/workrail 3.39.0 → 3.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/dist/cli/commands/init.js +0 -3
  2. package/dist/cli-worktrain.js +58 -26
  3. package/dist/cli.js +0 -18
  4. package/dist/config/app-config.d.ts +0 -16
  5. package/dist/config/app-config.js +0 -14
  6. package/dist/config/config-file.js +0 -3
  7. package/dist/console-ui/assets/index-CQt4UhPB.js +28 -0
  8. package/dist/console-ui/assets/index-DGj8EsFR.css +1 -0
  9. package/dist/console-ui/index.html +2 -2
  10. package/dist/coordinators/pr-review.d.ts +23 -1
  11. package/dist/coordinators/pr-review.js +224 -5
  12. package/dist/daemon/daemon-events.d.ts +9 -1
  13. package/dist/daemon/soul-template.d.ts +2 -2
  14. package/dist/daemon/soul-template.js +11 -1
  15. package/dist/daemon/workflow-runner.d.ts +17 -3
  16. package/dist/daemon/workflow-runner.js +401 -28
  17. package/dist/di/container.js +1 -25
  18. package/dist/di/tokens.d.ts +0 -3
  19. package/dist/di/tokens.js +0 -3
  20. package/dist/engine/engine-factory.js +0 -1
  21. package/dist/infrastructure/console-defaults.d.ts +1 -0
  22. package/dist/infrastructure/console-defaults.js +4 -0
  23. package/dist/infrastructure/session/index.d.ts +0 -1
  24. package/dist/infrastructure/session/index.js +1 -3
  25. package/dist/manifest.json +124 -124
  26. package/dist/mcp/handlers/session.d.ts +1 -0
  27. package/dist/mcp/handlers/session.js +61 -13
  28. package/dist/mcp/output-schemas.d.ts +10 -10
  29. package/dist/mcp/server.js +1 -18
  30. package/dist/mcp/tools.d.ts +12 -12
  31. package/dist/mcp/transports/http-entry.js +0 -2
  32. package/dist/mcp/transports/stdio-entry.js +1 -2
  33. package/dist/mcp/types.d.ts +0 -2
  34. package/dist/trigger/daemon-console.d.ts +2 -0
  35. package/dist/trigger/daemon-console.js +1 -1
  36. package/dist/trigger/trigger-listener.d.ts +2 -0
  37. package/dist/trigger/trigger-listener.js +3 -1
  38. package/dist/trigger/trigger-router.d.ts +4 -3
  39. package/dist/trigger/trigger-router.js +13 -5
  40. package/dist/trigger/trigger-store.js +17 -4
  41. package/dist/types/workflow-source.d.ts +0 -1
  42. package/dist/types/workflow-source.js +3 -6
  43. package/dist/types/workflow.d.ts +1 -1
  44. package/dist/types/workflow.js +1 -2
  45. package/dist/v2/durable-core/domain/artifact-contract-validator.js +66 -0
  46. package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.d.ts +25 -0
  47. package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.js +31 -0
  48. package/dist/v2/durable-core/schemas/artifacts/index.d.ts +3 -1
  49. package/dist/v2/durable-core/schemas/artifacts/index.js +14 -1
  50. package/dist/v2/durable-core/schemas/artifacts/review-verdict.d.ts +41 -0
  51. package/dist/v2/durable-core/schemas/artifacts/review-verdict.js +30 -0
  52. package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +236 -236
  53. package/dist/v2/durable-core/schemas/session/events.d.ts +50 -50
  54. package/dist/v2/durable-core/schemas/session/gaps.d.ts +2 -2
  55. package/dist/v2/durable-core/schemas/session/manifest.d.ts +4 -4
  56. package/dist/v2/durable-core/schemas/session/outputs.d.ts +8 -8
  57. package/dist/v2/usecases/console-routes.d.ts +2 -1
  58. package/dist/v2/usecases/console-routes.js +207 -5
  59. package/dist/v2/usecases/console-service.js +14 -0
  60. package/dist/v2/usecases/console-types.d.ts +1 -0
  61. package/docs/authoring.md +16 -16
  62. package/docs/design/coordinator-artifact-protocol-design-candidates.md +155 -0
  63. package/docs/design/coordinator-artifact-protocol-design-review.md +103 -0
  64. package/docs/design/coordinator-artifact-protocol-implementation-plan.md +259 -0
  65. package/docs/design/coordinator-message-queue-drain-plan.md +241 -0
  66. package/docs/design/coordinator-message-queue-drain-review.md +120 -0
  67. package/docs/design/coordinator-message-queue-drain.md +289 -0
  68. package/docs/design/shaping-workflow-external-research.md +119 -0
  69. package/docs/discovery/late-bound-goals-impl-plan.md +147 -0
  70. package/docs/discovery/late-bound-goals-review.md +82 -0
  71. package/docs/discovery/late-bound-goals.md +118 -0
  72. package/docs/discovery/steer-endpoint-design-candidates.md +288 -0
  73. package/docs/discovery/steer-endpoint-design-review-findings.md +104 -0
  74. package/docs/discovery/steer-endpoint-implementation-plan.md +284 -0
  75. package/docs/ideas/backlog.md +447 -97
  76. package/docs/ideas/design-candidates-console-session-tree-impl.md +64 -0
  77. package/docs/ideas/design-candidates-session-tree-view.md +196 -0
  78. package/docs/ideas/design-review-findings-console-session-tree-impl.md +75 -0
  79. package/docs/ideas/design-review-findings-session-tree-view.md +88 -0
  80. package/docs/ideas/implementation_plan_session_tree_view.md +238 -0
  81. package/package.json +2 -1
  82. package/spec/authoring-spec.json +16 -16
  83. package/spec/shape.schema.json +178 -0
  84. package/spec/workflow-tags.json +232 -47
  85. package/workflows/coding-task-workflow-agentic.json +491 -480
  86. package/workflows/mr-review-workflow.agentic.v2.json +5 -1
  87. package/workflows/wr.shaping.json +182 -0
  88. package/dist/console-ui/assets/index-3oXZ_A9m.js +0 -28
  89. package/dist/console-ui/assets/index-8dh0Psu-.css +0 -1
  90. package/dist/infrastructure/session/DashboardHeartbeat.d.ts +0 -8
  91. package/dist/infrastructure/session/DashboardHeartbeat.js +0 -39
  92. package/dist/infrastructure/session/DashboardLockRelease.d.ts +0 -2
  93. package/dist/infrastructure/session/DashboardLockRelease.js +0 -29
  94. package/dist/infrastructure/session/HttpServer.d.ts +0 -60
  95. package/dist/infrastructure/session/HttpServer.js +0 -912
  96. package/workflows/coding-task-workflow-agentic.lean.v2.json +0 -648
  97. package/workflows/coding-task-workflow-agentic.v2.json +0 -324
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "id": "coding-task-workflow-agentic",
3
- "name": "Agentic Task Dev Workflow (Legacy)",
4
- "version": "1.5.0",
3
+ "name": "Agentic Task Dev Workflow",
4
+ "version": "1.2.0",
5
5
  "description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
6
- "about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",
6
+ "about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### Upstream context (Phase 0.5)\n\nPhase 0.5 looks for any upstream document that has already defined what to build -- a Shape Up pitch, PRD, BRD, RFC, design doc, user story with acceptance criteria, Jira epic, or equivalent. The agent uses whatever tools are available (repo search, WebFetch, Confluence/Notion/Glean MCPs, Memory MCP) to find it. If found, two flags are set: `upstreamSpecDetected` (something exists) and `solutionFixed` (whether the document commits to a specific technical direction). When `solutionFixed = true`, design ideation phases (1a-1c) are skipped and Phase 1d translates the upstream constraints directly into an engineering approach. When `solutionFixed = false`, design ideation runs normally but is constrained by whatever the upstream document does specify. The plan audit (Phase 4) checks for drift against `upstreamBoundaries` whenever an upstream document was found.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",
7
7
  "examples": [
8
8
  "Implement JWT refresh token rotation in the auth service",
9
9
  "Fix the race condition in the cache invalidation path when concurrent writes occur",
@@ -14,192 +14,285 @@
14
14
  "recommendedAutonomy": "guided",
15
15
  "recommendedRiskPolicy": "conservative"
16
16
  },
17
- "preconditions": [
18
- "User provides a task description (ticket text or equivalent) and success criteria (even if partial).",
19
- "Agent has read access to the codebase and can use tools (search/read/edit/terminal).",
20
- "A validation path exists (tests, build, or a deterministic verification strategy).",
21
- "If the task touches production or critical paths, rollback/flag strategy can be defined."
17
+ "assessments": [
18
+ {
19
+ "id": "design-soundness-gate",
20
+ "purpose": "The selected design approach is committed with rationale. No unresolved ambiguity remains about what to build.",
21
+ "dimensions": [
22
+ {
23
+ "id": "design_soundness",
24
+ "purpose": "Design decision is made, tradeoffs are recorded, and there is no remaining ambiguity about the chosen approach.",
25
+ "levels": [
26
+ "low",
27
+ "high"
28
+ ]
29
+ }
30
+ ]
31
+ },
32
+ {
33
+ "id": "design-gaps-gate",
34
+ "purpose": "A deliberate scan for unconsidered alternatives, unhandled edge cases, or untracked risks has been completed.",
35
+ "dimensions": [
36
+ {
37
+ "id": "design_gaps",
38
+ "purpose": "Active scan completed: either no material gaps were found, or any found were addressed or explicitly filed.",
39
+ "levels": [
40
+ "low",
41
+ "high"
42
+ ]
43
+ }
44
+ ]
45
+ },
46
+ {
47
+ "id": "plan-completeness-gate",
48
+ "purpose": "Every slice has a defined scope and verifiable acceptance criterion. No slice is vague or open-ended.",
49
+ "dimensions": [
50
+ {
51
+ "id": "plan_completeness",
52
+ "purpose": "Slices have clear boundaries and acceptance criteria. The agent knows what done looks like for each.",
53
+ "levels": [
54
+ "low",
55
+ "high"
56
+ ]
57
+ }
58
+ ]
59
+ },
60
+ {
61
+ "id": "invariant-clarity-gate",
62
+ "purpose": "Invariants and non-goals are explicit enough to verify against during and after implementation.",
63
+ "dimensions": [
64
+ {
65
+ "id": "invariant_clarity",
66
+ "purpose": "Named invariants are checkable in the implementation. Non-goals are stated and will prevent scope creep.",
67
+ "levels": [
68
+ "low",
69
+ "high"
70
+ ]
71
+ }
72
+ ]
73
+ },
74
+ {
75
+ "id": "plan-gaps-gate",
76
+ "purpose": "A deliberate scan for missing slices, untracked risks, or acceptance criteria mismatches has been completed.",
77
+ "dimensions": [
78
+ {
79
+ "id": "plan_gaps",
80
+ "purpose": "Active scan completed: either no material gaps were found, or any found were addressed or explicitly filed.",
81
+ "levels": [
82
+ "low",
83
+ "high"
84
+ ]
85
+ }
86
+ ]
87
+ },
88
+ {
89
+ "id": "build-correctness-gate",
90
+ "purpose": "The implementation compiles and passes all relevant tests.",
91
+ "dimensions": [
92
+ {
93
+ "id": "build_correctness",
94
+ "purpose": "Build succeeds and tests pass. No compilation errors or failing assertions.",
95
+ "levels": [
96
+ "low",
97
+ "high"
98
+ ]
99
+ }
100
+ ]
101
+ },
102
+ {
103
+ "id": "invariant-preservation-gate",
104
+ "purpose": "Invariants identified during planning still hold in the implemented code.",
105
+ "dimensions": [
106
+ {
107
+ "id": "invariant_preservation",
108
+ "purpose": "Each named invariant from the plan has been verified in the implementation.",
109
+ "levels": [
110
+ "low",
111
+ "high"
112
+ ]
113
+ }
114
+ ]
115
+ },
116
+ {
117
+ "id": "implementation-gaps-gate",
118
+ "purpose": "A deliberate scan for gaps, issues, or improvements surfaced during implementation has been completed.",
119
+ "dimensions": [
120
+ {
121
+ "id": "implementation_gaps",
122
+ "purpose": "Active scan completed: gaps found are either fixed inline, filed as follow-up tickets, or explicitly deferred with rationale.",
123
+ "levels": [
124
+ "low",
125
+ "high"
126
+ ]
127
+ }
128
+ ]
129
+ }
22
130
  ],
23
- "clarificationPrompts": [
24
- "What are the acceptance criteria and explicit non-goals?",
25
- "What are the key invariants? (backwards compatibility, API contracts, data correctness, performance budgets, security constraints)",
26
- "Any rollout constraints? (feature flags, staged rollout, migrations, telemetry requirements)",
27
- "Any constraints on tooling? (can/can't run tests locally, CI only, limited environment access)",
28
- "Any preferred code patterns/examples in this repo that should be followed?"
131
+ "preconditions": [
132
+ "User provides a task description or equivalent objective.",
133
+ "Agent has codebase read access and can run the tools needed for analysis and validation.",
134
+ "A deterministic validation path exists (tests, build, or an explicit verification strategy).",
135
+ "If the task touches critical paths, rollback or containment strategy can be defined."
29
136
  ],
30
137
  "metaGuidance": [
31
- "DEFAULT BEHAVIOR: self-execute with tools. Only ask the user for (a) business decisions, (b) missing external artifacts, or (c) permissions/constraints you cannot resolve.",
32
- "ARCHITECTURE OVER PATCHES: prefer structural fixes that eliminate whole classes of issues; avoid local patches unless they are explicitly intended.",
33
- "INVARIANTS FIRST: define constraints before designing or coding; use invariants to evaluate plan/slices/PR size.",
34
- "CONTEXT DOC: Maintain CONTEXT.md with decision log, user pushback, surprises, and deterministic resume payloads.",
35
- "CONTEXT DOC: Never commit .md files unless user explicitly asks.",
36
- "USER RULES: Capture user rules/preferences early as `userRules` (in CONTEXT.md). When ideating and planning, explicitly apply `userRules` and call out deviations with rationale + approval request.",
37
- "ARTIFACTS: For non-small tasks, maintain CONTEXT.md + implementation_plan.md. Create spec.md/design.md per docDepth.",
38
- "ARTIFACTS: Never auto-commit .md files. Write-or-paste: attempt write; if fails, output in chat.",
39
- "WRITE-OR-PASTE: When a step requires a file artifact, attempt to write it. If file writing fails or is unavailable, output the full content in chat (pasteable). Treat the output as canonical.",
40
- "VERTICAL SLICES: plan and implement as independently testable, reviewable slices; each slice should be mergeable and ideally shippable behind a flag.",
41
- "PR/MR SIZING: avoid monster PRs; if scope is large, split into multiple PRs by slices with explicit dependencies.",
42
- "REFLECTION LOOPS: iterate on context and plan with bounded loops; audit yourself before coding and before finalizing.",
43
- "SUBAGENTS: delegate ONLY when a step explicitly instructs delegation; prefer auditor-style delegation (challenge/audit/validate) over executor-style delegation.",
44
- "SUBAGENT CONTEXT: When delegating, provide filtered userRules, invariants, and feature brief via file refs.",
45
- "USER RULES FILTERING: When delegating, use keyword-based filtering to extract relevant rules: Architecture, Testing, Performance, Error handling. Bias toward over-inclusion if unsure.",
46
- "BUILDER DELEGATION: When delegationMode=delegate AND rigorMode=THOROUGH AND work is non-trivial, you MAY delegate to Builder (routine-feature-implementation). Main agent reviews output against allowlist/denylist/budgets.",
47
- "VALIDATION: prefer compile-time safety and deterministic tests; verify each slice before moving on; fail fast with meaningful errors.",
48
- "DECISION LOG: Entry includes Decision, Why (1-3 bullets), Impacted files (≤5), User feedback, Surprises. Cap 8 bullets.",
49
- "VARIABLE TYPES: Strings: taskComplexity, rigorMode, prStrategy, selectedApproach, runnerUpApproach, leadingCandidate, architectureRationale, keyRiskToMonitor, selectedSliceStrategy.",
50
- "VARIABLE TYPES: Strings (cont): pivotSeverity (none/MINOR/MODERATE/MAJOR), pivotReturnPhase, cleanSlateDivergence (None/Minor/Major).",
51
- "VARIABLE TYPES: Arrays: approaches, pivotTriggers, preMortemFindings, sliceStrategies, planningGaps, integrationGaps, integrationVerificationFindings, invariantViolations, resolvedFindings. Numbers: planConfidence, sliceIndex.",
52
- "VARIABLE TYPES: Booleans: continuePlanning, pivotTriggered, planningComplete, validationFailed, slicePlanStale, majorConcernsRaised.",
53
- "VARIABLE TYPES: Booleans (cont): spikeFailure, assumptionsValidated, planningGapsFound, integrationGapsFound, integrationVerificationPassed, integrationVerificationFailed, regressionDetected."
54
- ],
55
- "functionDefinitions": [
56
- {
57
- "name": "captureCheckpoint",
58
- "definition": "Update CONTEXT.md Machine State Checkpoint: paste response.state and response.next.stepInstanceId (raw JSON objects, not strings) from workflow_next. Keep last 3 checkpoints. Replace instruction comments with actual JSON."
59
- }
138
+ "DEFAULT BEHAVIOR: self-execute with tools. Only ask the user for business decisions, missing external artifacts, or permissions you cannot resolve.",
139
+ "V2 DURABILITY: use output.notesMarkdown as the primary durable record. Do NOT mirror execution state into CONTEXT.md or any markdown checkpoint file.",
140
+ "ARTIFACT STRATEGY: `implementation_plan.md` drives execution. `spec.md`, when created, is canonical for observable behavior and serves as the verification anchor. Do not create extra artifacts unless they materially improve handoff.",
141
+ "OWNERSHIP & DELEGATION: the main agent owns strategy, decisions, synthesis, and implementation. Delegate only bounded cognitive routines via WorkRail Executor. Never hand off full task ownership or rely on named Builder/Researcher identities.",
142
+ "SUBAGENT SYNTHESIS: treat subagent output as evidence, not conclusions. State your hypothesis before delegating, then interrogate what came back: what was missed, wrong, or new? Say what changed your mind or what you still reject, and why.",
143
+ "PARALLELISM: when reads, audits, or delegations are independent, run them in parallel inside the phase. Parallelize cognition; serialize synthesis and canonical writes.",
144
+ "PHILOSOPHY LENS: apply the user's coding philosophy (from active session rules) as the evaluation lens. Flag violations by principle name, not as generic feedback. If principles conflict, surface the tension explicitly instead of silently choosing.",
145
+ "VALIDATION: prefer static/compile-time safety over runtime checks. Use build, type-checking, and tests as the primary proof of correctness — in that order of reliability.",
146
+ "DRIFT HANDLING: when reality diverges from the plan, update the plan artifact and re-audit deliberately rather than accumulating undocumented drift.",
147
+ "NEVER COMMIT MARKDOWN FILES UNLESS USER EXPLICITLY ASKS.",
148
+ "SLICE DISCIPLINE: Phase 6 is a loop -- implement ONE slice per iteration. Do not implement multiple slices at once. The verification loop exists to catch drift per slice, not retroactively."
60
149
  ],
61
150
  "steps": [
62
151
  {
63
- "id": "phase-0-triage-and-mode",
64
- "title": "Phase 0: Triage (Complexity • Risk • Automation • Doc Depth • PR Strategy)",
65
- "prompt": "**ANALYZE** the task and classify with deterministic criteria.\n\n## 0) Rigor mode (deterministic)\nSelect **rigorMode**: QUICK / STANDARD / THOROUGH.\n\nScore each criterion 0–2 and sum. Use the table:\n- **Scope breadth** (files/areas touched): 0=1–2 files, 1=multi-file but single area, 2=multi-area\n- **Risk level**: 0=low, 1=moderate, 2=high (security/auth/data loss/release pipeline/perf critical)\n- **Uncertainty**: 0=clear requirements + known code path, 1=some ambiguity, 2=unknowns/missing acceptance criteria\n- **Repro difficulty**: 0=deterministic + local, 1=some async/edge cases, 2=flaky/CI-only/racy\n- **Externalities**: 0=internal-only, 1=some external deps, 2=publishing/infra/3rd-party integration\n\nDecision:\n- 0–2 **QUICK**\n- 3–5 **STANDARD**\n- 6–10 **THOROUGH**\n\nAlso set:\n- QUICK: `auditDepth=light`, `maxQuestions=1`, `maxParallelism=0`\n- STANDARD: `auditDepth=normal`, `maxQuestions=3`, `maxParallelism=1`\n- THOROUGH: `auditDepth=deep`, `maxQuestions=5`, `maxParallelism=3`\n\n## 1) taskComplexity\nSmall / Medium / Large\n- Small: 1–2 files, low risk, clear change, minimal ambiguity\n- Medium: multi-file, moderate risk, some ambiguity, needs planning\n- Large: architectural impact, multiple systems, high risk/unknowns\n\n## 2) riskLevel\nLow / Medium / High\n- High if: auth/payments/security/data integrity/perf-sensitive/production incident/release pipeline\n\n## 3) automationLevel\nHigh / Medium / Low\n- High: proceed autonomously; ask only for real decisions\n- Medium: normal confirmations at gates\n- Low: extra confirmations and explicit checklists\n\n## 4) docDepth (durable artifacts; no auto-commit)\nNone / Light / Full\n- For non-small tasks: always maintain `CONTEXT.md` and `implementation_plan.md`.\n- None: plan + context only (no additional spec/design)\n- Light: add `spec.md` (short)\n- Full: add `spec.md` + `design.md` (architecture + risks)\n\n## 5) prStrategy\nSinglePR / MultiPR\n- MultiPR if Large or diff is broad (many files/domains)\n\nSet these keys in the next `continue_workflow` call's `context` object: `rigorMode`, `auditDepth`, `maxQuestions`, `maxParallelism`, `taskComplexity`, `riskLevel`, `automationLevel`, `docDepth`, `prStrategy`.\n\n**VERIFY (minimal questions)**: ask the user to confirm or override `rigorMode` and `prStrategy` only if it impacts delivery expectations.\n\n**CONTEXT LOGGING**: Update CONTEXT.md Decision Log (follow format from metaGuidance) - record this triage decision and any user overrides.",
66
- "requireConfirmation": true
67
- },
68
- {
69
- "id": "phase-0b-minimum-context-request",
70
- "title": "Phase 0b: Minimum Inputs Gate (Only Ask What You Truly Need)",
71
- "prompt": "If any critical information is missing, request ONLY the minimum needed to proceed.\n\n**Ask for:**\n- Ticket text / requirements (if not provided)\n- Success criteria / expected behavior\n- Constraints (permissions, environment, deadlines)\n- Pointers to relevant code areas (if user has them)\n\n**Do NOT ask** questions you can answer via tools.\n\n**Output:** a short list of missing inputs (if any) and proceed once answered.",
152
+ "id": "phase-0-understand-and-classify",
153
+ "title": "Phase 0: Understand & Classify",
154
+ "prompt": "Understand this before you touch anything.\n\nMake sure the expected behavior is clear enough to proceed. If it really isn't, ask me only what you can't answer yourself. Don't ask me things you can find with tools.\n\nThen dig through the code. Figure out:\n- where this starts and what the call chain looks like\n- which files, modules, and functions matter\n- what patterns this should follow\n- how this repo verifies similar work\n- what the real risks, invariants, and non-goals are\n\nFigure out what philosophy to use while doing the work. Prefer, in order: Memory MCP (`mcp_memory_conventions`, `mcp_memory_prefer`, `mcp_memory_recall`), active session/Firebender rules, repo patterns, then me only if those still conflict or aren't enough.\n\nRecord where that philosophy lives, not a summary. If the stated rules and repo patterns disagree, capture the conflict.\n\nOnce you actually understand the task, classify it:\n- `taskComplexity`: Small / Medium / Large\n- `riskLevel`: Low / Medium / High\n- `rigorMode`: QUICK / STANDARD / THOROUGH\n- `automationLevel`: High / Medium / Low\n- `prStrategy`: SinglePR / MultiPR\n\nUse this guidance:\n- QUICK: small, low-risk, clear path, little ambiguity\n- STANDARD: medium scope or moderate risk\n- THOROUGH: large scope, architectural uncertainty, or high-risk change\n\nThen force a context-clarity check. Score each from 0-2 and give one sentence of evidence for each score:\n- `entryPointClarity`: 0 = clear entry point and call chain, 1 = partial chain with gaps, 2 = still unclear where behavior starts or flows\n- `boundaryClarity`: 0 = clear boundary, 1 = likely boundary but some uncertainty, 2 = patch-vs-boundary decision still unclear\n- `invariantClarity`: 0 = important invariants are explicit, 1 = some are inferred or uncertain, 2 = important invariants are still unclear\n- `verificationClarity`: 0 = clear deterministic verification path, 1 = partial verification path, 2 = verification is still weak or unclear\n\nUse the rubric, not vibes:\n- QUICK: do not run the deeper context batch; if the rubric says you're missing too much context, your classification is probably wrong and you should reclassify upward before moving on\n- STANDARD: run the deeper context batch if the total score is 3 or more, or if `boundaryClarity`, `invariantClarity`, or `verificationClarity` is 2\n- THOROUGH: always run the deeper context batch\n\nThe deeper context batch is:\n- `routine-context-gathering` with `focus=COMPLETENESS`\n- `routine-context-gathering` with `focus=DEPTH`\n\nAfter the batch, synthesize what changed, what stayed the same, and what is still unknown. If the extra context changes the classification, update it before you leave this step.\n\nCapture:\n- `taskComplexity`\n- `riskLevel`\n- `rigorMode`\n- `automationLevel`\n- `prStrategy`\n- `contextSummary`\n- `candidateFiles`\n- `invariants`\n- `nonGoals`\n- `openQuestions` (only real human-decision questions)\n- `philosophySources`\n- `philosophyConflicts`",
72
155
  "requireConfirmation": {
73
156
  "or": [
74
157
  {
75
- "var": "automationLevel",
76
- "equals": "Low"
158
+ "var": "taskComplexity",
159
+ "equals": "Large"
77
160
  },
78
161
  {
79
- "var": "automationLevel",
80
- "equals": "Medium"
162
+ "var": "riskLevel",
163
+ "equals": "High"
81
164
  }
82
165
  ]
83
166
  }
84
167
  },
85
168
  {
86
- "id": "phase-0c-base-context-doc",
87
- "title": "Phase 0c: Base Context Doc (Non-Small)",
88
- "runCondition": {
89
- "var": "taskComplexity",
90
- "not_equals": "Small"
91
- },
92
- "prompt": "Create and initialize `CONTEXT.md` as the durable artifact for this workflow run.\n\n**Rules (write-or-paste, deterministic):**\n- If file-writing is possible in your environment: write/update `CONTEXT.md` now.\n- Otherwise: output the full pasteable content in chat.\n- Treat `CONTEXT.md` as canonical; do not paraphrase.\n- Do NOT commit documentation files unless the user explicitly asks.\n\n**Subagent capability detection (no repo file dependency):**\n- Determine if delegation is available from your runtime/tooling.\n- If you can delegate to the WorkRail Executor: set `delegationMode=delegate`.\n- Otherwise: set `delegationMode=solo`.\n- Add \"Environment Capabilities\" section to CONTEXT.md recording delegationMode.\n\n**CONTEXT.md is a living log**: it must be updated at each gate (triage, invariants, architecture choice, slice planning, plan refocus, each slice checkpoint, each PR packaging gate).\n\n**Size caps (keep resumable but bounded):**\n- Relevant files: max 10 (beyond that, link to plan artifacts)\n- Decision log entries: max 8 bullets each; use plan/spec/design for details\n- Keep last 3 Machine State Checkpoints only (delete older ones)\n\n**CONTEXT.md structure (must include):**\n\n1) **Task Summary** (1 paragraph)\n\n2) **Conversation Preferences**\n- Tone/verbosity preferences\n- Constraints like \"don't run X\" or \"ask before doing Y\"\n\n3) **Triage**\n- rigorMode, auditDepth, maxQuestions, maxParallelism\n- taskComplexity, riskLevel, automationLevel, docDepth, prStrategy\n\n3b) **Environment Capabilities**\n- delegationMode (solo/proxy/delegate)\n- Note: This value is cached for this workflow run\n\n4) **Inputs & Sources**\n- Ticket links/text pointers\n- User-provided file paths and external references\n\n5) **User Rules & Philosophies (`userRules`)**\n- Extract from: user instructions, README.md, docs/, ADRs, workflows/ patterns, 1–2 exemplar files near target module.\n- Keep this focused and actionable.\n- Set `userRules` in the next `continue_workflow` call's `context` object as a bulleted list.\n\n6) **Decision Log (append-only, capped at 8 bullets/entry)**\nFor each decision include:\n- Decision\n- Why\n- Alternatives considered\n- Impacted files\n- User feedback/pushback\n- Unexpected discoveries\n\n7) **Unexpected Discoveries / Deviations**\n- Anything surprising encountered (deps, scope expansion, missing invariants)\n- Any plan drift and how you addressed it\n\n8) **Relevant Files (max 10)**\n- Key files + why they matter\n- Beyond 10: reference plan artifacts\n\n9) **Artifacts Index**\n- `implementation_plan.md` (always for non-small)\n- `spec.md` / `design.md` if created\n\n10) **Progress**\n- Current slice name/index, what's done, what's next\n\n7) **Resumption Instructions**: Use captureCheckpoint() after each workflow_next call to maintain Machine State Checkpoint section.\n\n**Output:** the full content for `CONTEXT.md` (or confirm file written).",
169
+ "id": "phase-0-5-upstream-context",
170
+ "title": "Phase 0.5: Locate Upstream Context",
171
+ "prompt": "Before you start designing, find out what has already been decided upstream. Use whatever tools you have -- repo search, WebFetch, MCP integrations (Confluence, Notion, Google Docs, Glean, Linear, Jira), Memory MCP, whatever is available.\n\nLook for any document that describes this work: a pitch, PRD, BRD, RFC, design doc, user story with acceptance criteria, epic, Jira ticket, or equivalent. Also scan for URLs in the task description -- if someone linked a document, read it.\n\nIf nothing relevant exists, say so and move on. Do not fabricate context.\n\nIf you find something, read it and commit to three questions:\n\n1. **Is the problem fixed?** Does this document commit to a specific problem or need -- or is the framing still open?\n2. **Is the solution fixed?** Does this document commit to a specific technical direction -- or is that still yours to decide?\n3. **What are the hard boundaries?** Extract any explicit effort bounds, out-of-scope exclusions, required constraints, acceptance criteria, or invariants that must hold. Free-form summary -- do not force a schema onto it.\n\nFor the two booleans: if you are genuinely uncertain, default `solutionFixed = false`. Running design ideation when it wasn't strictly needed is a smaller cost than skipping it when it was.\n\nCapture:\n- `upstreamSpecDetected` (boolean)\n- `upstreamSource` (string: where you found it, or 'none')\n- `upstreamContext` (string: short summary of what you found and why it's relevant)\n- `problemFixed` (boolean)\n- `solutionFixed` (boolean)\n- `upstreamBoundaries` (string: the hard constraints extracted -- effort bound, out-of-scope items, required invariants, acceptance criteria)",
93
172
  "requireConfirmation": false
94
173
  },
95
174
  {
96
- "id": "phase-1-context-gathering",
97
- "title": "Phase 1: Context Gathering (Main Agent, Tool-Driven)",
98
- "runCondition": {
99
- "var": "taskComplexity",
100
- "not_equals": "Small"
101
- },
102
- "prompt": "Gather enough context to design and plan correctly.\n\n**Rules:**\n- Do this yourself (no delegation in this step).\n- Use tools to verify everything.\n- Prefer matching existing patterns over inventing new ones.\n- Prefer answering your own questions with tools; only keep true human-decision questions.\n\n**Deliverable (in chat, concise):**\n- Entry points and call chain sketch (file references)\n- Key modules/classes/functions involved\n- Existing patterns that apply (with 2–3 concrete examples)\n- Testing approach found in repo (where tests live; key helpers)\n- Risks/unknowns list\n\n**Question resolution pass (required):**\n- For uncertainties you encounter, attempt resolution via tools/code first.\n- Only add to `openQuestions` if it is a true business/product decision.\n- Enforce: `openQuestions.length <= maxQuestions`.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `contextSummary` (short)\n- `candidateFiles` (list of key file paths)\n- `openQuestions` (true human decisions only)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record relevant files, decisions made during context gathering, and any unexpected discoveries. If you discover a conflict between repo patterns and `userRules`, note it explicitly for planning."
103
- },
104
- {
105
- "id": "phase-1b-context-audit-mode-adaptive",
106
- "title": "Phase 1b: Context Audit (Mode-Adaptive, Subagent-Friendly)",
175
+ "id": "phase-1a-hypothesis",
176
+ "title": "Phase 1a: State Hypothesis",
107
177
  "runCondition": {
108
- "or": [
178
+ "and": [
179
+ {
180
+ "var": "taskComplexity",
181
+ "not_equals": "Small"
182
+ },
109
183
  {
110
184
  "var": "rigorMode",
111
- "equals": "THOROUGH"
185
+ "not_equals": "QUICK"
112
186
  },
113
187
  {
114
- "and": [
115
- {
116
- "var": "rigorMode",
117
- "equals": "STANDARD"
118
- },
119
- {
120
- "var": "riskLevel",
121
- "equals": "High"
122
- }
123
- ]
188
+ "var": "solutionFixed",
189
+ "not_equals": true
124
190
  }
125
191
  ]
126
192
  },
127
- "prompt": "Audit your context understanding before designing.\n\nMode behavior:\n- **QUICK**: skip this step (should not run)\n- **STANDARD**: do a self-audit; delegate at most once if you have subagent support\n- **THOROUGH**: delegate in parallel if you have subagent support\n\n**If subagent support is available and `rigorMode=THOROUGH`:**\n\nSpawn 2 WorkRail Executors SIMULTANEOUSLY using `routine-context-gathering`:\n\n**Delegation 1 — Completeness Audit:**\n- routine: routine-context-gathering\n- depth: 2 (Explore level)\n- mission: \"Audit main agent's context for missed areas and blind spots\"\n- target: [Areas main agent investigated]\n- focus: COMPLETENESS\n- deliverable: context-audit-completeness.md\n\n**Delegation 2 Depth Audit:**\n- routine: routine-context-gathering\n- depth: 3 (Analyze level)\n- mission: \"Audit main agent's context for shallow understanding\"\n- target: [Areas main agent investigated]\n- focus: DEPTH\n- deliverable: context-audit-depth.md\n\n**If `rigorMode=STANDARD`:**\n- Prefer self-audit. Optionally delegate ONCE using `routine-context-gathering` (depth: 2) focusing on COMPLETENESS.\n\n**If no subagents:** do a self-audit using the two lenses.\n\n**SYNTHESIZE** audit findings:\n- Update `contextSummary` with gaps filled\n- Resolve uncertainties with tools when possible\n- Update `openQuestions` but keep it <= `maxQuestions` and only for true human decisions\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record audit deltas, gaps addressed, and any new decisions.\n\n**Quality gate:** proceed only if you can explain the relevant flow end-to-end.",
193
+ "prompt": "Before you do design work, tell me your current best guess.\n\nKeep it short:\n1. what you think the right approach is\n2. what worries you about it\n3. what would most likely make it wrong\n\nCapture:\n- `initialHypothesis`",
128
194
  "requireConfirmation": false
129
195
  },
130
196
  {
131
- "id": "phase-2-invariants-and-nongoals",
132
- "title": "Phase 2: Invariants (Contracts, Constraints, Non-Goals)",
133
- "prompt": "Create explicit invariants and non-goals.\n\n**Include (as applicable):**\n- API/behavior contracts that must not change\n- Data invariants (schema constraints, idempotency, ordering)\n- Performance budgets (latency, allocations, query counts)\n- Security/privacy constraints\n- Rollout invariants (flagging, migration safety, rollback)\n- Non-goals (explicitly out of scope)\n\n**Output:** a numbered list of invariants + non-goals.\n\nSet these keys in the next `continue_workflow` call's `context` object: `invariants`, `nonGoals`.\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record invariants + non-goals, why they were chosen, any user pushback/clarifications, and impacted files/areas.\n\n**VERIFY:** ask the user to confirm only if any invariant is a product decision.",
134
- "requireConfirmation": {
135
- "or": [
197
+ "id": "phase-1b-design-quick",
198
+ "title": "Phase 1b: Lightweight Design (QUICK)",
199
+ "runCondition": {
200
+ "and": [
136
201
  {
137
- "var": "automationLevel",
138
- "equals": "Low"
202
+ "var": "taskComplexity",
203
+ "not_equals": "Small"
139
204
  },
140
205
  {
141
- "var": "riskLevel",
142
- "equals": "High"
206
+ "var": "rigorMode",
207
+ "equals": "QUICK"
208
+ },
209
+ {
210
+ "var": "solutionFixed",
211
+ "not_equals": true
143
212
  }
144
213
  ]
145
- }
146
- },
147
- {
148
- "id": "phase-ideation",
149
- "title": "Ideation (Divergent Thinking via Lenses)",
150
- "runCondition": {
151
- "var": "taskComplexity",
152
- "not_equals": "Small"
153
214
  },
154
- "prompt": "Generate approaches by answering DIFFERENT questions—not variations of one idea.\n\nThis is DIVERGENT thinking. Do not evaluate or compare yet.\n\n**Answer each lens (minimum 3, add more for THOROUGH):**\n\n1. **Simplicity lens:** What's the simplest approach that could work?\n - Minimal moving parts, easiest to understand\n - What would you do if you had 1 hour?\n\n2. **Maintainability lens:** What approach optimizes for future changes?\n - Easiest to modify in 6 months by someone unfamiliar\n - What would make a new team member's life easiest?\n\n3. **Clean-slate lens:** If this area didn't exist, how would you design it?\n - Ignore existing structure—what's the \"right\" architecture?\n - What would you build if starting fresh today?\n\n4. **(STANDARD+) Constraint-flip lens:** What if a key constraint didn't exist?\n - Often reveals assumptions worth questioning\n - What if [performance/compatibility/scope] wasn't a concern?\n\n**For each approach:**\n- **Name**: Short memorable label\n- **Core idea**: 2-3 sentences describing the fundamental approach\n- **Key trade-off**: What does this optimize for? What does it sacrifice?\n- **Shape**: High-level structure (what changes, where)\n\n**Anti-anchoring check:**\nIf your approaches feel like variations of one idea, you haven't diverged enough. The lenses should produce genuinely different shapes.\n\n**If `rigorMode=THOROUGH` and subagents available:**\n\nSpawn 3 WorkRail Executors SIMULTANEOUSLY using `routine-ideation`:\n\n**Delegation 1 — Simplicity Lens:**\n- routine: routine-ideation\n- perspective: simplicity\n- quantity: 3-5 ideas\n- problem: [Task problem statement from Phase 0]\n- constraints: [From invariants]\n- deliverable: ideas-simplicity.md\n\n**Delegation 2 Maintainability Lens:**\n- routine: routine-ideation\n- perspective: maintainability\n- quantity: 3-5 ideas\n- problem: [Task problem statement]\n- constraints: [From invariants]\n- deliverable: ideas-maintainability.md\n\n**Delegation 3 Clean-Slate Lens:**\n- routine: routine-ideation\n- perspective: innovation\n- quantity: 3-5 ideas\n- problem: [Task problem statement]\n- constraints: [From invariants, relaxed]\n- deliverable: ideas-clean-slate.md\n\n**Main agent synthesis:**\n- Combine ideas from all 3 deliverables\n- Deduplicate similar ideas (keep best version)\n- Select best from each perspective for `approaches` array\n\n**Output:** `approaches` array with one entry per lens answered.\n\nSet these keys in the next `continue_workflow` call's `context` object: `approaches`\n\n**CONTEXT LOGGING:** Add Approaches section to CONTEXT.md. Preserve ALL approaches—they may become Plan B/C later.",
215
+ "prompt": "Generate a lightweight design inline. QUICK rigor means the path is clear and risk is low.\n\nProduce two mandatory candidates:\n1. The simplest possible change that satisfies acceptance criteria\n2. Follow the existing repo pattern for this kind of change\n\nFor each candidate:\n- One-sentence summary\n- Which tensions it resolves and which it accepts\n- How it relates to existing repo patterns (follows / adapts / departs)\n- Failure mode to watch\n- Philosophy fit (name specific principles)\n\nCompare and recommend. If both converge on the same approach, say so honestly.\n\nWrite the output to `design-candidates.md` with this structure:\n- Problem Understanding (core tensions, what makes it hard)\n- Philosophy Constraints (which principles matter for this problem)\n- Candidates (each with: summary, tensions resolved/accepted, failure mode, philosophy fit)\n- Comparison and Recommendation\n- Open Questions (if any remain)",
155
216
  "requireConfirmation": false
156
217
  },
157
218
  {
158
- "id": "phase-assess-approaches",
159
- "title": "Assess Approaches (Analytical)",
219
+ "id": "phase-1b-design-deep",
220
+ "title": "Phase 1b: Design Generation (Injected Routine — Tension-Driven Design)",
160
221
  "runCondition": {
161
- "var": "taskComplexity",
162
- "not_equals": "Small"
163
- },
164
- "prompt": "Assess each approach individually. This is ANALYTICAL thinking—evaluate, don't compare yet.\n\n**For EACH approach in `approaches`:**\n\n1. **Invariant fit**:\n - Which invariants does it naturally satisfy?\n - Which require extra effort or workarounds?\n\n2. **Risk profile**:\n - What could go wrong?\n - What's the worst-case scenario?\n - What dependencies does it introduce?\n\n3. **Implementation shape**:\n - What files/areas change?\n - What new abstractions are needed?\n - Complexity estimate (Low/Medium/High)\n\n4. **Pattern alignment**:\n - Does it match existing `userRules` and repo patterns?\n - Any deviations needed?\n\n**Output:** Assessment for each approach (can be brief—2-3 bullets each).\n\n**Set:** `approachAssessments` (object mapping approach name to assessment)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Approaches section with assessments.",
165
- "requireConfirmation": false
166
- },
167
- {
168
- "id": "phase-compare-approaches",
169
- "title": "Compare Approaches (Evaluative)",
170
- "runCondition": {
171
- "var": "taskComplexity",
172
- "not_equals": "Small"
173
- },
174
- "prompt": "Compare approaches side-by-side to identify the leading candidate.\n\nThis is EVALUATIVE thinking—compare and rank, don't stress-test yet.\n\n**Using `approachAssessments`, compare approaches on:**\n- Which best fits the `invariants`?\n- Which has the best risk profile?\n- Which aligns with `userRules` and existing patterns?\n- If trade-offs conflict, which trade-off is acceptable for THIS task?\n\n**Build comparison summary:**\nFor each dimension, note which approach wins and why.\n\n**Identify:**\n- **Leading candidate**: Which approach scores best overall?\n- **Runner-up**: Which is second-best (this becomes Plan B)?\n\n**Output:**\n- Comparison summary (which approach wins on which dimension)\n- Leading candidate name\n- Runner-up name\n\n**Set:** `leadingCandidate`, `runnerUpApproach`\n\n**CONTEXT LOGGING:** Update CONTEXT.md with comparison summary.",
175
- "requireConfirmation": false
176
- },
177
- {
178
- "id": "phase-premortem",
179
- "title": "Pre-Mortem (Adversarial)",
180
- "runCondition": {
181
- "or": [
222
+ "and": [
182
223
  {
183
- "var": "rigorMode",
184
- "equals": "STANDARD"
224
+ "var": "taskComplexity",
225
+ "not_equals": "Small"
185
226
  },
186
227
  {
187
228
  "var": "rigorMode",
188
- "equals": "THOROUGH"
229
+ "not_equals": "QUICK"
230
+ },
231
+ {
232
+ "var": "solutionFixed",
233
+ "not_equals": true
189
234
  }
190
235
  ]
191
236
  },
192
- "prompt": "Stress-test the leading candidate before committing.\n\nThis is ADVERSARIAL thinking—try to break it.\n\n**For `leadingCandidate` only:**\n\n> \"It's 2 weeks from now. This approach failed catastrophically. What happened?\"\n\n**Identify:**\n- **Most likely failure mode**: What probably goes wrong?\n- **Hidden assumption**: What are we assuming that could be wrong?\n- **Dependency risk**: What external factor could break this?\n\n---\n\n**If `rigorMode=THOROUGH` and subagents available:**\n\nDelegate to WorkRail Executor using `routine-hypothesis-challenge`:\n\n**Pre-Mortem Delegation:**\n- routine: routine-hypothesis-challenge\n- rigor: 3 (use 5 for High-risk tasks)\n- hypotheses:\n - \"The leading candidate approach will succeed\"\n - [Key assumptions from the approach: architecture, dependencies, patterns]\n- evidence: `approachAssessments` for leading candidate\n- context:\n - Read: CONTEXT.md (invariants section)\n - Filtered userRules: architecture, risk, edge cases\n - Feature brief: problem + constraints + approach shape\n- deliverable: premortem-challenges.md\n\n**Synthesis:**\n- Review challenges from deliverable\n- Update `preMortemFindings` with subagent insights\n- If major concerns raised: flag for reconsideration in next phase\n\n---\n\n**Output:**\n- Pre-mortem findings for leading candidate\n- Flag if major concerns require reconsidering `leadingCandidate`\n\n**Set:** `preMortemFindings`, `majorConcernsRaised` (boolean)",
193
- "requireConfirmation": false
237
+ "templateCall": {
238
+ "templateId": "wr.templates.routine.tension-driven-design",
239
+ "args": {
240
+ "deliverableName": "design-candidates.md"
241
+ }
242
+ }
194
243
  },
195
244
  {
196
- "id": "phase-select-architecture",
197
- "title": "Select Architecture (Decisive)",
245
+ "id": "phase-1c-challenge-and-select",
246
+ "title": "Phase 1c: Challenge and Select",
198
247
  "runCondition": {
199
- "var": "taskComplexity",
200
- "not_equals": "Small"
248
+ "and": [
249
+ {
250
+ "var": "taskComplexity",
251
+ "not_equals": "Small"
252
+ },
253
+ {
254
+ "var": "solutionFixed",
255
+ "not_equals": true
256
+ }
257
+ ]
201
258
  },
202
- "prompt": "Make the architecture decision and define early-warning triggers.\n\n**If `majorConcernsRaised = true`:**\nThe pre-mortem raised serious concerns about `leadingCandidate`. Before proceeding:\n1. Review `preMortemFindings` carefully\n2. Consider if `runnerUpApproach` addresses the concerns better\n3. Decide: proceed with `leadingCandidate` (accepting risk) OR switch to `runnerUpApproach`\n\n**PART 1: SELECTION**\n\nBased on comparison and pre-mortem:\n\n- **selectedApproach**: Confirm or change from `leadingCandidate`\n- **architectureRationale**: Why this wins (2-3 sentences referencing comparison)\n- **runnerUpApproach**: Confirm Plan B\n- **keyRiskToMonitor**: The pre-mortem concern to watch during implementation\n\n**PART 2: PIVOT TRIGGERS (STANDARD+)**\n\nDefine conditions that should trigger reconsideration:\n\n```\nPIVOT TRIGGERS (if any occur during implementation, stop and reassess):\n- Trigger 1: [specific, observable condition]\n- Trigger 2: [specific, observable condition]\n```\n\nGood triggers are CONCRETE and OBSERVABLE:\n- \"If we need to touch >2 files outside target module\"\n- \"If the API doesn't support X capability\"\n- \"If tests require mocking >3 dependencies\"\n\nBad triggers (too vague):\n- \"If it gets hard\"\n- \"If there are problems\"\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `selectedApproach`\n- `architectureRationale`\n- `runnerUpApproach`\n- `architectureRisks`\n- `pivotTriggers` (STANDARD+)\n- `keyRiskToMonitor`\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log - record selection + rationale, rejected alternatives and why, pivot triggers.\n\n**VERIFY (Large or High-risk):** User confirms approach selection.",
259
+ "prompt": "Read `design-candidates.md`, compare it to your original guess, and make the call.\n\nBe explicit about three things:\n- what the design work confirmed\n- what changed your mind\n- what you missed the first time\n\nThen pressure-test the leading option:\n- what's the strongest case against it?\n- what assumption breaks it?\n\nAfter the challenge batch, say:\n- what changed your mind\n- what didn't\n- which findings you reject and why\n\nPick the approach yourself. Don't hide behind the artifact. If the simplest thing works, prefer it. If the front-runner stops looking right after challenge, switch.\n\nCapture:\n- `selectedApproach` chosen design with rationale tied to tensions\n- `runnerUpApproach` next-best option and why it lost\n- `architectureRationale` — tensions resolved vs accepted\n- `pivotTriggers` conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` failure mode of the selected approach\n- `acceptedTradeoffs`\n- `identifiedFailureModes`",
260
+ "promptFragments": [
261
+ {
262
+ "id": "phase-1c-challenge-standard",
263
+ "when": {
264
+ "var": "rigorMode",
265
+ "in": [
266
+ "STANDARD",
267
+ "THOROUGH"
268
+ ]
269
+ },
270
+ "text": "Run `routine-hypothesis-challenge` on the leading option's failure modes before you decide."
271
+ },
272
+ {
273
+ "id": "phase-1c-challenge-thorough",
274
+ "when": {
275
+ "var": "rigorMode",
276
+ "equals": "THOROUGH"
277
+ },
278
+ "text": "Also run `routine-execution-simulation` on the three most likely failure paths before you decide."
279
+ }
280
+ ],
281
+ "assessmentRefs": [
282
+ "design-soundness-gate",
283
+ "design-gaps-gate"
284
+ ],
285
+ "assessmentConsequences": [
286
+ {
287
+ "when": {
288
+ "anyEqualsLevel": "low"
289
+ },
290
+ "effect": {
291
+ "kind": "require_followup",
292
+ "guidance": "Address whichever gate scored low: design_soundness low -- the design decision is still ambiguous; commit to an approach and record the rationale before proceeding. design_gaps low -- the gap scan was not completed or found unaddressed gaps; either resolve them or explicitly file them before proceeding."
293
+ }
294
+ }
295
+ ],
203
296
  "requireConfirmation": {
204
297
  "or": [
205
298
  {
@@ -213,58 +306,13 @@
213
306
  {
214
307
  "var": "riskLevel",
215
308
  "equals": "High"
216
- },
217
- {
218
- "var": "majorConcernsRaised",
219
- "equals": true
220
309
  }
221
310
  ]
222
311
  }
223
312
  },
224
313
  {
225
- "id": "phase-spike-validation",
226
- "title": "Spike Validation (Validate Before Planning)",
227
- "runCondition": {
228
- "or": [
229
- {
230
- "var": "rigorMode",
231
- "equals": "STANDARD"
232
- },
233
- {
234
- "var": "rigorMode",
235
- "equals": "THOROUGH"
236
- }
237
- ]
238
- },
239
- "prompt": "Validate key assumptions about `selectedApproach` with quick, time-boxed probes before investing in detailed planning.\n\n**Purpose:** Catch \"this won't work\" early with real code, not just analysis.\n\n---\n\n**STEP 1: Identify key uncertainties**\n\nReview `preMortemFindings` and `keyRiskToMonitor`. For each, ask:\n- Can this be validated with a quick probe?\n- What's the smallest code/test that would prove or disprove this?\n\nPrioritize uncertainties that would INVALIDATE the approach if wrong.\n\n---\n\n**STEP 2: Design 1-3 spikes**\n\nEach spike should be:\n- **Time-boxed**: 5-15 minutes max\n- **Minimal**: Smallest code that validates the assumption\n- **Disposable**: Don't need to keep the code (but can)\n- **Binary outcome**: Works or doesn't\n\n**Example spikes:**\n- API probe: \"Can the API handle batch requests?\" → Try it\n- Pattern probe: \"Can we extend this class?\" → Try it\n- Perf probe: \"Is this fast enough?\" → Quick benchmark\n- Integration probe: \"Does DI work here?\" → Try injecting\n\n**Document each spike:**\n- Assumption being tested\n- Probe approach (what code/test)\n- Expected outcome if assumption holds\n\n---\n\n**STEP 3: Execute spikes**\n\nFor each spike:\n1. Write minimal probe code\n2. Run it\n3. Document result: VALIDATED / INVALIDATED / INCONCLUSIVE\n\n**If INCONCLUSIVE:** Note what additional information would resolve it.\n\n---\n\n**STEP 4: Decide**\n\n**If any spike INVALIDATED a critical assumption:**\n- Set `spikeFailure = true`\n- Document what was learned\n- Return to `phase-select-architecture` with new information\n- Consider `runnerUpApproach` or generate new approaches\n\n**If all spikes VALIDATED (or no critical spikes needed):**\n- Set `assumptionsValidated = true`\n- Proceed to slice planning with higher confidence\n\n---\n\n**Output:**\n- Spikes attempted: [{assumption, probe, result}]\n- Key learnings\n- Decision: proceed / return to selection\n\n**Set:** `spikeResults`, `spikeFailure`, `assumptionsValidated`\n\n**CONTEXT LOGGING:** Add Spike Results section to CONTEXT.md.",
240
- "requireConfirmation": {
241
- "var": "spikeFailure",
242
- "equals": true
243
- }
244
- },
245
- {
246
- "id": "phase-generate-slice-strategies",
247
- "title": "Generate Slice Strategies (Divergent)",
248
- "runCondition": {
249
- "var": "taskComplexity",
250
- "not_equals": "Small"
251
- },
252
- "prompt": "Generate 2-3 different ways to slice this work.\n\nThis is DIVERGENT thinking—explore different orderings, not just one.\n\n**Slicing lenses:**\n\n1. **Risk-first**: Order by risk (highest first → fail fast)\n - What's the riskiest change? Do it first.\n - Surfaces problems before investment grows\n - Trade-off: May require more scaffolding upfront\n\n2. **Foundation-first**: Order by dependencies (base → features)\n - Build the infrastructure/contracts first\n - Each slice builds on stable ground\n - Trade-off: May delay visible progress\n\n3. **Value-first**: Order by deliverable value (most valuable first)\n - Ship something useful early\n - Get user feedback faster\n - Trade-off: May need to revisit foundations later\n\n**For each strategy, define:**\n- **Name**: Risk-first / Foundation-first / Value-first (or custom)\n- **Slice order**: List slices in that order\n- **Per slice**: Name, scope, key files, verification plan\n- **PR boundaries**: Where would you split PRs?\n- **Trade-offs**: What's prioritized? What's sacrificed?\n\n**Output:** `sliceStrategies` array (2-3 entries)\n\n**Set:** `sliceStrategies`\n\n**CONTEXT LOGGING:** Add Slice Strategies section to CONTEXT.md.",
253
- "requireConfirmation": false
254
- },
255
- {
256
- "id": "phase-compare-select-slices",
257
- "title": "Compare & Select Slice Strategy (Evaluative)",
258
- "runCondition": {
259
- "var": "taskComplexity",
260
- "not_equals": "Small"
261
- },
262
- "prompt": "Compare slice strategies and select the best fit.\n\nThis is EVALUATIVE thinking—compare and decide.\n\n**Compare strategies on:**\n- **Risk management**: Which best surfaces problems early given our invariants?\n- **PR reviewability**: Which produces the cleanest PR boundaries?\n- **Feedback speed**: Which gets us useful feedback fastest?\n- **Rollout constraints**: Which aligns with any flagging/migration requirements?\n- **Implementation flow**: Which has the smoothest dependencies between slices?\n\n**Select:**\n- **selectedSliceStrategy**: [name]\n- **rationale**: Why this wins (2-3 sentences referencing comparison)\n- **slices**: The ordered list from selected strategy\n\n**PR sizing gate:**\n- If `prStrategy = MultiPR`, map slices to PRs.\n- If `prStrategy = SinglePR` but slices suggest broad changes, recommend switching to MultiPR.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `selectedSliceStrategy`\n- `slices` (array from selected strategy)\n- `estimatedPRCount` (number)\n- `prStrategyRationale` (short)\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log - record selected strategy + rationale, rejected strategies and why, slice boundaries and PR strategy.\n\n**VERIFY:** user confirms slice strategy and PR approach.",
263
- "requireConfirmation": true
264
- },
265
- {
266
- "id": "phase-locks-compliance-audit",
267
- "title": "Locks Compliance Audit (Canonical Docs → Slices Matrix)",
314
+ "id": "phase-1d-spec-to-approach",
315
+ "title": "Phase 1d: Spec to Approach (Solution Fixed)",
268
316
  "runCondition": {
269
317
  "and": [
270
318
  {
@@ -272,47 +320,33 @@
272
320
  "not_equals": "Small"
273
321
  },
274
322
  {
275
- "var": "riskLevel",
276
- "equals": "High"
277
- },
278
- {
279
- "or": [
280
- {
281
- "var": "rigorMode",
282
- "equals": "THOROUGH"
283
- },
284
- {
285
- "var": "docDepth",
286
- "equals": "Full"
287
- }
288
- ]
323
+ "var": "solutionFixed",
324
+ "equals": true
289
325
  }
290
326
  ]
291
327
  },
292
- "prompt": "Verify your slice plan covers all locked requirements from canonical docs.\n\n**Quick check:**\n- If canonical lock docs exist (ADRs with 'MUST', *-locks.md, contract specs): list locked items.\n- Confirm each slice or follow-up ticket covers each locked item.\n- If gaps exist: add to a slice, create a slice, or explicitly defer with user approval.\n\n**Output:** Gap list (if any) + resolution.\n\n**Set:** `locksGaps` (list or empty)\n\n**CONTEXT LOGGING:** If gaps exist, update CONTEXT.md Decision Log (follow format from metaGuidance).\n\n**VERIFY if gaps:** user confirms updated slices or explicit deferral.",
328
+ "prompt": "An upstream document has fixed the solution direction. Your job is to translate it into a concrete engineering approach, then pressure-test the translation on technical grounds.\n\nFrom `upstreamContext` and `upstreamBoundaries`, derive:\n1. **selectedApproach** -- the technical strategy. Name key interfaces, types, and modules. Do not re-open decisions already fixed by the upstream document.\n2. **architectureRationale** -- which upstream constraints drove each technical decision.\n3. **pivotTriggers** -- conditions under which the approach needs revisiting (engineering discoveries only, not product pivots).\n4. **acceptedTradeoffs** -- engineering tradeoffs accepted to honor upstream constraints.\n5. **identifiedFailureModes** -- technical risks in this approach.\n\nThen challenge the translation on purely technical grounds: what is the strongest engineering case against this approach? Which module boundary assumption is most likely wrong? Which integration point is least understood? Record your honest answer before continuing.\n\nIf `upstreamBoundaries` includes an effort bound (appetite, deadline, sprint allocation), treat it as a ceiling -- scope down to fit rather than expand. Record any engineering concerns that exceed it as `followUpTickets`.\n\nCapture:\n- `selectedApproach`\n- `architectureRationale`\n- `pivotTriggers`\n- `acceptedTradeoffs`\n- `identifiedFailureModes`\n- `followUpTickets` (initialize if needed)",
293
329
  "requireConfirmation": {
294
330
  "or": [
295
331
  {
296
332
  "var": "automationLevel",
297
333
  "equals": "Low"
334
+ },
335
+ {
336
+ "var": "taskComplexity",
337
+ "equals": "Large"
338
+ },
339
+ {
340
+ "var": "riskLevel",
341
+ "equals": "High"
298
342
  }
299
343
  ]
300
344
  }
301
345
  },
302
346
  {
303
- "id": "phase-5-plan-iteration-init",
304
- "title": "Phase 5: Plan Iteration Init (Bounded Loop Setup)",
305
- "runCondition": {
306
- "var": "taskComplexity",
307
- "not_equals": "Small"
308
- },
309
- "prompt": "Initialize a bounded plan-iteration loop.\n\nRule: max 5 iterations. Continue while issues are found; stop when a pass is clean (no findings).\n\nThe loop will enter automatically. The exit decision step inside the loop will use a typed artifact to control continuation.",
310
- "requireConfirmation": false
311
- },
312
- {
313
- "id": "phase-5-plan-iterations",
347
+ "id": "phase-2-design-review",
314
348
  "type": "loop",
315
- "title": "Phase 5: Plan Iteration Loop (Draft → Audit → Refocus)",
349
+ "title": "Phase 2: Design Review",
316
350
  "runCondition": {
317
351
  "var": "taskComplexity",
318
352
  "not_equals": "Small"
@@ -322,45 +356,57 @@
322
356
  "conditionSource": {
323
357
  "kind": "artifact_contract",
324
358
  "contractRef": "wr.contracts.loop_control",
325
- "loopId": "plan-iteration"
359
+ "loopId": "design_review_loop"
326
360
  },
327
- "maxIterations": 5
361
+ "maxIterations": 2
328
362
  },
329
363
  "body": [
330
364
  {
331
- "id": "phase-5a-draft-implementation-plan",
332
- "title": "Plan Artifact Draft/Update",
333
- "prompt": "Create or update the **Plan Artifact** (deterministic schema).\n\n**Write-or-paste rule:** attempt to write/update `implementation_plan.md`. If file writing fails, output full content in chat (canonical).\n\n**Plan Artifact headings (concise, complete):**\n\n1) Problem statement\n2) Acceptance criteria (bullets)\n3) Non-goals (bullets)\n4) **User rules/preferences applied:**\n - Relevant `userRules` + how plan respects them.\n - Deviations: rationale + mitigation + user decision (counts toward `maxQuestions`).\n5) Invariants (reference `invariants`)\n6) Proposed approach (1–2 paragraphs)\n7) Architecture decision (reference Phase 3/3b outputs):\n - Selected approach: reference `selectedApproach`\n - Rationale: reference `architectureRationale`\n - Runner-up (Plan B): reference `runnerUpApproach`\n - Key risk: reference `keyRiskToMonitor`\n - Full alternatives: see CONTEXT.md Approaches section\n8) **Vertical slices** (match `slices`: scope, done-definition, files, verification)\n\n **Work Packages inside each slice (mode-dependent):**\n - QUICK: skip work packages\n - STANDARD: optional; recommended when slice is high-risk or multi-layer\n - THOROUGH: required for non-trivial slices\n\n Each work package (WP):\n - ID: `S<sliceIndex>-WP<k>` (e.g., S1-WP1)\n - Goal: one coherent outcome\n - Targets (allowlist): dirs/files (+ allowed new files)\n - Forbidden (denylist): files/dirs not to touch\n - Budget: maxModified (5 STANDARD/8 THOROUGH), maxNew (2/3)\n - Done-definition: 2–5 bullets\n - Verification: 1–3 commands/tests\n - Dependencies: contracts/types from other WPs (if parallel)\n\n **Parallelism rule:** parallelize only if Targets don't overlap. Final WP must be \"Hook-up/Integration\" when parallel was used.\n\n9) Test plan (unit/integration/e2e; cite repo patterns)\n10) Risk register (risks + mitigation + rollback/flag)\n11) PR packaging (Single/Multi + rule)\n12) **Philosophy alignment per slice** (for each slice, include):\n - For each design principle touched by this slice: [principle] → [satisfied / tension / violated + 1-line why]\n - The audit step will independently verify these self-assessments. Be honest — violations caught early are cheaper than violations caught in review.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `planArtifact`\n- `implementationPlan`\n\n**VERIFY:** concrete enough for another engineer to implement without guessing.",
365
+ "id": "phase-2a-pre-assess-design-review",
366
+ "title": "Pre-Assess Design Review",
367
+ "prompt": "Before the detailed design review, state your current assessment in 2-4 sentences.\n\nSay:\n- what you think the strongest part of the selected design is right now\n- what you think the weakest part is right now\n- which tradeoff or failure mode worries you most\n\nThis is your reference point for interpreting the review findings.\n\nSet this key in the next `continue_workflow` call's `context` object:\n- `designReviewAssessment`",
334
368
  "requireConfirmation": false
335
369
  },
336
370
  {
337
- "id": "phase-5b-plan-audit-mode-adaptive",
338
- "title": "Plan Audit (Subagent-Friendly)",
339
- "prompt": "**Mission: Find gaps, issues, and inconsistencies in this plan.**\n\nActively look for:\n- **Gaps**: What's missing? What's not covered?\n- **Weak assumptions**: What could be wrong? What are we taking for granted?\n- **Inconsistencies**: Do parts contradict each other? Does the plan match the invariants?\n- **Risks**: What could go wrong? What hasn't been stress-tested?\n\n---\n\n**Mode behavior:**\n- QUICK: self-audit only\n- STANDARD: self-audit; delegate once if subagents exist\n- THOROUGH: parallel delegation if subagents exist\n\n**If subagents + `rigorMode=THOROUGH`:**\n\nYou have permission to spawn THREE subagents SIMULTANEOUSLY for parallel plan validation.\n\nDelegate to WorkRail Executor THREE TIMES with scoped context:\n\n**Delegation 1 — Plan Analysis:**\n- routine: routine-plan-analysis\n- plan: implementation_plan.md\n- requirements: [From Phase 2 invariants + acceptance criteria]\n- constraints: [Filtered userRules: architecture, testing, patterns]\n- context (file-reference-first, max 500 words if pasting):\n - Read: CONTEXT.md (userRules section), implementation_plan.md\n - Read: spec.md, design.md (if exist)\n - Invariants + locks (if locksMatrix exists)\n - Feature brief: problem statement + architecture decision + key constraints\n- deliverable: plan-analysis.md\n\n**Delegation 2 — Hypothesis Challenge:**\n- routine: routine-hypothesis-challenge\n- rigor: 3\n- hypotheses: [Plan's key assumptions about architecture, dependencies, invariant satisfaction]\n- evidence: implementation_plan.md\n- context:\n - Read: implementation_plan.md\n - Filtered userRules: error handling, edge cases, validation rules\n - Invariants (especially high-risk ones)\n - Feature brief: problem + acceptance criteria + non-goals\n- deliverable: plan-challenges.md\n\n**Delegation 3 — Execution Simulation:**\n- routine: routine-execution-simulation\n- entry_point: [Riskiest slice entry function]\n- inputs: [Expected inputs and state]\n- trace_depth: 3 (follow calls to understand failure modes)\n- context:\n - Read: implementation_plan.md (riskiest slice section)\n - Filtered userRules: performance, data flow, state management\n - Invariants touched by risky slice\n - Feature brief: architecture decision + risk register\n- deliverable: simulation-results.md\n\n**Self-check before delegating (required):**\n✅ Each delegation includes filtered userRules (not full list)\n✅ Each includes invariants + locks (if applicable)\n✅ Each includes feature brief (file refs or <500 word excerpt)\n✅ Each has specific focus/lens\n\n**If subagents + `rigorMode=STANDARD`:**\nDelegate ONCE using Plan Analysis with full context (not filtered).\n\n\n**Note:** delegationMode was detected in phase-0c and cached in CONTEXT.md\n**Else:** self-audit (same three lenses).\n\n**Output:**\n- Findings: Critical / Major / Minor\n- Plan amendments\n\n---\n\n**CLEAN-SLATE CHECK (STANDARD+, if findings exist):**\n\nBefore applying amendments, briefly answer:\n\n> \"If I started fresh right now, knowing everything I've learned, would I choose the same approach?\"\n\n1. Without looking at current plan, sketch in 1 sentence what approach you'd take\n2. Compare to `selectedApproach`:\n - **Same**: Proceed with amendments\n - **Minor variation**: Note the insight; consider incorporating\n - **Fundamentally different**: STOP. Set `cleanSlateDivergence = Major`\n\n**If fundamentally different:**\n- Document why fresh thinking differs\n- Return to Phase 3b with fresh approach as new candidate, OR\n- Document why current approach is still better despite fresh thinking\n\n---\n\n**REGRESSION CHECK (iteration 2+, if `resolvedFindings` is non-empty):**\n\nBefore running the forward-looking audit, verify each item in `resolvedFindings`:\n- Is the resolution still valid in the current plan?\n- Has the amendment been reverted or contradicted by subsequent changes?\n\nIf ANY regression found: add to `planFindings` with severity Critical and prefix \"REGRESSION: previously resolved finding reverted.\"\n\n---\n\n**PHILOSOPHY ALIGNMENT CHECK (mandatory, all modes):**\n\nReview the plan against the user's coding philosophy and design principles from `userRules`.\n\nThis evaluates DESIGN QUALITY — not plan consistency. Stale acceptance criteria, missing requirements, and coverage gaps are covered by the completeness audit above.\n\nIf no philosophy or design principles are found in `userRules`, skip this section and note \"No philosophy principles configured.\"\n\n**Required output format** (structured table):\nFor each violation or tension found:\n\n| Principle | Violation | Severity | Action |\n|-----------|-----------|----------|--------|\n| [Principle name from userRules] | [What violates it and why] | Red / Orange / Yellow | [Specific fix or justification needed] |\n\nSeverity guide:\n- **Red** (blocking) = must fix before implementation. Add to `planFindings`.\n- **Orange** (design quality) = should fix; document if intentionally accepted. Add to `planFindings`.\n- **Yellow** (tension) = tension between principles; document the tradeoff. Do NOT add to `planFindings` — these are informational only.\n\nChecklist — actively check: immutability, error handling model (Result/sealed vs exceptions), test doubles strategy (fakes vs mocks), dead code, naming clarity, abstraction level, type safety, exhaustiveness.\n\nIf NO violations found: explicitly state \"Philosophy check: no violations found\" with brief evidence (e.g., \"error handling uses Result<T> per philosophy; test doubles are fakes not mocks\"). Do NOT rubber-stamp. If you find zero violations on a non-trivial plan, double-check naming, dead code, and abstraction choices.\n\n**Set:** `planFindings`, `planAmendments`, `planConfidence` (1–10), `cleanSlateDivergence` (None/Minor/Major)",
371
+ "id": "phase-2b-design-review-core",
372
+ "title": "Design Review Core",
373
+ "templateCall": {
374
+ "templateId": "wr.templates.routine.design-review",
375
+ "args": {
376
+ "deliverableName": "design-review-findings.md"
377
+ }
378
+ },
340
379
  "requireConfirmation": false
341
380
  },
342
381
  {
343
- "id": "phase-5c-refocus-and-ticket-extraction",
344
- "title": "Refocus: Amendments + Tickets + Drift Detection",
345
- "prompt": "Apply amendments and refocus.\n\n**Do:**\n- Update `planArtifact` + `implementationPlan` to incorporate `planAmendments`.\n- Extract out-of-scope work into `followUpTickets`.\n- Ensure plan follows `invariants` and stays slice-oriented.\n\n**RESOLVED FINDINGS LEDGER (required):**\n\nWhen applying amendments, maintain the `resolvedFindings` entry in the next `continue_workflow` call's `context` object:\n- For each finding resolved in this iteration, add an entry: { finding: \"...\", resolution: \"...\", iteration: N }\n- Cap at 10 entries (if exceeded, drop oldest entries first)\n- This ledger carries forward to the next audit pass for regression checking\n\n**Set:** `resolvedFindings` (array, append new resolutions)\n\n**Drift detection:**\n- If user introduced new constraints/preferences, update `userRules` and log in `CONTEXT.md`.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record amendments accepted/rejected and why, user pushback, and scope/rules/verification drift\n\n**Set:** `followUpTickets`\n\n**VERIFY:** plan is coherent and PR-sized by slice.",
346
- "requireConfirmation": {
347
- "or": [
348
- {
349
- "var": "automationLevel",
350
- "equals": "Low"
382
+ "id": "phase-2c-synthesize-design-review",
383
+ "title": "Synthesize Design Review Findings",
384
+ "prompt": "Read `design-review-findings.md` and turn the review into workflow-owned decisions.\n\nCompare it against `designReviewAssessment`:\n- what did the review confirm?\n- what did it surface that you missed?\n- what changed your mind and what held firm?\n\nIf the findings are real, fix the design before you continue (`selectedApproach`, `architectureRationale`, `pivotTriggers`, `acceptedTradeoffs`, `identifiedFailureModes`).\n\nAfter any extra challenge, synthesize explicitly:\n- which findings actually matter\n- what changed in the design\n- what you reject and why\n\nFor any finding that changes the decision, classify it as:\n- `Confirmed`: you checked it against primary evidence (code, artifacts, spec, tests/build, or direct workflow context)\n- `Plausible`: interesting, but not verified enough to drive the decision yet\n- `Rejected`: contradicted by fuller context or direct evidence\n\nSubagent agreement alone is not enough for `Confirmed`.\n\nCapture:\n- `designFindings`\n- `designRevised`",
385
+ "promptFragments": [
386
+ {
387
+ "id": "phase-2c-upstream-focus",
388
+ "when": {
389
+ "var": "solutionFixed",
390
+ "equals": true
351
391
  },
352
- {
353
- "var": "planConfidence",
354
- "lt": 8
355
- }
356
- ]
357
- }
392
+ "text": "The solution direction was fixed by an upstream document. Do NOT reopen decisions it already resolved. Critique the technical translation only: does the approach faithfully implement what the upstream document specifies? Does it stay within any stated effort bound? Reject any review finding that is actually a product or requirements question -- those belong in the upstream document, not here."
393
+ },
394
+ {
395
+ "id": "phase-2c-challenge-thorough",
396
+ "when": {
397
+ "var": "rigorMode",
398
+ "equals": "THOROUGH"
399
+ },
400
+ "text": "If the review surfaced materially non-empty or surprising findings, run `routine-hypothesis-challenge` on the most serious finding and `routine-execution-simulation` on the most dangerous failure mode before you finalize the revised design."
401
+ }
402
+ ],
403
+ "requireConfirmation": false
358
404
  },
359
405
  {
360
- "id": "phase-5d-loop-exit-decision",
361
- "title": "Loop Exit Decision (Fail-Safe)",
362
- "prompt": "**Non-optional:** Provide a loop control decision artifact.\n\n**Required output format:**\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```\n`loopId` is optional the engine infers the active loop automatically.\n\n**Decision rules (no exceptions):**\n\n- If `planFindings` is **NON-EMPTY** (any finding this pass, regardless of severity):\n → `decision: \"continue\"`\n → Rationale: Amendments need a verification pass; changes may have introduced new issues.\n\n- If `planFindings` is **EMPTY** (zero findings this pass):\n → `decision: \"stop\"` — but you **must demonstrate** the empty pass:\n → List each area you audited and explicitly confirm nothing was found in each.\n → Example: \"Checked invariant coverage ✓, data-flow correctness ✓, slice boundary alignment ✓ — zero findings.\"\n → Claiming `planFindings: []` without enumerated evidence is not sufficient.\n\n**Max iterations (5) still applies** — if you've hit 5 iterations and still finding issues, exit with `decision: \"stop\"` and document remaining concerns.\n\nIf continuing, name what was found + what changes next iteration.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) and update Machine State Checkpoint (keep last 3).",
363
- "requireConfirmation": true,
406
+ "id": "phase-2d-loop-decision",
407
+ "title": "Design Review Loop Decision",
408
+ "prompt": "Decide whether the design needs another pass.\n\nIf `designFindings` is non-empty and the design was revised, keep going so the revision gets checked.\nIf `designFindings` is empty, stop.\nIf you've hit the limit, stop and record the remaining concerns.\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
409
+ "requireConfirmation": false,
364
410
  "outputContract": {
365
411
  "contractRef": "wr.contracts.loop_control"
366
412
  }
@@ -368,36 +414,34 @@
368
414
  ]
369
415
  },
370
416
  {
371
- "id": "phase-5e-doc-artifacts",
372
- "title": "Phase 5e: Spec/Design Artifacts (Mode-Dependent)",
373
- "runCondition": {
374
- "or": [
375
- {
376
- "var": "docDepth",
377
- "equals": "Light"
378
- },
379
- {
380
- "var": "docDepth",
381
- "equals": "Full"
382
- }
383
- ]
384
- },
385
- "prompt": "Produce mode-appropriate durable artifacts.\n\n**Write-or-paste rule:** attempt to write/update files. If unavailable, output full content in chat (canonical).\n\n**Always (non-small):** ensure `CONTEXT.md` and `implementation_plan.md` are current.\n\n**If `docDepth=Light`:**\nCreate/update `spec.md`:\n- Problem / Goals\n- Acceptance criteria\n- Non-goals\n- Invariants\n- PR strategy + rationale\n- Rollout / verification summary\n\n**If `docDepth=Full`:**\nCreate/update `spec.md` + `design.md`:\n- design.md: Architecture delta, integration points, risks + mitigations, verification strategy.\n\n**Chat output:** summarize what was written + short Checkpoint/Resume. If Full, also include Risk register + Verification matrix.\n\n**Resumption:** update `CONTEXT.md` Machine State Checkpoint with exact `workflow_next` payload.",
386
- "requireConfirmation": false
387
- },
388
- {
389
- "id": "phase-6a-test-design",
390
- "title": "Phase 6a: Test Design (Non-Small, Pre-Implementation)",
417
+ "id": "phase-3-plan-and-test-design",
418
+ "title": "Phase 3: Slice, Plan, and Test Design",
391
419
  "runCondition": {
392
420
  "var": "taskComplexity",
393
421
  "not_equals": "Small"
394
422
  },
395
- "prompt": "Design test strategy before implementation begins.\n\n**Required outputs:**\n- List acceptance criteria with corresponding test coverage\n- Identify edge cases and failure modes that need tests\n- Map invariants to test verification (which tests prove which invariants)\n- Document test execution plan (unit/integration/e2e)\n\n**Rigor-adaptive depth:**\n- QUICK: Brief test checklist (≤5 items)\n- STANDARD: Test coverage matrix (criteria tests)\n- THOROUGH: Comprehensive test plan with edge cases, failure injection, invariant proofs\n\n**Validation gate:** For high-risk invariants, require explicit test coverage. If gap exists, add to slice plan or acknowledge as risk.\n\nSet these keys in the next `continue_workflow` call's `context` object: `testDesign`, `testCoverageGaps`\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - test strategy, coverage gaps, and how gaps are addressed.\n\n**Output:** Test design artifact (in chat or file if write-or-paste).",
423
+ "prompt": "Turn the decision into a plan someone else could execute without guessing.\n\n**Open questions gate:** check `openQuestions` from Phase 0. If any remain unanswered and would materially affect implementation quality, either resolve them now with tools or record them in the risk register with an explicit decision about how to proceed without them. Do not silently carry unanswered questions into implementation.\n\nUpdate `implementation_plan.md`.\n\nIt should cover:\n1. Problem statement\n2. Acceptance criteria (mirror `spec.md` if it exists; `spec.md` owns observable behavior)\n3. Non-goals\n4. Philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only if they actually help\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n - [principle] -> [satisfied / tension / violated + 1-line why]\n\nCapture:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount` count of open questions that would materially affect implementation quality\n- `planConfidenceBand` — Low / Medium / High\n\nThe plan is the deliverable for this step. Do not implement anything -- not a \"quick win\", not a file read that bleeds into edits, nothing. Execution begins in Phase 6, one slice at a time. If you find yourself writing code or editing source files right now, stop immediately.",
424
+ "assessmentRefs": [
425
+ "plan-completeness-gate",
426
+ "invariant-clarity-gate",
427
+ "plan-gaps-gate"
428
+ ],
429
+ "assessmentConsequences": [
430
+ {
431
+ "when": {
432
+ "anyEqualsLevel": "low"
433
+ },
434
+ "effect": {
435
+ "kind": "require_followup",
436
+ "guidance": "Address whichever gate scored low: plan_completeness low -- one or more slices lack clear boundaries or verifiable acceptance criteria; sharpen them before implementation begins. invariant_clarity low -- invariants or non-goals are too vague to verify against; make them concrete. plan_gaps low -- the gap scan was not completed or found unaddressed gaps; resolve or file them before proceeding."
437
+ }
438
+ }
439
+ ],
396
440
  "requireConfirmation": false
397
441
  },
398
442
  {
399
- "id": "phase-6b-test-first-implementation",
400
- "title": "Phase 6b: Test-First Implementation (High Risk Only)",
443
+ "id": "phase-3b-spec",
444
+ "title": "Phase 3b: Spec (Observable Behavior)",
401
445
  "runCondition": {
402
446
  "and": [
403
447
  {
@@ -405,71 +449,116 @@
405
449
  "not_equals": "Small"
406
450
  },
407
451
  {
408
- "var": "riskLevel",
409
- "equals": "High"
452
+ "or": [
453
+ {
454
+ "var": "taskComplexity",
455
+ "equals": "Large"
456
+ },
457
+ {
458
+ "var": "riskLevel",
459
+ "equals": "High"
460
+ }
461
+ ]
410
462
  }
411
463
  ]
412
464
  },
413
- "prompt": "Implement tests BEFORE features for high-risk slices.\n\n**Do:**\n- Identify riskiest slice from plan (highest invariant risk)\n- Implement test scaffolding for that slice's verification plan\n- Ensure tests FAIL for the right reason (RED state)\n- Do NOT implement the feature yet\n\n**Why:** Proves your understanding of invariants before writing production code.\n\n**VERIFY:** Tests are runnable and fail meaningfully.",
414
- "requireConfirmation": true
465
+ "prompt": "Write `spec.md`.\n\nKeep it about what the feature does from the outside, not how you plan to build it.\n\nInclude:\n1. Feature summary\n2. Acceptance criteria\n3. Non-goals\n4. External API / interface contract if it matters\n5. Edge cases and failure modes\n6. How each acceptance criterion will be verified\n\nKeep it tight. If something can't be verified, it doesn't belong as an acceptance criterion.\n\n`spec.md` is canonical for observable behavior.",
466
+ "requireConfirmation": false
415
467
  },
416
468
  {
417
- "id": "phase-planning-gap-check",
418
- "title": "Planning Gap Check (Discovery)",
469
+ "id": "phase-4-plan-audit",
470
+ "type": "loop",
471
+ "title": "Phase 4: Plan Audit (Review, Fix, Decide)",
419
472
  "runCondition": {
420
- "var": "taskComplexity",
421
- "not_equals": "Small"
422
- },
423
- "prompt": "**Mission: Find what's MISSING from planning. Do not check boxes—find gaps.**\n\nThis is DISCOVERY mode. Your job is to find problems, not approve.\n\n**Important:** `planningGaps` should represent what is STILL unresolved after you make a best-effort attempt to fix it immediately (update artifacts, clarify decisions). If you can fix it now, do so and do not carry it forward as a gap.\n\n---\n\n**STEP 1: Artifact Check**\n\nWhat SHOULD exist? Look for each and note if missing:\n\n- `CONTEXT.md` — Does it exist? Is it current?\n- `implementation_plan.md` — Does it exist? Is it complete?\n- `approaches` in CONTEXT.md — Are there ≥2 genuinely different approaches?\n- `slices` — Are they defined with scope/files/verification?\n\n**For each, state:** \"EXISTS at [location]\" or \"MISSING\" or \"INCOMPLETE: [what's missing]\"\n\n---\n\n**STEP 2: Decision Check**\n\nWhat decisions should have been made but weren't?\n\n- Is `selectedApproach` decided with rationale?\n- Is `runnerUpApproach` (Plan B) defined?\n- Are `pivotTriggers` concrete and observable?\n- Are there any \"TBD\" or \"TODO\" items in the plan?\n- Are there unresolved questions that block implementation?\n\n**For each fuzzy decision:** State what's unclear and what would resolve it.\n\n---\n\n**STEP 3: Skeptical Review**\n\nPretend a skeptical senior engineer is reviewing your planning:\n\n- \"What would they challenge?\"\n- \"What looks underspecified?\"\n- \"What assumption haven't you validated?\"\n- \"Are you rushing because you want to start coding?\"\n\n---\n\n**STEP 4: Immediate gap-fixing attempt (required)**\n\nIf you found any gaps you can resolve without a product/business decision:\n- Fix them immediately (update `CONTEXT.md` / `implementation_plan.md` / plan variables)\n- Then re-check the items above once\n\nOnly keep gaps that are STILL unresolved after this best-effort attempt.\n\n---\n\n**Output:**\n- Gaps found (unresolved) (list, may be empty)\n- Fuzzy decisions (still unresolved) (list, may be empty)\n- Skeptic's concerns (list, may be empty)\n\n**Output (required exact lines):**\n- planningGaps = [...] \n- planningGapsFound = true|false\n\n**Set (required):**\n- `planningGaps` (unresolved gaps array)\n- `planningGapsFound` (true iff planningGaps is non-empty)\n\n**If ANY unresolved gaps remain (`planningGapsFound = true`):** STOP and ask the user what to do next before proceeding to the planning complete gate.",
424
- "requireConfirmation": {
425
- "var": "planningGapsFound",
426
- "equals": true
427
- },
428
- "validationCriteria": {
429
473
  "and": [
430
474
  {
431
- "type": "contains",
432
- "value": "planningGaps =",
433
- "message": "Must set planningGaps = [...] (even if empty)"
475
+ "var": "taskComplexity",
476
+ "not_equals": "Small"
434
477
  },
435
478
  {
436
- "type": "contains",
437
- "value": "planningGapsFound =",
438
- "message": "Must set planningGapsFound = true|false"
479
+ "var": "rigorMode",
480
+ "not_equals": "QUICK"
439
481
  }
440
482
  ]
441
- }
483
+ },
484
+ "loop": {
485
+ "type": "while",
486
+ "conditionSource": {
487
+ "kind": "artifact_contract",
488
+ "contractRef": "wr.contracts.loop_control",
489
+ "loopId": "plan_audit_loop"
490
+ },
491
+ "maxIterations": 2
492
+ },
493
+ "body": [
494
+ {
495
+ "id": "phase-4a-audit-and-refocus",
496
+ "title": "Audit Plan and Apply Fixes",
497
+ "prompt": "Audit the plan and fix it in the same pass. Fix the plan -- do NOT write or edit source code here. If you find something that looks like a trivial code fix, note it in the plan for Phase 6. Do not implement anything.\n\nLook for:\n- missing work\n- weak assumptions and risks\n- invariant gaps\n- bad slice boundaries\n- philosophy violations or tensions\n- regressions from things you already fixed\n- mismatches between `implementation_plan.md` and `spec.md` if there is a spec\n\nBefore you delegate, say what looks weakest right now and what you trust least.\n\nAfter the audit batch, synthesize explicitly:\n- what multiple auditors agreed on\n- what only one auditor raised\n- what you reject and why\n- what changed in the plan because of the audit\n\nFor any finding that changes the plan, classify it as:\n- `Confirmed`: you checked it against primary evidence (code, plan/spec artifacts, tests/build, or direct workflow context)\n- `Plausible`: interesting, but not verified enough to change the plan yet\n- `Rejected`: contradicted by fuller context or direct evidence\n\nSubagent agreement alone is not enough for `Confirmed`.\n\nThen fix the plan immediately:\n- update `implementation_plan.md`\n- update `spec.md` if acceptance criteria or other observable behavior changed\n- update `slices` if the shape changed\n- move out-of-scope work into `followUpTickets`\n- track resolved findings (cap at 10, drop oldest)\n\nCapture:\n- `planFindings`\n- `planConfidence`\n- `resolvedFindings`\n- `followUpTickets`\n\nIf the plan drifted, fix the plan. Don't just keep going.",
498
+ "promptFragments": [
499
+ {
500
+ "id": "phase-4a-upstream-anchor",
501
+ "when": {
502
+ "var": "upstreamSpecDetected",
503
+ "equals": true
504
+ },
505
+ "text": "Authorship stripping: rewrite the plan's Problem and Selected Approach sections in neutral third-person voice before auditing. 'The plan proposes...', 'The approach implements...'. Evaluate the rewritten version. Also explicitly check against `upstreamBoundaries`: does any slice exceed the stated effort bound? Does any slice violate an out-of-scope exclusion? Does any slice implement something not justified by the upstream document? Does any acceptance criterion from the upstream document lack test coverage in the plan? Flag any such drift as a blocking finding."
506
+ },
507
+ {
508
+ "id": "phase-4a-delegation-quick",
509
+ "when": {
510
+ "var": "rigorMode",
511
+ "equals": "QUICK"
512
+ },
513
+ "text": "Do this yourself."
514
+ },
515
+ {
516
+ "id": "phase-4a-delegation-standard",
517
+ "when": {
518
+ "var": "rigorMode",
519
+ "equals": "STANDARD"
520
+ },
521
+ "text": "Run `routine-plan-analysis`, `routine-hypothesis-challenge`, and `routine-philosophy-alignment` in parallel before you decide whether the plan is good enough."
522
+ },
523
+ {
524
+ "id": "phase-4a-delegation-thorough",
525
+ "when": {
526
+ "var": "rigorMode",
527
+ "equals": "THOROUGH"
528
+ },
529
+ "text": "Run `routine-plan-analysis`, `routine-hypothesis-challenge`, `routine-execution-simulation`, and `routine-philosophy-alignment` in parallel before you decide whether the plan is good enough."
530
+ }
531
+ ],
532
+ "requireConfirmation": false
533
+ },
534
+ {
535
+ "id": "phase-4b-loop-decision",
536
+ "title": "Loop Exit Decision",
537
+ "prompt": "Decide whether the plan needs another pass.\n\nIf `planFindings` is non-empty, keep going.\nIf it's empty, stop — but say what you checked so the clean pass means something.\nIf you've hit the limit, stop and record what still bothers you.\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
538
+ "requireConfirmation": true,
539
+ "outputContract": {
540
+ "contractRef": "wr.contracts.loop_control"
541
+ }
542
+ }
543
+ ]
442
544
  },
443
545
  {
444
- "id": "phase-planning-complete-gate",
445
- "title": "Planning Complete Gate (Verification)",
546
+ "id": "phase-5-small-task-fast-path",
547
+ "title": "Phase 5: Small Task Fast Path",
446
548
  "runCondition": {
447
549
  "var": "taskComplexity",
448
- "not_equals": "Small"
550
+ "equals": "Small"
449
551
  },
450
- "prompt": "**BOUNDARY: Planning Execution**\n\nYou've done gap discovery. Now confirm with EVIDENCE that planning is complete.\n\n---\n\n**ENUMERATION (required):**\n\nDon't just check boxes. For each item, cite the specific artifact:\n\n**Architecture:**\n- [ ] approaches: \"[List approach names] in CONTEXT.md\"\n- [ ] selectedApproach: \"[Name], rationale: [1 sentence summary]\"\n- [ ] runnerUpApproach: \"[Name] is Plan B\"\n- [ ] pivotTriggers: \"[List the actual triggers]\"\n\n**Slices:**\n- [ ] slices defined: \"[N] slices in implementation_plan.md\"\n- [ ] each slice has: \"name, scope, files, verification — verified\"\n\n**Artifacts:**\n- [ ] CONTEXT.md: \"exists, current\"\n- [ ] implementation_plan.md: \"exists, [N] slices defined\"\n\n---\n\n**ANTI-CHECKBOX WARNING:**\n\nIf you're checking boxes quickly without pausing, STOP.\n- Did you actually verify each item exists?\n- Can you point to the specific location?\n- Are you rushing to start coding?\n\n---\n\n**Decision:**\n\n- If ALL items verified with evidence `planningComplete = true`\n- If ANY item cannot be verified STOP, return to address gap\n\n**After this gate:** Trust the plan and execute.\n\n**Set:** `planningComplete = true`",
451
- "requireConfirmation": true,
452
- "validationCriteria": {
453
- "and": [
454
- {
455
- "type": "contains",
456
- "value": "planningComplete = true",
457
- "message": "Must confirm planning is complete before implementation"
458
- }
459
- ]
460
- }
552
+ "prompt": "For Small tasks, fast does not mean shallow. Every item below is required.\n\n**Upstream spec check:** if `upstreamSpecDetected = true` and `taskComplexity = Small`, verify this classification is correct. If the upstream document describes non-trivial scope (multiple requirements, explicit out-of-scope items, acceptance criteria beyond a single behavior), the Small classification is likely wrong. Reclassify upward to Medium before proceeding -- do not silently discard upstream constraints by staying on the fast path.\n\n**1. Confirm all wiring points with tools.**\nDon't assume a file you create is reachable. Check every public entry point:\n- Does the new symbol need to be exported from an index file?\n- Does it need to be imported and registered somewhere (CLI command map, router, DI container, plugin registry)?\n- Is there a test file that needs to reference it?\nTrace the full call path from the public interface down to your new code before writing anything.\n\n**2. Implement the smallest correct change.**\nChange exactly what needs changing. No drive-by refactors, no extra abstractions.\n\n**3. Verify end-to-end.**\n- Run build and tests. Both must pass.\n- Manually trace the new behavior through the public entry point (e.g. run the CLI command, check the export resolves, hit the endpoint). If you can't do this deterministically with tools, say why.\n- Apply the user's coding philosophy as the review lens. Flag any violation by principle name.\n\n**4. Produce a handoff note.**\nOutput a notes artifact containing a JSON fenced block with the following fields.\nThe daemon reads this block to run `git commit` and `gh pr create` -- write it exactly as shown:\n\n```json\n{\n \"commitType\": \"feat\",\n \"commitScope\": \"mcp\",\n \"commitSubject\": \"imperative mood, max 72 chars total with type(scope): prefix, no period\",\n \"prTitle\": \"same as full commit first line\",\n \"prBody\": \"markdown with ## Summary (bullets) and ## Test plan (checklist)\",\n \"followUpTickets\": [],\n \"filesChanged\": [\"src/path/to/file.ts\", \"tests/unit/file.test.ts\"]\n}\n```\n\nFields:\n- `commitType`: feat / fix / chore / refactor / docs / test / perf (pick one)\n- `commitScope`: product area only (console / mcp / workflows / engine / schema / docs)\n- `commitSubject`: imperative mood, max 72 chars total with type(scope): prefix, no period\n- `prTitle`: same as full commit first line\n- `prBody`: markdown with ## Summary (bullets) and ## Test plan (checklist)\n- `followUpTickets`: list of deferred items, or empty array\n- `filesChanged`: list of every file you created or modified (required -- do not omit)\n\nThe daemon will use this artifact to run git commit and open the PR. Do not commit or push yourself.\n\nDo not create heavyweight planning artifacts unless risk unexpectedly grows.",
553
+ "requireConfirmation": false
461
554
  },
462
555
  {
463
- "id": "phase-7-implement-slices",
556
+ "id": "phase-6-implement-slices",
464
557
  "type": "loop",
465
- "title": "Phase 7: Implement Slice-by-Slice (PREP → IMPLEMENT → VERIFY → CHECKPOINT)",
558
+ "title": "Phase 6: Implement Slice-by-Slice",
466
559
  "runCondition": {
467
- "and": [
468
- {
469
- "var": "taskComplexity",
470
- "not_equals": "Small"
471
- }
472
- ]
560
+ "var": "taskComplexity",
561
+ "not_equals": "Small"
473
562
  },
474
563
  "loop": {
475
564
  "type": "forEach",
@@ -480,207 +569,129 @@
480
569
  },
481
570
  "body": [
482
571
  {
483
- "id": "phase-pre-impl-validation",
484
- "title": "Pre-Implementation Validation",
485
- "prompt": "Validate before implementing slice `{{currentSlice.name}}`.\n\n**FLAG RESET (required):**\nSet these keys in the next `continue_workflow` call's `context` object to their initial state:\n- planDrift = false\n- rulesDrift = false\n- verificationFailed = false\n- verificationApprovalRequired = false\n- verificationRetried = false\n- sliceVerified = false\n- softReplanCompleted = false\n- replanFailed = false\n- pivotTriggered = false\n- pivotSeverity = none\n- validationFailed = false\n\n---\n\n**PART 1: PIVOT TRIGGER CHECK**\n\nReview `pivotTriggers`:\n```\nPIVOT TRIGGERS:\n- [ ] Trigger 1: [condition] Status: [Not triggered / Triggered]\n- [ ] Trigger 2: [condition] Status: [Not triggered / Triggered]\n```\n\n**If ANY trigger fired:**\n1. Set `pivotTriggered = true`, `validationFailed = true`\n2. Assess severity:\n - **MINOR**: Return to `phase-select-architecture` (try runnerUp)\n - **MODERATE**: Return to `phase-ideation` (new constraint)\n - **MAJOR**: Return to `phase-invariants` (problem misunderstood)\n3. Set `pivotSeverity`, `pivotReturnPhase`\n4. STOP do not continue to Part 2\n\n---\n\n**PART 2: PLAN STALENESS CHECK (STANDARD+)**\n\nQuick audit of slice plan vs current codebase:\n- Are target files still in expected state?\n- Have dependencies/contracts changed since planning?\n- Any new constraints from prior slices?\n\nStaleness: [Fresh / Minor drift / Major drift]\n\n**If Major drift:** Set `slicePlanStale = true`, `validationFailed = true`\n\n---\n\n**PART 3: SANITY CHECK**\n\nVerify implementation prerequisites:\n- **Existence**: Target files/symbols exist\n- **Signatures**: Key function/type signatures match assumptions\n- **Scope**: No hidden touchpoints beyond plan\n- **Verification**: Slice verification commands are runnable\n\n**If any check fails:** Set `validationFailed = true`\n\n---\n\n**OUTPUT:**\n- Pivot triggers: [All clear / Triggered: X]\n- Staleness: [Fresh / Minor / Major]\n- Sanity: [Pass / Fail: reason]\n- `validationFailed`: true/false\n\n**Set:** `pivotTriggered`, `pivotSeverity`, `pivotReturnPhase`, `slicePlanStale`, `validationFailed`",
486
- "requireConfirmation": {
487
- "or": [
488
- {
489
- "var": "pivotTriggered",
490
- "equals": true
491
- },
492
- {
493
- "var": "slicePlanStale",
494
- "equals": true
495
- },
496
- {
497
- "var": "validationFailed",
498
- "equals": true
499
- }
500
- ]
501
- }
572
+ "id": "phase-6a-implement-slice",
573
+ "title": "Implement Slice",
574
+ "prompt": "Implement the current slice: `{{currentSlice.name}}`.\n\nBefore writing a single line of code, declare your scope:\n- List the exact files and symbols this slice touches\n- Confirm none of them belong to a later slice\n- If you have already edited files from this or any other slice in a previous step, stop and report it\n\nHard scope rule: you may only modify what is described in `{{currentSlice.name}}`. Anything outside that boundary is out of scope for this iteration -- not \"do it early\", not \"while I'm here\". If you discover you need to touch something outside this slice to make it compile or integrate, set `unexpectedScopeChange = true` and do the minimum necessary to stay green, then stop.\n\nImplement incrementally. Run tests and build to prove the slice works before advancing.\n\nTrack:\n- `specialCaseIntroduced` -- did this slice require a new special-case?\n- `unplannedAbstractionIntroduced` -- did this slice introduce an abstraction not in the plan?\n- `unexpectedScopeChange` -- did this slice touch files outside its planned scope?\n\nSet `verifyNeeded` to true if ANY of:\n- `prStrategy = MultiPR`\n- `specialCaseIntroduced = true`\n- `unplannedAbstractionIntroduced = true`\n- `unexpectedScopeChange = true`\n- tests or build failed\n\nCapture: `specialCaseIntroduced`, `unplannedAbstractionIntroduced`, `unexpectedScopeChange`, `verifyNeeded`",
575
+ "requireConfirmation": false
502
576
  },
503
577
  {
504
- "id": "phase-slice-prep",
505
- "title": "Slice Preparation",
578
+ "id": "phase-6b-verify-slice",
579
+ "title": "Verify Slice",
506
580
  "runCondition": {
507
- "var": "validationFailed",
508
- "not_equals": true
581
+ "var": "verifyNeeded",
582
+ "equals": true
509
583
  },
510
- "prompt": "Prepare to implement slice `{{currentSlice.name}}`.\n\n**Do:**\n- Re-state slice goal + verification\n- Identify exact files/components to change\n- Re-check invariants impacted\n- Match existing patterns (1–3 exemplars)\n- Apply `userRules` (call out if any rule affects this slice)\n\n**Work Package handling:**\n- If `currentSlice.workPackages` exist: use as implementation guidance\n- If no WPs: proceed with full slice scope as one unit\n\n**Git setup (first slice only):**\nIf sliceIndex = 0:\n- Check git availability: `git status`\n- Create feature branch: `feature/etienneb/acei-XXXX_<task-name>`\n- Set `featureBranch` in the next `continue_workflow` call's `context` object\n- Update CONTEXT.md with branch name\n\n**Output:**\n- Slice goal + verification (restated)\n- Files to change\n- Patterns to follow\n- userRules that apply",
511
- "requireConfirmation": false
512
- },
513
- {
514
- "id": "phase-7b-implement",
515
- "title": "IMPLEMENT: Slice {{sliceIndex}}",
516
- "prompt": "Implement the current slice.\n\n**Implementation strategy:**\n- If the slice has work packages: use them as implementation order and boundary guidance (do WP1, then WP2, etc. within this step).\n- Otherwise: implement full slice as one unit.\n\n---\n\n\n**Note:** delegationMode was detected in phase-0c and cached in CONTEXT.md\n**OPTION A: DELEGATE TO BUILDER**\n\nWhen:\n- `delegationMode=delegate` AND\n- Slice is non-trivial (>3 files or new abstractions or multi-layer changes)\n\nDelegate to WorkRail Executor using **Feature Implementation Routine**.\n\nWork Package for Builder:\n```\nMISSION: Implement the current slice according to plan\n\nSLICE SPEC: [Extract from implementation_plan.md]\n- Goal\n- Scope (files/components)\n- Verification plan\n- Work packages (if defined): use as implementation order/guidance\n\nCONTEXT (filtered, file-reference-first):\n- Read: CONTEXT.md (userRules section)\n- Read: implementation_plan.md (this slice)\n- userRules (filtered): include rules matching this slice's domain (architecture, patterns, testing, error-handling)\n- invariants (filtered): those touched by this slice only\n- Patterns: [from PREP - 1-3 exemplars with file refs]\n\nCONSTRAINTS:\n- Follow filtered userRules\n- Preserve filtered invariants\n- Match patterns\n- No drive-by refactors\n- If slice has WPs: respect Targets/Forbidden/Budgets as guidance\n\nACCEPTANCE:\n- Slice done-definition met\n- Verification plan executable\n\nDELIVERABLE: implementation-complete.md\n- Summary (5-8 bullets)\n- File changes (file:line)\n- Tests written/updated\n- Deviations (with rationale)\n```\n\n**Self-check before delegating (required):**\n✅ userRules filtered (not full list)\n✅ invariants filtered (slice-relevant only)\n✅ Patterns included with file refs\n✅ Feature brief included\n\n**Main agent review (mandatory):**\n- Read Builder's deliverable.\n- Confirm: scope adhered to, done-definition met, no drive-bys.\n- Set `builderDeliverable`.\n\n**Builder fallback (if delegation fails):**\n\nBuilder output is considered incomplete/invalid if ANY:\n- Missing required deliverable file (implementation-complete.md)\n- Touched files in Forbidden list (if WP boundaries exist)\n- Exceeded budget (maxModified/maxNew violations if WP budgets exist)\n- Done-definition not met\n- Verification plan not executable\n\nIf any criterion is triggered: fall back to OPTION B (self-implement).\n- Log the fallback reason in CONTEXT.md.\n\n---\n\n**OPTION B: SELF-IMPLEMENT**\n\nWhen: `delegationMode=solo` OR trivial slice OR Builder fallback\n\nConstraints:\n- If slice has WPs: use them as guidance for implementation order and scope boundaries\n- Prefer architectural moves\n- No drive-by refactors\n\n---\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record implementation approach (Builder/self/fallback) and if Builder: deliverable summary + any fallback reason.",
517
- "requireConfirmation": false
518
- },
519
- {
520
- "id": "phase-7c-verify",
521
- "title": "VERIFY: Slice {{sliceIndex}}",
522
- "prompt": "Verify the slice implementation.\n\n**PRIMARY VERIFICATION (always):**\n- Run verification commands from slice (or WP if applicable).\n- Add/adjust tests if needed.\n- Ensure invariants hold.\n- If blocked: request user to run and share output.\n\n---\n\n**PARALLEL VERIFICATION (THOROUGH + high-risk only):**\n\nRun when `rigorMode=THOROUGH` AND slice touches high-risk invariants (auth/payments/security/data integrity/perf-critical).\n\nIf `delegationMode=delegate`:\n\nYou have permission to spawn THREE subagents SIMULTANEOUSLY for parallel verification.\n\nDelegate to WorkRail Executor THREE TIMES with scoped context:\n\n**Verification 1 — Adversarial Challenge:**\n- routine: routine-hypothesis-challenge\n- rigor: 5 (maximum for implementation verification)\n- hypotheses: [\"This implementation is correct\", key assumptions about the changes]\n- evidence: files changed in this slice\n- context (file-reference-first):\n - Read: files changed in this slice\n - Read: CONTEXT.md (invariants section)\n - Filtered userRules: edge cases, error handling, validation rules\n - Feature brief: slice goal + invariants touched + verification plan\n- deliverable: implementation-challenges.md\n\n**Verification 2 — Execution Simulation:**\n- routine: routine-execution-simulation\n- entry_point: [Changed functions in this slice]\n- inputs: [Test scenarios: normal + edge cases]\n- trace_depth: 3\n- context:\n - Read: files changed in this slice\n - Read: implementation_plan.md (this slice's verification scenarios)\n - Filtered userRules: performance, state management, data flow rules\n - Invariants touched by this slice\n - Feature brief: architecture decision + risk register for this slice\n- deliverable: execution-simulation.md\n\n**Verification 3 — Plan Adherence:**\n- routine: routine-plan-analysis\n- plan: implementation_plan.md (this slice section)\n- requirements: [Slice done-definition + targets/forbidden]\n- constraints: [Filtered userRules: patterns, conventions, testing]\n- context:\n - Read: files changed + implementation_plan.md (this slice)\n - Feature brief: slice scope + done-definition + targets/forbidden\n- deliverable: plan-adherence.md\n\n**Self-check before delegating (required):**\n✅ Each delegation includes filtered userRules (relevant to their lens)\n✅ Each includes invariants touched by this slice\n✅ Each includes feature brief (file refs or excerpt)\n✅ Each has specific verification lens\n\n**Synthesize (deterministic, bounded retry):**\n\n- **ALL THREE validate** → set `sliceVerified=true`, proceed to checkpoint\n\n- **ONE concern raised:**\n 1. Investigate the concern and attempt to fix within this slice iteration\n 2. Re-run ONLY the failing validator (max 1 retry per slice)\n 3. If passes after retry: set `sliceVerified=true`, proceed\n 4. If still fails after retry:\n - Add concern to `verificationFindings`\n - Require user approval to proceed OR rewind to planning\n - Set `verificationApprovalRequired=true`\n\n- **TWO+ concerns raised:**\n 1. Do NOT attempt automatic fix\n 2. Set `verificationFailed=true`\n 3. Stop slice loop immediately\n 4. User must choose:\n - Rewind to planning (Phase 5) via new workflow run with drift context\n - Manual fix + re-verify\n - Defer this slice to follow-up ticket\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `sliceVerified` (true/false)\n- `verificationFindings` (list of concerns)\n- `verificationFailed` (true/false)\n- `verificationApprovalRequired` (true/false)\n- `verificationRetried` (true/false)\n- `parallelVerificationRan` (true/false)\n\n---\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record verification approach (primary only / parallel), concerns raised + retry outcome, and user decision (if approval required).",
584
+ "prompt": "Take a fresh look at what you just changed.\n\nCheck whether:\n- it matches the plan's intent, not just the letter\n- it hides assumptions or skips edge cases\n- invariants still hold\n- it regressed against the user's philosophy\n- multiple unverified slices now need to be reviewed together\n- `unexpectedScopeChange` was just harmless integration work or real plan drift\n\nIf any of `specialCaseIntroduced`, `unplannedAbstractionIntroduced`, or `unexpectedScopeChange` is true, or if tests/build were shaky, run the verification batch before you decide this slice is done.\n\nAfter the verification batch, synthesize explicitly:\n- what multiple reviewers agreed on\n- what only one reviewer raised\n- what you reject and why\n- whether the drift was harmless integration work or real plan drift\n\nFor any finding that changes whether this slice is accepted, classify it as:\n- `Confirmed`: you checked it against primary evidence (code, plan/spec artifacts, tests/build, or direct workflow context)\n- `Plausible`: interesting, but not verified enough to accept or block the slice yet\n- `Rejected`: contradicted by fuller context or direct evidence\n\nSubagent agreement alone is not enough for `Confirmed`.\n\nSay where you're least confident.\n\nIf the slice drifted materially, update `implementation_plan.md` and `spec.md` if observable behavior changed. If the drift changed boundaries or makes the current plan unreliable, stop and go back to planning.\n\nIf the concerns are serious, stop and go back to planning or ask me. Don't wave this through just because the code exists.\n\nCapture:\n- `verificationFindings`\n- `verificationFailed`",
585
+ "promptFragments": [
586
+ {
587
+ "id": "phase-6b-delegation-quick",
588
+ "when": {
589
+ "var": "rigorMode",
590
+ "equals": "QUICK"
591
+ },
592
+ "text": "Do the verification yourself."
593
+ },
594
+ {
595
+ "id": "phase-6b-delegation-standard",
596
+ "when": {
597
+ "var": "rigorMode",
598
+ "equals": "STANDARD"
599
+ },
600
+ "text": "If any slice-risk trigger fired, run `routine-hypothesis-challenge` and `routine-philosophy-alignment` before you decide this slice is done."
601
+ },
602
+ {
603
+ "id": "phase-6b-delegation-thorough",
604
+ "when": {
605
+ "var": "rigorMode",
606
+ "equals": "THOROUGH"
607
+ },
608
+ "text": "If any slice-risk trigger fired, also run `routine-execution-simulation` before you decide this slice is done."
609
+ },
610
+ {
611
+ "id": "phase-6b-multi-pr",
612
+ "when": {
613
+ "var": "prStrategy",
614
+ "equals": "MultiPR"
615
+ },
616
+ "text": "If this slice is verified and ready, stop here and package it for review before you move to the next slice."
617
+ }
618
+ ],
523
619
  "requireConfirmation": {
524
620
  "or": [
525
- {
526
- "var": "verificationApprovalRequired",
527
- "equals": true
528
- },
529
621
  {
530
622
  "var": "verificationFailed",
531
623
  "equals": true
532
- }
533
- ]
534
- }
535
- },
536
- {
537
- "id": "phase-7d1-record-work",
538
- "title": "CHECKPOINT Part 1: Record Work & Detect Drift",
539
- "prompt": "Checkpoint after slice completion.\n\n**Record:**\n- What changed (high level)\n- Verification summary\n- Invariants proven\n- What remains (next slice)\n- Follow-up tickets\n- PR notes: if `prStrategy=MultiPR`, propose slice(s) for next PR\n\n**Drift detection (git-based, deterministic):**\n- Run `git status` (or `git diff --name-only`) to list files actually modified in this slice.\n- Compare against slice scope (or WP Targets if WPs were used as guidance).\n- Set `planDrift=true` if:\n - Modified files outside planned scope\n - Invariants/slices/verification changed beyond plan\n - New deps/rollout requirements emerged\n- Set `rulesDrift=true` if user introduced new constraints during implementation.\n\n**Set:** `planDrift`, `rulesDrift`\n\n**Artifact maintenance:**\n- Update `implementation_plan.md` if drift occurred or slices evolved.\n- Update `CONTEXT.md` with:\n - Decision Log entry (≤8 bullets; for complex decisions reference plan artifacts)\n - Unexpected Discoveries\n - Relevant Files (top 10 in CONTEXT.md; full list in implementation_plan.md)\n\n**Write-or-paste.**",
540
- "requireConfirmation": false,
541
- "runCondition": {
542
- "var": "verificationFailed",
543
- "not_equals": true
544
- }
545
- },
546
- {
547
- "id": "phase-7d2-machine-state",
548
- "title": "CHECKPOINT Part 2: Capture Machine State (Resume/Rewind)",
549
- "runCondition": {
550
- "var": "verificationFailed",
551
- "not_equals": true
552
- },
553
- "prompt": "Capture machine state for deterministic resume/rewind.\n\nExecute captureCheckpoint() to update CONTEXT.md Machine State Checkpoint section.\n\nEnsure:\n- Captured after latest workflow_next call\n- response.state and response.next.stepInstanceId pasted as raw JSON objects\n- Last 3 checkpoints retained, oldest deleted\n- Timestamp recorded\n\nWrite-or-paste.",
554
- "requireConfirmation": false
555
- },
556
- {
557
- "id": "phase-7d-drift-gate",
558
- "title": "Drift Gate: Re-Plan if Boundaries Changed",
559
- "runCondition": {
560
- "and": [
561
- {
562
- "var": "verificationFailed",
563
- "not_equals": true
564
- },
565
- {
566
- "or": [
567
- {
568
- "var": "planDrift",
569
- "equals": true
570
- },
571
- {
572
- "var": "rulesDrift",
573
- "equals": true
574
- }
575
- ]
576
- }
577
- ]
578
- },
579
- "prompt": "Drift detected. Plan or implementation boundaries have changed since planning.\n\n**Detected drift:**\n- Plan drift: slice scope/files/verification changed beyond original plan\n- Rules drift: user introduced new constraints affecting implementation\n\n**Required decision (deterministic, single-attempt re-plan limit):**\n\n**Option 1: IN-PLACE RE-PLAN (soft, single attempt)**\n\nWhen to use: drift is containable (1-3 extra files, minor scope shift, clarified requirement).\n\nSteps:\n1. Update `implementation_plan.md` immediately to reflect actual scope/changes\n2. Update affected slices in `slices` array\n3. Run single-pass plan audit (self-audit if QUICK/STANDARD; delegate once if THOROUGH and subagents available)\n4. If audit passes (no new Major/Critical findings):\n - Set `softReplanCompleted=true`\n - Reset drift flags: `planDrift=false`, `rulesDrift=false`\n - Document drift resolution in CONTEXT.md Decision Log\n - Continue slice loop with updated plan\n5. If audit finds NEW drift or Major issues:\n - Set `replanFailed=true`\n - Escalate to Option 2 (user decision)\n\n**Single-attempt limit:** if drift recurs in a later slice after soft re-plan, you MUST escalate to Option 2.\n\n---\n\n**Option 2: HARD STOP + USER DECISION**\n\nWhen to use: High risk OR Major drift (scope doubled, new invariants, architectural change) OR soft re-plan failed/recurred.\n\nSteps:\n1. Stop slice loop immediately\n2. Document drift in CONTEXT.md with evidence (git diff, scope comparison)\n3. Update CONTEXT.md Machine State Checkpoint for resume\n4. User chooses:\n - **Rewind to planning**: exit this workflow run; start new run with updated context; use last Planning checkpoint state to resume at Phase 5\n - **Manual fix**: user fixes the issue outside workflow; resume at current slice\n - **Defer slice**: skip this slice, add to follow-up tickets, continue with next slice\n\n---\n\n**Option 3: CONTINUE WITH DEVIATION (document + approve)**\n\nWhen to use: Low/Medium risk AND drift is expected/acceptable.\n\nSteps:\n1. Document why drift is safe/expected\n2. Confirm all invariants still hold\n3. Update CONTEXT.md Decision Log with drift resolution + user approval\n4. Reset drift flags: `planDrift=false`, `rulesDrift=false`\n5. Continue slice loop\n\n---\n\n**Default recommendation:**\n- High risk → Option 2 (hard stop)\n- Medium risk + containable drift → Option 1 (soft re-plan)\n- Low risk + expected drift → Option 3 (continue with approval)\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `softReplanCompleted` (if Option 1 succeeded)\n- `replanFailed` (if Option 1 audit failed)\n- `driftResolution` (which option was chosen)\n\nUser must approve which option to take.",
580
- "requireConfirmation": true
581
- },
582
- {
583
- "id": "phase-7e-multi-pr-packaging-gate",
584
- "title": "PR Packaging Gate (Hard Stop when MultiPR)",
585
- "runCondition": {
586
- "and": [
587
- {
588
- "var": "verificationFailed",
589
- "not_equals": true
590
624
  },
591
625
  {
592
626
  "var": "prStrategy",
593
627
  "equals": "MultiPR"
594
628
  }
595
629
  ]
596
- },
597
- "prompt": "**Hard gate:** prevent PR size drift.\n\nIf `prStrategy=MultiPR`, stop here and package a PR before next slice.\n\n**PR-ready output:**\n- Proposed PR title\n- 3–6 bullet summary (why, not what)\n- Test plan (what ran)\n- Rollout/risks\n- What remains (next slice)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record why this boundary is the right PR boundary, any user pushback, and discoveries affecting PR sizing.\n\n**Wait for user confirmation** to proceed.\n\n(Do not merge; do not push/create PR unless user requests.)",
598
- "requireConfirmation": true
630
+ }
599
631
  }
600
632
  ]
601
633
  },
602
634
  {
603
- "id": "phase-integration-gap-check",
604
- "title": "Integration Gap Check (Discovery)",
605
- "runCondition": {
606
- "var": "taskComplexity",
607
- "not_equals": "Small"
608
- },
609
- "prompt": "**Mission: Find missing proofs before integration sign-off. Do not approve.**\n\nThis is DISCOVERY mode. Your job is to find gaps in integration verification (missing commands run, missing invariant proofs, missing perf/compat checks, etc.).\n\n**Important:** `integrationGaps` should represent what is STILL unresolved after you make a best-effort attempt to fix it immediately (run commands, document evidence, add missing proofs).\n\n---\n\n**STEP 1: Enumerate what SHOULD be proven**\n\nGiven the plan + invariants, list what evidence should exist for sign-off:\n- Full test suite run (unit + integration + e2e if applicable)\n- Invariant validation coverage (which tests/proofs map to which invariants)\n- Perf budgets validated (if any invariants mention perf)\n- Backward compatibility validated (if any invariants mention compat)\n- Build/compile/lint checks\n\n---\n\n**STEP 2: Find missing evidence**\n\nFor each item, mark:\n- PROVEN: with evidence (command + result summary)\n- MISSING: not run / not documented\n- INCONCLUSIVE: unclear\n\nIf any item is MISSING or INCONCLUSIVE, it is a GAP.\n\n---\n\n**STEP 3: Immediate gap-fixing attempt (required)**\n\nIf any gaps can be resolved without a product/business decision:\n- Fix them immediately (run the missing command(s), add documentation, add missing proof mapping)\n- Then re-check once\n\nOnly keep gaps that are STILL unresolved.\n\n---\n\n**Output:**\n- `integrationGaps`: list of missing or inconclusive proofs (may be empty)\n- Short note for each gap: what to run or document\n\n**Output (required exact lines):**\n- integrationGaps = [...]\n- integrationGapsFound = true|false\n\n**Set (required):**\n- `integrationGaps` (unresolved gaps array)\n- `integrationGapsFound` (true iff integrationGaps is non-empty)\n\n**If ANY unresolved gaps remain (`integrationGapsFound = true`):** STOP and ask the user what to do next before proceeding to the verification gate.",
610
- "requireConfirmation": {
611
- "var": "integrationGapsFound",
612
- "equals": true
613
- },
614
- "validationCriteria": {
615
- "and": [
616
- {
617
- "type": "contains",
618
- "value": "integrationGaps =",
619
- "message": "Must set integrationGaps = [...] (even if empty)"
620
- },
621
- {
622
- "type": "contains",
623
- "value": "integrationGapsFound =",
624
- "message": "Must set integrationGapsFound = true|false"
625
- }
626
- ]
627
- }
628
- },
629
- {
630
- "id": "phase-integration-verification-gate",
631
- "title": "Integration Verification Gate (Verification)",
635
+ "id": "phase-7-final-verification",
636
+ "type": "loop",
637
+ "title": "Phase 7: Final Verification Barrier (Verify, Fix, Re-Verify)",
632
638
  "runCondition": {
633
639
  "var": "taskComplexity",
634
640
  "not_equals": "Small"
635
641
  },
636
- "prompt": "**BOUNDARY: Execution → Handoff**\n\nYou've done gap discovery. Now verify integration with concrete evidence and set explicit pass/fail flags.\n\n---\n\n**REQUIRED VERIFICATIONS (enumerate commands actually run):**\n\n1) Full test suite\n- Command(s) run:\n- Result summary:\n\n2) Invariant validation\n- For each invariant: how is it proven? (test name or manual proof)\n- Any invariant without proof must be listed in `invariantViolations`\n\n3) Performance budgets (if applicable)\n- Command(s)/benchmark run:\n- Result vs budget:\n\n4) Backward compatibility (if applicable)\n- Command(s) run / checks performed:\n- Result summary:\n\n5) Build/compile check\n- Command(s) run:\n- Result summary:\n\n---\n\n**Output (required exact lines):**\n- integrationVerificationPassed = true|false\n- integrationVerificationFailed = true|false\n- regressionDetected = true|false\n\n---\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `integrationVerificationPassed`\n- `integrationVerificationFailed`\n- `integrationVerificationFindings` (list of issues)\n- `regressionDetected`\n- `invariantViolations` (list)\n\n**Rule:**\n- If `integrationVerificationFailed = true`, then `integrationVerificationPassed` must be false.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log - commands run + findings + any user decisions.",
637
- "requireConfirmation": {
638
- "or": [
639
- {
640
- "var": "integrationVerificationFailed",
641
- "equals": true
642
- },
643
- {
644
- "var": "regressionDetected",
645
- "equals": true
646
- }
647
- ]
642
+ "loop": {
643
+ "type": "while",
644
+ "conditionSource": {
645
+ "kind": "artifact_contract",
646
+ "contractRef": "wr.contracts.loop_control",
647
+ "loopId": "final_verification_loop"
648
+ },
649
+ "maxIterations": 2
648
650
  },
649
- "validationCriteria": {
650
- "and": [
651
- {
652
- "type": "contains",
653
- "value": "integrationVerificationPassed =",
654
- "message": "Must set integrationVerificationPassed = true|false"
655
- },
656
- {
657
- "type": "contains",
658
- "value": "integrationVerificationFailed =",
659
- "message": "Must set integrationVerificationFailed = true|false"
651
+ "body": [
652
+ {
653
+ "id": "phase-7a-final-verification-core",
654
+ "title": "Run Final Verification Batch",
655
+ "templateCall": {
656
+ "templateId": "wr.templates.routine.final-verification",
657
+ "args": {
658
+ "deliverableName": "final-verification-findings.md"
659
+ }
660
660
  },
661
- {
662
- "type": "contains",
663
- "value": "regressionDetected =",
664
- "message": "Must set regressionDetected = true|false"
661
+ "requireConfirmation": false
662
+ },
663
+ {
664
+ "id": "phase-7b-fix-and-summarize",
665
+ "title": "Synthesize Findings, Fix, and Re-Verify",
666
+ "prompt": "Read `final-verification-findings.md` and decide what actually needs fixing.\n\nDon't rubber-stamp it. The verifier is evidence, not the decision.\n\nIf `spec.md` exists, use it as the verification anchor and make sure every acceptance criterion is actually met.\n\nThis loop is verify, fix, then re-verify. If you fix anything here, the next pass exists to prove the fixes worked.\n\nSynthesize the verification output explicitly:\n- what the verifier found\n- what you agree with\n- what you reject and why\n- what changed because of the fixes\n\nFor any finding that changes final acceptance, classify it as:\n- `Confirmed`: you checked it against primary evidence (code, spec, tests/build, or direct workflow context)\n- `Plausible`: interesting, but not verified enough to accept or block final signoff yet\n- `Rejected`: contradicted by fuller context or direct evidence\n\nSubagent agreement alone is not enough for `Confirmed`.\n\nFix what has to be fixed now, rerun the affected verification, and update:\n- `implementation_plan.md` if the execution shape changed\n- `spec.md` if acceptance criteria, observable behavior, or external contracts changed\n\nCapture:\n- `integrationFindings`\n- `integrationPassed`\n- `regressionDetected`",
667
+ "assessmentRefs": [
668
+ "build-correctness-gate",
669
+ "invariant-preservation-gate",
670
+ "implementation-gaps-gate"
671
+ ],
672
+ "assessmentConsequences": [
673
+ {
674
+ "when": {
675
+ "anyEqualsLevel": "low"
676
+ },
677
+ "effect": {
678
+ "kind": "require_followup",
679
+ "guidance": "Address whichever gate scored low: build_correctness low -- the build or tests are still failing; fix them before this step can complete. invariant_preservation low -- one or more invariants from the plan are violated; fix the implementation. implementation_gaps low -- the gap scan was not completed or found unaddressed gaps; fix them inline, file as follow-up tickets, or explicitly defer with rationale."
680
+ }
681
+ }
682
+ ],
683
+ "requireConfirmation": false
684
+ },
685
+ {
686
+ "id": "phase-7c-loop-decision",
687
+ "title": "Final Verification Loop Decision",
688
+ "prompt": "Decide whether final verification needs another pass or whether we're done.\n\nThis loop gets up to two verify/fix passes.\n- If verification found real issues and you fixed them, keep going so the fixes get re-verified.\n- If the issues are clean or resolved, stop.\n- If you've hit the limit, stop and record what remains.\n\nWhen you stop, include:\n- acceptance criteria status\n- invariant status\n- test/build summary\n- follow-up tickets\n- any philosophy tensions you accepted on purpose\n\n**Handoff block (required for daemon auto-commit):**\nInclude a JSON fenced block in your notes. The daemon reads this to run `git commit` and `gh pr create`:\n\n```json\n{\n \"commitType\": \"feat\",\n \"commitScope\": \"mcp\",\n \"commitSubject\": \"imperative mood, max 72 chars total with type(scope): prefix, no period\",\n \"prTitle\": \"same as full commit first line\",\n \"prBody\": \"markdown with ## Summary (bullets) and ## Test plan (checklist)\",\n \"followUpTickets\": [],\n \"filesChanged\": [\"src/path/to/file.ts\", \"tests/unit/file.test.ts\"]\n}\n```\n\nFields: `commitType` (feat/fix/chore/refactor/docs/test/perf), `commitScope` (product area only: console/mcp/workflows/engine/schema/docs), `commitSubject` (imperative, <=72 chars including prefix, no period), `prTitle` (same as commit first line), `prBody` (markdown), `followUpTickets` (array), `filesChanged` (required -- every file created or modified).\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
689
+ "requireConfirmation": true,
690
+ "outputContract": {
691
+ "contractRef": "wr.contracts.loop_control"
665
692
  }
666
- ]
667
- }
668
- },
669
- {
670
- "id": "phase-8a-small-task-fast-path",
671
- "title": "Phase 8a (Small Only): Fast Path",
672
- "runCondition": {
673
- "var": "taskComplexity",
674
- "equals": "Small"
675
- },
676
- "prompt": "For Small tasks:\n\n1) Confirm target locations with tools (existence + pattern match)\n2) Implement smallest correct change\n3) Verify (tests/build or deterministic check)\n4) Provide concise PR-ready summary\n\nAvoid heavy docs unless risk increases.",
677
- "requireConfirmation": false
678
- },
679
- {
680
- "id": "phase-9-final-validation-and-handoff",
681
- "title": "Phase 9: Final Validation + PR/MR Handoff (No Auto-Merge)",
682
- "prompt": "Final validation and handoff.\n\n**Do:**\n- Verify acceptance criteria and invariants\n- Confirm test/build status + coverage gaps\n- Summarize slice completion + PR strategy outcome\n- Provide PR/MR description draft (concise): summary + test plan + rollout notes\n- Provide follow-up tickets list\n\n**Durable artifacts (non-small):**\n- Update `implementation_plan.md` if any slices changed or drift occurred.\n- Ensure `CONTEXT.md` current:\n - Decision Log with final decisions + follow-ups (≤ 8 bullets).\n - Machine State Checkpoint (deterministic resume/rewind):\n\nExecute final captureCheckpoint() to record workflow completion state.\n\n**Checkpoint correctness checklist (required):**\n✅ Captured `state` object (not stringified)\n✅ Captured `stepInstanceId` object (not stringified)\n✅ Resume payload variants have instruction comments replaced with actual JSON from workflow_next response\n✅ Workflow identity recorded (version + timestamp)\n✅ Deleted oldest checkpoint if >3 exist\n\n**Important:** do not auto-merge, squash-merge, or delete branches.",
683
- "requireConfirmation": true
693
+ }
694
+ ]
684
695
  }
685
696
  ]
686
697
  }