npm - @exaudeus/workrail - Versions diffs - 3.31.1 → 3.33.0 - Mend

@exaudeus/workrail 3.31.1 → 3.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/dist/cli/commands/index.d.ts +1 -0
package/dist/cli/commands/index.js +3 -1
package/dist/cli/commands/worktrain-await.js +11 -9
package/dist/cli/commands/worktrain-daemon-install.d.ts +35 -0
package/dist/cli/commands/worktrain-daemon-install.js +291 -0
package/dist/cli/commands/worktrain-daemon.d.ts +31 -0
package/dist/cli/commands/worktrain-daemon.js +272 -0
package/dist/cli/commands/worktrain-spawn.js +11 -9
package/dist/cli-worktrain.js +329 -0
package/dist/cli.js +4 -22
package/dist/console/standalone-console.d.ts +28 -0
package/dist/console/standalone-console.js +142 -0
package/dist/{console/assets/index-6H9DeFxj.js → console-ui/assets/index-BuJFLLfY.js} +1 -1
package/dist/{console → console-ui}/index.html +1 -1
package/dist/daemon/agent-loop.d.ts +26 -0
package/dist/daemon/agent-loop.js +53 -2
package/dist/daemon/daemon-events.d.ts +103 -0
package/dist/daemon/daemon-events.js +56 -0
package/dist/daemon/workflow-runner.d.ts +6 -3
package/dist/daemon/workflow-runner.js +229 -33
package/dist/infrastructure/session/HttpServer.js +133 -34
package/dist/manifest.json +134 -70
package/dist/mcp/output-schemas.d.ts +30 -30
package/dist/mcp/transports/bridge-events.d.ts +4 -0
package/dist/mcp/transports/fatal-exit.js +4 -0
package/dist/mcp/transports/http-entry.js +2 -0
package/dist/mcp/transports/stdio-entry.js +26 -6
package/dist/mcp/v2/tools.d.ts +4 -4
package/dist/trigger/adapters/github-poller.d.ts +44 -0
package/dist/trigger/adapters/github-poller.js +190 -0
package/dist/trigger/adapters/gitlab-poller.d.ts +27 -0
package/dist/trigger/adapters/gitlab-poller.js +81 -0
package/dist/trigger/delivery-client.d.ts +2 -1
package/dist/trigger/delivery-client.js +4 -1
package/dist/trigger/index.d.ts +4 -1
package/dist/trigger/index.js +5 -1
package/dist/trigger/polled-event-store.d.ts +22 -0
package/dist/trigger/polled-event-store.js +173 -0
package/dist/trigger/polling-scheduler.d.ts +20 -0
package/dist/trigger/polling-scheduler.js +249 -0
package/dist/trigger/trigger-listener.d.ts +5 -0
package/dist/trigger/trigger-listener.js +53 -4
package/dist/trigger/trigger-router.d.ts +4 -2
package/dist/trigger/trigger-router.js +7 -4
package/dist/trigger/trigger-store.js +114 -33
package/dist/trigger/types.d.ts +17 -1
package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +224 -224
package/dist/v2/durable-core/schemas/session/events.d.ts +42 -42
package/dist/v2/durable-core/schemas/session/manifest.d.ts +6 -6
package/dist/v2/durable-core/schemas/session/validation-event.d.ts +2 -2
package/dist/v2/durable-core/tokens/payloads.d.ts +52 -52
package/dist/v2/usecases/console-routes.js +3 -3
package/dist/v2/usecases/console-service.js +133 -9
package/dist/v2/usecases/console-types.d.ts +7 -0
package/docs/design/daemon-conversation-logging-plan.md +98 -0
package/docs/design/daemon-conversation-logging-review.md +55 -0
package/docs/design/daemon-conversation-logging.md +129 -0
package/docs/design/github-polling-adapter-design-candidates.md +226 -0
package/docs/design/github-polling-adapter-design-review-findings.md +131 -0
package/docs/design/github-polling-adapter-implementation-plan.md +284 -0
package/docs/design/implementation_plan.md +192 -0
package/docs/design/workflow-id-validation-at-startup.md +146 -0
package/docs/design/workflow-id-validation-design-review.md +87 -0
package/docs/design/workflow-id-validation-implementation-plan.md +185 -0
package/docs/design/worktrain-system-prompt-report-issue-candidates.md +135 -0
package/docs/design/worktrain-system-prompt-report-issue-design-review.md +73 -0
package/docs/ideas/backlog.md +465 -0
package/package.json +1 -1
package/workflows/architecture-scalability-audit.json +1 -1
package/workflows/bug-investigation.agentic.v2.json +3 -3
package/workflows/coding-task-workflow-agentic.json +32 -32
package/workflows/coding-task-workflow-agentic.lean.v2.json +1 -1
package/workflows/coding-task-workflow-agentic.v2.json +7 -7
package/workflows/mr-review-workflow.agentic.v2.json +21 -12
package/workflows/personal-learning-materials-creation-branched.json +2 -2
package/workflows/production-readiness-audit.json +1 -1
package/workflows/relocation-workflow-us.json +2 -2
package/workflows/ui-ux-design-workflow.json +14 -14
package/workflows/workflow-for-workflows.json +3 -3
package/workflows/workflow-for-workflows.v2.json +2 -2
package/workflows/wr.discovery.json +1 -1
/package/dist/{console → console-ui}/assets/index-8dh0Psu-.css +0 -0

package/workflows/coding-task-workflow-agentic.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "id": "coding-task-workflow-agentic",
-  "name": "Agentic Task Dev Workflow (Invariants \u2022 Architecture \u2022 Vertical Slices \u2022 PR Sizing \u2022 Audits \u2022 Resumable)",
+  "name": "Agentic Task Dev Workflow (Legacy)",
   "version": "1.5.0",
   "description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
   "about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",
@@ -45,7 +45,7 @@
     "USER RULES FILTERING: When delegating, use keyword-based filtering to extract relevant rules: Architecture, Testing, Performance, Error handling. Bias toward over-inclusion if unsure.",
     "BUILDER DELEGATION: When delegationMode=delegate AND rigorMode=THOROUGH AND work is non-trivial, you MAY delegate to Builder (routine-feature-implementation). Main agent reviews output against allowlist/denylist/budgets.",
     "VALIDATION: prefer compile-time safety and deterministic tests; verify each slice before moving on; fail fast with meaningful errors.",
-    "DECISION LOG: Entry includes Decision, Why (1-3 bullets), Impacted files (\u22645), User feedback, Surprises. Cap 8 bullets.",
+    "DECISION LOG: Entry includes Decision, Why (1-3 bullets), Impacted files (≤5), User feedback, Surprises. Cap 8 bullets.",
     "VARIABLE TYPES: Strings: taskComplexity, rigorMode, prStrategy, selectedApproach, runnerUpApproach, leadingCandidate, architectureRationale, keyRiskToMonitor, selectedSliceStrategy.",
     "VARIABLE TYPES: Strings (cont): pivotSeverity (none/MINOR/MODERATE/MAJOR), pivotReturnPhase, cleanSlateDivergence (None/Minor/Major).",
     "VARIABLE TYPES: Arrays: approaches, pivotTriggers, preMortemFindings, sliceStrategies, planningGaps, integrationGaps, integrationVerificationFindings, invariantViolations, resolvedFindings. Numbers: planConfidence, sliceIndex.",
@@ -61,8 +61,8 @@
   "steps": [
     {
       "id": "phase-0-triage-and-mode",
-      "title": "Phase 0: Triage (Complexity \u2022 Risk \u2022 Automation \u2022 Doc Depth \u2022 PR Strategy)",
-      "prompt": "**ANALYZE** the task and classify with deterministic criteria.\n\n## 0) Rigor mode (deterministic)\nSelect **rigorMode**: QUICK / STANDARD / THOROUGH.\n\nScore each criterion 0\u20132 and sum. Use the table:\n- **Scope breadth** (files/areas touched): 0=1\u20132 files, 1=multi-file but single area, 2=multi-area\n- **Risk level**: 0=low, 1=moderate, 2=high (security/auth/data loss/release pipeline/perf critical)\n- **Uncertainty**: 0=clear requirements + known code path, 1=some ambiguity, 2=unknowns/missing acceptance criteria\n- **Repro difficulty**: 0=deterministic + local, 1=some async/edge cases, 2=flaky/CI-only/racy\n- **Externalities**: 0=internal-only, 1=some external deps, 2=publishing/infra/3rd-party integration\n\nDecision:\n- 0\u20132 \u2192 **QUICK**\n- 3\u20135 \u2192 **STANDARD**\n- 6\u201310 \u2192 **THOROUGH**\n\nAlso set:\n- QUICK: `auditDepth=light`, `maxQuestions=1`, `maxParallelism=0`\n- STANDARD: `auditDepth=normal`, `maxQuestions=3`, `maxParallelism=1`\n- THOROUGH: `auditDepth=deep`, `maxQuestions=5`, `maxParallelism=3`\n\n## 1) taskComplexity\nSmall / Medium / Large\n- Small: 1\u20132 files, low risk, clear change, minimal ambiguity\n- Medium: multi-file, moderate risk, some ambiguity, needs planning\n- Large: architectural impact, multiple systems, high risk/unknowns\n\n## 2) riskLevel\nLow / Medium / High\n- High if: auth/payments/security/data integrity/perf-sensitive/production incident/release pipeline\n\n## 3) automationLevel\nHigh / Medium / Low\n- High: proceed autonomously; ask only for real decisions\n- Medium: normal confirmations at gates\n- Low: extra confirmations and explicit checklists\n\n## 4) docDepth (durable artifacts; no auto-commit)\nNone / Light / Full\n- For non-small tasks: always maintain `CONTEXT.md` and `implementation_plan.md`.\n- None: plan + context only (no additional spec/design)\n- Light: add `spec.md` (short)\n- Full: add `spec.md` + `design.md` (architecture + risks)\n\n## 5) prStrategy\nSinglePR / MultiPR\n- MultiPR if Large or diff is broad (many files/domains)\n\nSet these keys in the next `continue_workflow` call's `context` object: `rigorMode`, `auditDepth`, `maxQuestions`, `maxParallelism`, `taskComplexity`, `riskLevel`, `automationLevel`, `docDepth`, `prStrategy`.\n\n**VERIFY (minimal questions)**: ask the user to confirm or override `rigorMode` and `prStrategy` only if it impacts delivery expectations.\n\n**CONTEXT LOGGING**: Update CONTEXT.md Decision Log (follow format from metaGuidance) - record this triage decision and any user overrides.",
+      "title": "Phase 0: Triage (Complexity • Risk • Automation • Doc Depth • PR Strategy)",
+      "prompt": "**ANALYZE** the task and classify with deterministic criteria.\n\n## 0) Rigor mode (deterministic)\nSelect **rigorMode**: QUICK / STANDARD / THOROUGH.\n\nScore each criterion 0–2 and sum. Use the table:\n- **Scope breadth** (files/areas touched): 0=1–2 files, 1=multi-file but single area, 2=multi-area\n- **Risk level**: 0=low, 1=moderate, 2=high (security/auth/data loss/release pipeline/perf critical)\n- **Uncertainty**: 0=clear requirements + known code path, 1=some ambiguity, 2=unknowns/missing acceptance criteria\n- **Repro difficulty**: 0=deterministic + local, 1=some async/edge cases, 2=flaky/CI-only/racy\n- **Externalities**: 0=internal-only, 1=some external deps, 2=publishing/infra/3rd-party integration\n\nDecision:\n- 0–2 → **QUICK**\n- 3–5 → **STANDARD**\n- 6–10 → **THOROUGH**\n\nAlso set:\n- QUICK: `auditDepth=light`, `maxQuestions=1`, `maxParallelism=0`\n- STANDARD: `auditDepth=normal`, `maxQuestions=3`, `maxParallelism=1`\n- THOROUGH: `auditDepth=deep`, `maxQuestions=5`, `maxParallelism=3`\n\n## 1) taskComplexity\nSmall / Medium / Large\n- Small: 1–2 files, low risk, clear change, minimal ambiguity\n- Medium: multi-file, moderate risk, some ambiguity, needs planning\n- Large: architectural impact, multiple systems, high risk/unknowns\n\n## 2) riskLevel\nLow / Medium / High\n- High if: auth/payments/security/data integrity/perf-sensitive/production incident/release pipeline\n\n## 3) automationLevel\nHigh / Medium / Low\n- High: proceed autonomously; ask only for real decisions\n- Medium: normal confirmations at gates\n- Low: extra confirmations and explicit checklists\n\n## 4) docDepth (durable artifacts; no auto-commit)\nNone / Light / Full\n- For non-small tasks: always maintain `CONTEXT.md` and `implementation_plan.md`.\n- None: plan + context only (no additional spec/design)\n- Light: add `spec.md` (short)\n- Full: add `spec.md` + `design.md` (architecture + risks)\n\n## 5) prStrategy\nSinglePR / MultiPR\n- MultiPR if Large or diff is broad (many files/domains)\n\nSet these keys in the next `continue_workflow` call's `context` object: `rigorMode`, `auditDepth`, `maxQuestions`, `maxParallelism`, `taskComplexity`, `riskLevel`, `automationLevel`, `docDepth`, `prStrategy`.\n\n**VERIFY (minimal questions)**: ask the user to confirm or override `rigorMode` and `prStrategy` only if it impacts delivery expectations.\n\n**CONTEXT LOGGING**: Update CONTEXT.md Decision Log (follow format from metaGuidance) - record this triage decision and any user overrides.",
       "requireConfirmation": true
     },
     {
@@ -89,7 +89,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Create and initialize `CONTEXT.md` as the durable artifact for this workflow run.\n\n**Rules (write-or-paste, deterministic):**\n- If file-writing is possible in your environment: write/update `CONTEXT.md` now.\n- Otherwise: output the full pasteable content in chat.\n- Treat `CONTEXT.md` as canonical; do not paraphrase.\n- Do NOT commit documentation files unless the user explicitly asks.\n\n**Subagent capability detection (no repo file dependency):**\n- Determine if delegation is available from your runtime/tooling.\n- If you can delegate to the WorkRail Executor: set `delegationMode=delegate`.\n- Otherwise: set `delegationMode=solo`.\n- Add \"Environment Capabilities\" section to CONTEXT.md recording delegationMode.\n\n**CONTEXT.md is a living log**: it must be updated at each gate (triage, invariants, architecture choice, slice planning, plan refocus, each slice checkpoint, each PR packaging gate).\n\n**Size caps (keep resumable but bounded):**\n- Relevant files: max 10 (beyond that, link to plan artifacts)\n- Decision log entries: max 8 bullets each; use plan/spec/design for details\n- Keep last 3 Machine State Checkpoints only (delete older ones)\n\n**CONTEXT.md structure (must include):**\n\n1) **Task Summary** (1 paragraph)\n\n2) **Conversation Preferences**\n- Tone/verbosity preferences\n- Constraints like \"don't run X\" or \"ask before doing Y\"\n\n3) **Triage**\n- rigorMode, auditDepth, maxQuestions, maxParallelism\n- taskComplexity, riskLevel, automationLevel, docDepth, prStrategy\n\n3b) **Environment Capabilities**\n- delegationMode (solo/proxy/delegate)\n- Note: This value is cached for this workflow run\n\n4) **Inputs & Sources**\n- Ticket links/text pointers\n- User-provided file paths and external references\n\n5) **User Rules & Philosophies (`userRules`)**\n- Extract from: user instructions, README.md, docs/, ADRs, workflows/ patterns, 1\u20132 exemplar files near target module.\n- Keep this focused and actionable.\n- Set `userRules` in the next `continue_workflow` call's `context` object as a bulleted list.\n\n6) **Decision Log (append-only, capped at 8 bullets/entry)**\nFor each decision include:\n- Decision\n- Why\n- Alternatives considered\n- Impacted files\n- User feedback/pushback\n- Unexpected discoveries\n\n7) **Unexpected Discoveries / Deviations**\n- Anything surprising encountered (deps, scope expansion, missing invariants)\n- Any plan drift and how you addressed it\n\n8) **Relevant Files (max 10)**\n- Key files + why they matter\n- Beyond 10: reference plan artifacts\n\n9) **Artifacts Index**\n- `implementation_plan.md` (always for non-small)\n- `spec.md` / `design.md` if created\n\n10) **Progress**\n- Current slice name/index, what's done, what's next\n\n7) **Resumption Instructions**: Use captureCheckpoint() after each workflow_next call to maintain Machine State Checkpoint section.\n\n**Output:** the full content for `CONTEXT.md` (or confirm file written).",
+      "prompt": "Create and initialize `CONTEXT.md` as the durable artifact for this workflow run.\n\n**Rules (write-or-paste, deterministic):**\n- If file-writing is possible in your environment: write/update `CONTEXT.md` now.\n- Otherwise: output the full pasteable content in chat.\n- Treat `CONTEXT.md` as canonical; do not paraphrase.\n- Do NOT commit documentation files unless the user explicitly asks.\n\n**Subagent capability detection (no repo file dependency):**\n- Determine if delegation is available from your runtime/tooling.\n- If you can delegate to the WorkRail Executor: set `delegationMode=delegate`.\n- Otherwise: set `delegationMode=solo`.\n- Add \"Environment Capabilities\" section to CONTEXT.md recording delegationMode.\n\n**CONTEXT.md is a living log**: it must be updated at each gate (triage, invariants, architecture choice, slice planning, plan refocus, each slice checkpoint, each PR packaging gate).\n\n**Size caps (keep resumable but bounded):**\n- Relevant files: max 10 (beyond that, link to plan artifacts)\n- Decision log entries: max 8 bullets each; use plan/spec/design for details\n- Keep last 3 Machine State Checkpoints only (delete older ones)\n\n**CONTEXT.md structure (must include):**\n\n1) **Task Summary** (1 paragraph)\n\n2) **Conversation Preferences**\n- Tone/verbosity preferences\n- Constraints like \"don't run X\" or \"ask before doing Y\"\n\n3) **Triage**\n- rigorMode, auditDepth, maxQuestions, maxParallelism\n- taskComplexity, riskLevel, automationLevel, docDepth, prStrategy\n\n3b) **Environment Capabilities**\n- delegationMode (solo/proxy/delegate)\n- Note: This value is cached for this workflow run\n\n4) **Inputs & Sources**\n- Ticket links/text pointers\n- User-provided file paths and external references\n\n5) **User Rules & Philosophies (`userRules`)**\n- Extract from: user instructions, README.md, docs/, ADRs, workflows/ patterns, 1–2 exemplar files near target module.\n- Keep this focused and actionable.\n- Set `userRules` in the next `continue_workflow` call's `context` object as a bulleted list.\n\n6) **Decision Log (append-only, capped at 8 bullets/entry)**\nFor each decision include:\n- Decision\n- Why\n- Alternatives considered\n- Impacted files\n- User feedback/pushback\n- Unexpected discoveries\n\n7) **Unexpected Discoveries / Deviations**\n- Anything surprising encountered (deps, scope expansion, missing invariants)\n- Any plan drift and how you addressed it\n\n8) **Relevant Files (max 10)**\n- Key files + why they matter\n- Beyond 10: reference plan artifacts\n\n9) **Artifacts Index**\n- `implementation_plan.md` (always for non-small)\n- `spec.md` / `design.md` if created\n\n10) **Progress**\n- Current slice name/index, what's done, what's next\n\n7) **Resumption Instructions**: Use captureCheckpoint() after each workflow_next call to maintain Machine State Checkpoint section.\n\n**Output:** the full content for `CONTEXT.md` (or confirm file written).",
       "requireConfirmation": false
     },
     {
@@ -99,7 +99,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Gather enough context to design and plan correctly.\n\n**Rules:**\n- Do this yourself (no delegation in this step).\n- Use tools to verify everything.\n- Prefer matching existing patterns over inventing new ones.\n- Prefer answering your own questions with tools; only keep true human-decision questions.\n\n**Deliverable (in chat, concise):**\n- Entry points and call chain sketch (file references)\n- Key modules/classes/functions involved\n- Existing patterns that apply (with 2\u20133 concrete examples)\n- Testing approach found in repo (where tests live; key helpers)\n- Risks/unknowns list\n\n**Question resolution pass (required):**\n- For uncertainties you encounter, attempt resolution via tools/code first.\n- Only add to `openQuestions` if it is a true business/product decision.\n- Enforce: `openQuestions.length <= maxQuestions`.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `contextSummary` (short)\n- `candidateFiles` (list of key file paths)\n- `openQuestions` (true human decisions only)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record relevant files, decisions made during context gathering, and any unexpected discoveries. If you discover a conflict between repo patterns and `userRules`, note it explicitly for planning."
+      "prompt": "Gather enough context to design and plan correctly.\n\n**Rules:**\n- Do this yourself (no delegation in this step).\n- Use tools to verify everything.\n- Prefer matching existing patterns over inventing new ones.\n- Prefer answering your own questions with tools; only keep true human-decision questions.\n\n**Deliverable (in chat, concise):**\n- Entry points and call chain sketch (file references)\n- Key modules/classes/functions involved\n- Existing patterns that apply (with 2–3 concrete examples)\n- Testing approach found in repo (where tests live; key helpers)\n- Risks/unknowns list\n\n**Question resolution pass (required):**\n- For uncertainties you encounter, attempt resolution via tools/code first.\n- Only add to `openQuestions` if it is a true business/product decision.\n- Enforce: `openQuestions.length <= maxQuestions`.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `contextSummary` (short)\n- `candidateFiles` (list of key file paths)\n- `openQuestions` (true human decisions only)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record relevant files, decisions made during context gathering, and any unexpected discoveries. If you discover a conflict between repo patterns and `userRules`, note it explicitly for planning."
     },
     {
       "id": "phase-1b-context-audit-mode-adaptive",
@@ -124,7 +124,7 @@
           }
         ]
       },
-      "prompt": "Audit your context understanding before designing.\n\nMode behavior:\n- **QUICK**: skip this step (should not run)\n- **STANDARD**: do a self-audit; delegate at most once if you have subagent support\n- **THOROUGH**: delegate in parallel if you have subagent support\n\n**If subagent support is available and `rigorMode=THOROUGH`:**\n\nSpawn 2 WorkRail Executors SIMULTANEOUSLY using `routine-context-gathering`:\n\n**Delegation 1 \u2014 Completeness Audit:**\n- routine: routine-context-gathering\n- depth: 2 (Explore level)\n- mission: \"Audit main agent's context for missed areas and blind spots\"\n- target: [Areas main agent investigated]\n- focus: COMPLETENESS\n- deliverable: context-audit-completeness.md\n\n**Delegation 2 \u2014 Depth Audit:**\n- routine: routine-context-gathering\n- depth: 3 (Analyze level)\n- mission: \"Audit main agent's context for shallow understanding\"\n- target: [Areas main agent investigated]\n- focus: DEPTH\n- deliverable: context-audit-depth.md\n\n**If `rigorMode=STANDARD`:**\n- Prefer self-audit. Optionally delegate ONCE using `routine-context-gathering` (depth: 2) focusing on COMPLETENESS.\n\n**If no subagents:** do a self-audit using the two lenses.\n\n**SYNTHESIZE** audit findings:\n- Update `contextSummary` with gaps filled\n- Resolve uncertainties with tools when possible\n- Update `openQuestions` but keep it <= `maxQuestions` and only for true human decisions\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record audit deltas, gaps addressed, and any new decisions.\n\n**Quality gate:** proceed only if you can explain the relevant flow end-to-end.",
+      "prompt": "Audit your context understanding before designing.\n\nMode behavior:\n- **QUICK**: skip this step (should not run)\n- **STANDARD**: do a self-audit; delegate at most once if you have subagent support\n- **THOROUGH**: delegate in parallel if you have subagent support\n\n**If subagent support is available and `rigorMode=THOROUGH`:**\n\nSpawn 2 WorkRail Executors SIMULTANEOUSLY using `routine-context-gathering`:\n\n**Delegation 1 — Completeness Audit:**\n- routine: routine-context-gathering\n- depth: 2 (Explore level)\n- mission: \"Audit main agent's context for missed areas and blind spots\"\n- target: [Areas main agent investigated]\n- focus: COMPLETENESS\n- deliverable: context-audit-completeness.md\n\n**Delegation 2 — Depth Audit:**\n- routine: routine-context-gathering\n- depth: 3 (Analyze level)\n- mission: \"Audit main agent's context for shallow understanding\"\n- target: [Areas main agent investigated]\n- focus: DEPTH\n- deliverable: context-audit-depth.md\n\n**If `rigorMode=STANDARD`:**\n- Prefer self-audit. Optionally delegate ONCE using `routine-context-gathering` (depth: 2) focusing on COMPLETENESS.\n\n**If no subagents:** do a self-audit using the two lenses.\n\n**SYNTHESIZE** audit findings:\n- Update `contextSummary` with gaps filled\n- Resolve uncertainties with tools when possible\n- Update `openQuestions` but keep it <= `maxQuestions` and only for true human decisions\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record audit deltas, gaps addressed, and any new decisions.\n\n**Quality gate:** proceed only if you can explain the relevant flow end-to-end.",
       "requireConfirmation": false
     },
     {
@@ -151,7 +151,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Generate approaches by answering DIFFERENT questions\u2014not variations of one idea.\n\nThis is DIVERGENT thinking. Do not evaluate or compare yet.\n\n**Answer each lens (minimum 3, add more for THOROUGH):**\n\n1. **Simplicity lens:** What's the simplest approach that could work?\n   - Minimal moving parts, easiest to understand\n   - What would you do if you had 1 hour?\n\n2. **Maintainability lens:** What approach optimizes for future changes?\n   - Easiest to modify in 6 months by someone unfamiliar\n   - What would make a new team member's life easiest?\n\n3. **Clean-slate lens:** If this area didn't exist, how would you design it?\n   - Ignore existing structure\u2014what's the \"right\" architecture?\n   - What would you build if starting fresh today?\n\n4. **(STANDARD+) Constraint-flip lens:** What if a key constraint didn't exist?\n   - Often reveals assumptions worth questioning\n   - What if [performance/compatibility/scope] wasn't a concern?\n\n**For each approach:**\n- **Name**: Short memorable label\n- **Core idea**: 2-3 sentences describing the fundamental approach\n- **Key trade-off**: What does this optimize for? What does it sacrifice?\n- **Shape**: High-level structure (what changes, where)\n\n**Anti-anchoring check:**\nIf your approaches feel like variations of one idea, you haven't diverged enough. The lenses should produce genuinely different shapes.\n\n**If `rigorMode=THOROUGH` and subagents available:**\n\nSpawn 3 WorkRail Executors SIMULTANEOUSLY using `routine-ideation`:\n\n**Delegation 1 \u2014 Simplicity Lens:**\n- routine: routine-ideation\n- perspective: simplicity\n- quantity: 3-5 ideas\n- problem: [Task problem statement from Phase 0]\n- constraints: [From invariants]\n- deliverable: ideas-simplicity.md\n\n**Delegation 2 \u2014 Maintainability Lens:**\n- routine: routine-ideation\n- perspective: maintainability\n- quantity: 3-5 ideas\n- problem: [Task problem statement]\n- constraints: [From invariants]\n- deliverable: ideas-maintainability.md\n\n**Delegation 3 \u2014 Clean-Slate Lens:**\n- routine: routine-ideation\n- perspective: innovation\n- quantity: 3-5 ideas\n- problem: [Task problem statement]\n- constraints: [From invariants, relaxed]\n- deliverable: ideas-clean-slate.md\n\n**Main agent synthesis:**\n- Combine ideas from all 3 deliverables\n- Deduplicate similar ideas (keep best version)\n- Select best from each perspective for `approaches` array\n\n**Output:** `approaches` array with one entry per lens answered.\n\nSet these keys in the next `continue_workflow` call's `context` object: `approaches`\n\n**CONTEXT LOGGING:** Add Approaches section to CONTEXT.md. Preserve ALL approaches\u2014they may become Plan B/C later.",
+      "prompt": "Generate approaches by answering DIFFERENT questions—not variations of one idea.\n\nThis is DIVERGENT thinking. Do not evaluate or compare yet.\n\n**Answer each lens (minimum 3, add more for THOROUGH):**\n\n1. **Simplicity lens:** What's the simplest approach that could work?\n   - Minimal moving parts, easiest to understand\n   - What would you do if you had 1 hour?\n\n2. **Maintainability lens:** What approach optimizes for future changes?\n   - Easiest to modify in 6 months by someone unfamiliar\n   - What would make a new team member's life easiest?\n\n3. **Clean-slate lens:** If this area didn't exist, how would you design it?\n   - Ignore existing structure—what's the \"right\" architecture?\n   - What would you build if starting fresh today?\n\n4. **(STANDARD+) Constraint-flip lens:** What if a key constraint didn't exist?\n   - Often reveals assumptions worth questioning\n   - What if [performance/compatibility/scope] wasn't a concern?\n\n**For each approach:**\n- **Name**: Short memorable label\n- **Core idea**: 2-3 sentences describing the fundamental approach\n- **Key trade-off**: What does this optimize for? What does it sacrifice?\n- **Shape**: High-level structure (what changes, where)\n\n**Anti-anchoring check:**\nIf your approaches feel like variations of one idea, you haven't diverged enough. The lenses should produce genuinely different shapes.\n\n**If `rigorMode=THOROUGH` and subagents available:**\n\nSpawn 3 WorkRail Executors SIMULTANEOUSLY using `routine-ideation`:\n\n**Delegation 1 — Simplicity Lens:**\n- routine: routine-ideation\n- perspective: simplicity\n- quantity: 3-5 ideas\n- problem: [Task problem statement from Phase 0]\n- constraints: [From invariants]\n- deliverable: ideas-simplicity.md\n\n**Delegation 2 — Maintainability Lens:**\n- routine: routine-ideation\n- perspective: maintainability\n- quantity: 3-5 ideas\n- problem: [Task problem statement]\n- constraints: [From invariants]\n- deliverable: ideas-maintainability.md\n\n**Delegation 3 — Clean-Slate Lens:**\n- routine: routine-ideation\n- perspective: innovation\n- quantity: 3-5 ideas\n- problem: [Task problem statement]\n- constraints: [From invariants, relaxed]\n- deliverable: ideas-clean-slate.md\n\n**Main agent synthesis:**\n- Combine ideas from all 3 deliverables\n- Deduplicate similar ideas (keep best version)\n- Select best from each perspective for `approaches` array\n\n**Output:** `approaches` array with one entry per lens answered.\n\nSet these keys in the next `continue_workflow` call's `context` object: `approaches`\n\n**CONTEXT LOGGING:** Add Approaches section to CONTEXT.md. Preserve ALL approaches—they may become Plan B/C later.",
       "requireConfirmation": false
     },
     {
@@ -161,7 +161,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Assess each approach individually. This is ANALYTICAL thinking\u2014evaluate, don't compare yet.\n\n**For EACH approach in `approaches`:**\n\n1. **Invariant fit**:\n   - Which invariants does it naturally satisfy?\n   - Which require extra effort or workarounds?\n\n2. **Risk profile**:\n   - What could go wrong?\n   - What's the worst-case scenario?\n   - What dependencies does it introduce?\n\n3. **Implementation shape**:\n   - What files/areas change?\n   - What new abstractions are needed?\n   - Complexity estimate (Low/Medium/High)\n\n4. **Pattern alignment**:\n   - Does it match existing `userRules` and repo patterns?\n   - Any deviations needed?\n\n**Output:** Assessment for each approach (can be brief\u20142-3 bullets each).\n\n**Set:** `approachAssessments` (object mapping approach name to assessment)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Approaches section with assessments.",
+      "prompt": "Assess each approach individually. This is ANALYTICAL thinking—evaluate, don't compare yet.\n\n**For EACH approach in `approaches`:**\n\n1. **Invariant fit**:\n   - Which invariants does it naturally satisfy?\n   - Which require extra effort or workarounds?\n\n2. **Risk profile**:\n   - What could go wrong?\n   - What's the worst-case scenario?\n   - What dependencies does it introduce?\n\n3. **Implementation shape**:\n   - What files/areas change?\n   - What new abstractions are needed?\n   - Complexity estimate (Low/Medium/High)\n\n4. **Pattern alignment**:\n   - Does it match existing `userRules` and repo patterns?\n   - Any deviations needed?\n\n**Output:** Assessment for each approach (can be brief—2-3 bullets each).\n\n**Set:** `approachAssessments` (object mapping approach name to assessment)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Approaches section with assessments.",
       "requireConfirmation": false
     },
     {
@@ -171,7 +171,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Compare approaches side-by-side to identify the leading candidate.\n\nThis is EVALUATIVE thinking\u2014compare and rank, don't stress-test yet.\n\n**Using `approachAssessments`, compare approaches on:**\n- Which best fits the `invariants`?\n- Which has the best risk profile?\n- Which aligns with `userRules` and existing patterns?\n- If trade-offs conflict, which trade-off is acceptable for THIS task?\n\n**Build comparison summary:**\nFor each dimension, note which approach wins and why.\n\n**Identify:**\n- **Leading candidate**: Which approach scores best overall?\n- **Runner-up**: Which is second-best (this becomes Plan B)?\n\n**Output:**\n- Comparison summary (which approach wins on which dimension)\n- Leading candidate name\n- Runner-up name\n\n**Set:** `leadingCandidate`, `runnerUpApproach`\n\n**CONTEXT LOGGING:** Update CONTEXT.md with comparison summary.",
+      "prompt": "Compare approaches side-by-side to identify the leading candidate.\n\nThis is EVALUATIVE thinking—compare and rank, don't stress-test yet.\n\n**Using `approachAssessments`, compare approaches on:**\n- Which best fits the `invariants`?\n- Which has the best risk profile?\n- Which aligns with `userRules` and existing patterns?\n- If trade-offs conflict, which trade-off is acceptable for THIS task?\n\n**Build comparison summary:**\nFor each dimension, note which approach wins and why.\n\n**Identify:**\n- **Leading candidate**: Which approach scores best overall?\n- **Runner-up**: Which is second-best (this becomes Plan B)?\n\n**Output:**\n- Comparison summary (which approach wins on which dimension)\n- Leading candidate name\n- Runner-up name\n\n**Set:** `leadingCandidate`, `runnerUpApproach`\n\n**CONTEXT LOGGING:** Update CONTEXT.md with comparison summary.",
       "requireConfirmation": false
     },
     {
@@ -189,7 +189,7 @@
           }
         ]
       },
-      "prompt": "Stress-test the leading candidate before committing.\n\nThis is ADVERSARIAL thinking\u2014try to break it.\n\n**For `leadingCandidate` only:**\n\n> \"It's 2 weeks from now. This approach failed catastrophically. What happened?\"\n\n**Identify:**\n- **Most likely failure mode**: What probably goes wrong?\n- **Hidden assumption**: What are we assuming that could be wrong?\n- **Dependency risk**: What external factor could break this?\n\n---\n\n**If `rigorMode=THOROUGH` and subagents available:**\n\nDelegate to WorkRail Executor using `routine-hypothesis-challenge`:\n\n**Pre-Mortem Delegation:**\n- routine: routine-hypothesis-challenge\n- rigor: 3 (use 5 for High-risk tasks)\n- hypotheses:\n  - \"The leading candidate approach will succeed\"\n  - [Key assumptions from the approach: architecture, dependencies, patterns]\n- evidence: `approachAssessments` for leading candidate\n- context:\n  - Read: CONTEXT.md (invariants section)\n  - Filtered userRules: architecture, risk, edge cases\n  - Feature brief: problem + constraints + approach shape\n- deliverable: premortem-challenges.md\n\n**Synthesis:**\n- Review challenges from deliverable\n- Update `preMortemFindings` with subagent insights\n- If major concerns raised: flag for reconsideration in next phase\n\n---\n\n**Output:**\n- Pre-mortem findings for leading candidate\n- Flag if major concerns require reconsidering `leadingCandidate`\n\n**Set:** `preMortemFindings`, `majorConcernsRaised` (boolean)",
+      "prompt": "Stress-test the leading candidate before committing.\n\nThis is ADVERSARIAL thinking—try to break it.\n\n**For `leadingCandidate` only:**\n\n> \"It's 2 weeks from now. This approach failed catastrophically. What happened?\"\n\n**Identify:**\n- **Most likely failure mode**: What probably goes wrong?\n- **Hidden assumption**: What are we assuming that could be wrong?\n- **Dependency risk**: What external factor could break this?\n\n---\n\n**If `rigorMode=THOROUGH` and subagents available:**\n\nDelegate to WorkRail Executor using `routine-hypothesis-challenge`:\n\n**Pre-Mortem Delegation:**\n- routine: routine-hypothesis-challenge\n- rigor: 3 (use 5 for High-risk tasks)\n- hypotheses:\n  - \"The leading candidate approach will succeed\"\n  - [Key assumptions from the approach: architecture, dependencies, patterns]\n- evidence: `approachAssessments` for leading candidate\n- context:\n  - Read: CONTEXT.md (invariants section)\n  - Filtered userRules: architecture, risk, edge cases\n  - Feature brief: problem + constraints + approach shape\n- deliverable: premortem-challenges.md\n\n**Synthesis:**\n- Review challenges from deliverable\n- Update `preMortemFindings` with subagent insights\n- If major concerns raised: flag for reconsideration in next phase\n\n---\n\n**Output:**\n- Pre-mortem findings for leading candidate\n- Flag if major concerns require reconsidering `leadingCandidate`\n\n**Set:** `preMortemFindings`, `majorConcernsRaised` (boolean)",
       "requireConfirmation": false
     },
     {
@@ -236,7 +236,7 @@
           }
         ]
       },
-      "prompt": "Validate key assumptions about `selectedApproach` with quick, time-boxed probes before investing in detailed planning.\n\n**Purpose:** Catch \"this won't work\" early with real code, not just analysis.\n\n---\n\n**STEP 1: Identify key uncertainties**\n\nReview `preMortemFindings` and `keyRiskToMonitor`. For each, ask:\n- Can this be validated with a quick probe?\n- What's the smallest code/test that would prove or disprove this?\n\nPrioritize uncertainties that would INVALIDATE the approach if wrong.\n\n---\n\n**STEP 2: Design 1-3 spikes**\n\nEach spike should be:\n- **Time-boxed**: 5-15 minutes max\n- **Minimal**: Smallest code that validates the assumption\n- **Disposable**: Don't need to keep the code (but can)\n- **Binary outcome**: Works or doesn't\n\n**Example spikes:**\n- API probe: \"Can the API handle batch requests?\" \u2192 Try it\n- Pattern probe: \"Can we extend this class?\" \u2192 Try it\n- Perf probe: \"Is this fast enough?\" \u2192 Quick benchmark\n- Integration probe: \"Does DI work here?\" \u2192 Try injecting\n\n**Document each spike:**\n- Assumption being tested\n- Probe approach (what code/test)\n- Expected outcome if assumption holds\n\n---\n\n**STEP 3: Execute spikes**\n\nFor each spike:\n1. Write minimal probe code\n2. Run it\n3. Document result: VALIDATED / INVALIDATED / INCONCLUSIVE\n\n**If INCONCLUSIVE:** Note what additional information would resolve it.\n\n---\n\n**STEP 4: Decide**\n\n**If any spike INVALIDATED a critical assumption:**\n- Set `spikeFailure = true`\n- Document what was learned\n- Return to `phase-select-architecture` with new information\n- Consider `runnerUpApproach` or generate new approaches\n\n**If all spikes VALIDATED (or no critical spikes needed):**\n- Set `assumptionsValidated = true`\n- Proceed to slice planning with higher confidence\n\n---\n\n**Output:**\n- Spikes attempted: [{assumption, probe, result}]\n- Key learnings\n- Decision: proceed / return to selection\n\n**Set:** `spikeResults`, `spikeFailure`, `assumptionsValidated`\n\n**CONTEXT LOGGING:** Add Spike Results section to CONTEXT.md.",
+      "prompt": "Validate key assumptions about `selectedApproach` with quick, time-boxed probes before investing in detailed planning.\n\n**Purpose:** Catch \"this won't work\" early with real code, not just analysis.\n\n---\n\n**STEP 1: Identify key uncertainties**\n\nReview `preMortemFindings` and `keyRiskToMonitor`. For each, ask:\n- Can this be validated with a quick probe?\n- What's the smallest code/test that would prove or disprove this?\n\nPrioritize uncertainties that would INVALIDATE the approach if wrong.\n\n---\n\n**STEP 2: Design 1-3 spikes**\n\nEach spike should be:\n- **Time-boxed**: 5-15 minutes max\n- **Minimal**: Smallest code that validates the assumption\n- **Disposable**: Don't need to keep the code (but can)\n- **Binary outcome**: Works or doesn't\n\n**Example spikes:**\n- API probe: \"Can the API handle batch requests?\" → Try it\n- Pattern probe: \"Can we extend this class?\" → Try it\n- Perf probe: \"Is this fast enough?\" → Quick benchmark\n- Integration probe: \"Does DI work here?\" → Try injecting\n\n**Document each spike:**\n- Assumption being tested\n- Probe approach (what code/test)\n- Expected outcome if assumption holds\n\n---\n\n**STEP 3: Execute spikes**\n\nFor each spike:\n1. Write minimal probe code\n2. Run it\n3. Document result: VALIDATED / INVALIDATED / INCONCLUSIVE\n\n**If INCONCLUSIVE:** Note what additional information would resolve it.\n\n---\n\n**STEP 4: Decide**\n\n**If any spike INVALIDATED a critical assumption:**\n- Set `spikeFailure = true`\n- Document what was learned\n- Return to `phase-select-architecture` with new information\n- Consider `runnerUpApproach` or generate new approaches\n\n**If all spikes VALIDATED (or no critical spikes needed):**\n- Set `assumptionsValidated = true`\n- Proceed to slice planning with higher confidence\n\n---\n\n**Output:**\n- Spikes attempted: [{assumption, probe, result}]\n- Key learnings\n- Decision: proceed / return to selection\n\n**Set:** `spikeResults`, `spikeFailure`, `assumptionsValidated`\n\n**CONTEXT LOGGING:** Add Spike Results section to CONTEXT.md.",
       "requireConfirmation": {
         "var": "spikeFailure",
         "equals": true
@@ -249,7 +249,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Generate 2-3 different ways to slice this work.\n\nThis is DIVERGENT thinking\u2014explore different orderings, not just one.\n\n**Slicing lenses:**\n\n1. **Risk-first**: Order by risk (highest first \u2192 fail fast)\n   - What's the riskiest change? Do it first.\n   - Surfaces problems before investment grows\n   - Trade-off: May require more scaffolding upfront\n\n2. **Foundation-first**: Order by dependencies (base \u2192 features)\n   - Build the infrastructure/contracts first\n   - Each slice builds on stable ground\n   - Trade-off: May delay visible progress\n\n3. **Value-first**: Order by deliverable value (most valuable first)\n   - Ship something useful early\n   - Get user feedback faster\n   - Trade-off: May need to revisit foundations later\n\n**For each strategy, define:**\n- **Name**: Risk-first / Foundation-first / Value-first (or custom)\n- **Slice order**: List slices in that order\n- **Per slice**: Name, scope, key files, verification plan\n- **PR boundaries**: Where would you split PRs?\n- **Trade-offs**: What's prioritized? What's sacrificed?\n\n**Output:** `sliceStrategies` array (2-3 entries)\n\n**Set:** `sliceStrategies`\n\n**CONTEXT LOGGING:** Add Slice Strategies section to CONTEXT.md.",
+      "prompt": "Generate 2-3 different ways to slice this work.\n\nThis is DIVERGENT thinking—explore different orderings, not just one.\n\n**Slicing lenses:**\n\n1. **Risk-first**: Order by risk (highest first → fail fast)\n   - What's the riskiest change? Do it first.\n   - Surfaces problems before investment grows\n   - Trade-off: May require more scaffolding upfront\n\n2. **Foundation-first**: Order by dependencies (base → features)\n   - Build the infrastructure/contracts first\n   - Each slice builds on stable ground\n   - Trade-off: May delay visible progress\n\n3. **Value-first**: Order by deliverable value (most valuable first)\n   - Ship something useful early\n   - Get user feedback faster\n   - Trade-off: May need to revisit foundations later\n\n**For each strategy, define:**\n- **Name**: Risk-first / Foundation-first / Value-first (or custom)\n- **Slice order**: List slices in that order\n- **Per slice**: Name, scope, key files, verification plan\n- **PR boundaries**: Where would you split PRs?\n- **Trade-offs**: What's prioritized? What's sacrificed?\n\n**Output:** `sliceStrategies` array (2-3 entries)\n\n**Set:** `sliceStrategies`\n\n**CONTEXT LOGGING:** Add Slice Strategies section to CONTEXT.md.",
       "requireConfirmation": false
     },
     {
@@ -259,12 +259,12 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Compare slice strategies and select the best fit.\n\nThis is EVALUATIVE thinking\u2014compare and decide.\n\n**Compare strategies on:**\n- **Risk management**: Which best surfaces problems early given our invariants?\n- **PR reviewability**: Which produces the cleanest PR boundaries?\n- **Feedback speed**: Which gets us useful feedback fastest?\n- **Rollout constraints**: Which aligns with any flagging/migration requirements?\n- **Implementation flow**: Which has the smoothest dependencies between slices?\n\n**Select:**\n- **selectedSliceStrategy**: [name]\n- **rationale**: Why this wins (2-3 sentences referencing comparison)\n- **slices**: The ordered list from selected strategy\n\n**PR sizing gate:**\n- If `prStrategy = MultiPR`, map slices to PRs.\n- If `prStrategy = SinglePR` but slices suggest broad changes, recommend switching to MultiPR.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `selectedSliceStrategy`\n- `slices` (array from selected strategy)\n- `estimatedPRCount` (number)\n- `prStrategyRationale` (short)\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log - record selected strategy + rationale, rejected strategies and why, slice boundaries and PR strategy.\n\n**VERIFY:** user confirms slice strategy and PR approach.",
+      "prompt": "Compare slice strategies and select the best fit.\n\nThis is EVALUATIVE thinking—compare and decide.\n\n**Compare strategies on:**\n- **Risk management**: Which best surfaces problems early given our invariants?\n- **PR reviewability**: Which produces the cleanest PR boundaries?\n- **Feedback speed**: Which gets us useful feedback fastest?\n- **Rollout constraints**: Which aligns with any flagging/migration requirements?\n- **Implementation flow**: Which has the smoothest dependencies between slices?\n\n**Select:**\n- **selectedSliceStrategy**: [name]\n- **rationale**: Why this wins (2-3 sentences referencing comparison)\n- **slices**: The ordered list from selected strategy\n\n**PR sizing gate:**\n- If `prStrategy = MultiPR`, map slices to PRs.\n- If `prStrategy = SinglePR` but slices suggest broad changes, recommend switching to MultiPR.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `selectedSliceStrategy`\n- `slices` (array from selected strategy)\n- `estimatedPRCount` (number)\n- `prStrategyRationale` (short)\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log - record selected strategy + rationale, rejected strategies and why, slice boundaries and PR strategy.\n\n**VERIFY:** user confirms slice strategy and PR approach.",
       "requireConfirmation": true
     },
     {
       "id": "phase-locks-compliance-audit",
-      "title": "Locks Compliance Audit (Canonical Docs \u2192 Slices Matrix)",
+      "title": "Locks Compliance Audit (Canonical Docs → Slices Matrix)",
       "runCondition": {
         "and": [
           {
@@ -312,7 +312,7 @@
     {
       "id": "phase-5-plan-iterations",
       "type": "loop",
-      "title": "Phase 5: Plan Iteration Loop (Draft \u2192 Audit \u2192 Refocus)",
+      "title": "Phase 5: Plan Iteration Loop (Draft → Audit → Refocus)",
       "runCondition": {
         "var": "taskComplexity",
         "not_equals": "Small"
@@ -330,13 +330,13 @@
         {
           "id": "phase-5a-draft-implementation-plan",
           "title": "Plan Artifact Draft/Update",
-          "prompt": "Create or update the **Plan Artifact** (deterministic schema).\n\n**Write-or-paste rule:** attempt to write/update `implementation_plan.md`. If file writing fails, output full content in chat (canonical).\n\n**Plan Artifact headings (concise, complete):**\n\n1) Problem statement\n2) Acceptance criteria (bullets)\n3) Non-goals (bullets)\n4) **User rules/preferences applied:**\n   - Relevant `userRules` + how plan respects them.\n   - Deviations: rationale + mitigation + user decision (counts toward `maxQuestions`).\n5) Invariants (reference `invariants`)\n6) Proposed approach (1\u20132 paragraphs)\n7) Architecture decision (reference Phase 3/3b outputs):\n   - Selected approach: reference `selectedApproach`\n   - Rationale: reference `architectureRationale`\n   - Runner-up (Plan B): reference `runnerUpApproach`\n   - Key risk: reference `keyRiskToMonitor`\n   - Full alternatives: see CONTEXT.md Approaches section\n8) **Vertical slices** (match `slices`: scope, done-definition, files, verification)\n\n   **Work Packages inside each slice (mode-dependent):**\n   - QUICK: skip work packages\n   - STANDARD: optional; recommended when slice is high-risk or multi-layer\n   - THOROUGH: required for non-trivial slices\n\n   Each work package (WP):\n   - ID: `S<sliceIndex>-WP<k>` (e.g., S1-WP1)\n   - Goal: one coherent outcome\n   - Targets (allowlist): dirs/files (+ allowed new files)\n   - Forbidden (denylist): files/dirs not to touch\n   - Budget: maxModified (5 STANDARD/8 THOROUGH), maxNew (2/3)\n   - Done-definition: 2\u20135 bullets\n   - Verification: 1\u20133 commands/tests\n   - Dependencies: contracts/types from other WPs (if parallel)\n\n   **Parallelism rule:** parallelize only if Targets don't overlap. Final WP must be \"Hook-up/Integration\" when parallel was used.\n\n9) Test plan (unit/integration/e2e; cite repo patterns)\n10) Risk register (risks + mitigation + rollback/flag)\n11) PR packaging (Single/Multi + rule)\n12) **Philosophy alignment per slice** (for each slice, include):\n   - For each design principle touched by this slice: [principle] \u2192 [satisfied / tension / violated + 1-line why]\n   - The audit step will independently verify these self-assessments. Be honest \u2014 violations caught early are cheaper than violations caught in review.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `planArtifact`\n- `implementationPlan`\n\n**VERIFY:** concrete enough for another engineer to implement without guessing.",
+          "prompt": "Create or update the **Plan Artifact** (deterministic schema).\n\n**Write-or-paste rule:** attempt to write/update `implementation_plan.md`. If file writing fails, output full content in chat (canonical).\n\n**Plan Artifact headings (concise, complete):**\n\n1) Problem statement\n2) Acceptance criteria (bullets)\n3) Non-goals (bullets)\n4) **User rules/preferences applied:**\n   - Relevant `userRules` + how plan respects them.\n   - Deviations: rationale + mitigation + user decision (counts toward `maxQuestions`).\n5) Invariants (reference `invariants`)\n6) Proposed approach (1–2 paragraphs)\n7) Architecture decision (reference Phase 3/3b outputs):\n   - Selected approach: reference `selectedApproach`\n   - Rationale: reference `architectureRationale`\n   - Runner-up (Plan B): reference `runnerUpApproach`\n   - Key risk: reference `keyRiskToMonitor`\n   - Full alternatives: see CONTEXT.md Approaches section\n8) **Vertical slices** (match `slices`: scope, done-definition, files, verification)\n\n   **Work Packages inside each slice (mode-dependent):**\n   - QUICK: skip work packages\n   - STANDARD: optional; recommended when slice is high-risk or multi-layer\n   - THOROUGH: required for non-trivial slices\n\n   Each work package (WP):\n   - ID: `S<sliceIndex>-WP<k>` (e.g., S1-WP1)\n   - Goal: one coherent outcome\n   - Targets (allowlist): dirs/files (+ allowed new files)\n   - Forbidden (denylist): files/dirs not to touch\n   - Budget: maxModified (5 STANDARD/8 THOROUGH), maxNew (2/3)\n   - Done-definition: 2–5 bullets\n   - Verification: 1–3 commands/tests\n   - Dependencies: contracts/types from other WPs (if parallel)\n\n   **Parallelism rule:** parallelize only if Targets don't overlap. Final WP must be \"Hook-up/Integration\" when parallel was used.\n\n9) Test plan (unit/integration/e2e; cite repo patterns)\n10) Risk register (risks + mitigation + rollback/flag)\n11) PR packaging (Single/Multi + rule)\n12) **Philosophy alignment per slice** (for each slice, include):\n   - For each design principle touched by this slice: [principle] → [satisfied / tension / violated + 1-line why]\n   - The audit step will independently verify these self-assessments. Be honest — violations caught early are cheaper than violations caught in review.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `planArtifact`\n- `implementationPlan`\n\n**VERIFY:** concrete enough for another engineer to implement without guessing.",
           "requireConfirmation": false
         },
         {
           "id": "phase-5b-plan-audit-mode-adaptive",
           "title": "Plan Audit (Subagent-Friendly)",
-          "prompt": "**Mission: Find gaps, issues, and inconsistencies in this plan.**\n\nActively look for:\n- **Gaps**: What's missing? What's not covered?\n- **Weak assumptions**: What could be wrong? What are we taking for granted?\n- **Inconsistencies**: Do parts contradict each other? Does the plan match the invariants?\n- **Risks**: What could go wrong? What hasn't been stress-tested?\n\n---\n\n**Mode behavior:**\n- QUICK: self-audit only\n- STANDARD: self-audit; delegate once if subagents exist\n- THOROUGH: parallel delegation if subagents exist\n\n**If subagents + `rigorMode=THOROUGH`:**\n\nYou have permission to spawn THREE subagents SIMULTANEOUSLY for parallel plan validation.\n\nDelegate to WorkRail Executor THREE TIMES with scoped context:\n\n**Delegation 1 \u2014 Plan Analysis:**\n- routine: routine-plan-analysis\n- plan: implementation_plan.md\n- requirements: [From Phase 2 invariants + acceptance criteria]\n- constraints: [Filtered userRules: architecture, testing, patterns]\n- context (file-reference-first, max 500 words if pasting):\n  - Read: CONTEXT.md (userRules section), implementation_plan.md\n  - Read: spec.md, design.md (if exist)\n  - Invariants + locks (if locksMatrix exists)\n  - Feature brief: problem statement + architecture decision + key constraints\n- deliverable: plan-analysis.md\n\n**Delegation 2 \u2014 Hypothesis Challenge:**\n- routine: routine-hypothesis-challenge\n- rigor: 3\n- hypotheses: [Plan's key assumptions about architecture, dependencies, invariant satisfaction]\n- evidence: implementation_plan.md\n- context:\n  - Read: implementation_plan.md\n  - Filtered userRules: error handling, edge cases, validation rules\n  - Invariants (especially high-risk ones)\n  - Feature brief: problem + acceptance criteria + non-goals\n- deliverable: plan-challenges.md\n\n**Delegation 3 \u2014 Execution Simulation:**\n- routine: routine-execution-simulation\n- entry_point: [Riskiest slice entry function]\n- inputs: [Expected inputs and state]\n- trace_depth: 3 (follow calls to understand failure modes)\n- context:\n  - Read: implementation_plan.md (riskiest slice section)\n  - Filtered userRules: performance, data flow, state management\n  - Invariants touched by risky slice\n  - Feature brief: architecture decision + risk register\n- deliverable: simulation-results.md\n\n**Self-check before delegating (required):**\n\u2705 Each delegation includes filtered userRules (not full list)\n\u2705 Each includes invariants + locks (if applicable)\n\u2705 Each includes feature brief (file refs or <500 word excerpt)\n\u2705 Each has specific focus/lens\n\n**If subagents + `rigorMode=STANDARD`:**\nDelegate ONCE using Plan Analysis with full context (not filtered).\n\n\n**Note:** delegationMode was detected in phase-0c and cached in CONTEXT.md\n**Else:** self-audit (same three lenses).\n\n**Output:**\n- Findings: Critical / Major / Minor\n- Plan amendments\n\n---\n\n**CLEAN-SLATE CHECK (STANDARD+, if findings exist):**\n\nBefore applying amendments, briefly answer:\n\n> \"If I started fresh right now, knowing everything I've learned, would I choose the same approach?\"\n\n1. Without looking at current plan, sketch in 1 sentence what approach you'd take\n2. Compare to `selectedApproach`:\n   - **Same**: Proceed with amendments\n   - **Minor variation**: Note the insight; consider incorporating\n   - **Fundamentally different**: STOP. Set `cleanSlateDivergence = Major`\n\n**If fundamentally different:**\n- Document why fresh thinking differs\n- Return to Phase 3b with fresh approach as new candidate, OR\n- Document why current approach is still better despite fresh thinking\n\n---\n\n**REGRESSION CHECK (iteration 2+, if `resolvedFindings` is non-empty):**\n\nBefore running the forward-looking audit, verify each item in `resolvedFindings`:\n- Is the resolution still valid in the current plan?\n- Has the amendment been reverted or contradicted by subsequent changes?\n\nIf ANY regression found: add to `planFindings` with severity Critical and prefix \"REGRESSION: previously resolved finding reverted.\"\n\n---\n\n**PHILOSOPHY ALIGNMENT CHECK (mandatory, all modes):**\n\nReview the plan against the user's coding philosophy and design principles from `userRules`.\n\nThis evaluates DESIGN QUALITY \u2014 not plan consistency. Stale acceptance criteria, missing requirements, and coverage gaps are covered by the completeness audit above.\n\nIf no philosophy or design principles are found in `userRules`, skip this section and note \"No philosophy principles configured.\"\n\n**Required output format** (structured table):\nFor each violation or tension found:\n\n| Principle | Violation | Severity | Action |\n|-----------|-----------|----------|--------|\n| [Principle name from userRules] | [What violates it and why] | Red / Orange / Yellow | [Specific fix or justification needed] |\n\nSeverity guide:\n- **Red** (blocking) = must fix before implementation. Add to `planFindings`.\n- **Orange** (design quality) = should fix; document if intentionally accepted. Add to `planFindings`.\n- **Yellow** (tension) = tension between principles; document the tradeoff. Do NOT add to `planFindings` \u2014 these are informational only.\n\nChecklist \u2014 actively check: immutability, error handling model (Result/sealed vs exceptions), test doubles strategy (fakes vs mocks), dead code, naming clarity, abstraction level, type safety, exhaustiveness.\n\nIf NO violations found: explicitly state \"Philosophy check: no violations found\" with brief evidence (e.g., \"error handling uses Result<T> per philosophy; test doubles are fakes not mocks\"). Do NOT rubber-stamp. If you find zero violations on a non-trivial plan, double-check naming, dead code, and abstraction choices.\n\n**Set:** `planFindings`, `planAmendments`, `planConfidence` (1\u201310), `cleanSlateDivergence` (None/Minor/Major)",
+          "prompt": "**Mission: Find gaps, issues, and inconsistencies in this plan.**\n\nActively look for:\n- **Gaps**: What's missing? What's not covered?\n- **Weak assumptions**: What could be wrong? What are we taking for granted?\n- **Inconsistencies**: Do parts contradict each other? Does the plan match the invariants?\n- **Risks**: What could go wrong? What hasn't been stress-tested?\n\n---\n\n**Mode behavior:**\n- QUICK: self-audit only\n- STANDARD: self-audit; delegate once if subagents exist\n- THOROUGH: parallel delegation if subagents exist\n\n**If subagents + `rigorMode=THOROUGH`:**\n\nYou have permission to spawn THREE subagents SIMULTANEOUSLY for parallel plan validation.\n\nDelegate to WorkRail Executor THREE TIMES with scoped context:\n\n**Delegation 1 — Plan Analysis:**\n- routine: routine-plan-analysis\n- plan: implementation_plan.md\n- requirements: [From Phase 2 invariants + acceptance criteria]\n- constraints: [Filtered userRules: architecture, testing, patterns]\n- context (file-reference-first, max 500 words if pasting):\n  - Read: CONTEXT.md (userRules section), implementation_plan.md\n  - Read: spec.md, design.md (if exist)\n  - Invariants + locks (if locksMatrix exists)\n  - Feature brief: problem statement + architecture decision + key constraints\n- deliverable: plan-analysis.md\n\n**Delegation 2 — Hypothesis Challenge:**\n- routine: routine-hypothesis-challenge\n- rigor: 3\n- hypotheses: [Plan's key assumptions about architecture, dependencies, invariant satisfaction]\n- evidence: implementation_plan.md\n- context:\n  - Read: implementation_plan.md\n  - Filtered userRules: error handling, edge cases, validation rules\n  - Invariants (especially high-risk ones)\n  - Feature brief: problem + acceptance criteria + non-goals\n- deliverable: plan-challenges.md\n\n**Delegation 3 — Execution Simulation:**\n- routine: routine-execution-simulation\n- entry_point: [Riskiest slice entry function]\n- inputs: [Expected inputs and state]\n- trace_depth: 3 (follow calls to understand failure modes)\n- context:\n  - Read: implementation_plan.md (riskiest slice section)\n  - Filtered userRules: performance, data flow, state management\n  - Invariants touched by risky slice\n  - Feature brief: architecture decision + risk register\n- deliverable: simulation-results.md\n\n**Self-check before delegating (required):**\n✅ Each delegation includes filtered userRules (not full list)\n✅ Each includes invariants + locks (if applicable)\n✅ Each includes feature brief (file refs or <500 word excerpt)\n✅ Each has specific focus/lens\n\n**If subagents + `rigorMode=STANDARD`:**\nDelegate ONCE using Plan Analysis with full context (not filtered).\n\n\n**Note:** delegationMode was detected in phase-0c and cached in CONTEXT.md\n**Else:** self-audit (same three lenses).\n\n**Output:**\n- Findings: Critical / Major / Minor\n- Plan amendments\n\n---\n\n**CLEAN-SLATE CHECK (STANDARD+, if findings exist):**\n\nBefore applying amendments, briefly answer:\n\n> \"If I started fresh right now, knowing everything I've learned, would I choose the same approach?\"\n\n1. Without looking at current plan, sketch in 1 sentence what approach you'd take\n2. Compare to `selectedApproach`:\n   - **Same**: Proceed with amendments\n   - **Minor variation**: Note the insight; consider incorporating\n   - **Fundamentally different**: STOP. Set `cleanSlateDivergence = Major`\n\n**If fundamentally different:**\n- Document why fresh thinking differs\n- Return to Phase 3b with fresh approach as new candidate, OR\n- Document why current approach is still better despite fresh thinking\n\n---\n\n**REGRESSION CHECK (iteration 2+, if `resolvedFindings` is non-empty):**\n\nBefore running the forward-looking audit, verify each item in `resolvedFindings`:\n- Is the resolution still valid in the current plan?\n- Has the amendment been reverted or contradicted by subsequent changes?\n\nIf ANY regression found: add to `planFindings` with severity Critical and prefix \"REGRESSION: previously resolved finding reverted.\"\n\n---\n\n**PHILOSOPHY ALIGNMENT CHECK (mandatory, all modes):**\n\nReview the plan against the user's coding philosophy and design principles from `userRules`.\n\nThis evaluates DESIGN QUALITY — not plan consistency. Stale acceptance criteria, missing requirements, and coverage gaps are covered by the completeness audit above.\n\nIf no philosophy or design principles are found in `userRules`, skip this section and note \"No philosophy principles configured.\"\n\n**Required output format** (structured table):\nFor each violation or tension found:\n\n| Principle | Violation | Severity | Action |\n|-----------|-----------|----------|--------|\n| [Principle name from userRules] | [What violates it and why] | Red / Orange / Yellow | [Specific fix or justification needed] |\n\nSeverity guide:\n- **Red** (blocking) = must fix before implementation. Add to `planFindings`.\n- **Orange** (design quality) = should fix; document if intentionally accepted. Add to `planFindings`.\n- **Yellow** (tension) = tension between principles; document the tradeoff. Do NOT add to `planFindings` — these are informational only.\n\nChecklist — actively check: immutability, error handling model (Result/sealed vs exceptions), test doubles strategy (fakes vs mocks), dead code, naming clarity, abstraction level, type safety, exhaustiveness.\n\nIf NO violations found: explicitly state \"Philosophy check: no violations found\" with brief evidence (e.g., \"error handling uses Result<T> per philosophy; test doubles are fakes not mocks\"). Do NOT rubber-stamp. If you find zero violations on a non-trivial plan, double-check naming, dead code, and abstraction choices.\n\n**Set:** `planFindings`, `planAmendments`, `planConfidence` (1–10), `cleanSlateDivergence` (None/Minor/Major)",
           "requireConfirmation": false
         },
         {
@@ -359,7 +359,7 @@
         {
           "id": "phase-5d-loop-exit-decision",
           "title": "Loop Exit Decision (Fail-Safe)",
-          "prompt": "**Non-optional:** Provide a loop control decision artifact.\n\n**Required output format:**\n```json\n{\n  \"artifacts\": [{\n    \"kind\": \"wr.loop_control\",\n    \"decision\": \"continue\"\n  }]\n}\n```\n`loopId` is optional \u2014 the engine infers the active loop automatically.\n\n**Decision rules (no exceptions):**\n\n- If `planFindings` is **NON-EMPTY** (any finding this pass, regardless of severity):\n  \u2192 `decision: \"continue\"`\n  \u2192 Rationale: Amendments need a verification pass; changes may have introduced new issues.\n\n- If `planFindings` is **EMPTY** (zero findings this pass):\n  \u2192 `decision: \"stop\"` \u2014 but you **must demonstrate** the empty pass:\n  \u2192 List each area you audited and explicitly confirm nothing was found in each.\n  \u2192 Example: \"Checked invariant coverage \u2713, data-flow correctness \u2713, slice boundary alignment \u2713 \u2014 zero findings.\"\n  \u2192 Claiming `planFindings: []` without enumerated evidence is not sufficient.\n\n**Max iterations (5) still applies** \u2014 if you've hit 5 iterations and still finding issues, exit with `decision: \"stop\"` and document remaining concerns.\n\nIf continuing, name what was found + what changes next iteration.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) and update Machine State Checkpoint (keep last 3).",
+          "prompt": "**Non-optional:** Provide a loop control decision artifact.\n\n**Required output format:**\n```json\n{\n  \"artifacts\": [{\n    \"kind\": \"wr.loop_control\",\n    \"decision\": \"continue\"\n  }]\n}\n```\n`loopId` is optional — the engine infers the active loop automatically.\n\n**Decision rules (no exceptions):**\n\n- If `planFindings` is **NON-EMPTY** (any finding this pass, regardless of severity):\n  → `decision: \"continue\"`\n  → Rationale: Amendments need a verification pass; changes may have introduced new issues.\n\n- If `planFindings` is **EMPTY** (zero findings this pass):\n  → `decision: \"stop\"` — but you **must demonstrate** the empty pass:\n  → List each area you audited and explicitly confirm nothing was found in each.\n  → Example: \"Checked invariant coverage ✓, data-flow correctness ✓, slice boundary alignment ✓ — zero findings.\"\n  → Claiming `planFindings: []` without enumerated evidence is not sufficient.\n\n**Max iterations (5) still applies** — if you've hit 5 iterations and still finding issues, exit with `decision: \"stop\"` and document remaining concerns.\n\nIf continuing, name what was found + what changes next iteration.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) and update Machine State Checkpoint (keep last 3).",
           "requireConfirmation": true,
           "outputContract": {
             "contractRef": "wr.contracts.loop_control"
@@ -392,7 +392,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Design test strategy before implementation begins.\n\n**Required outputs:**\n- List acceptance criteria with corresponding test coverage\n- Identify edge cases and failure modes that need tests\n- Map invariants to test verification (which tests prove which invariants)\n- Document test execution plan (unit/integration/e2e)\n\n**Rigor-adaptive depth:**\n- QUICK: Brief test checklist (\u22645 items)\n- STANDARD: Test coverage matrix (criteria \u2192 tests)\n- THOROUGH: Comprehensive test plan with edge cases, failure injection, invariant proofs\n\n**Validation gate:** For high-risk invariants, require explicit test coverage. If gap exists, add to slice plan or acknowledge as risk.\n\nSet these keys in the next `continue_workflow` call's `context` object: `testDesign`, `testCoverageGaps`\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - test strategy, coverage gaps, and how gaps are addressed.\n\n**Output:** Test design artifact (in chat or file if write-or-paste).",
+      "prompt": "Design test strategy before implementation begins.\n\n**Required outputs:**\n- List acceptance criteria with corresponding test coverage\n- Identify edge cases and failure modes that need tests\n- Map invariants to test verification (which tests prove which invariants)\n- Document test execution plan (unit/integration/e2e)\n\n**Rigor-adaptive depth:**\n- QUICK: Brief test checklist (≤5 items)\n- STANDARD: Test coverage matrix (criteria → tests)\n- THOROUGH: Comprehensive test plan with edge cases, failure injection, invariant proofs\n\n**Validation gate:** For high-risk invariants, require explicit test coverage. If gap exists, add to slice plan or acknowledge as risk.\n\nSet these keys in the next `continue_workflow` call's `context` object: `testDesign`, `testCoverageGaps`\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - test strategy, coverage gaps, and how gaps are addressed.\n\n**Output:** Test design artifact (in chat or file if write-or-paste).",
       "requireConfirmation": false
     },
     {
@@ -420,7 +420,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "**Mission: Find what's MISSING from planning. Do not check boxes\u2014find gaps.**\n\nThis is DISCOVERY mode. Your job is to find problems, not approve.\n\n**Important:** `planningGaps` should represent what is STILL unresolved after you make a best-effort attempt to fix it immediately (update artifacts, clarify decisions). If you can fix it now, do so and do not carry it forward as a gap.\n\n---\n\n**STEP 1: Artifact Check**\n\nWhat SHOULD exist? Look for each and note if missing:\n\n- `CONTEXT.md` \u2014 Does it exist? Is it current?\n- `implementation_plan.md` \u2014 Does it exist? Is it complete?\n- `approaches` in CONTEXT.md \u2014 Are there \u22652 genuinely different approaches?\n- `slices` \u2014 Are they defined with scope/files/verification?\n\n**For each, state:** \"EXISTS at [location]\" or \"MISSING\" or \"INCOMPLETE: [what's missing]\"\n\n---\n\n**STEP 2: Decision Check**\n\nWhat decisions should have been made but weren't?\n\n- Is `selectedApproach` decided with rationale?\n- Is `runnerUpApproach` (Plan B) defined?\n- Are `pivotTriggers` concrete and observable?\n- Are there any \"TBD\" or \"TODO\" items in the plan?\n- Are there unresolved questions that block implementation?\n\n**For each fuzzy decision:** State what's unclear and what would resolve it.\n\n---\n\n**STEP 3: Skeptical Review**\n\nPretend a skeptical senior engineer is reviewing your planning:\n\n- \"What would they challenge?\"\n- \"What looks underspecified?\"\n- \"What assumption haven't you validated?\"\n- \"Are you rushing because you want to start coding?\"\n\n---\n\n**STEP 4: Immediate gap-fixing attempt (required)**\n\nIf you found any gaps you can resolve without a product/business decision:\n- Fix them immediately (update `CONTEXT.md` / `implementation_plan.md` / plan variables)\n- Then re-check the items above once\n\nOnly keep gaps that are STILL unresolved after this best-effort attempt.\n\n---\n\n**Output:**\n- Gaps found (unresolved) (list, may be empty)\n- Fuzzy decisions (still unresolved) (list, may be empty)\n- Skeptic's concerns (list, may be empty)\n\n**Output (required exact lines):**\n- planningGaps = [...] \n- planningGapsFound = true|false\n\n**Set (required):**\n- `planningGaps` (unresolved gaps array)\n- `planningGapsFound` (true iff planningGaps is non-empty)\n\n**If ANY unresolved gaps remain (`planningGapsFound = true`):** STOP and ask the user what to do next before proceeding to the planning complete gate.",
+      "prompt": "**Mission: Find what's MISSING from planning. Do not check boxes—find gaps.**\n\nThis is DISCOVERY mode. Your job is to find problems, not approve.\n\n**Important:** `planningGaps` should represent what is STILL unresolved after you make a best-effort attempt to fix it immediately (update artifacts, clarify decisions). If you can fix it now, do so and do not carry it forward as a gap.\n\n---\n\n**STEP 1: Artifact Check**\n\nWhat SHOULD exist? Look for each and note if missing:\n\n- `CONTEXT.md` — Does it exist? Is it current?\n- `implementation_plan.md` — Does it exist? Is it complete?\n- `approaches` in CONTEXT.md — Are there ≥2 genuinely different approaches?\n- `slices` — Are they defined with scope/files/verification?\n\n**For each, state:** \"EXISTS at [location]\" or \"MISSING\" or \"INCOMPLETE: [what's missing]\"\n\n---\n\n**STEP 2: Decision Check**\n\nWhat decisions should have been made but weren't?\n\n- Is `selectedApproach` decided with rationale?\n- Is `runnerUpApproach` (Plan B) defined?\n- Are `pivotTriggers` concrete and observable?\n- Are there any \"TBD\" or \"TODO\" items in the plan?\n- Are there unresolved questions that block implementation?\n\n**For each fuzzy decision:** State what's unclear and what would resolve it.\n\n---\n\n**STEP 3: Skeptical Review**\n\nPretend a skeptical senior engineer is reviewing your planning:\n\n- \"What would they challenge?\"\n- \"What looks underspecified?\"\n- \"What assumption haven't you validated?\"\n- \"Are you rushing because you want to start coding?\"\n\n---\n\n**STEP 4: Immediate gap-fixing attempt (required)**\n\nIf you found any gaps you can resolve without a product/business decision:\n- Fix them immediately (update `CONTEXT.md` / `implementation_plan.md` / plan variables)\n- Then re-check the items above once\n\nOnly keep gaps that are STILL unresolved after this best-effort attempt.\n\n---\n\n**Output:**\n- Gaps found (unresolved) (list, may be empty)\n- Fuzzy decisions (still unresolved) (list, may be empty)\n- Skeptic's concerns (list, may be empty)\n\n**Output (required exact lines):**\n- planningGaps = [...] \n- planningGapsFound = true|false\n\n**Set (required):**\n- `planningGaps` (unresolved gaps array)\n- `planningGapsFound` (true iff planningGaps is non-empty)\n\n**If ANY unresolved gaps remain (`planningGapsFound = true`):** STOP and ask the user what to do next before proceeding to the planning complete gate.",
       "requireConfirmation": {
         "var": "planningGapsFound",
         "equals": true
@@ -447,7 +447,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "**BOUNDARY: Planning \u2192 Execution**\n\nYou've done gap discovery. Now confirm with EVIDENCE that planning is complete.\n\n---\n\n**ENUMERATION (required):**\n\nDon't just check boxes. For each item, cite the specific artifact:\n\n**Architecture:**\n- [ ] approaches: \"[List approach names] in CONTEXT.md\"\n- [ ] selectedApproach: \"[Name], rationale: [1 sentence summary]\"\n- [ ] runnerUpApproach: \"[Name] is Plan B\"\n- [ ] pivotTriggers: \"[List the actual triggers]\"\n\n**Slices:**\n- [ ] slices defined: \"[N] slices in implementation_plan.md\"\n- [ ] each slice has: \"name, scope, files, verification \u2014 verified\"\n\n**Artifacts:**\n- [ ] CONTEXT.md: \"exists, current\"\n- [ ] implementation_plan.md: \"exists, [N] slices defined\"\n\n---\n\n**ANTI-CHECKBOX WARNING:**\n\nIf you're checking boxes quickly without pausing, STOP.\n- Did you actually verify each item exists?\n- Can you point to the specific location?\n- Are you rushing to start coding?\n\n---\n\n**Decision:**\n\n- If ALL items verified with evidence \u2192 `planningComplete = true`\n- If ANY item cannot be verified \u2192 STOP, return to address gap\n\n**After this gate:** Trust the plan and execute.\n\n**Set:** `planningComplete = true`",
+      "prompt": "**BOUNDARY: Planning → Execution**\n\nYou've done gap discovery. Now confirm with EVIDENCE that planning is complete.\n\n---\n\n**ENUMERATION (required):**\n\nDon't just check boxes. For each item, cite the specific artifact:\n\n**Architecture:**\n- [ ] approaches: \"[List approach names] in CONTEXT.md\"\n- [ ] selectedApproach: \"[Name], rationale: [1 sentence summary]\"\n- [ ] runnerUpApproach: \"[Name] is Plan B\"\n- [ ] pivotTriggers: \"[List the actual triggers]\"\n\n**Slices:**\n- [ ] slices defined: \"[N] slices in implementation_plan.md\"\n- [ ] each slice has: \"name, scope, files, verification — verified\"\n\n**Artifacts:**\n- [ ] CONTEXT.md: \"exists, current\"\n- [ ] implementation_plan.md: \"exists, [N] slices defined\"\n\n---\n\n**ANTI-CHECKBOX WARNING:**\n\nIf you're checking boxes quickly without pausing, STOP.\n- Did you actually verify each item exists?\n- Can you point to the specific location?\n- Are you rushing to start coding?\n\n---\n\n**Decision:**\n\n- If ALL items verified with evidence → `planningComplete = true`\n- If ANY item cannot be verified → STOP, return to address gap\n\n**After this gate:** Trust the plan and execute.\n\n**Set:** `planningComplete = true`",
       "requireConfirmation": true,
       "validationCriteria": {
         "and": [
@@ -462,7 +462,7 @@
     {
       "id": "phase-7-implement-slices",
       "type": "loop",
-      "title": "Phase 7: Implement Slice-by-Slice (PREP \u2192 IMPLEMENT \u2192 VERIFY \u2192 CHECKPOINT)",
+      "title": "Phase 7: Implement Slice-by-Slice (PREP → IMPLEMENT → VERIFY → CHECKPOINT)",
       "runCondition": {
         "and": [
           {
@@ -482,7 +482,7 @@
         {
           "id": "phase-pre-impl-validation",
           "title": "Pre-Implementation Validation",
-          "prompt": "Validate before implementing slice `{{currentSlice.name}}`.\n\n**FLAG RESET (required):**\nSet these keys in the next `continue_workflow` call's `context` object to their initial state:\n- planDrift = false\n- rulesDrift = false\n- verificationFailed = false\n- verificationApprovalRequired = false\n- verificationRetried = false\n- sliceVerified = false\n- softReplanCompleted = false\n- replanFailed = false\n- pivotTriggered = false\n- pivotSeverity = none\n- validationFailed = false\n\n---\n\n**PART 1: PIVOT TRIGGER CHECK**\n\nReview `pivotTriggers`:\n```\nPIVOT TRIGGERS:\n- [ ] Trigger 1: [condition] \u2192 Status: [Not triggered / Triggered]\n- [ ] Trigger 2: [condition] \u2192 Status: [Not triggered / Triggered]\n```\n\n**If ANY trigger fired:**\n1. Set `pivotTriggered = true`, `validationFailed = true`\n2. Assess severity:\n   - **MINOR**: Return to `phase-select-architecture` (try runnerUp)\n   - **MODERATE**: Return to `phase-ideation` (new constraint)\n   - **MAJOR**: Return to `phase-invariants` (problem misunderstood)\n3. Set `pivotSeverity`, `pivotReturnPhase`\n4. STOP \u2014 do not continue to Part 2\n\n---\n\n**PART 2: PLAN STALENESS CHECK (STANDARD+)**\n\nQuick audit of slice plan vs current codebase:\n- Are target files still in expected state?\n- Have dependencies/contracts changed since planning?\n- Any new constraints from prior slices?\n\nStaleness: [Fresh / Minor drift / Major drift]\n\n**If Major drift:** Set `slicePlanStale = true`, `validationFailed = true`\n\n---\n\n**PART 3: SANITY CHECK**\n\nVerify implementation prerequisites:\n- **Existence**: Target files/symbols exist\n- **Signatures**: Key function/type signatures match assumptions\n- **Scope**: No hidden touchpoints beyond plan\n- **Verification**: Slice verification commands are runnable\n\n**If any check fails:** Set `validationFailed = true`\n\n---\n\n**OUTPUT:**\n- Pivot triggers: [All clear / Triggered: X]\n- Staleness: [Fresh / Minor / Major]\n- Sanity: [Pass / Fail: reason]\n- `validationFailed`: true/false\n\n**Set:** `pivotTriggered`, `pivotSeverity`, `pivotReturnPhase`, `slicePlanStale`, `validationFailed`",
+          "prompt": "Validate before implementing slice `{{currentSlice.name}}`.\n\n**FLAG RESET (required):**\nSet these keys in the next `continue_workflow` call's `context` object to their initial state:\n- planDrift = false\n- rulesDrift = false\n- verificationFailed = false\n- verificationApprovalRequired = false\n- verificationRetried = false\n- sliceVerified = false\n- softReplanCompleted = false\n- replanFailed = false\n- pivotTriggered = false\n- pivotSeverity = none\n- validationFailed = false\n\n---\n\n**PART 1: PIVOT TRIGGER CHECK**\n\nReview `pivotTriggers`:\n```\nPIVOT TRIGGERS:\n- [ ] Trigger 1: [condition] → Status: [Not triggered / Triggered]\n- [ ] Trigger 2: [condition] → Status: [Not triggered / Triggered]\n```\n\n**If ANY trigger fired:**\n1. Set `pivotTriggered = true`, `validationFailed = true`\n2. Assess severity:\n   - **MINOR**: Return to `phase-select-architecture` (try runnerUp)\n   - **MODERATE**: Return to `phase-ideation` (new constraint)\n   - **MAJOR**: Return to `phase-invariants` (problem misunderstood)\n3. Set `pivotSeverity`, `pivotReturnPhase`\n4. STOP — do not continue to Part 2\n\n---\n\n**PART 2: PLAN STALENESS CHECK (STANDARD+)**\n\nQuick audit of slice plan vs current codebase:\n- Are target files still in expected state?\n- Have dependencies/contracts changed since planning?\n- Any new constraints from prior slices?\n\nStaleness: [Fresh / Minor drift / Major drift]\n\n**If Major drift:** Set `slicePlanStale = true`, `validationFailed = true`\n\n---\n\n**PART 3: SANITY CHECK**\n\nVerify implementation prerequisites:\n- **Existence**: Target files/symbols exist\n- **Signatures**: Key function/type signatures match assumptions\n- **Scope**: No hidden touchpoints beyond plan\n- **Verification**: Slice verification commands are runnable\n\n**If any check fails:** Set `validationFailed = true`\n\n---\n\n**OUTPUT:**\n- Pivot triggers: [All clear / Triggered: X]\n- Staleness: [Fresh / Minor / Major]\n- Sanity: [Pass / Fail: reason]\n- `validationFailed`: true/false\n\n**Set:** `pivotTriggered`, `pivotSeverity`, `pivotReturnPhase`, `slicePlanStale`, `validationFailed`",
           "requireConfirmation": {
             "or": [
               {
@@ -507,19 +507,19 @@
             "var": "validationFailed",
             "not_equals": true
           },
-          "prompt": "Prepare to implement slice `{{currentSlice.name}}`.\n\n**Do:**\n- Re-state slice goal + verification\n- Identify exact files/components to change\n- Re-check invariants impacted\n- Match existing patterns (1\u20133 exemplars)\n- Apply `userRules` (call out if any rule affects this slice)\n\n**Work Package handling:**\n- If `currentSlice.workPackages` exist: use as implementation guidance\n- If no WPs: proceed with full slice scope as one unit\n\n**Git setup (first slice only):**\nIf sliceIndex = 0:\n- Check git availability: `git status`\n- Create feature branch: `feature/etienneb/acei-XXXX_<task-name>`\n- Set `featureBranch` in the next `continue_workflow` call's `context` object\n- Update CONTEXT.md with branch name\n\n**Output:**\n- Slice goal + verification (restated)\n- Files to change\n- Patterns to follow\n- userRules that apply",
+          "prompt": "Prepare to implement slice `{{currentSlice.name}}`.\n\n**Do:**\n- Re-state slice goal + verification\n- Identify exact files/components to change\n- Re-check invariants impacted\n- Match existing patterns (1–3 exemplars)\n- Apply `userRules` (call out if any rule affects this slice)\n\n**Work Package handling:**\n- If `currentSlice.workPackages` exist: use as implementation guidance\n- If no WPs: proceed with full slice scope as one unit\n\n**Git setup (first slice only):**\nIf sliceIndex = 0:\n- Check git availability: `git status`\n- Create feature branch: `feature/etienneb/acei-XXXX_<task-name>`\n- Set `featureBranch` in the next `continue_workflow` call's `context` object\n- Update CONTEXT.md with branch name\n\n**Output:**\n- Slice goal + verification (restated)\n- Files to change\n- Patterns to follow\n- userRules that apply",
           "requireConfirmation": false
         },
         {
           "id": "phase-7b-implement",
           "title": "IMPLEMENT: Slice {{sliceIndex}}",
-          "prompt": "Implement the current slice.\n\n**Implementation strategy:**\n- If the slice has work packages: use them as implementation order and boundary guidance (do WP1, then WP2, etc. within this step).\n- Otherwise: implement full slice as one unit.\n\n---\n\n\n**Note:** delegationMode was detected in phase-0c and cached in CONTEXT.md\n**OPTION A: DELEGATE TO BUILDER**\n\nWhen:\n- `delegationMode=delegate` AND\n- Slice is non-trivial (>3 files or new abstractions or multi-layer changes)\n\nDelegate to WorkRail Executor using **Feature Implementation Routine**.\n\nWork Package for Builder:\n```\nMISSION: Implement the current slice according to plan\n\nSLICE SPEC: [Extract from implementation_plan.md]\n- Goal\n- Scope (files/components)\n- Verification plan\n- Work packages (if defined): use as implementation order/guidance\n\nCONTEXT (filtered, file-reference-first):\n- Read: CONTEXT.md (userRules section)\n- Read: implementation_plan.md (this slice)\n- userRules (filtered): include rules matching this slice's domain (architecture, patterns, testing, error-handling)\n- invariants (filtered): those touched by this slice only\n- Patterns: [from PREP - 1-3 exemplars with file refs]\n\nCONSTRAINTS:\n- Follow filtered userRules\n- Preserve filtered invariants\n- Match patterns\n- No drive-by refactors\n- If slice has WPs: respect Targets/Forbidden/Budgets as guidance\n\nACCEPTANCE:\n- Slice done-definition met\n- Verification plan executable\n\nDELIVERABLE: implementation-complete.md\n- Summary (5-8 bullets)\n- File changes (file:line)\n- Tests written/updated\n- Deviations (with rationale)\n```\n\n**Self-check before delegating (required):**\n\u2705 userRules filtered (not full list)\n\u2705 invariants filtered (slice-relevant only)\n\u2705 Patterns included with file refs\n\u2705 Feature brief included\n\n**Main agent review (mandatory):**\n- Read Builder's deliverable.\n- Confirm: scope adhered to, done-definition met, no drive-bys.\n- Set `builderDeliverable`.\n\n**Builder fallback (if delegation fails):**\n\nBuilder output is considered incomplete/invalid if ANY:\n- Missing required deliverable file (implementation-complete.md)\n- Touched files in Forbidden list (if WP boundaries exist)\n- Exceeded budget (maxModified/maxNew violations if WP budgets exist)\n- Done-definition not met\n- Verification plan not executable\n\nIf any criterion is triggered: fall back to OPTION B (self-implement).\n- Log the fallback reason in CONTEXT.md.\n\n---\n\n**OPTION B: SELF-IMPLEMENT**\n\nWhen: `delegationMode=solo` OR trivial slice OR Builder fallback\n\nConstraints:\n- If slice has WPs: use them as guidance for implementation order and scope boundaries\n- Prefer architectural moves\n- No drive-by refactors\n\n---\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record implementation approach (Builder/self/fallback) and if Builder: deliverable summary + any fallback reason.",
+          "prompt": "Implement the current slice.\n\n**Implementation strategy:**\n- If the slice has work packages: use them as implementation order and boundary guidance (do WP1, then WP2, etc. within this step).\n- Otherwise: implement full slice as one unit.\n\n---\n\n\n**Note:** delegationMode was detected in phase-0c and cached in CONTEXT.md\n**OPTION A: DELEGATE TO BUILDER**\n\nWhen:\n- `delegationMode=delegate` AND\n- Slice is non-trivial (>3 files or new abstractions or multi-layer changes)\n\nDelegate to WorkRail Executor using **Feature Implementation Routine**.\n\nWork Package for Builder:\n```\nMISSION: Implement the current slice according to plan\n\nSLICE SPEC: [Extract from implementation_plan.md]\n- Goal\n- Scope (files/components)\n- Verification plan\n- Work packages (if defined): use as implementation order/guidance\n\nCONTEXT (filtered, file-reference-first):\n- Read: CONTEXT.md (userRules section)\n- Read: implementation_plan.md (this slice)\n- userRules (filtered): include rules matching this slice's domain (architecture, patterns, testing, error-handling)\n- invariants (filtered): those touched by this slice only\n- Patterns: [from PREP - 1-3 exemplars with file refs]\n\nCONSTRAINTS:\n- Follow filtered userRules\n- Preserve filtered invariants\n- Match patterns\n- No drive-by refactors\n- If slice has WPs: respect Targets/Forbidden/Budgets as guidance\n\nACCEPTANCE:\n- Slice done-definition met\n- Verification plan executable\n\nDELIVERABLE: implementation-complete.md\n- Summary (5-8 bullets)\n- File changes (file:line)\n- Tests written/updated\n- Deviations (with rationale)\n```\n\n**Self-check before delegating (required):**\n✅ userRules filtered (not full list)\n✅ invariants filtered (slice-relevant only)\n✅ Patterns included with file refs\n✅ Feature brief included\n\n**Main agent review (mandatory):**\n- Read Builder's deliverable.\n- Confirm: scope adhered to, done-definition met, no drive-bys.\n- Set `builderDeliverable`.\n\n**Builder fallback (if delegation fails):**\n\nBuilder output is considered incomplete/invalid if ANY:\n- Missing required deliverable file (implementation-complete.md)\n- Touched files in Forbidden list (if WP boundaries exist)\n- Exceeded budget (maxModified/maxNew violations if WP budgets exist)\n- Done-definition not met\n- Verification plan not executable\n\nIf any criterion is triggered: fall back to OPTION B (self-implement).\n- Log the fallback reason in CONTEXT.md.\n\n---\n\n**OPTION B: SELF-IMPLEMENT**\n\nWhen: `delegationMode=solo` OR trivial slice OR Builder fallback\n\nConstraints:\n- If slice has WPs: use them as guidance for implementation order and scope boundaries\n- Prefer architectural moves\n- No drive-by refactors\n\n---\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record implementation approach (Builder/self/fallback) and if Builder: deliverable summary + any fallback reason.",
           "requireConfirmation": false
         },
         {
           "id": "phase-7c-verify",
           "title": "VERIFY: Slice {{sliceIndex}}",
-          "prompt": "Verify the slice implementation.\n\n**PRIMARY VERIFICATION (always):**\n- Run verification commands from slice (or WP if applicable).\n- Add/adjust tests if needed.\n- Ensure invariants hold.\n- If blocked: request user to run and share output.\n\n---\n\n**PARALLEL VERIFICATION (THOROUGH + high-risk only):**\n\nRun when `rigorMode=THOROUGH` AND slice touches high-risk invariants (auth/payments/security/data integrity/perf-critical).\n\nIf `delegationMode=delegate`:\n\nYou have permission to spawn THREE subagents SIMULTANEOUSLY for parallel verification.\n\nDelegate to WorkRail Executor THREE TIMES with scoped context:\n\n**Verification 1 \u2014 Adversarial Challenge:**\n- routine: routine-hypothesis-challenge\n- rigor: 5 (maximum for implementation verification)\n- hypotheses: [\"This implementation is correct\", key assumptions about the changes]\n- evidence: files changed in this slice\n- context (file-reference-first):\n  - Read: files changed in this slice\n  - Read: CONTEXT.md (invariants section)\n  - Filtered userRules: edge cases, error handling, validation rules\n  - Feature brief: slice goal + invariants touched + verification plan\n- deliverable: implementation-challenges.md\n\n**Verification 2 \u2014 Execution Simulation:**\n- routine: routine-execution-simulation\n- entry_point: [Changed functions in this slice]\n- inputs: [Test scenarios: normal + edge cases]\n- trace_depth: 3\n- context:\n  - Read: files changed in this slice\n  - Read: implementation_plan.md (this slice's verification scenarios)\n  - Filtered userRules: performance, state management, data flow rules\n  - Invariants touched by this slice\n  - Feature brief: architecture decision + risk register for this slice\n- deliverable: execution-simulation.md\n\n**Verification 3 \u2014 Plan Adherence:**\n- routine: routine-plan-analysis\n- plan: implementation_plan.md (this slice section)\n- requirements: [Slice done-definition + targets/forbidden]\n- constraints: [Filtered userRules: patterns, conventions, testing]\n- context:\n  - Read: files changed + implementation_plan.md (this slice)\n  - Feature brief: slice scope + done-definition + targets/forbidden\n- deliverable: plan-adherence.md\n\n**Self-check before delegating (required):**\n\u2705 Each delegation includes filtered userRules (relevant to their lens)\n\u2705 Each includes invariants touched by this slice\n\u2705 Each includes feature brief (file refs or excerpt)\n\u2705 Each has specific verification lens\n\n**Synthesize (deterministic, bounded retry):**\n\n- **ALL THREE validate** \u2192 set `sliceVerified=true`, proceed to checkpoint\n\n- **ONE concern raised:**\n  1. Investigate the concern and attempt to fix within this slice iteration\n  2. Re-run ONLY the failing validator (max 1 retry per slice)\n  3. If passes after retry: set `sliceVerified=true`, proceed\n  4. If still fails after retry:\n     - Add concern to `verificationFindings`\n     - Require user approval to proceed OR rewind to planning\n     - Set `verificationApprovalRequired=true`\n\n- **TWO+ concerns raised:**\n  1. Do NOT attempt automatic fix\n  2. Set `verificationFailed=true`\n  3. Stop slice loop immediately\n  4. User must choose:\n     - Rewind to planning (Phase 5) via new workflow run with drift context\n     - Manual fix + re-verify\n     - Defer this slice to follow-up ticket\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `sliceVerified` (true/false)\n- `verificationFindings` (list of concerns)\n- `verificationFailed` (true/false)\n- `verificationApprovalRequired` (true/false)\n- `verificationRetried` (true/false)\n- `parallelVerificationRan` (true/false)\n\n---\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record verification approach (primary only / parallel), concerns raised + retry outcome, and user decision (if approval required).",
+          "prompt": "Verify the slice implementation.\n\n**PRIMARY VERIFICATION (always):**\n- Run verification commands from slice (or WP if applicable).\n- Add/adjust tests if needed.\n- Ensure invariants hold.\n- If blocked: request user to run and share output.\n\n---\n\n**PARALLEL VERIFICATION (THOROUGH + high-risk only):**\n\nRun when `rigorMode=THOROUGH` AND slice touches high-risk invariants (auth/payments/security/data integrity/perf-critical).\n\nIf `delegationMode=delegate`:\n\nYou have permission to spawn THREE subagents SIMULTANEOUSLY for parallel verification.\n\nDelegate to WorkRail Executor THREE TIMES with scoped context:\n\n**Verification 1 — Adversarial Challenge:**\n- routine: routine-hypothesis-challenge\n- rigor: 5 (maximum for implementation verification)\n- hypotheses: [\"This implementation is correct\", key assumptions about the changes]\n- evidence: files changed in this slice\n- context (file-reference-first):\n  - Read: files changed in this slice\n  - Read: CONTEXT.md (invariants section)\n  - Filtered userRules: edge cases, error handling, validation rules\n  - Feature brief: slice goal + invariants touched + verification plan\n- deliverable: implementation-challenges.md\n\n**Verification 2 — Execution Simulation:**\n- routine: routine-execution-simulation\n- entry_point: [Changed functions in this slice]\n- inputs: [Test scenarios: normal + edge cases]\n- trace_depth: 3\n- context:\n  - Read: files changed in this slice\n  - Read: implementation_plan.md (this slice's verification scenarios)\n  - Filtered userRules: performance, state management, data flow rules\n  - Invariants touched by this slice\n  - Feature brief: architecture decision + risk register for this slice\n- deliverable: execution-simulation.md\n\n**Verification 3 — Plan Adherence:**\n- routine: routine-plan-analysis\n- plan: implementation_plan.md (this slice section)\n- requirements: [Slice done-definition + targets/forbidden]\n- constraints: [Filtered userRules: patterns, conventions, testing]\n- context:\n  - Read: files changed + implementation_plan.md (this slice)\n  - Feature brief: slice scope + done-definition + targets/forbidden\n- deliverable: plan-adherence.md\n\n**Self-check before delegating (required):**\n✅ Each delegation includes filtered userRules (relevant to their lens)\n✅ Each includes invariants touched by this slice\n✅ Each includes feature brief (file refs or excerpt)\n✅ Each has specific verification lens\n\n**Synthesize (deterministic, bounded retry):**\n\n- **ALL THREE validate** → set `sliceVerified=true`, proceed to checkpoint\n\n- **ONE concern raised:**\n  1. Investigate the concern and attempt to fix within this slice iteration\n  2. Re-run ONLY the failing validator (max 1 retry per slice)\n  3. If passes after retry: set `sliceVerified=true`, proceed\n  4. If still fails after retry:\n     - Add concern to `verificationFindings`\n     - Require user approval to proceed OR rewind to planning\n     - Set `verificationApprovalRequired=true`\n\n- **TWO+ concerns raised:**\n  1. Do NOT attempt automatic fix\n  2. Set `verificationFailed=true`\n  3. Stop slice loop immediately\n  4. User must choose:\n     - Rewind to planning (Phase 5) via new workflow run with drift context\n     - Manual fix + re-verify\n     - Defer this slice to follow-up ticket\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `sliceVerified` (true/false)\n- `verificationFindings` (list of concerns)\n- `verificationFailed` (true/false)\n- `verificationApprovalRequired` (true/false)\n- `verificationRetried` (true/false)\n- `parallelVerificationRan` (true/false)\n\n---\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record verification approach (primary only / parallel), concerns raised + retry outcome, and user decision (if approval required).",
           "requireConfirmation": {
             "or": [
               {
@@ -536,7 +536,7 @@
         {
           "id": "phase-7d1-record-work",
           "title": "CHECKPOINT Part 1: Record Work & Detect Drift",
-          "prompt": "Checkpoint after slice completion.\n\n**Record:**\n- What changed (high level)\n- Verification summary\n- Invariants proven\n- What remains (next slice)\n- Follow-up tickets\n- PR notes: if `prStrategy=MultiPR`, propose slice(s) for next PR\n\n**Drift detection (git-based, deterministic):**\n- Run `git status` (or `git diff --name-only`) to list files actually modified in this slice.\n- Compare against slice scope (or WP Targets if WPs were used as guidance).\n- Set `planDrift=true` if:\n  - Modified files outside planned scope\n  - Invariants/slices/verification changed beyond plan\n  - New deps/rollout requirements emerged\n- Set `rulesDrift=true` if user introduced new constraints during implementation.\n\n**Set:** `planDrift`, `rulesDrift`\n\n**Artifact maintenance:**\n- Update `implementation_plan.md` if drift occurred or slices evolved.\n- Update `CONTEXT.md` with:\n  - Decision Log entry (\u22648 bullets; for complex decisions reference plan artifacts)\n  - Unexpected Discoveries\n  - Relevant Files (top 10 in CONTEXT.md; full list in implementation_plan.md)\n\n**Write-or-paste.**",
+          "prompt": "Checkpoint after slice completion.\n\n**Record:**\n- What changed (high level)\n- Verification summary\n- Invariants proven\n- What remains (next slice)\n- Follow-up tickets\n- PR notes: if `prStrategy=MultiPR`, propose slice(s) for next PR\n\n**Drift detection (git-based, deterministic):**\n- Run `git status` (or `git diff --name-only`) to list files actually modified in this slice.\n- Compare against slice scope (or WP Targets if WPs were used as guidance).\n- Set `planDrift=true` if:\n  - Modified files outside planned scope\n  - Invariants/slices/verification changed beyond plan\n  - New deps/rollout requirements emerged\n- Set `rulesDrift=true` if user introduced new constraints during implementation.\n\n**Set:** `planDrift`, `rulesDrift`\n\n**Artifact maintenance:**\n- Update `implementation_plan.md` if drift occurred or slices evolved.\n- Update `CONTEXT.md` with:\n  - Decision Log entry (≤8 bullets; for complex decisions reference plan artifacts)\n  - Unexpected Discoveries\n  - Relevant Files (top 10 in CONTEXT.md; full list in implementation_plan.md)\n\n**Write-or-paste.**",
           "requireConfirmation": false,
           "runCondition": {
             "var": "verificationFailed",
@@ -576,7 +576,7 @@
               }
             ]
           },
-          "prompt": "Drift detected. Plan or implementation boundaries have changed since planning.\n\n**Detected drift:**\n- Plan drift: slice scope/files/verification changed beyond original plan\n- Rules drift: user introduced new constraints affecting implementation\n\n**Required decision (deterministic, single-attempt re-plan limit):**\n\n**Option 1: IN-PLACE RE-PLAN (soft, single attempt)**\n\nWhen to use: drift is containable (1-3 extra files, minor scope shift, clarified requirement).\n\nSteps:\n1. Update `implementation_plan.md` immediately to reflect actual scope/changes\n2. Update affected slices in `slices` array\n3. Run single-pass plan audit (self-audit if QUICK/STANDARD; delegate once if THOROUGH and subagents available)\n4. If audit passes (no new Major/Critical findings):\n   - Set `softReplanCompleted=true`\n   - Reset drift flags: `planDrift=false`, `rulesDrift=false`\n   - Document drift resolution in CONTEXT.md Decision Log\n   - Continue slice loop with updated plan\n5. If audit finds NEW drift or Major issues:\n   - Set `replanFailed=true`\n   - Escalate to Option 2 (user decision)\n\n**Single-attempt limit:** if drift recurs in a later slice after soft re-plan, you MUST escalate to Option 2.\n\n---\n\n**Option 2: HARD STOP + USER DECISION**\n\nWhen to use: High risk OR Major drift (scope doubled, new invariants, architectural change) OR soft re-plan failed/recurred.\n\nSteps:\n1. Stop slice loop immediately\n2. Document drift in CONTEXT.md with evidence (git diff, scope comparison)\n3. Update CONTEXT.md Machine State Checkpoint for resume\n4. User chooses:\n   - **Rewind to planning**: exit this workflow run; start new run with updated context; use last Planning checkpoint state to resume at Phase 5\n   - **Manual fix**: user fixes the issue outside workflow; resume at current slice\n   - **Defer slice**: skip this slice, add to follow-up tickets, continue with next slice\n\n---\n\n**Option 3: CONTINUE WITH DEVIATION (document + approve)**\n\nWhen to use: Low/Medium risk AND drift is expected/acceptable.\n\nSteps:\n1. Document why drift is safe/expected\n2. Confirm all invariants still hold\n3. Update CONTEXT.md Decision Log with drift resolution + user approval\n4. Reset drift flags: `planDrift=false`, `rulesDrift=false`\n5. Continue slice loop\n\n---\n\n**Default recommendation:**\n- High risk \u2192 Option 2 (hard stop)\n- Medium risk + containable drift \u2192 Option 1 (soft re-plan)\n- Low risk + expected drift \u2192 Option 3 (continue with approval)\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `softReplanCompleted` (if Option 1 succeeded)\n- `replanFailed` (if Option 1 audit failed)\n- `driftResolution` (which option was chosen)\n\nUser must approve which option to take.",
+          "prompt": "Drift detected. Plan or implementation boundaries have changed since planning.\n\n**Detected drift:**\n- Plan drift: slice scope/files/verification changed beyond original plan\n- Rules drift: user introduced new constraints affecting implementation\n\n**Required decision (deterministic, single-attempt re-plan limit):**\n\n**Option 1: IN-PLACE RE-PLAN (soft, single attempt)**\n\nWhen to use: drift is containable (1-3 extra files, minor scope shift, clarified requirement).\n\nSteps:\n1. Update `implementation_plan.md` immediately to reflect actual scope/changes\n2. Update affected slices in `slices` array\n3. Run single-pass plan audit (self-audit if QUICK/STANDARD; delegate once if THOROUGH and subagents available)\n4. If audit passes (no new Major/Critical findings):\n   - Set `softReplanCompleted=true`\n   - Reset drift flags: `planDrift=false`, `rulesDrift=false`\n   - Document drift resolution in CONTEXT.md Decision Log\n   - Continue slice loop with updated plan\n5. If audit finds NEW drift or Major issues:\n   - Set `replanFailed=true`\n   - Escalate to Option 2 (user decision)\n\n**Single-attempt limit:** if drift recurs in a later slice after soft re-plan, you MUST escalate to Option 2.\n\n---\n\n**Option 2: HARD STOP + USER DECISION**\n\nWhen to use: High risk OR Major drift (scope doubled, new invariants, architectural change) OR soft re-plan failed/recurred.\n\nSteps:\n1. Stop slice loop immediately\n2. Document drift in CONTEXT.md with evidence (git diff, scope comparison)\n3. Update CONTEXT.md Machine State Checkpoint for resume\n4. User chooses:\n   - **Rewind to planning**: exit this workflow run; start new run with updated context; use last Planning checkpoint state to resume at Phase 5\n   - **Manual fix**: user fixes the issue outside workflow; resume at current slice\n   - **Defer slice**: skip this slice, add to follow-up tickets, continue with next slice\n\n---\n\n**Option 3: CONTINUE WITH DEVIATION (document + approve)**\n\nWhen to use: Low/Medium risk AND drift is expected/acceptable.\n\nSteps:\n1. Document why drift is safe/expected\n2. Confirm all invariants still hold\n3. Update CONTEXT.md Decision Log with drift resolution + user approval\n4. Reset drift flags: `planDrift=false`, `rulesDrift=false`\n5. Continue slice loop\n\n---\n\n**Default recommendation:**\n- High risk → Option 2 (hard stop)\n- Medium risk + containable drift → Option 1 (soft re-plan)\n- Low risk + expected drift → Option 3 (continue with approval)\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `softReplanCompleted` (if Option 1 succeeded)\n- `replanFailed` (if Option 1 audit failed)\n- `driftResolution` (which option was chosen)\n\nUser must approve which option to take.",
           "requireConfirmation": true
         },
         {
@@ -594,7 +594,7 @@
               }
             ]
           },
-          "prompt": "**Hard gate:** prevent PR size drift.\n\nIf `prStrategy=MultiPR`, stop here and package a PR before next slice.\n\n**PR-ready output:**\n- Proposed PR title\n- 3\u20136 bullet summary (why, not what)\n- Test plan (what ran)\n- Rollout/risks\n- What remains (next slice)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record why this boundary is the right PR boundary, any user pushback, and discoveries affecting PR sizing.\n\n**Wait for user confirmation** to proceed.\n\n(Do not merge; do not push/create PR unless user requests.)",
+          "prompt": "**Hard gate:** prevent PR size drift.\n\nIf `prStrategy=MultiPR`, stop here and package a PR before next slice.\n\n**PR-ready output:**\n- Proposed PR title\n- 3–6 bullet summary (why, not what)\n- Test plan (what ran)\n- Rollout/risks\n- What remains (next slice)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record why this boundary is the right PR boundary, any user pushback, and discoveries affecting PR sizing.\n\n**Wait for user confirmation** to proceed.\n\n(Do not merge; do not push/create PR unless user requests.)",
           "requireConfirmation": true
         }
       ]
@@ -633,7 +633,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "**BOUNDARY: Execution \u2192 Handoff**\n\nYou've done gap discovery. Now verify integration with concrete evidence and set explicit pass/fail flags.\n\n---\n\n**REQUIRED VERIFICATIONS (enumerate commands actually run):**\n\n1) Full test suite\n- Command(s) run:\n- Result summary:\n\n2) Invariant validation\n- For each invariant: how is it proven? (test name or manual proof)\n- Any invariant without proof must be listed in `invariantViolations`\n\n3) Performance budgets (if applicable)\n- Command(s)/benchmark run:\n- Result vs budget:\n\n4) Backward compatibility (if applicable)\n- Command(s) run / checks performed:\n- Result summary:\n\n5) Build/compile check\n- Command(s) run:\n- Result summary:\n\n---\n\n**Output (required exact lines):**\n- integrationVerificationPassed = true|false\n- integrationVerificationFailed = true|false\n- regressionDetected = true|false\n\n---\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `integrationVerificationPassed`\n- `integrationVerificationFailed`\n- `integrationVerificationFindings` (list of issues)\n- `regressionDetected`\n- `invariantViolations` (list)\n\n**Rule:**\n- If `integrationVerificationFailed = true`, then `integrationVerificationPassed` must be false.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log - commands run + findings + any user decisions.",
+      "prompt": "**BOUNDARY: Execution → Handoff**\n\nYou've done gap discovery. Now verify integration with concrete evidence and set explicit pass/fail flags.\n\n---\n\n**REQUIRED VERIFICATIONS (enumerate commands actually run):**\n\n1) Full test suite\n- Command(s) run:\n- Result summary:\n\n2) Invariant validation\n- For each invariant: how is it proven? (test name or manual proof)\n- Any invariant without proof must be listed in `invariantViolations`\n\n3) Performance budgets (if applicable)\n- Command(s)/benchmark run:\n- Result vs budget:\n\n4) Backward compatibility (if applicable)\n- Command(s) run / checks performed:\n- Result summary:\n\n5) Build/compile check\n- Command(s) run:\n- Result summary:\n\n---\n\n**Output (required exact lines):**\n- integrationVerificationPassed = true|false\n- integrationVerificationFailed = true|false\n- regressionDetected = true|false\n\n---\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `integrationVerificationPassed`\n- `integrationVerificationFailed`\n- `integrationVerificationFindings` (list of issues)\n- `regressionDetected`\n- `invariantViolations` (list)\n\n**Rule:**\n- If `integrationVerificationFailed = true`, then `integrationVerificationPassed` must be false.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log - commands run + findings + any user decisions.",
       "requireConfirmation": {
         "or": [
           {
@@ -679,7 +679,7 @@
     {
       "id": "phase-9-final-validation-and-handoff",
       "title": "Phase 9: Final Validation + PR/MR Handoff (No Auto-Merge)",
-      "prompt": "Final validation and handoff.\n\n**Do:**\n- Verify acceptance criteria and invariants\n- Confirm test/build status + coverage gaps\n- Summarize slice completion + PR strategy outcome\n- Provide PR/MR description draft (concise): summary + test plan + rollout notes\n- Provide follow-up tickets list\n\n**Durable artifacts (non-small):**\n- Update `implementation_plan.md` if any slices changed or drift occurred.\n- Ensure `CONTEXT.md` current:\n  - Decision Log with final decisions + follow-ups (\u2264 8 bullets).\n  - Machine State Checkpoint (deterministic resume/rewind):\n\nExecute final captureCheckpoint() to record workflow completion state.\n\n**Checkpoint correctness checklist (required):**\n\u2705 Captured `state` object (not stringified)\n\u2705 Captured `stepInstanceId` object (not stringified)\n\u2705 Resume payload variants have instruction comments replaced with actual JSON from workflow_next response\n\u2705 Workflow identity recorded (version + timestamp)\n\u2705 Deleted oldest checkpoint if >3 exist\n\n**Important:** do not auto-merge, squash-merge, or delete branches.",
+      "prompt": "Final validation and handoff.\n\n**Do:**\n- Verify acceptance criteria and invariants\n- Confirm test/build status + coverage gaps\n- Summarize slice completion + PR strategy outcome\n- Provide PR/MR description draft (concise): summary + test plan + rollout notes\n- Provide follow-up tickets list\n\n**Durable artifacts (non-small):**\n- Update `implementation_plan.md` if any slices changed or drift occurred.\n- Ensure `CONTEXT.md` current:\n  - Decision Log with final decisions + follow-ups (≤ 8 bullets).\n  - Machine State Checkpoint (deterministic resume/rewind):\n\nExecute final captureCheckpoint() to record workflow completion state.\n\n**Checkpoint correctness checklist (required):**\n✅ Captured `state` object (not stringified)\n✅ Captured `stepInstanceId` object (not stringified)\n✅ Resume payload variants have instruction comments replaced with actual JSON from workflow_next response\n✅ Workflow identity recorded (version + timestamp)\n✅ Deleted oldest checkpoint if >3 exist\n\n**Important:** do not auto-merge, squash-merge, or delete branches.",
       "requireConfirmation": true
     }
   ]

package/workflows/coding-task-workflow-agentic.lean.v2.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "id": "coding-task-workflow-agentic",
-  "name": "Agentic Task Dev Workflow (Lean • Notes-First • WorkRail Executor)",
+  "name": "Agentic Task Dev Workflow (Lean)",
   "version": "1.1.0",
   "description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
   "about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",

package/workflows/coding-task-workflow-agentic.v2.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "id": "coding-task-workflow-agentic",
-  "name": "Agentic Task Dev Workflow (v2 \u2022 Notes-First \u2022 WorkRail Executor)",
+  "name": "Agentic Task Dev Workflow (v2)",
   "version": "2.0.0",
   "description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
   "about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",
@@ -47,7 +47,7 @@
   "steps": [
     {
       "id": "phase-0-triage-and-mode",
-      "title": "Phase 0: Triage (Complexity \u2022 Risk \u2022 PR Strategy)",
+      "title": "Phase 0: Triage (Complexity • Risk • PR Strategy)",
       "prompt": "Analyze the task and choose the right rigor.\n\nClassify:\n- `taskComplexity`: Small / Medium / Large\n- `riskLevel`: Low / Medium / High\n- `rigorMode`: QUICK / STANDARD / THOROUGH\n- `automationLevel`: High / Medium / Low\n- `prStrategy`: SinglePR / MultiPR\n- `maxParallelism`: 0 / 3 / 4\n\nDecision guidance:\n- QUICK: small, low-risk, clear path, little ambiguity\n- STANDARD: medium scope or moderate risk\n- THOROUGH: large scope, architectural uncertainty, or high-risk change\n\nParallelism guidance:\n- QUICK: no delegation by default\n- STANDARD: few delegation moments, but allow multiple parallel executors at each moment\n- THOROUGH: same pattern, but with one extra delegation moment and broader parallel validation\n\nAlso capture `userRules` from the active session instructions and explicit philosophy. Keep them as a focused list of concrete, actionable rules.\n\nSet these keys in the next `continue_workflow` call's `context` object: `taskComplexity`, `riskLevel`, `rigorMode`, `automationLevel`, `prStrategy`, `maxParallelism`, `userRules`.\n\nAsk the user to confirm only if the rigor or PR strategy materially affects delivery expectations.\n\nAlso set  in the context object: one sentence describing what you are trying to accomplish (e.g. \"implement OAuth refresh token rotation\", \"review PR #47 before merge\"). This populates the session title in the Workspace console immediately.",
       "requireConfirmation": true
     },
@@ -101,12 +101,12 @@
     },
     {
       "id": "phase-2-architecture-decision",
-      "title": "Phase 2: Architecture Decision (Generate \u2022 Compare \u2022 Challenge \u2022 Select)",
+      "title": "Phase 2: Architecture Decision (Generate • Compare • Challenge • Select)",
       "runCondition": {
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Make the architecture decision in one coherent phase instead of serializing every thinking mode into a separate step.\n\nPart A \u2014 Prepare a neutral fact packet:\n- problem statement\n- acceptance criteria\n- non-goals\n- invariants\n- constraints\n- `userRules`\n- relevant files / pattern examples\n- current risks and unknowns\n\nPart B \u2014 Generate candidate plans:\n- QUICK: self-generate at least 3 genuinely different approaches\n- STANDARD: if delegation is available, spawn TWO or THREE WorkRail Executors SIMULTANEOUSLY running `routine-plan-generation` with different perspectives (for example simplicity, maintainability, pragmatic)\n- THOROUGH: if delegation is available, spawn THREE or FOUR WorkRail Executors SIMULTANEOUSLY running `routine-plan-generation` with different perspectives (for example simplicity, maintainability, architecture-first, rollback-safe)\n\nPart C \u2014 Diversity gate before commitment:\n- assign each candidate plan a short `candidatePlanFamily` label\n- check whether the candidates are materially different in shape, not just wording\n- if all candidates cluster on the same pattern family, generate at least one more plan from a deliberately different perspective before selecting\n- set `candidateDiversityAdequate = true|false`\n\nPart D \u2014 Compare candidate plans:\n- invariant fit\n- philosophy alignment (`userRules` as active lens)\n- risk profile\n- implementation shape\n- likely reviewability / PR shape\n\nPart E \u2014 Challenge the best one or two:\n- STANDARD: optionally challenge the leading candidate with ONE WorkRail Executor running `routine-hypothesis-challenge`\n- THOROUGH: challenge the top 1-2 candidate plans using ONE or TWO WorkRail Executors running `routine-hypothesis-challenge`\n\nPart F \u2014 Decide:\nSet these keys in the next `continue_workflow` call's `context` object:\n- `approaches`\n- `alternativesConsideredCount`\n- `candidatePlanFamilies`\n- `candidateDiversityAdequate`\n- `hasRunnerUp`\n- `selectedApproach`\n- `runnerUpApproach`\n- `architectureRationale`\n- `keyRiskToMonitor`\n- `pivotTriggers`\n- `architectureConfidenceBand`\n\nRules:\n- the main agent owns the final decision\n- subagents generate candidate plans; they do not decide the winner\n- if the challenged leading candidate no longer looks best, switch deliberately rather than defending sunk cost",
+      "prompt": "Make the architecture decision in one coherent phase instead of serializing every thinking mode into a separate step.\n\nPart A — Prepare a neutral fact packet:\n- problem statement\n- acceptance criteria\n- non-goals\n- invariants\n- constraints\n- `userRules`\n- relevant files / pattern examples\n- current risks and unknowns\n\nPart B — Generate candidate plans:\n- QUICK: self-generate at least 3 genuinely different approaches\n- STANDARD: if delegation is available, spawn TWO or THREE WorkRail Executors SIMULTANEOUSLY running `routine-plan-generation` with different perspectives (for example simplicity, maintainability, pragmatic)\n- THOROUGH: if delegation is available, spawn THREE or FOUR WorkRail Executors SIMULTANEOUSLY running `routine-plan-generation` with different perspectives (for example simplicity, maintainability, architecture-first, rollback-safe)\n\nPart C — Diversity gate before commitment:\n- assign each candidate plan a short `candidatePlanFamily` label\n- check whether the candidates are materially different in shape, not just wording\n- if all candidates cluster on the same pattern family, generate at least one more plan from a deliberately different perspective before selecting\n- set `candidateDiversityAdequate = true|false`\n\nPart D — Compare candidate plans:\n- invariant fit\n- philosophy alignment (`userRules` as active lens)\n- risk profile\n- implementation shape\n- likely reviewability / PR shape\n\nPart E — Challenge the best one or two:\n- STANDARD: optionally challenge the leading candidate with ONE WorkRail Executor running `routine-hypothesis-challenge`\n- THOROUGH: challenge the top 1-2 candidate plans using ONE or TWO WorkRail Executors running `routine-hypothesis-challenge`\n\nPart F — Decide:\nSet these keys in the next `continue_workflow` call's `context` object:\n- `approaches`\n- `alternativesConsideredCount`\n- `candidatePlanFamilies`\n- `candidateDiversityAdequate`\n- `hasRunnerUp`\n- `selectedApproach`\n- `runnerUpApproach`\n- `architectureRationale`\n- `keyRiskToMonitor`\n- `pivotTriggers`\n- `architectureConfidenceBand`\n\nRules:\n- the main agent owns the final decision\n- subagents generate candidate plans; they do not decide the winner\n- if the challenged leading candidate no longer looks best, switch deliberately rather than defending sunk cost",
       "requireConfirmation": {
         "or": [
           {
@@ -131,13 +131,13 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Create or update the human-facing implementation artifact: `implementation_plan.md`.\n\nThis phase combines slicing, plan drafting, philosophy alignment, and test design.\n\nThe plan must include:\n1. Problem statement\n2. Acceptance criteria\n3. Non-goals\n4. Applied `userRules` and philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only when they improve execution or enable safe parallelism\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n   - [principle] \u2192 [satisfied / tension / violated + 1-line why]\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount`\n- `planConfidenceBand`\n\nRules:\n- keep `implementation_plan.md` concrete enough for another engineer to implement without guessing\n- use work packages only when they create real clarity; do not over-fragment work\n- use the user's coding philosophy as the primary planning lens, and name tensions explicitly\n- set `unresolvedUnknownCount` to the number of still-open issues that would materially affect implementation quality\n- set `planConfidenceBand` to Low / Medium / High based on how ready the plan actually is",
+      "prompt": "Create or update the human-facing implementation artifact: `implementation_plan.md`.\n\nThis phase combines slicing, plan drafting, philosophy alignment, and test design.\n\nThe plan must include:\n1. Problem statement\n2. Acceptance criteria\n3. Non-goals\n4. Applied `userRules` and philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only when they improve execution or enable safe parallelism\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n   - [principle] → [satisfied / tension / violated + 1-line why]\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount`\n- `planConfidenceBand`\n\nRules:\n- keep `implementation_plan.md` concrete enough for another engineer to implement without guessing\n- use work packages only when they create real clarity; do not over-fragment work\n- use the user's coding philosophy as the primary planning lens, and name tensions explicitly\n- set `unresolvedUnknownCount` to the number of still-open issues that would materially affect implementation quality\n- set `planConfidenceBand` to Low / Medium / High based on how ready the plan actually is",
       "requireConfirmation": false
     },
     {
       "id": "phase-4-plan-iterations",
       "type": "loop",
-      "title": "Phase 4: Plan Audit Loop (Audit \u2192 Refocus \u2192 Decide)",
+      "title": "Phase 4: Plan Audit Loop (Audit → Refocus → Decide)",
       "runCondition": {
         "var": "taskComplexity",
         "not_equals": "Small"
@@ -167,7 +167,7 @@
         {
           "id": "phase-4c-loop-decision",
           "title": "Loop Exit Decision",
-          "prompt": "Provide a loop control artifact.\n\nDecision rules:\n- if `planFindings` is non-empty \u2192 continue\n- if `planFindings` is empty \u2192 stop, but enumerate what was checked to justify the clean pass\n- if max iterations reached \u2192 stop and document remaining concerns\n\nOutput exactly:\n```json\n{\n  \"artifacts\": [{\n    \"kind\": \"wr.loop_control\",\n    \"decision\": \"continue\"\n  }]\n}\n```",
+          "prompt": "Provide a loop control artifact.\n\nDecision rules:\n- if `planFindings` is non-empty → continue\n- if `planFindings` is empty → stop, but enumerate what was checked to justify the clean pass\n- if max iterations reached → stop and document remaining concerns\n\nOutput exactly:\n```json\n{\n  \"artifacts\": [{\n    \"kind\": \"wr.loop_control\",\n    \"decision\": \"continue\"\n  }]\n}\n```",
           "requireConfirmation": true,
           "outputContract": {
             "contractRef": "wr.contracts.loop_control"