@exaudeus/workrail 3.3.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/dist/application/services/compiler/binding-registry.d.ts +3 -0
  2. package/dist/application/services/compiler/binding-registry.js +71 -0
  3. package/dist/application/services/compiler/resolve-bindings.d.ts +18 -0
  4. package/dist/application/services/compiler/resolve-bindings.js +162 -0
  5. package/dist/application/services/compiler/sentinel-scan.d.ts +9 -0
  6. package/dist/application/services/compiler/sentinel-scan.js +37 -0
  7. package/dist/application/services/validation-engine.js +104 -0
  8. package/dist/application/services/workflow-compiler.d.ts +10 -2
  9. package/dist/application/services/workflow-compiler.js +25 -6
  10. package/dist/application/services/workflow-validation-pipeline.js +8 -1
  11. package/dist/cli.js +2 -2
  12. package/dist/engine/engine-factory.js +1 -1
  13. package/dist/index.d.ts +2 -1
  14. package/dist/index.js +4 -2
  15. package/dist/manifest.json +149 -101
  16. package/dist/mcp/handler-factory.d.ts +1 -1
  17. package/dist/mcp/handler-factory.js +2 -2
  18. package/dist/mcp/handlers/v2-checkpoint.js +5 -5
  19. package/dist/mcp/handlers/v2-error-mapping.js +4 -4
  20. package/dist/mcp/handlers/v2-execution/continue-advance.js +2 -2
  21. package/dist/mcp/handlers/v2-execution/continue-rehydrate.d.ts +1 -0
  22. package/dist/mcp/handlers/v2-execution/continue-rehydrate.js +76 -60
  23. package/dist/mcp/handlers/v2-execution/index.js +86 -44
  24. package/dist/mcp/handlers/v2-execution-helpers.js +1 -1
  25. package/dist/mcp/handlers/v2-resume.js +10 -5
  26. package/dist/mcp/handlers/v2-token-ops.d.ts +1 -1
  27. package/dist/mcp/handlers/v2-token-ops.js +5 -5
  28. package/dist/mcp/handlers/v2-workspace-resolution.d.ts +1 -0
  29. package/dist/mcp/handlers/v2-workspace-resolution.js +12 -0
  30. package/dist/mcp/index.d.ts +4 -1
  31. package/dist/mcp/index.js +6 -2
  32. package/dist/mcp/output-schemas.d.ts +148 -8
  33. package/dist/mcp/output-schemas.js +22 -4
  34. package/dist/mcp/server.d.ts +6 -4
  35. package/dist/mcp/server.js +2 -57
  36. package/dist/mcp/tool-descriptions.js +9 -158
  37. package/dist/mcp/transports/http-entry.js +6 -25
  38. package/dist/mcp/transports/shutdown-hooks.d.ts +5 -0
  39. package/dist/mcp/transports/shutdown-hooks.js +38 -0
  40. package/dist/mcp/transports/stdio-entry.js +6 -28
  41. package/dist/mcp/v2/tool-registry.js +2 -1
  42. package/dist/mcp/v2/tools.d.ts +28 -11
  43. package/dist/mcp/v2/tools.js +28 -4
  44. package/dist/mcp/v2-response-formatter.js +28 -1
  45. package/dist/mcp/validation/suggestion-generator.d.ts +1 -1
  46. package/dist/mcp/validation/suggestion-generator.js +13 -3
  47. package/dist/mcp/workflow-protocol-contracts.d.ts +31 -0
  48. package/dist/mcp/workflow-protocol-contracts.js +207 -0
  49. package/dist/mcp-server.d.ts +3 -1
  50. package/dist/mcp-server.js +6 -2
  51. package/dist/types/workflow-definition.d.ts +7 -0
  52. package/dist/types/workflow-definition.js +1 -0
  53. package/dist/v2/durable-core/domain/binding-drift.d.ts +8 -0
  54. package/dist/v2/durable-core/domain/binding-drift.js +29 -0
  55. package/dist/v2/durable-core/domain/reason-model.js +2 -2
  56. package/dist/v2/durable-core/schemas/compiled-workflow/index.d.ts +12 -0
  57. package/dist/v2/durable-core/schemas/compiled-workflow/index.js +2 -0
  58. package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +56 -56
  59. package/dist/v2/durable-core/schemas/session/events.d.ts +16 -16
  60. package/dist/v2/durable-core/schemas/session/gaps.d.ts +6 -6
  61. package/dist/v2/projections/resume-ranking.d.ts +1 -0
  62. package/dist/v2/projections/resume-ranking.js +1 -0
  63. package/dist/v2/read-only/v1-to-v2-shim.js +27 -10
  64. package/dist/v2/usecases/resume-session.d.ts +5 -1
  65. package/dist/v2/usecases/resume-session.js +4 -1
  66. package/package.json +1 -1
  67. package/spec/authoring-spec.json +1373 -0
  68. package/spec/workflow.schema.json +132 -2
  69. package/workflows/coding-task-workflow-agentic.json +15 -15
  70. package/workflows/coding-task-workflow-agentic.lean.v2.json +10 -10
  71. package/workflows/coding-task-workflow-agentic.v2.json +12 -12
  72. package/workflows/coding-task-workflow-with-loops.json +2 -2
  73. package/workflows/cross-platform-code-conversion.v2.json +199 -0
  74. package/workflows/document-creation-workflow.json +1 -1
  75. package/workflows/exploration-workflow.json +3 -3
  76. package/workflows/mr-review-workflow.agentic.v2.json +11 -11
  77. package/workflows/routines/parallel-work-partitioning.json +43 -0
  78. package/workflows/workflow-for-workflows.v2.json +186 -0
@@ -67,7 +67,7 @@
67
67
  },
68
68
  "metaGuidance": {
69
69
  "type": "array",
70
- "description": "Persistent best practices that apply throughout the workflow",
70
+ "description": "Persistent behavioral rules surfaced on start and resume. Not repeated on every step advance. For external document pointers, use 'references' instead.",
71
71
  "items": {
72
72
  "type": "string",
73
73
  "minLength": 1,
@@ -134,6 +134,22 @@
134
134
  "maxLength": 128
135
135
  },
136
136
  "uniqueItems": true
137
+ },
138
+ "extensionPoints": {
139
+ "type": "array",
140
+ "description": "Bounded cognitive slots that users can customize via .workrail/bindings.json. Each slot is referenced in step prompts via {{wr.bindings.slotId}} and resolved at compile time.",
141
+ "items": {
142
+ "$ref": "#/$defs/extensionPoint"
143
+ },
144
+ "uniqueItems": true
145
+ },
146
+ "references": {
147
+ "type": "array",
148
+ "description": "Workflow-declared references to external documents. Each reference is a pointer — content is never inlined. Declarations participate in the workflow hash; referenced file content does not.",
149
+ "items": {
150
+ "$ref": "#/$defs/workflowReference"
151
+ },
152
+ "uniqueItems": true
137
153
  }
138
154
  },
139
155
  "required": [
@@ -145,6 +161,85 @@
145
161
  ],
146
162
  "additionalProperties": false,
147
163
  "$defs": {
164
+ "extensionPoint": {
165
+ "type": "object",
166
+ "description": "A bounded cognitive slot that can be customized via .workrail/bindings.json",
167
+ "properties": {
168
+ "slotId": {
169
+ "type": "string",
170
+ "description": "Stable identifier used in {{wr.bindings.slotId}} tokens",
171
+ "pattern": "^[a-z][a-z0-9_]*$",
172
+ "minLength": 2,
173
+ "maxLength": 64
174
+ },
175
+ "purpose": {
176
+ "type": "string",
177
+ "description": "Human description of what this slot does",
178
+ "minLength": 1,
179
+ "maxLength": 256
180
+ },
181
+ "default": {
182
+ "type": "string",
183
+ "description": "Default routine/workflow ID used when no project override is declared",
184
+ "minLength": 1,
185
+ "maxLength": 128
186
+ },
187
+ "acceptedKinds": {
188
+ "type": "array",
189
+ "description": "Allowed implementation kinds (informational in v1)",
190
+ "items": {
191
+ "type": "string",
192
+ "enum": ["routine", "workflow"]
193
+ },
194
+ "uniqueItems": true
195
+ }
196
+ },
197
+ "required": ["slotId", "purpose", "default"],
198
+ "additionalProperties": false
199
+ },
200
+ "workflowReference": {
201
+ "type": "object",
202
+ "description": "A pointer to an external document relevant to the workflow. Content is never inlined — the agent reads the file itself if needed.",
203
+ "properties": {
204
+ "id": {
205
+ "type": "string",
206
+ "description": "Unique identifier within the workflow",
207
+ "pattern": "^[a-z][a-z0-9_-]*$",
208
+ "minLength": 2,
209
+ "maxLength": 64
210
+ },
211
+ "title": {
212
+ "type": "string",
213
+ "description": "Human-readable title for the reference",
214
+ "minLength": 1,
215
+ "maxLength": 128
216
+ },
217
+ "source": {
218
+ "type": "string",
219
+ "description": "File path relative to workspace root",
220
+ "minLength": 1,
221
+ "maxLength": 512
222
+ },
223
+ "purpose": {
224
+ "type": "string",
225
+ "description": "Why this reference matters to the workflow",
226
+ "minLength": 1,
227
+ "maxLength": 512
228
+ },
229
+ "authoritative": {
230
+ "type": "boolean",
231
+ "description": "Whether this document is authoritative (agent should follow it strictly)"
232
+ },
233
+ "resolveFrom": {
234
+ "type": "string",
235
+ "enum": ["workspace", "package"],
236
+ "description": "Resolution base for source path. 'workspace' (default) resolves against the user's project root. 'package' resolves against the workrail package root (for files shipped with the workflow).",
237
+ "default": "workspace"
238
+ }
239
+ },
240
+ "required": ["id", "title", "source", "purpose", "authoritative"],
241
+ "additionalProperties": false
242
+ },
148
243
  "stepId": {
149
244
  "type": "string",
150
245
  "pattern": "^[a-z0-9-]+$",
@@ -202,7 +297,13 @@
202
297
  "functionDefinitions": { "type": "array", "items": { "$ref": "#/$defs/functionDefinition" } },
203
298
  "functionCalls": { "type": "array", "items": { "$ref": "#/$defs/functionCall" } },
204
299
  "functionReferences": { "type": "array", "items": { "type": "string", "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*\\(\\)$" } },
205
- "templateCall": { "$ref": "#/$defs/templateCall" }
300
+ "templateCall": { "$ref": "#/$defs/templateCall" },
301
+ "promptFragments": {
302
+ "type": "array",
303
+ "description": "Conditional prompt fragments appended to the step's base prompt at render time. Each fragment is appended in declaration order when its 'when' condition matches the session context.",
304
+ "items": { "$ref": "#/$defs/promptFragment" },
305
+ "minItems": 1
306
+ }
206
307
  },
207
308
  "required": ["id", "title"],
208
309
  "additionalProperties": false
@@ -427,8 +528,37 @@
427
528
  "not": {
428
529
  "$ref": "#/$defs/condition",
429
530
  "description": "Logical NOT of a condition"
531
+ },
532
+ "in": {
533
+ "type": "array",
534
+ "description": "Check if variable value is in this array of allowed values"
535
+ }
536
+ },
537
+ "additionalProperties": false
538
+ },
539
+ "promptFragment": {
540
+ "type": "object",
541
+ "description": "A conditional prompt fragment appended to a step's base prompt at render time when its condition matches the session context.",
542
+ "properties": {
543
+ "id": {
544
+ "type": "string",
545
+ "description": "Unique identifier for this fragment within the step",
546
+ "minLength": 1,
547
+ "maxLength": 64,
548
+ "pattern": "^[a-z][a-z0-9_-]*$"
549
+ },
550
+ "when": {
551
+ "$ref": "#/$defs/condition",
552
+ "description": "Condition evaluated against session context at render time. If absent, fragment is always appended."
553
+ },
554
+ "text": {
555
+ "type": "string",
556
+ "description": "Prompt text appended when the condition matches. Must not contain {{wr.*}} tokens.",
557
+ "minLength": 1,
558
+ "maxLength": 4096
430
559
  }
431
560
  },
561
+ "required": ["id", "text"],
432
562
  "additionalProperties": false
433
563
  },
434
564
  "confirmationRule": {
@@ -55,7 +55,7 @@
55
55
  {
56
56
  "id": "phase-0-triage-and-mode",
57
57
  "title": "Phase 0: Triage (Complexity • Risk • Automation • Doc Depth • PR Strategy)",
58
- "prompt": "**ANALYZE** the task and classify with deterministic criteria.\n\n## 0) Rigor mode (deterministic)\nSelect **rigorMode**: QUICK / STANDARD / THOROUGH.\n\nScore each criterion 0–2 and sum. Use the table:\n- **Scope breadth** (files/areas touched): 0=1–2 files, 1=multi-file but single area, 2=multi-area\n- **Risk level**: 0=low, 1=moderate, 2=high (security/auth/data loss/release pipeline/perf critical)\n- **Uncertainty**: 0=clear requirements + known code path, 1=some ambiguity, 2=unknowns/missing acceptance criteria\n- **Repro difficulty**: 0=deterministic + local, 1=some async/edge cases, 2=flaky/CI-only/racy\n- **Externalities**: 0=internal-only, 1=some external deps, 2=publishing/infra/3rd-party integration\n\nDecision:\n- 0–2 → **QUICK**\n- 3–5 → **STANDARD**\n- 6–10 → **THOROUGH**\n\nAlso set:\n- QUICK: `auditDepth=light`, `maxQuestions=1`, `maxParallelism=0`\n- STANDARD: `auditDepth=normal`, `maxQuestions=3`, `maxParallelism=1`\n- THOROUGH: `auditDepth=deep`, `maxQuestions=5`, `maxParallelism=3`\n\n## 1) taskComplexity\nSmall / Medium / Large\n- Small: 1–2 files, low risk, clear change, minimal ambiguity\n- Medium: multi-file, moderate risk, some ambiguity, needs planning\n- Large: architectural impact, multiple systems, high risk/unknowns\n\n## 2) riskLevel\nLow / Medium / High\n- High if: auth/payments/security/data integrity/perf-sensitive/production incident/release pipeline\n\n## 3) automationLevel\nHigh / Medium / Low\n- High: proceed autonomously; ask only for real decisions\n- Medium: normal confirmations at gates\n- Low: extra confirmations and explicit checklists\n\n## 4) docDepth (durable artifacts; no auto-commit)\nNone / Light / Full\n- For non-small tasks: always maintain `CONTEXT.md` and `implementation_plan.md`.\n- None: plan + context only (no additional spec/design)\n- Light: add `spec.md` (short)\n- Full: add `spec.md` + `design.md` (architecture + risks)\n\n## 5) prStrategy\nSinglePR / MultiPR\n- MultiPR if Large or diff is broad (many files/domains)\n\n**Set context variables**: `rigorMode`, `auditDepth`, `maxQuestions`, `maxParallelism`, `taskComplexity`, `riskLevel`, `automationLevel`, `docDepth`, `prStrategy`.\n\n**VERIFY (minimal questions)**: ask the user to confirm or override `rigorMode` and `prStrategy` only if it impacts delivery expectations.\n\n**CONTEXT LOGGING**: Update CONTEXT.md Decision Log (follow format from metaGuidance) - record this triage decision and any user overrides.",
58
+ "prompt": "**ANALYZE** the task and classify with deterministic criteria.\n\n## 0) Rigor mode (deterministic)\nSelect **rigorMode**: QUICK / STANDARD / THOROUGH.\n\nScore each criterion 0–2 and sum. Use the table:\n- **Scope breadth** (files/areas touched): 0=1–2 files, 1=multi-file but single area, 2=multi-area\n- **Risk level**: 0=low, 1=moderate, 2=high (security/auth/data loss/release pipeline/perf critical)\n- **Uncertainty**: 0=clear requirements + known code path, 1=some ambiguity, 2=unknowns/missing acceptance criteria\n- **Repro difficulty**: 0=deterministic + local, 1=some async/edge cases, 2=flaky/CI-only/racy\n- **Externalities**: 0=internal-only, 1=some external deps, 2=publishing/infra/3rd-party integration\n\nDecision:\n- 0–2 → **QUICK**\n- 3–5 → **STANDARD**\n- 6–10 → **THOROUGH**\n\nAlso set:\n- QUICK: `auditDepth=light`, `maxQuestions=1`, `maxParallelism=0`\n- STANDARD: `auditDepth=normal`, `maxQuestions=3`, `maxParallelism=1`\n- THOROUGH: `auditDepth=deep`, `maxQuestions=5`, `maxParallelism=3`\n\n## 1) taskComplexity\nSmall / Medium / Large\n- Small: 1–2 files, low risk, clear change, minimal ambiguity\n- Medium: multi-file, moderate risk, some ambiguity, needs planning\n- Large: architectural impact, multiple systems, high risk/unknowns\n\n## 2) riskLevel\nLow / Medium / High\n- High if: auth/payments/security/data integrity/perf-sensitive/production incident/release pipeline\n\n## 3) automationLevel\nHigh / Medium / Low\n- High: proceed autonomously; ask only for real decisions\n- Medium: normal confirmations at gates\n- Low: extra confirmations and explicit checklists\n\n## 4) docDepth (durable artifacts; no auto-commit)\nNone / Light / Full\n- For non-small tasks: always maintain `CONTEXT.md` and `implementation_plan.md`.\n- None: plan + context only (no additional spec/design)\n- Light: add `spec.md` (short)\n- Full: add `spec.md` + `design.md` (architecture + risks)\n\n## 5) prStrategy\nSinglePR / MultiPR\n- MultiPR if Large or diff is broad (many files/domains)\n\nSet these keys in the next `continue_workflow` call's `context` object: `rigorMode`, `auditDepth`, `maxQuestions`, `maxParallelism`, `taskComplexity`, `riskLevel`, `automationLevel`, `docDepth`, `prStrategy`.\n\n**VERIFY (minimal questions)**: ask the user to confirm or override `rigorMode` and `prStrategy` only if it impacts delivery expectations.\n\n**CONTEXT LOGGING**: Update CONTEXT.md Decision Log (follow format from metaGuidance) - record this triage decision and any user overrides.",
59
59
  "requireConfirmation": true
60
60
  },
61
61
  {
@@ -82,7 +82,7 @@
82
82
  "var": "taskComplexity",
83
83
  "not_equals": "Small"
84
84
  },
85
- "prompt": "Create and initialize `CONTEXT.md` as the durable artifact for this workflow run.\n\n**Rules (write-or-paste, deterministic):**\n- If file-writing is possible in your environment: write/update `CONTEXT.md` now.\n- Otherwise: output the full pasteable content in chat.\n- Treat `CONTEXT.md` as canonical; do not paraphrase.\n- Do NOT commit documentation files unless the user explicitly asks.\n\n**Subagent capability detection (no repo file dependency):**\n- Determine if delegation is available from your runtime/tooling.\n- If you can delegate to the WorkRail Executor: set `delegationMode=delegate`.\n- Otherwise: set `delegationMode=solo`.\n- Add \"Environment Capabilities\" section to CONTEXT.md recording delegationMode.\n\n**CONTEXT.md is a living log**: it must be updated at each gate (triage, invariants, architecture choice, slice planning, plan refocus, each slice checkpoint, each PR packaging gate).\n\n**Size caps (keep resumable but bounded):**\n- Relevant files: max 10 (beyond that, link to plan artifacts)\n- Decision log entries: max 8 bullets each; use plan/spec/design for details\n- Keep last 3 Machine State Checkpoints only (delete older ones)\n\n**CONTEXT.md structure (must include):**\n\n1) **Task Summary** (1 paragraph)\n\n2) **Conversation Preferences**\n- Tone/verbosity preferences\n- Constraints like \"don't run X\" or \"ask before doing Y\"\n\n3) **Triage**\n- rigorMode, auditDepth, maxQuestions, maxParallelism\n- taskComplexity, riskLevel, automationLevel, docDepth, prStrategy\n\n3b) **Environment Capabilities**\n- delegationMode (solo/proxy/delegate)\n- Note: This value is cached for this workflow run\n\n4) **Inputs & Sources**\n- Ticket links/text pointers\n- User-provided file paths and external references\n\n5) **User Rules & Philosophies (`userRules`)**\n- Extract from: user instructions, README.md, docs/, ADRs, workflows/ patterns, 1–2 exemplar files near target module.\n- Keep this focused and actionable.\n- Set context variable `userRules` as a bulleted list.\n\n6) **Decision Log (append-only, capped at 8 bullets/entry)**\nFor each decision include:\n- Decision\n- Why\n- Alternatives considered\n- Impacted files\n- User feedback/pushback\n- Unexpected discoveries\n\n7) **Unexpected Discoveries / Deviations**\n- Anything surprising encountered (deps, scope expansion, missing invariants)\n- Any plan drift and how you addressed it\n\n8) **Relevant Files (max 10)**\n- Key files + why they matter\n- Beyond 10: reference plan artifacts\n\n9) **Artifacts Index**\n- `implementation_plan.md` (always for non-small)\n- `spec.md` / `design.md` if created\n\n10) **Progress**\n- Current slice name/index, what's done, what's next\n\n7) **Resumption Instructions**: Use captureCheckpoint() after each workflow_next call to maintain Machine State Checkpoint section.\n\n**Output:** the full content for `CONTEXT.md` (or confirm file written).",
85
+ "prompt": "Create and initialize `CONTEXT.md` as the durable artifact for this workflow run.\n\n**Rules (write-or-paste, deterministic):**\n- If file-writing is possible in your environment: write/update `CONTEXT.md` now.\n- Otherwise: output the full pasteable content in chat.\n- Treat `CONTEXT.md` as canonical; do not paraphrase.\n- Do NOT commit documentation files unless the user explicitly asks.\n\n**Subagent capability detection (no repo file dependency):**\n- Determine if delegation is available from your runtime/tooling.\n- If you can delegate to the WorkRail Executor: set `delegationMode=delegate`.\n- Otherwise: set `delegationMode=solo`.\n- Add \"Environment Capabilities\" section to CONTEXT.md recording delegationMode.\n\n**CONTEXT.md is a living log**: it must be updated at each gate (triage, invariants, architecture choice, slice planning, plan refocus, each slice checkpoint, each PR packaging gate).\n\n**Size caps (keep resumable but bounded):**\n- Relevant files: max 10 (beyond that, link to plan artifacts)\n- Decision log entries: max 8 bullets each; use plan/spec/design for details\n- Keep last 3 Machine State Checkpoints only (delete older ones)\n\n**CONTEXT.md structure (must include):**\n\n1) **Task Summary** (1 paragraph)\n\n2) **Conversation Preferences**\n- Tone/verbosity preferences\n- Constraints like \"don't run X\" or \"ask before doing Y\"\n\n3) **Triage**\n- rigorMode, auditDepth, maxQuestions, maxParallelism\n- taskComplexity, riskLevel, automationLevel, docDepth, prStrategy\n\n3b) **Environment Capabilities**\n- delegationMode (solo/proxy/delegate)\n- Note: This value is cached for this workflow run\n\n4) **Inputs & Sources**\n- Ticket links/text pointers\n- User-provided file paths and external references\n\n5) **User Rules & Philosophies (`userRules`)**\n- Extract from: user instructions, README.md, docs/, ADRs, workflows/ patterns, 1–2 exemplar files near target module.\n- Keep this focused and actionable.\n- Set `userRules` in the next `continue_workflow` call's `context` object as a bulleted list.\n\n6) **Decision Log (append-only, capped at 8 bullets/entry)**\nFor each decision include:\n- Decision\n- Why\n- Alternatives considered\n- Impacted files\n- User feedback/pushback\n- Unexpected discoveries\n\n7) **Unexpected Discoveries / Deviations**\n- Anything surprising encountered (deps, scope expansion, missing invariants)\n- Any plan drift and how you addressed it\n\n8) **Relevant Files (max 10)**\n- Key files + why they matter\n- Beyond 10: reference plan artifacts\n\n9) **Artifacts Index**\n- `implementation_plan.md` (always for non-small)\n- `spec.md` / `design.md` if created\n\n10) **Progress**\n- Current slice name/index, what's done, what's next\n\n7) **Resumption Instructions**: Use captureCheckpoint() after each workflow_next call to maintain Machine State Checkpoint section.\n\n**Output:** the full content for `CONTEXT.md` (or confirm file written).",
86
86
  "requireConfirmation": false
87
87
  },
88
88
  {
@@ -92,7 +92,7 @@
92
92
  "var": "taskComplexity",
93
93
  "not_equals": "Small"
94
94
  },
95
- "prompt": "Gather enough context to design and plan correctly.\n\n**Rules:**\n- Do this yourself (no delegation in this step).\n- Use tools to verify everything.\n- Prefer matching existing patterns over inventing new ones.\n- Prefer answering your own questions with tools; only keep true human-decision questions.\n\n**Deliverable (in chat, concise):**\n- Entry points and call chain sketch (file references)\n- Key modules/classes/functions involved\n- Existing patterns that apply (with 2–3 concrete examples)\n- Testing approach found in repo (where tests live; key helpers)\n- Risks/unknowns list\n\n**Question resolution pass (required):**\n- For uncertainties you encounter, attempt resolution via tools/code first.\n- Only add to `openQuestions` if it is a true business/product decision.\n- Enforce: `openQuestions.length <= maxQuestions`.\n\n**Set context variables:**\n- `contextSummary` (short)\n- `candidateFiles` (list of key file paths)\n- `openQuestions` (true human decisions only)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record relevant files, decisions made during context gathering, and any unexpected discoveries. If you discover a conflict between repo patterns and `userRules`, note it explicitly for planning."
95
+ "prompt": "Gather enough context to design and plan correctly.\n\n**Rules:**\n- Do this yourself (no delegation in this step).\n- Use tools to verify everything.\n- Prefer matching existing patterns over inventing new ones.\n- Prefer answering your own questions with tools; only keep true human-decision questions.\n\n**Deliverable (in chat, concise):**\n- Entry points and call chain sketch (file references)\n- Key modules/classes/functions involved\n- Existing patterns that apply (with 2–3 concrete examples)\n- Testing approach found in repo (where tests live; key helpers)\n- Risks/unknowns list\n\n**Question resolution pass (required):**\n- For uncertainties you encounter, attempt resolution via tools/code first.\n- Only add to `openQuestions` if it is a true business/product decision.\n- Enforce: `openQuestions.length <= maxQuestions`.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `contextSummary` (short)\n- `candidateFiles` (list of key file paths)\n- `openQuestions` (true human decisions only)\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record relevant files, decisions made during context gathering, and any unexpected discoveries. If you discover a conflict between repo patterns and `userRules`, note it explicitly for planning."
96
96
  },
97
97
  {
98
98
  "id": "phase-1b-context-audit-mode-adaptive",
@@ -123,7 +123,7 @@
123
123
  {
124
124
  "id": "phase-2-invariants-and-nongoals",
125
125
  "title": "Phase 2: Invariants (Contracts, Constraints, Non-Goals)",
126
- "prompt": "Create explicit invariants and non-goals.\n\n**Include (as applicable):**\n- API/behavior contracts that must not change\n- Data invariants (schema constraints, idempotency, ordering)\n- Performance budgets (latency, allocations, query counts)\n- Security/privacy constraints\n- Rollout invariants (flagging, migration safety, rollback)\n- Non-goals (explicitly out of scope)\n\n**Output:** a numbered list of invariants + non-goals.\n\n**Set context variables:** `invariants`, `nonGoals`.\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record invariants + non-goals, why they were chosen, any user pushback/clarifications, and impacted files/areas.\n\n**VERIFY:** ask the user to confirm only if any invariant is a product decision.",
126
+ "prompt": "Create explicit invariants and non-goals.\n\n**Include (as applicable):**\n- API/behavior contracts that must not change\n- Data invariants (schema constraints, idempotency, ordering)\n- Performance budgets (latency, allocations, query counts)\n- Security/privacy constraints\n- Rollout invariants (flagging, migration safety, rollback)\n- Non-goals (explicitly out of scope)\n\n**Output:** a numbered list of invariants + non-goals.\n\nSet these keys in the next `continue_workflow` call's `context` object: `invariants`, `nonGoals`.\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record invariants + non-goals, why they were chosen, any user pushback/clarifications, and impacted files/areas.\n\n**VERIFY:** ask the user to confirm only if any invariant is a product decision.",
127
127
  "requireConfirmation": {
128
128
  "or": [
129
129
  {
@@ -144,7 +144,7 @@
144
144
  "var": "taskComplexity",
145
145
  "not_equals": "Small"
146
146
  },
147
- "prompt": "Generate approaches by answering DIFFERENT questions—not variations of one idea.\n\nThis is DIVERGENT thinking. Do not evaluate or compare yet.\n\n**Answer each lens (minimum 3, add more for THOROUGH):**\n\n1. **Simplicity lens:** What's the simplest approach that could work?\n - Minimal moving parts, easiest to understand\n - What would you do if you had 1 hour?\n\n2. **Maintainability lens:** What approach optimizes for future changes?\n - Easiest to modify in 6 months by someone unfamiliar\n - What would make a new team member's life easiest?\n\n3. **Clean-slate lens:** If this area didn't exist, how would you design it?\n - Ignore existing structure—what's the \"right\" architecture?\n - What would you build if starting fresh today?\n\n4. **(STANDARD+) Constraint-flip lens:** What if a key constraint didn't exist?\n - Often reveals assumptions worth questioning\n - What if [performance/compatibility/scope] wasn't a concern?\n\n**For each approach:**\n- **Name**: Short memorable label\n- **Core idea**: 2-3 sentences describing the fundamental approach\n- **Key trade-off**: What does this optimize for? What does it sacrifice?\n- **Shape**: High-level structure (what changes, where)\n\n**Anti-anchoring check:**\nIf your approaches feel like variations of one idea, you haven't diverged enough. The lenses should produce genuinely different shapes.\n\n**If `rigorMode=THOROUGH` and subagents available:**\n\nSpawn 3 WorkRail Executors SIMULTANEOUSLY using `routine-ideation`:\n\n**Delegation 1 — Simplicity Lens:**\n- routine: routine-ideation\n- perspective: simplicity\n- quantity: 3-5 ideas\n- problem: [Task problem statement from Phase 0]\n- constraints: [From invariants]\n- deliverable: ideas-simplicity.md\n\n**Delegation 2 — Maintainability Lens:**\n- routine: routine-ideation\n- perspective: maintainability\n- quantity: 3-5 ideas\n- problem: [Task problem statement]\n- constraints: [From invariants]\n- deliverable: ideas-maintainability.md\n\n**Delegation 3 — Clean-Slate Lens:**\n- routine: routine-ideation\n- perspective: innovation\n- quantity: 3-5 ideas\n- problem: [Task problem statement]\n- constraints: [From invariants, relaxed]\n- deliverable: ideas-clean-slate.md\n\n**Main agent synthesis:**\n- Combine ideas from all 3 deliverables\n- Deduplicate similar ideas (keep best version)\n- Select best from each perspective for `approaches` array\n\n**Output:** `approaches` array with one entry per lens answered.\n\n**Set context variables:** `approaches`\n\n**CONTEXT LOGGING:** Add Approaches section to CONTEXT.md. Preserve ALL approaches—they may become Plan B/C later.",
147
+ "prompt": "Generate approaches by answering DIFFERENT questions—not variations of one idea.\n\nThis is DIVERGENT thinking. Do not evaluate or compare yet.\n\n**Answer each lens (minimum 3, add more for THOROUGH):**\n\n1. **Simplicity lens:** What's the simplest approach that could work?\n - Minimal moving parts, easiest to understand\n - What would you do if you had 1 hour?\n\n2. **Maintainability lens:** What approach optimizes for future changes?\n - Easiest to modify in 6 months by someone unfamiliar\n - What would make a new team member's life easiest?\n\n3. **Clean-slate lens:** If this area didn't exist, how would you design it?\n - Ignore existing structure—what's the \"right\" architecture?\n - What would you build if starting fresh today?\n\n4. **(STANDARD+) Constraint-flip lens:** What if a key constraint didn't exist?\n - Often reveals assumptions worth questioning\n - What if [performance/compatibility/scope] wasn't a concern?\n\n**For each approach:**\n- **Name**: Short memorable label\n- **Core idea**: 2-3 sentences describing the fundamental approach\n- **Key trade-off**: What does this optimize for? What does it sacrifice?\n- **Shape**: High-level structure (what changes, where)\n\n**Anti-anchoring check:**\nIf your approaches feel like variations of one idea, you haven't diverged enough. The lenses should produce genuinely different shapes.\n\n**If `rigorMode=THOROUGH` and subagents available:**\n\nSpawn 3 WorkRail Executors SIMULTANEOUSLY using `routine-ideation`:\n\n**Delegation 1 — Simplicity Lens:**\n- routine: routine-ideation\n- perspective: simplicity\n- quantity: 3-5 ideas\n- problem: [Task problem statement from Phase 0]\n- constraints: [From invariants]\n- deliverable: ideas-simplicity.md\n\n**Delegation 2 — Maintainability Lens:**\n- routine: routine-ideation\n- perspective: maintainability\n- quantity: 3-5 ideas\n- problem: [Task problem statement]\n- constraints: [From invariants]\n- deliverable: ideas-maintainability.md\n\n**Delegation 3 — Clean-Slate Lens:**\n- routine: routine-ideation\n- perspective: innovation\n- quantity: 3-5 ideas\n- problem: [Task problem statement]\n- constraints: [From invariants, relaxed]\n- deliverable: ideas-clean-slate.md\n\n**Main agent synthesis:**\n- Combine ideas from all 3 deliverables\n- Deduplicate similar ideas (keep best version)\n- Select best from each perspective for `approaches` array\n\n**Output:** `approaches` array with one entry per lens answered.\n\nSet these keys in the next `continue_workflow` call's `context` object: `approaches`\n\n**CONTEXT LOGGING:** Add Approaches section to CONTEXT.md. Preserve ALL approaches—they may become Plan B/C later.",
148
148
  "requireConfirmation": false
149
149
  },
150
150
  {
@@ -186,7 +186,7 @@
186
186
  "var": "taskComplexity",
187
187
  "not_equals": "Small"
188
188
  },
189
- "prompt": "Make the architecture decision and define early-warning triggers.\n\n**If `majorConcernsRaised = true`:**\nThe pre-mortem raised serious concerns about `leadingCandidate`. Before proceeding:\n1. Review `preMortemFindings` carefully\n2. Consider if `runnerUpApproach` addresses the concerns better\n3. Decide: proceed with `leadingCandidate` (accepting risk) OR switch to `runnerUpApproach`\n\n**PART 1: SELECTION**\n\nBased on comparison and pre-mortem:\n\n- **selectedApproach**: Confirm or change from `leadingCandidate`\n- **architectureRationale**: Why this wins (2-3 sentences referencing comparison)\n- **runnerUpApproach**: Confirm Plan B\n- **keyRiskToMonitor**: The pre-mortem concern to watch during implementation\n\n**PART 2: PIVOT TRIGGERS (STANDARD+)**\n\nDefine conditions that should trigger reconsideration:\n\n```\nPIVOT TRIGGERS (if any occur during implementation, stop and reassess):\n- Trigger 1: [specific, observable condition]\n- Trigger 2: [specific, observable condition]\n```\n\nGood triggers are CONCRETE and OBSERVABLE:\n- \"If we need to touch >2 files outside target module\"\n- \"If the API doesn't support X capability\"\n- \"If tests require mocking >3 dependencies\"\n\nBad triggers (too vague):\n- \"If it gets hard\"\n- \"If there are problems\"\n\n**Set context variables:**\n- `selectedApproach`\n- `architectureRationale`\n- `runnerUpApproach`\n- `architectureRisks`\n- `pivotTriggers` (STANDARD+)\n- `keyRiskToMonitor`\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log - record selection + rationale, rejected alternatives and why, pivot triggers.\n\n**VERIFY (Large or High-risk):** User confirms approach selection.",
189
+ "prompt": "Make the architecture decision and define early-warning triggers.\n\n**If `majorConcernsRaised = true`:**\nThe pre-mortem raised serious concerns about `leadingCandidate`. Before proceeding:\n1. Review `preMortemFindings` carefully\n2. Consider if `runnerUpApproach` addresses the concerns better\n3. Decide: proceed with `leadingCandidate` (accepting risk) OR switch to `runnerUpApproach`\n\n**PART 1: SELECTION**\n\nBased on comparison and pre-mortem:\n\n- **selectedApproach**: Confirm or change from `leadingCandidate`\n- **architectureRationale**: Why this wins (2-3 sentences referencing comparison)\n- **runnerUpApproach**: Confirm Plan B\n- **keyRiskToMonitor**: The pre-mortem concern to watch during implementation\n\n**PART 2: PIVOT TRIGGERS (STANDARD+)**\n\nDefine conditions that should trigger reconsideration:\n\n```\nPIVOT TRIGGERS (if any occur during implementation, stop and reassess):\n- Trigger 1: [specific, observable condition]\n- Trigger 2: [specific, observable condition]\n```\n\nGood triggers are CONCRETE and OBSERVABLE:\n- \"If we need to touch >2 files outside target module\"\n- \"If the API doesn't support X capability\"\n- \"If tests require mocking >3 dependencies\"\n\nBad triggers (too vague):\n- \"If it gets hard\"\n- \"If there are problems\"\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `selectedApproach`\n- `architectureRationale`\n- `runnerUpApproach`\n- `architectureRisks`\n- `pivotTriggers` (STANDARD+)\n- `keyRiskToMonitor`\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log - record selection + rationale, rejected alternatives and why, pivot triggers.\n\n**VERIFY (Large or High-risk):** User confirms approach selection.",
190
190
  "requireConfirmation": {
191
191
  "or": [
192
192
  {
@@ -240,7 +240,7 @@
240
240
  "var": "taskComplexity",
241
241
  "not_equals": "Small"
242
242
  },
243
- "prompt": "Compare slice strategies and select the best fit.\n\nThis is EVALUATIVE thinking—compare and decide.\n\n**Compare strategies on:**\n- **Risk management**: Which best surfaces problems early given our invariants?\n- **PR reviewability**: Which produces the cleanest PR boundaries?\n- **Feedback speed**: Which gets us useful feedback fastest?\n- **Rollout constraints**: Which aligns with any flagging/migration requirements?\n- **Implementation flow**: Which has the smoothest dependencies between slices?\n\n**Select:**\n- **selectedSliceStrategy**: [name]\n- **rationale**: Why this wins (2-3 sentences referencing comparison)\n- **slices**: The ordered list from selected strategy\n\n**PR sizing gate:**\n- If `prStrategy = MultiPR`, map slices to PRs.\n- If `prStrategy = SinglePR` but slices suggest broad changes, recommend switching to MultiPR.\n\n**Set context variables:**\n- `selectedSliceStrategy`\n- `slices` (array from selected strategy)\n- `estimatedPRCount` (number)\n- `prStrategyRationale` (short)\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log - record selected strategy + rationale, rejected strategies and why, slice boundaries and PR strategy.\n\n**VERIFY:** user confirms slice strategy and PR approach.",
243
+ "prompt": "Compare slice strategies and select the best fit.\n\nThis is EVALUATIVE thinking—compare and decide.\n\n**Compare strategies on:**\n- **Risk management**: Which best surfaces problems early given our invariants?\n- **PR reviewability**: Which produces the cleanest PR boundaries?\n- **Feedback speed**: Which gets us useful feedback fastest?\n- **Rollout constraints**: Which aligns with any flagging/migration requirements?\n- **Implementation flow**: Which has the smoothest dependencies between slices?\n\n**Select:**\n- **selectedSliceStrategy**: [name]\n- **rationale**: Why this wins (2-3 sentences referencing comparison)\n- **slices**: The ordered list from selected strategy\n\n**PR sizing gate:**\n- If `prStrategy = MultiPR`, map slices to PRs.\n- If `prStrategy = SinglePR` but slices suggest broad changes, recommend switching to MultiPR.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `selectedSliceStrategy`\n- `slices` (array from selected strategy)\n- `estimatedPRCount` (number)\n- `prStrategyRationale` (short)\n\n**CONTEXT LOGGING (required):** Update CONTEXT.md Decision Log - record selected strategy + rationale, rejected strategies and why, slice boundaries and PR strategy.\n\n**VERIFY:** user confirms slice strategy and PR approach.",
244
244
  "requireConfirmation": true
245
245
  },
246
246
  {
@@ -311,7 +311,7 @@
311
311
  {
312
312
  "id": "phase-5a-draft-implementation-plan",
313
313
  "title": "Plan Artifact Draft/Update",
314
- "prompt": "Create or update the **Plan Artifact** (deterministic schema).\n\n**Write-or-paste rule:** attempt to write/update `implementation_plan.md`. If file writing fails, output full content in chat (canonical).\n\n**Plan Artifact headings (concise, complete):**\n\n1) Problem statement\n2) Acceptance criteria (bullets)\n3) Non-goals (bullets)\n4) **User rules/preferences applied:**\n - Relevant `userRules` + how plan respects them.\n - Deviations: rationale + mitigation + user decision (counts toward `maxQuestions`).\n5) Invariants (reference `invariants`)\n6) Proposed approach (1–2 paragraphs)\n7) Architecture decision (reference Phase 3/3b outputs):\n - Selected approach: reference `selectedApproach`\n - Rationale: reference `architectureRationale`\n - Runner-up (Plan B): reference `runnerUpApproach`\n - Key risk: reference `keyRiskToMonitor`\n - Full alternatives: see CONTEXT.md Approaches section\n8) **Vertical slices** (match `slices`: scope, done-definition, files, verification)\n\n **Work Packages inside each slice (mode-dependent):**\n - QUICK: skip work packages\n - STANDARD: optional; recommended when slice is high-risk or multi-layer\n - THOROUGH: required for non-trivial slices\n\n Each work package (WP):\n - ID: `S<sliceIndex>-WP<k>` (e.g., S1-WP1)\n - Goal: one coherent outcome\n - Targets (allowlist): dirs/files (+ allowed new files)\n - Forbidden (denylist): files/dirs not to touch\n - Budget: maxModified (5 STANDARD/8 THOROUGH), maxNew (2/3)\n - Done-definition: 2–5 bullets\n - Verification: 1–3 commands/tests\n - Dependencies: contracts/types from other WPs (if parallel)\n\n **Parallelism rule:** parallelize only if Targets don't overlap. Final WP must be \"Hook-up/Integration\" when parallel was used.\n\n9) Test plan (unit/integration/e2e; cite repo patterns)\n10) Risk register (risks + mitigation + rollback/flag)\n11) PR packaging (Single/Multi + rule)\n12) **Philosophy alignment per slice** (for each slice, include):\n - For each design principle touched by this slice: [principle] → [satisfied / tension / violated + 1-line why]\n - The audit step will independently verify these self-assessments. Be honest — violations caught early are cheaper than violations caught in review.\n\n**Set context variables:**\n- `planArtifact`\n- `implementationPlan`\n\n**VERIFY:** concrete enough for another engineer to implement without guessing.",
314
+ "prompt": "Create or update the **Plan Artifact** (deterministic schema).\n\n**Write-or-paste rule:** attempt to write/update `implementation_plan.md`. If file writing fails, output full content in chat (canonical).\n\n**Plan Artifact headings (concise, complete):**\n\n1) Problem statement\n2) Acceptance criteria (bullets)\n3) Non-goals (bullets)\n4) **User rules/preferences applied:**\n - Relevant `userRules` + how plan respects them.\n - Deviations: rationale + mitigation + user decision (counts toward `maxQuestions`).\n5) Invariants (reference `invariants`)\n6) Proposed approach (1–2 paragraphs)\n7) Architecture decision (reference Phase 3/3b outputs):\n - Selected approach: reference `selectedApproach`\n - Rationale: reference `architectureRationale`\n - Runner-up (Plan B): reference `runnerUpApproach`\n - Key risk: reference `keyRiskToMonitor`\n - Full alternatives: see CONTEXT.md Approaches section\n8) **Vertical slices** (match `slices`: scope, done-definition, files, verification)\n\n **Work Packages inside each slice (mode-dependent):**\n - QUICK: skip work packages\n - STANDARD: optional; recommended when slice is high-risk or multi-layer\n - THOROUGH: required for non-trivial slices\n\n Each work package (WP):\n - ID: `S<sliceIndex>-WP<k>` (e.g., S1-WP1)\n - Goal: one coherent outcome\n - Targets (allowlist): dirs/files (+ allowed new files)\n - Forbidden (denylist): files/dirs not to touch\n - Budget: maxModified (5 STANDARD/8 THOROUGH), maxNew (2/3)\n - Done-definition: 2–5 bullets\n - Verification: 1–3 commands/tests\n - Dependencies: contracts/types from other WPs (if parallel)\n\n **Parallelism rule:** parallelize only if Targets don't overlap. Final WP must be \"Hook-up/Integration\" when parallel was used.\n\n9) Test plan (unit/integration/e2e; cite repo patterns)\n10) Risk register (risks + mitigation + rollback/flag)\n11) PR packaging (Single/Multi + rule)\n12) **Philosophy alignment per slice** (for each slice, include):\n - For each design principle touched by this slice: [principle] → [satisfied / tension / violated + 1-line why]\n - The audit step will independently verify these self-assessments. Be honest — violations caught early are cheaper than violations caught in review.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `planArtifact`\n- `implementationPlan`\n\n**VERIFY:** concrete enough for another engineer to implement without guessing.",
315
315
  "requireConfirmation": false
316
316
  },
317
317
  {
@@ -323,7 +323,7 @@
323
323
  {
324
324
  "id": "phase-5c-refocus-and-ticket-extraction",
325
325
  "title": "Refocus: Amendments + Tickets + Drift Detection",
326
- "prompt": "Apply amendments and refocus.\n\n**Do:**\n- Update `planArtifact` + `implementationPlan` to incorporate `planAmendments`.\n- Extract out-of-scope work into `followUpTickets`.\n- Ensure plan follows `invariants` and stays slice-oriented.\n\n**RESOLVED FINDINGS LEDGER (required):**\n\nWhen applying amendments, maintain the `resolvedFindings` context variable:\n- For each finding resolved in this iteration, add an entry: { finding: \"...\", resolution: \"...\", iteration: N }\n- Cap at 10 entries (if exceeded, drop oldest entries first)\n- This ledger carries forward to the next audit pass for regression checking\n\n**Set:** `resolvedFindings` (array, append new resolutions)\n\n**Drift detection:**\n- If user introduced new constraints/preferences, update `userRules` and log in `CONTEXT.md`.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record amendments accepted/rejected and why, user pushback, and scope/rules/verification drift\n\n**Set:** `followUpTickets`\n\n**VERIFY:** plan is coherent and PR-sized by slice.",
326
+ "prompt": "Apply amendments and refocus.\n\n**Do:**\n- Update `planArtifact` + `implementationPlan` to incorporate `planAmendments`.\n- Extract out-of-scope work into `followUpTickets`.\n- Ensure plan follows `invariants` and stays slice-oriented.\n\n**RESOLVED FINDINGS LEDGER (required):**\n\nWhen applying amendments, maintain the `resolvedFindings` entry in the next `continue_workflow` call's `context` object:\n- For each finding resolved in this iteration, add an entry: { finding: \"...\", resolution: \"...\", iteration: N }\n- Cap at 10 entries (if exceeded, drop oldest entries first)\n- This ledger carries forward to the next audit pass for regression checking\n\n**Set:** `resolvedFindings` (array, append new resolutions)\n\n**Drift detection:**\n- If user introduced new constraints/preferences, update `userRules` and log in `CONTEXT.md`.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record amendments accepted/rejected and why, user pushback, and scope/rules/verification drift\n\n**Set:** `followUpTickets`\n\n**VERIFY:** plan is coherent and PR-sized by slice.",
327
327
  "requireConfirmation": {
328
328
  "or": [
329
329
  {
@@ -373,7 +373,7 @@
373
373
  "var": "taskComplexity",
374
374
  "not_equals": "Small"
375
375
  },
376
- "prompt": "Design test strategy before implementation begins.\n\n**Required outputs:**\n- List acceptance criteria with corresponding test coverage\n- Identify edge cases and failure modes that need tests\n- Map invariants to test verification (which tests prove which invariants)\n- Document test execution plan (unit/integration/e2e)\n\n**Rigor-adaptive depth:**\n- QUICK: Brief test checklist (≤5 items)\n- STANDARD: Test coverage matrix (criteria → tests)\n- THOROUGH: Comprehensive test plan with edge cases, failure injection, invariant proofs\n\n**Validation gate:** For high-risk invariants, require explicit test coverage. If gap exists, add to slice plan or acknowledge as risk.\n\n**Set context variables:** `testDesign`, `testCoverageGaps`\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - test strategy, coverage gaps, and how gaps are addressed.\n\n**Output:** Test design artifact (in chat or file if write-or-paste).",
376
+ "prompt": "Design test strategy before implementation begins.\n\n**Required outputs:**\n- List acceptance criteria with corresponding test coverage\n- Identify edge cases and failure modes that need tests\n- Map invariants to test verification (which tests prove which invariants)\n- Document test execution plan (unit/integration/e2e)\n\n**Rigor-adaptive depth:**\n- QUICK: Brief test checklist (≤5 items)\n- STANDARD: Test coverage matrix (criteria → tests)\n- THOROUGH: Comprehensive test plan with edge cases, failure injection, invariant proofs\n\n**Validation gate:** For high-risk invariants, require explicit test coverage. If gap exists, add to slice plan or acknowledge as risk.\n\nSet these keys in the next `continue_workflow` call's `context` object: `testDesign`, `testCoverageGaps`\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - test strategy, coverage gaps, and how gaps are addressed.\n\n**Output:** Test design artifact (in chat or file if write-or-paste).",
377
377
  "requireConfirmation": false
378
378
  },
379
379
  {
@@ -463,7 +463,7 @@
463
463
  {
464
464
  "id": "phase-pre-impl-validation",
465
465
  "title": "Pre-Implementation Validation",
466
- "prompt": "Validate before implementing slice `{{currentSlice.name}}`.\n\n**FLAG RESET (required):**\nSet these context variables to initial state:\n- planDrift = false\n- rulesDrift = false\n- verificationFailed = false\n- verificationApprovalRequired = false\n- verificationRetried = false\n- sliceVerified = false\n- softReplanCompleted = false\n- replanFailed = false\n- pivotTriggered = false\n- pivotSeverity = none\n- validationFailed = false\n\n---\n\n**PART 1: PIVOT TRIGGER CHECK**\n\nReview `pivotTriggers`:\n```\nPIVOT TRIGGERS:\n- [ ] Trigger 1: [condition] → Status: [Not triggered / Triggered]\n- [ ] Trigger 2: [condition] → Status: [Not triggered / Triggered]\n```\n\n**If ANY trigger fired:**\n1. Set `pivotTriggered = true`, `validationFailed = true`\n2. Assess severity:\n - **MINOR**: Return to `phase-select-architecture` (try runnerUp)\n - **MODERATE**: Return to `phase-ideation` (new constraint)\n - **MAJOR**: Return to `phase-invariants` (problem misunderstood)\n3. Set `pivotSeverity`, `pivotReturnPhase`\n4. STOP — do not continue to Part 2\n\n---\n\n**PART 2: PLAN STALENESS CHECK (STANDARD+)**\n\nQuick audit of slice plan vs current codebase:\n- Are target files still in expected state?\n- Have dependencies/contracts changed since planning?\n- Any new constraints from prior slices?\n\nStaleness: [Fresh / Minor drift / Major drift]\n\n**If Major drift:** Set `slicePlanStale = true`, `validationFailed = true`\n\n---\n\n**PART 3: SANITY CHECK**\n\nVerify implementation prerequisites:\n- **Existence**: Target files/symbols exist\n- **Signatures**: Key function/type signatures match assumptions\n- **Scope**: No hidden touchpoints beyond plan\n- **Verification**: Slice verification commands are runnable\n\n**If any check fails:** Set `validationFailed = true`\n\n---\n\n**OUTPUT:**\n- Pivot triggers: [All clear / Triggered: X]\n- Staleness: [Fresh / Minor / Major]\n- Sanity: [Pass / Fail: reason]\n- `validationFailed`: true/false\n\n**Set:** `pivotTriggered`, `pivotSeverity`, `pivotReturnPhase`, `slicePlanStale`, `validationFailed`",
466
+ "prompt": "Validate before implementing slice `{{currentSlice.name}}`.\n\n**FLAG RESET (required):**\nSet these keys in the next `continue_workflow` call's `context` object to their initial state:\n- planDrift = false\n- rulesDrift = false\n- verificationFailed = false\n- verificationApprovalRequired = false\n- verificationRetried = false\n- sliceVerified = false\n- softReplanCompleted = false\n- replanFailed = false\n- pivotTriggered = false\n- pivotSeverity = none\n- validationFailed = false\n\n---\n\n**PART 1: PIVOT TRIGGER CHECK**\n\nReview `pivotTriggers`:\n```\nPIVOT TRIGGERS:\n- [ ] Trigger 1: [condition] → Status: [Not triggered / Triggered]\n- [ ] Trigger 2: [condition] → Status: [Not triggered / Triggered]\n```\n\n**If ANY trigger fired:**\n1. Set `pivotTriggered = true`, `validationFailed = true`\n2. Assess severity:\n - **MINOR**: Return to `phase-select-architecture` (try runnerUp)\n - **MODERATE**: Return to `phase-ideation` (new constraint)\n - **MAJOR**: Return to `phase-invariants` (problem misunderstood)\n3. Set `pivotSeverity`, `pivotReturnPhase`\n4. STOP — do not continue to Part 2\n\n---\n\n**PART 2: PLAN STALENESS CHECK (STANDARD+)**\n\nQuick audit of slice plan vs current codebase:\n- Are target files still in expected state?\n- Have dependencies/contracts changed since planning?\n- Any new constraints from prior slices?\n\nStaleness: [Fresh / Minor drift / Major drift]\n\n**If Major drift:** Set `slicePlanStale = true`, `validationFailed = true`\n\n---\n\n**PART 3: SANITY CHECK**\n\nVerify implementation prerequisites:\n- **Existence**: Target files/symbols exist\n- **Signatures**: Key function/type signatures match assumptions\n- **Scope**: No hidden touchpoints beyond plan\n- **Verification**: Slice verification commands are runnable\n\n**If any check fails:** Set `validationFailed = true`\n\n---\n\n**OUTPUT:**\n- Pivot triggers: [All clear / Triggered: X]\n- Staleness: [Fresh / Minor / Major]\n- Sanity: [Pass / Fail: reason]\n- `validationFailed`: true/false\n\n**Set:** `pivotTriggered`, `pivotSeverity`, `pivotReturnPhase`, `slicePlanStale`, `validationFailed`",
467
467
  "requireConfirmation": {
468
468
  "or": [
469
469
  {
@@ -488,7 +488,7 @@
488
488
  "var": "validationFailed",
489
489
  "not_equals": true
490
490
  },
491
- "prompt": "Prepare to implement slice `{{currentSlice.name}}`.\n\n**Do:**\n- Re-state slice goal + verification\n- Identify exact files/components to change\n- Re-check invariants impacted\n- Match existing patterns (1–3 exemplars)\n- Apply `userRules` (call out if any rule affects this slice)\n\n**Work Package handling:**\n- If `currentSlice.workPackages` exist: use as implementation guidance\n- If no WPs: proceed with full slice scope as one unit\n\n**Git setup (first slice only):**\nIf sliceIndex = 0:\n- Check git availability: `git status`\n- Create feature branch: `feature/etienneb/acei-XXXX_<task-name>`\n- Set `featureBranch` context variable\n- Update CONTEXT.md with branch name\n\n**Output:**\n- Slice goal + verification (restated)\n- Files to change\n- Patterns to follow\n- userRules that apply",
491
+ "prompt": "Prepare to implement slice `{{currentSlice.name}}`.\n\n**Do:**\n- Re-state slice goal + verification\n- Identify exact files/components to change\n- Re-check invariants impacted\n- Match existing patterns (1–3 exemplars)\n- Apply `userRules` (call out if any rule affects this slice)\n\n**Work Package handling:**\n- If `currentSlice.workPackages` exist: use as implementation guidance\n- If no WPs: proceed with full slice scope as one unit\n\n**Git setup (first slice only):**\nIf sliceIndex = 0:\n- Check git availability: `git status`\n- Create feature branch: `feature/etienneb/acei-XXXX_<task-name>`\n- Set `featureBranch` in the next `continue_workflow` call's `context` object\n- Update CONTEXT.md with branch name\n\n**Output:**\n- Slice goal + verification (restated)\n- Files to change\n- Patterns to follow\n- userRules that apply",
492
492
  "requireConfirmation": false
493
493
  },
494
494
  {
@@ -500,7 +500,7 @@
500
500
  {
501
501
  "id": "phase-7c-verify",
502
502
  "title": "VERIFY: Slice {{sliceIndex}}",
503
- "prompt": "Verify the slice implementation.\n\n**PRIMARY VERIFICATION (always):**\n- Run verification commands from slice (or WP if applicable).\n- Add/adjust tests if needed.\n- Ensure invariants hold.\n- If blocked: request user to run and share output.\n\n---\n\n**PARALLEL VERIFICATION (THOROUGH + high-risk only):**\n\nRun when `rigorMode=THOROUGH` AND slice touches high-risk invariants (auth/payments/security/data integrity/perf-critical).\n\nIf `delegationMode=delegate`:\n\nYou have permission to spawn THREE subagents SIMULTANEOUSLY for parallel verification.\n\nDelegate to WorkRail Executor THREE TIMES with scoped context:\n\n**Verification 1 — Adversarial Challenge:**\n- routine: routine-hypothesis-challenge\n- rigor: 5 (maximum for implementation verification)\n- hypotheses: [\"This implementation is correct\", key assumptions about the changes]\n- evidence: files changed in this slice\n- context (file-reference-first):\n - Read: files changed in this slice\n - Read: CONTEXT.md (invariants section)\n - Filtered userRules: edge cases, error handling, validation rules\n - Feature brief: slice goal + invariants touched + verification plan\n- deliverable: implementation-challenges.md\n\n**Verification 2 — Execution Simulation:**\n- routine: routine-execution-simulation\n- entry_point: [Changed functions in this slice]\n- inputs: [Test scenarios: normal + edge cases]\n- trace_depth: 3\n- context:\n - Read: files changed in this slice\n - Read: implementation_plan.md (this slice's verification scenarios)\n - Filtered userRules: performance, state management, data flow rules\n - Invariants touched by this slice\n - Feature brief: architecture decision + risk register for this slice\n- deliverable: execution-simulation.md\n\n**Verification 3 — Plan Adherence:**\n- routine: routine-plan-analysis\n- plan: implementation_plan.md (this slice section)\n- requirements: [Slice done-definition + targets/forbidden]\n- constraints: [Filtered userRules: patterns, conventions, testing]\n- context:\n - Read: files changed + implementation_plan.md (this slice)\n - Feature brief: slice scope + done-definition + targets/forbidden\n- deliverable: plan-adherence.md\n\n**Self-check before delegating (required):**\n✅ Each delegation includes filtered userRules (relevant to their lens)\n✅ Each includes invariants touched by this slice\n✅ Each includes feature brief (file refs or excerpt)\n✅ Each has specific verification lens\n\n**Synthesize (deterministic, bounded retry):**\n\n- **ALL THREE validate** → set `sliceVerified=true`, proceed to checkpoint\n\n- **ONE concern raised:**\n 1. Investigate the concern and attempt to fix within this slice iteration\n 2. Re-run ONLY the failing validator (max 1 retry per slice)\n 3. If passes after retry: set `sliceVerified=true`, proceed\n 4. If still fails after retry:\n - Add concern to `verificationFindings`\n - Require user approval to proceed OR rewind to planning\n - Set `verificationApprovalRequired=true`\n\n- **TWO+ concerns raised:**\n 1. Do NOT attempt automatic fix\n 2. Set `verificationFailed=true`\n 3. Stop slice loop immediately\n 4. User must choose:\n - Rewind to planning (Phase 5) via new workflow run with drift context\n - Manual fix + re-verify\n - Defer this slice to follow-up ticket\n\n**Set context variables:**\n- `sliceVerified` (true/false)\n- `verificationFindings` (list of concerns)\n- `verificationFailed` (true/false)\n- `verificationApprovalRequired` (true/false)\n- `verificationRetried` (true/false)\n- `parallelVerificationRan` (true/false)\n\n---\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record verification approach (primary only / parallel), concerns raised + retry outcome, and user decision (if approval required).",
503
+ "prompt": "Verify the slice implementation.\n\n**PRIMARY VERIFICATION (always):**\n- Run verification commands from slice (or WP if applicable).\n- Add/adjust tests if needed.\n- Ensure invariants hold.\n- If blocked: request user to run and share output.\n\n---\n\n**PARALLEL VERIFICATION (THOROUGH + high-risk only):**\n\nRun when `rigorMode=THOROUGH` AND slice touches high-risk invariants (auth/payments/security/data integrity/perf-critical).\n\nIf `delegationMode=delegate`:\n\nYou have permission to spawn THREE subagents SIMULTANEOUSLY for parallel verification.\n\nDelegate to WorkRail Executor THREE TIMES with scoped context:\n\n**Verification 1 — Adversarial Challenge:**\n- routine: routine-hypothesis-challenge\n- rigor: 5 (maximum for implementation verification)\n- hypotheses: [\"This implementation is correct\", key assumptions about the changes]\n- evidence: files changed in this slice\n- context (file-reference-first):\n - Read: files changed in this slice\n - Read: CONTEXT.md (invariants section)\n - Filtered userRules: edge cases, error handling, validation rules\n - Feature brief: slice goal + invariants touched + verification plan\n- deliverable: implementation-challenges.md\n\n**Verification 2 — Execution Simulation:**\n- routine: routine-execution-simulation\n- entry_point: [Changed functions in this slice]\n- inputs: [Test scenarios: normal + edge cases]\n- trace_depth: 3\n- context:\n - Read: files changed in this slice\n - Read: implementation_plan.md (this slice's verification scenarios)\n - Filtered userRules: performance, state management, data flow rules\n - Invariants touched by this slice\n - Feature brief: architecture decision + risk register for this slice\n- deliverable: execution-simulation.md\n\n**Verification 3 — Plan Adherence:**\n- routine: routine-plan-analysis\n- plan: implementation_plan.md (this slice section)\n- requirements: [Slice done-definition + targets/forbidden]\n- constraints: [Filtered userRules: patterns, conventions, testing]\n- context:\n - Read: files changed + implementation_plan.md (this slice)\n - Feature brief: slice scope + done-definition + targets/forbidden\n- deliverable: plan-adherence.md\n\n**Self-check before delegating (required):**\n✅ Each delegation includes filtered userRules (relevant to their lens)\n✅ Each includes invariants touched by this slice\n✅ Each includes feature brief (file refs or excerpt)\n✅ Each has specific verification lens\n\n**Synthesize (deterministic, bounded retry):**\n\n- **ALL THREE validate** → set `sliceVerified=true`, proceed to checkpoint\n\n- **ONE concern raised:**\n 1. Investigate the concern and attempt to fix within this slice iteration\n 2. Re-run ONLY the failing validator (max 1 retry per slice)\n 3. If passes after retry: set `sliceVerified=true`, proceed\n 4. If still fails after retry:\n - Add concern to `verificationFindings`\n - Require user approval to proceed OR rewind to planning\n - Set `verificationApprovalRequired=true`\n\n- **TWO+ concerns raised:**\n 1. Do NOT attempt automatic fix\n 2. Set `verificationFailed=true`\n 3. Stop slice loop immediately\n 4. User must choose:\n - Rewind to planning (Phase 5) via new workflow run with drift context\n - Manual fix + re-verify\n - Defer this slice to follow-up ticket\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `sliceVerified` (true/false)\n- `verificationFindings` (list of concerns)\n- `verificationFailed` (true/false)\n- `verificationApprovalRequired` (true/false)\n- `verificationRetried` (true/false)\n- `parallelVerificationRan` (true/false)\n\n---\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log (follow format from metaGuidance) - record verification approach (primary only / parallel), concerns raised + retry outcome, and user decision (if approval required).",
504
504
  "requireConfirmation": {
505
505
  "or": [
506
506
  {
@@ -557,7 +557,7 @@
557
557
  }
558
558
  ]
559
559
  },
560
- "prompt": "Drift detected. Plan or implementation boundaries have changed since planning.\n\n**Detected drift:**\n- Plan drift: slice scope/files/verification changed beyond original plan\n- Rules drift: user introduced new constraints affecting implementation\n\n**Required decision (deterministic, single-attempt re-plan limit):**\n\n**Option 1: IN-PLACE RE-PLAN (soft, single attempt)**\n\nWhen to use: drift is containable (1-3 extra files, minor scope shift, clarified requirement).\n\nSteps:\n1. Update `implementation_plan.md` immediately to reflect actual scope/changes\n2. Update affected slices in `slices` array\n3. Run single-pass plan audit (self-audit if QUICK/STANDARD; delegate once if THOROUGH and subagents available)\n4. If audit passes (no new Major/Critical findings):\n - Set `softReplanCompleted=true`\n - Reset drift flags: `planDrift=false`, `rulesDrift=false`\n - Document drift resolution in CONTEXT.md Decision Log\n - Continue slice loop with updated plan\n5. If audit finds NEW drift or Major issues:\n - Set `replanFailed=true`\n - Escalate to Option 2 (user decision)\n\n**Single-attempt limit:** if drift recurs in a later slice after soft re-plan, you MUST escalate to Option 2.\n\n---\n\n**Option 2: HARD STOP + USER DECISION**\n\nWhen to use: High risk OR Major drift (scope doubled, new invariants, architectural change) OR soft re-plan failed/recurred.\n\nSteps:\n1. Stop slice loop immediately\n2. Document drift in CONTEXT.md with evidence (git diff, scope comparison)\n3. Update CONTEXT.md Machine State Checkpoint for resume\n4. User chooses:\n - **Rewind to planning**: exit this workflow run; start new run with updated context; use last Planning checkpoint state to resume at Phase 5\n - **Manual fix**: user fixes the issue outside workflow; resume at current slice\n - **Defer slice**: skip this slice, add to follow-up tickets, continue with next slice\n\n---\n\n**Option 3: CONTINUE WITH DEVIATION (document + approve)**\n\nWhen to use: Low/Medium risk AND drift is expected/acceptable.\n\nSteps:\n1. Document why drift is safe/expected\n2. Confirm all invariants still hold\n3. Update CONTEXT.md Decision Log with drift resolution + user approval\n4. Reset drift flags: `planDrift=false`, `rulesDrift=false`\n5. Continue slice loop\n\n---\n\n**Default recommendation:**\n- High risk → Option 2 (hard stop)\n- Medium risk + containable drift → Option 1 (soft re-plan)\n- Low risk + expected drift → Option 3 (continue with approval)\n\n**Set context variables:**\n- `softReplanCompleted` (if Option 1 succeeded)\n- `replanFailed` (if Option 1 audit failed)\n- `driftResolution` (which option was chosen)\n\nUser must approve which option to take.",
560
+ "prompt": "Drift detected. Plan or implementation boundaries have changed since planning.\n\n**Detected drift:**\n- Plan drift: slice scope/files/verification changed beyond original plan\n- Rules drift: user introduced new constraints affecting implementation\n\n**Required decision (deterministic, single-attempt re-plan limit):**\n\n**Option 1: IN-PLACE RE-PLAN (soft, single attempt)**\n\nWhen to use: drift is containable (1-3 extra files, minor scope shift, clarified requirement).\n\nSteps:\n1. Update `implementation_plan.md` immediately to reflect actual scope/changes\n2. Update affected slices in `slices` array\n3. Run single-pass plan audit (self-audit if QUICK/STANDARD; delegate once if THOROUGH and subagents available)\n4. If audit passes (no new Major/Critical findings):\n - Set `softReplanCompleted=true`\n - Reset drift flags: `planDrift=false`, `rulesDrift=false`\n - Document drift resolution in CONTEXT.md Decision Log\n - Continue slice loop with updated plan\n5. If audit finds NEW drift or Major issues:\n - Set `replanFailed=true`\n - Escalate to Option 2 (user decision)\n\n**Single-attempt limit:** if drift recurs in a later slice after soft re-plan, you MUST escalate to Option 2.\n\n---\n\n**Option 2: HARD STOP + USER DECISION**\n\nWhen to use: High risk OR Major drift (scope doubled, new invariants, architectural change) OR soft re-plan failed/recurred.\n\nSteps:\n1. Stop slice loop immediately\n2. Document drift in CONTEXT.md with evidence (git diff, scope comparison)\n3. Update CONTEXT.md Machine State Checkpoint for resume\n4. User chooses:\n - **Rewind to planning**: exit this workflow run; start new run with updated context; use last Planning checkpoint state to resume at Phase 5\n - **Manual fix**: user fixes the issue outside workflow; resume at current slice\n - **Defer slice**: skip this slice, add to follow-up tickets, continue with next slice\n\n---\n\n**Option 3: CONTINUE WITH DEVIATION (document + approve)**\n\nWhen to use: Low/Medium risk AND drift is expected/acceptable.\n\nSteps:\n1. Document why drift is safe/expected\n2. Confirm all invariants still hold\n3. Update CONTEXT.md Decision Log with drift resolution + user approval\n4. Reset drift flags: `planDrift=false`, `rulesDrift=false`\n5. Continue slice loop\n\n---\n\n**Default recommendation:**\n- High risk → Option 2 (hard stop)\n- Medium risk + containable drift → Option 1 (soft re-plan)\n- Low risk + expected drift → Option 3 (continue with approval)\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `softReplanCompleted` (if Option 1 succeeded)\n- `replanFailed` (if Option 1 audit failed)\n- `driftResolution` (which option was chosen)\n\nUser must approve which option to take.",
561
561
  "requireConfirmation": true
562
562
  },
563
563
  {
@@ -614,7 +614,7 @@
614
614
  "var": "taskComplexity",
615
615
  "not_equals": "Small"
616
616
  },
617
- "prompt": "**BOUNDARY: Execution → Handoff**\n\nYou've done gap discovery. Now verify integration with concrete evidence and set explicit pass/fail flags.\n\n---\n\n**REQUIRED VERIFICATIONS (enumerate commands actually run):**\n\n1) Full test suite\n- Command(s) run:\n- Result summary:\n\n2) Invariant validation\n- For each invariant: how is it proven? (test name or manual proof)\n- Any invariant without proof must be listed in `invariantViolations`\n\n3) Performance budgets (if applicable)\n- Command(s)/benchmark run:\n- Result vs budget:\n\n4) Backward compatibility (if applicable)\n- Command(s) run / checks performed:\n- Result summary:\n\n5) Build/compile check\n- Command(s) run:\n- Result summary:\n\n---\n\n**Output (required exact lines):**\n- integrationVerificationPassed = true|false\n- integrationVerificationFailed = true|false\n- regressionDetected = true|false\n\n---\n\n**Set context variables:**\n- `integrationVerificationPassed`\n- `integrationVerificationFailed`\n- `integrationVerificationFindings` (list of issues)\n- `regressionDetected`\n- `invariantViolations` (list)\n\n**Rule:**\n- If `integrationVerificationFailed = true`, then `integrationVerificationPassed` must be false.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log - commands run + findings + any user decisions.",
617
+ "prompt": "**BOUNDARY: Execution → Handoff**\n\nYou've done gap discovery. Now verify integration with concrete evidence and set explicit pass/fail flags.\n\n---\n\n**REQUIRED VERIFICATIONS (enumerate commands actually run):**\n\n1) Full test suite\n- Command(s) run:\n- Result summary:\n\n2) Invariant validation\n- For each invariant: how is it proven? (test name or manual proof)\n- Any invariant without proof must be listed in `invariantViolations`\n\n3) Performance budgets (if applicable)\n- Command(s)/benchmark run:\n- Result vs budget:\n\n4) Backward compatibility (if applicable)\n- Command(s) run / checks performed:\n- Result summary:\n\n5) Build/compile check\n- Command(s) run:\n- Result summary:\n\n---\n\n**Output (required exact lines):**\n- integrationVerificationPassed = true|false\n- integrationVerificationFailed = true|false\n- regressionDetected = true|false\n\n---\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `integrationVerificationPassed`\n- `integrationVerificationFailed`\n- `integrationVerificationFindings` (list of issues)\n- `regressionDetected`\n- `invariantViolations` (list)\n\n**Rule:**\n- If `integrationVerificationFailed = true`, then `integrationVerificationPassed` must be false.\n\n**CONTEXT LOGGING:** Update CONTEXT.md Decision Log - commands run + findings + any user decisions.",
618
618
  "requireConfirmation": {
619
619
  "or": [
620
620
  {
@@ -31,7 +31,7 @@
31
31
  {
32
32
  "id": "phase-0-understand-and-classify",
33
33
  "title": "Phase 0: Understand & Classify",
34
- "prompt": "Build understanding and classify the task in one pass.\n\nStep 1 — Early exit check:\nBefore any exploration, verify that acceptance criteria or expected behavior exist. If they are completely absent and cannot be inferred, ask the user and stop. Do NOT ask questions you can resolve with tools.\n\nStep 2 — Explore:\nUse tools to build the minimum complete understanding needed to design correctly. Read independent files in parallel when possible.\n\nGather:\n- key entry points and call chain sketch\n- relevant files, modules, and functions\n- existing repo patterns with concrete file references\n- testing strategy already present in the repo\n- risks and unknowns\n- explicit invariants and non-goals\n\nStep 3 — Discover the dev's philosophy and preferences:\nDiscover what the dev cares about using this fallback chain (try each, use all that are available):\n1. Memory MCP (if available): call `mcp_memory_conventions`, `mcp_memory_prefer`, `mcp_memory_recall` to retrieve learned preferences and coding philosophy\n2. Active session rules / Firebender rules: read any rules, commands, or philosophy documents already in context\n3. Repo patterns: infer preferences from how the codebase already works — error handling style, mutability patterns, test approach, naming conventions, architecture patterns\n4. Ask the dev: only if the above sources are contradictory or clearly insufficient for this task\n\nDo NOT distill into a summary — record WHERE the philosophy lives (which rules, which Memory entries, which repo files exemplify it) so later phases can reference the source directly.\n\nIf stated rules conflict with actual repo patterns (e.g., rules say 'prefer immutability' but the module uses MutableStateFlow), note the conflict in `philosophyConflicts` — this is valuable signal for design decisions.\n\nStep 4 — Classify (informed by exploration):\nNow that you have real context, classify:\n- `taskComplexity`: Small / Medium / Large\n- `riskLevel`: Low / Medium / High\n- `rigorMode`: QUICK / STANDARD / THOROUGH\n- `automationLevel`: High / Medium / Low\n- `prStrategy`: SinglePR / MultiPR\n\nDecision guidance:\n- QUICK: small, low-risk, clear path, little ambiguity\n- STANDARD: medium scope or moderate risk\n- THOROUGH: large scope, architectural uncertainty, or high-risk change\n\nStep 5 — Optional deeper context (post-classification):\nIf `rigorMode` is STANDARD or THOROUGH and understanding still feels incomplete or the call chain is too fuzzy, and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH. Synthesize both outputs before finishing this step.\n\nSet context variables:\n- `taskComplexity`\n- `riskLevel`\n- `rigorMode`\n- `automationLevel`\n- `prStrategy`\n- `contextSummary`\n- `candidateFiles`\n- `invariants`\n- `nonGoals`\n- `openQuestions`\n- `philosophySources` — pointers to where the dev's philosophy lives (rules, Memory entries, repo files), not a summary\n- `philosophyConflicts` — conflicts between stated rules and actual repo patterns (if any)\n\nRules:\n- answer your own questions with tools whenever possible\n- only keep true human-decision questions in `openQuestions`\n- keep `openQuestions` bounded to the minimum necessary\n- classify AFTER exploring, not before",
34
+ "prompt": "Build understanding and classify the task in one pass.\n\nStep 1 — Early exit check:\nBefore any exploration, verify that acceptance criteria or expected behavior exist. If they are completely absent and cannot be inferred, ask the user and stop. Do NOT ask questions you can resolve with tools.\n\nStep 2 — Explore:\nUse tools to build the minimum complete understanding needed to design correctly. Read independent files in parallel when possible.\n\nGather:\n- key entry points and call chain sketch\n- relevant files, modules, and functions\n- existing repo patterns with concrete file references\n- testing strategy already present in the repo\n- risks and unknowns\n- explicit invariants and non-goals\n\nStep 3 — Discover the dev's philosophy and preferences:\nDiscover what the dev cares about using this fallback chain (try each, use all that are available):\n1. Memory MCP (if available): call `mcp_memory_conventions`, `mcp_memory_prefer`, `mcp_memory_recall` to retrieve learned preferences and coding philosophy\n2. Active session rules / Firebender rules: read any rules, commands, or philosophy documents already in context\n3. Repo patterns: infer preferences from how the codebase already works — error handling style, mutability patterns, test approach, naming conventions, architecture patterns\n4. Ask the dev: only if the above sources are contradictory or clearly insufficient for this task\n\nDo NOT distill into a summary — record WHERE the philosophy lives (which rules, which Memory entries, which repo files exemplify it) so later phases can reference the source directly.\n\nIf stated rules conflict with actual repo patterns (e.g., rules say 'prefer immutability' but the module uses MutableStateFlow), note the conflict in `philosophyConflicts` — this is valuable signal for design decisions.\n\nStep 4 — Classify (informed by exploration):\nNow that you have real context, classify:\n- `taskComplexity`: Small / Medium / Large\n- `riskLevel`: Low / Medium / High\n- `rigorMode`: QUICK / STANDARD / THOROUGH\n- `automationLevel`: High / Medium / Low\n- `prStrategy`: SinglePR / MultiPR\n\nDecision guidance:\n- QUICK: small, low-risk, clear path, little ambiguity\n- STANDARD: medium scope or moderate risk\n- THOROUGH: large scope, architectural uncertainty, or high-risk change\n\nStep 5 — Optional deeper context (post-classification):\nIf `rigorMode` is STANDARD or THOROUGH and understanding still feels incomplete or the call chain is too fuzzy, and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH. Synthesize both outputs before finishing this step.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `taskComplexity`\n- `riskLevel`\n- `rigorMode`\n- `automationLevel`\n- `prStrategy`\n- `contextSummary`\n- `candidateFiles`\n- `invariants`\n- `nonGoals`\n- `openQuestions`\n- `philosophySources` — pointers to where the dev's philosophy lives (rules, Memory entries, repo files), not a summary\n- `philosophyConflicts` — conflicts between stated rules and actual repo patterns (if any)\n\nRules:\n- answer your own questions with tools whenever possible\n- only keep true human-decision questions in `openQuestions`\n- keep `openQuestions` bounded to the minimum necessary\n- classify AFTER exploring, not before",
35
35
  "requireConfirmation": {
36
36
  "or": [
37
37
  { "var": "taskComplexity", "equals": "Large" },
@@ -46,7 +46,7 @@
46
46
  "var": "taskComplexity",
47
47
  "not_equals": "Small"
48
48
  },
49
- "prompt": "Before any design work, state your current hypothesis in 3-5 sentences.\n\nBased on what you learned in Phase 0, write:\n1. Your current best guess for the approach\n2. Your main concern about that guess\n3. What would most likely make that guess wrong\n\nThis is your reference point. After design generation, you will compare the result against this hypothesis and say what changed your mind or what held firm.\n\nSet context variable:\n- `initialHypothesis`",
49
+ "prompt": "Before any design work, state your current hypothesis in 3-5 sentences.\n\nBased on what you learned in Phase 0, write:\n1. Your current best guess for the approach\n2. Your main concern about that guess\n3. What would most likely make that guess wrong\n\nThis is your reference point. After design generation, you will compare the result against this hypothesis and say what changed your mind or what held firm.\n\nSet this key in the next `continue_workflow` call's `context` object:\n- `initialHypothesis`",
50
50
  "requireConfirmation": false
51
51
  },
52
52
  {
@@ -84,7 +84,7 @@
84
84
  "var": "taskComplexity",
85
85
  "not_equals": "Small"
86
86
  },
87
- "prompt": "Read `design-candidates.md`, compare against your initial hypothesis, and make the final architecture decision.\n\nInput contract: both QUICK and deep design paths produce `design-candidates.md` with candidates, tradeoffs, and a recommendation. Use that artifact as your primary input.\n\nPart A — Compare to hypothesis:\nRevisit `initialHypothesis`. Now that you have design candidates:\n- Where did the design work confirm your hypothesis?\n- Where did it challenge or change your thinking?\n- What did you learn that you hadn't considered?\nState explicitly what changed your mind and what held firm.\n\nPart B — Challenge the leading option:\n- What's the strongest argument against the recommended approach?\n- What assumption, if wrong, would invalidate it?\n- STANDARD/THOROUGH: optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` focused on the leading option's failure modes\n- THOROUGH: optionally also spawn ONE WorkRail Executor running `routine-execution-simulation` to trace the 3 most likely failure scenarios\n\nPart C — Select:\nMake the final architecture decision. The design output is evidence, not a decision — you own the choice.\n\nIf the simplest solution satisfies acceptance criteria, prefer it. Complexity must justify itself. If the challenged leading candidate no longer looks best, switch deliberately rather than defending sunk cost.\n\nSet context variables:\n- `selectedApproach` — the chosen design with rationale tied back to tensions\n- `runnerUpApproach` — the next-best option and why it lost\n- `architectureRationale` — which tensions were resolved and which were accepted\n- `pivotTriggers` — specific conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` — the failure mode of the selected approach\n- `acceptedTradeoffs` — what the selected approach gives up (feeds directly into design review)\n- `identifiedFailureModes` — per-candidate failure modes (feeds directly into design review)",
87
+ "prompt": "Read `design-candidates.md`, compare against your initial hypothesis, and make the final architecture decision.\n\nInput contract: both QUICK and deep design paths produce `design-candidates.md` with candidates, tradeoffs, and a recommendation. Use that artifact as your primary input.\n\nPart A — Compare to hypothesis:\nRevisit `initialHypothesis`. Now that you have design candidates:\n- Where did the design work confirm your hypothesis?\n- Where did it challenge or change your thinking?\n- What did you learn that you hadn't considered?\nState explicitly what changed your mind and what held firm.\n\nPart B — Challenge the leading option:\n- What's the strongest argument against the recommended approach?\n- What assumption, if wrong, would invalidate it?\n- STANDARD/THOROUGH: optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` focused on the leading option's failure modes\n- THOROUGH: optionally also spawn ONE WorkRail Executor running `routine-execution-simulation` to trace the 3 most likely failure scenarios\n\nPart C — Select:\nMake the final architecture decision. The design output is evidence, not a decision — you own the choice.\n\nIf the simplest solution satisfies acceptance criteria, prefer it. Complexity must justify itself. If the challenged leading candidate no longer looks best, switch deliberately rather than defending sunk cost.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `selectedApproach` — the chosen design with rationale tied back to tensions\n- `runnerUpApproach` — the next-best option and why it lost\n- `architectureRationale` — which tensions were resolved and which were accepted\n- `pivotTriggers` — specific conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` — the failure mode of the selected approach\n- `acceptedTradeoffs` — what the selected approach gives up (feeds directly into design review)\n- `identifiedFailureModes` — per-candidate failure modes (feeds directly into design review)",
88
88
  "requireConfirmation": {
89
89
  "or": [
90
90
  { "var": "automationLevel", "equals": "Low" },
@@ -114,7 +114,7 @@
114
114
  {
115
115
  "id": "phase-2a-pre-assess-design-review",
116
116
  "title": "Pre-Assess Design Review",
117
- "prompt": "Before the detailed design review, state your current assessment in 2-4 sentences.\n\nSay:\n- what you think the strongest part of the selected design is right now\n- what you think the weakest part is right now\n- which tradeoff or failure mode worries you most\n\nThis is your reference point for interpreting the review findings.\n\nSet context variable:\n- `designReviewAssessment`",
117
+ "prompt": "Before the detailed design review, state your current assessment in 2-4 sentences.\n\nSay:\n- what you think the strongest part of the selected design is right now\n- what you think the weakest part is right now\n- which tradeoff or failure mode worries you most\n\nThis is your reference point for interpreting the review findings.\n\nSet this key in the next `continue_workflow` call's `context` object:\n- `designReviewAssessment`",
118
118
  "requireConfirmation": false
119
119
  },
120
120
  {
@@ -131,7 +131,7 @@
131
131
  {
132
132
  "id": "phase-2c-synthesize-design-review",
133
133
  "title": "Synthesize Design Review Findings",
134
- "prompt": "Read `design-review-findings.md` and synthesize the review into workflow-owned decisions.\n\nPart A — Compare against your pre-assessment:\nRevisit `designReviewAssessment`.\n- What did the review confirm?\n- What did it surface that you missed?\n- What changed your mind and what held firm?\n\nPart B — Optional mode-adaptive challenge around the review findings:\n- QUICK: self-synthesize only\n- STANDARD: optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` focused on the most serious review finding\n- THOROUGH: optionally spawn TWO WorkRail Executors — `routine-hypothesis-challenge` on the most serious finding + `routine-execution-simulation` on the most dangerous failure mode\n\nPart C — Decide:\nInterpret the findings yourself. Do not adopt the review artifact or any subagent framing wholesale.\n\nIf issues are found, fix the design (update `selectedApproach`, `architectureRationale`, `pivotTriggers`, `acceptedTradeoffs`, `identifiedFailureModes`) before continuing.\n\nSet context variables:\n- `designFindings`\n- `designRevised`",
134
+ "prompt": "Read `design-review-findings.md` and synthesize the review into workflow-owned decisions.\n\nPart A — Compare against your pre-assessment:\nRevisit `designReviewAssessment`.\n- What did the review confirm?\n- What did it surface that you missed?\n- What changed your mind and what held firm?\n\nPart B — Optional mode-adaptive challenge around the review findings:\n- QUICK: self-synthesize only\n- STANDARD: optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` focused on the most serious review finding\n- THOROUGH: optionally spawn TWO WorkRail Executors — `routine-hypothesis-challenge` on the most serious finding + `routine-execution-simulation` on the most dangerous failure mode\n\nPart C — Decide:\nInterpret the findings yourself. Do not adopt the review artifact or any subagent framing wholesale.\n\nIf issues are found, fix the design (update `selectedApproach`, `architectureRationale`, `pivotTriggers`, `acceptedTradeoffs`, `identifiedFailureModes`) before continuing.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `designFindings`\n- `designRevised`",
135
135
  "requireConfirmation": false
136
136
  },
137
137
  {
@@ -152,7 +152,7 @@
152
152
  "var": "taskComplexity",
153
153
  "not_equals": "Small"
154
154
  },
155
- "prompt": "Create or update the human-facing implementation artifact: `implementation_plan.md`.\n\nThis phase combines slicing, plan drafting, philosophy alignment, and test design.\n\nThe plan must include:\n1. Problem statement\n2. Acceptance criteria\n3. Non-goals\n4. Philosophy-driven constraints (from the user's active rules)\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only when they improve execution or enable safe parallelism\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n - [principle] -> [satisfied / tension / violated + 1-line why]\n\nSet context variables:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount`\n- `planConfidenceBand`\n\nRules:\n- keep `implementation_plan.md` concrete enough for another engineer to implement without guessing\n- use work packages only when they create real clarity; do not over-fragment work\n- use the user's coding philosophy as the primary planning lens, and name tensions explicitly\n- set `unresolvedUnknownCount` to the number of still-open issues that would materially affect implementation quality\n- set `planConfidenceBand` to Low / Medium / High based on how ready the plan actually is",
155
+ "prompt": "Create or update the human-facing implementation artifact: `implementation_plan.md`.\n\nThis phase combines slicing, plan drafting, philosophy alignment, and test design.\n\nThe plan must include:\n1. Problem statement\n2. Acceptance criteria\n3. Non-goals\n4. Philosophy-driven constraints (from the user's active rules)\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only when they improve execution or enable safe parallelism\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n - [principle] -> [satisfied / tension / violated + 1-line why]\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount`\n- `planConfidenceBand`\n\nRules:\n- keep `implementation_plan.md` concrete enough for another engineer to implement without guessing\n- use work packages only when they create real clarity; do not over-fragment work\n- use the user's coding philosophy as the primary planning lens, and name tensions explicitly\n- set `unresolvedUnknownCount` to the number of still-open issues that would materially affect implementation quality\n- set `planConfidenceBand` to Low / Medium / High based on how ready the plan actually is",
156
156
  "requireConfirmation": false
157
157
  },
158
158
  {
@@ -176,7 +176,7 @@
176
176
  {
177
177
  "id": "phase-4a-audit-and-refocus",
178
178
  "title": "Audit Plan and Apply Fixes",
179
- "prompt": "Audit the plan and fix what you find in one pass.\n\nPart A -- Audit:\n- completeness / missing work\n- weak assumptions and risks\n- invariant coverage\n- slice boundary quality\n- philosophy alignment against the user's active rules\n- regression check against `resolvedFindings` (if present): if a previously resolved issue reappeared, treat it as Critical\n\nPhilosophy rules:\n- flag findings by principle name\n- Red / Orange findings go into `planFindings`\n- Yellow tensions are informational only and do NOT block loop exit\n\nBefore delegating, state your hypothesis: what do you think the plan's biggest weakness is right now? What are you most and least confident about?\n\nMode-adaptive delegation:\n- QUICK: self-audit only\n- STANDARD: if delegation is available, spawn THREE WorkRail Executors SIMULTANEOUSLY running `routine-plan-analysis`, `routine-hypothesis-challenge`, and `routine-philosophy-alignment`; include `routine-execution-simulation` only when runtime or state-flow risk is material\n- THOROUGH: if delegation is available, spawn FOUR WorkRail Executors SIMULTANEOUSLY running `routine-plan-analysis`, `routine-hypothesis-challenge`, `routine-execution-simulation`, and `routine-philosophy-alignment`\n\nInterrogate subagent output (if used):\n- Do NOT treat auditor findings as your findings. They are raw input from junior analysts.\n- Where 2+ auditors flag the same issue -> likely real, but verify it yourself\n- Where one auditor flags a unique concern -> investigate; is it genuine or did they miss context?\n- Where auditors conflict -> reason through it yourself rather than splitting the difference\n- State what changed your assessment of the plan and what didn't\n\nPart B -- Refocus (apply fixes immediately):\n- update `implementation_plan.md` to incorporate amendments\n- update `slices` if the plan shape changed\n- extract out-of-scope work into `followUpTickets`\n- track resolved findings (cap at 10, drop oldest)\n\nSet context variables:\n- `planFindings`\n- `planConfidence`\n- `resolvedFindings`\n- `followUpTickets`\n\nRules:\n- the main agent is synthesizer and final decision-maker\n- do not delegate sequentially when audit routines are independent\n- do not silently accept plan drift; reflect changes in the plan artifact immediately",
179
+ "prompt": "Audit the plan and fix what you find in one pass.\n\nPart A -- Audit:\n- completeness / missing work\n- weak assumptions and risks\n- invariant coverage\n- slice boundary quality\n- philosophy alignment against the user's active rules\n- regression check against `resolvedFindings` (if present): if a previously resolved issue reappeared, treat it as Critical\n\nPhilosophy rules:\n- flag findings by principle name\n- Red / Orange findings go into `planFindings`\n- Yellow tensions are informational only and do NOT block loop exit\n\nBefore delegating, state your hypothesis: what do you think the plan's biggest weakness is right now? What are you most and least confident about?\n\nMode-adaptive delegation:\n- QUICK: self-audit only\n- STANDARD: if delegation is available, spawn THREE WorkRail Executors SIMULTANEOUSLY running `routine-plan-analysis`, `routine-hypothesis-challenge`, and `routine-philosophy-alignment`; include `routine-execution-simulation` only when runtime or state-flow risk is material\n- THOROUGH: if delegation is available, spawn FOUR WorkRail Executors SIMULTANEOUSLY running `routine-plan-analysis`, `routine-hypothesis-challenge`, `routine-execution-simulation`, and `routine-philosophy-alignment`\n\nInterrogate subagent output (if used):\n- Do NOT treat auditor findings as your findings. They are raw input from junior analysts.\n- Where 2+ auditors flag the same issue -> likely real, but verify it yourself\n- Where one auditor flags a unique concern -> investigate; is it genuine or did they miss context?\n- Where auditors conflict -> reason through it yourself rather than splitting the difference\n- State what changed your assessment of the plan and what didn't\n\nPart B -- Refocus (apply fixes immediately):\n- update `implementation_plan.md` to incorporate amendments\n- update `slices` if the plan shape changed\n- extract out-of-scope work into `followUpTickets`\n- track resolved findings (cap at 10, drop oldest)\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `planFindings`\n- `planConfidence`\n- `resolvedFindings`\n- `followUpTickets`\n\nRules:\n- the main agent is synthesizer and final decision-maker\n- do not delegate sequentially when audit routines are independent\n- do not silently accept plan drift; reflect changes in the plan artifact immediately",
180
180
  "requireConfirmation": false
181
181
  },
182
182
  {
@@ -219,7 +219,7 @@
219
219
  {
220
220
  "id": "phase-6a-implement-slice",
221
221
  "title": "Implement Slice",
222
- "prompt": "Implement slice `{{currentSlice.name}}`.\n\nBefore writing code, do a quick inline check:\n- if pivot triggers have fired or plan assumptions are clearly stale, stop and return to planning instead of coding through it\n- if target files or symbols no longer match the plan, stop and re-plan\n\nImplementation rules:\n- the main agent owns implementation\n- delegate only targeted cognitive routines via the WorkRail Executor (challenge, simulation, philosophy review), not the whole slice\n- read independent files in parallel when possible\n- implement incrementally and keep the slice within its intended boundary\n- apply the user's coding philosophy as the active implementation lens\n- run tests and build after implementation to confirm the slice works\n\nTrack whether this slice required:\n- a new special-case (`specialCaseIntroduced`)\n- an unplanned abstraction (`unplannedAbstractionIntroduced`)\n- unexpected file changes outside planned scope (`unexpectedScopeChange`)\n\nSet `verifyNeeded` to true if ANY of:\n- `sliceIndex` is odd (verify every 2 slices)\n- `specialCaseIntroduced = true`\n- `unplannedAbstractionIntroduced = true`\n- `unexpectedScopeChange = true`\n- tests or build failed\n\nIf `prStrategy = MultiPR`, stop with a concise PR package for user review.\n\nSet context variables:\n- `specialCaseIntroduced`\n- `unplannedAbstractionIntroduced`\n- `unexpectedScopeChange`\n- `verifyNeeded`",
222
+ "prompt": "Implement slice `{{currentSlice.name}}`.\n\nBefore writing code, do a quick inline check:\n- if pivot triggers have fired or plan assumptions are clearly stale, stop and return to planning instead of coding through it\n- if target files or symbols no longer match the plan, stop and re-plan\n\nImplementation rules:\n- the main agent owns implementation\n- delegate only targeted cognitive routines via the WorkRail Executor (challenge, simulation, philosophy review), not the whole slice\n- read independent files in parallel when possible\n- implement incrementally and keep the slice within its intended boundary\n- apply the user's coding philosophy as the active implementation lens\n- run tests and build after implementation to confirm the slice works\n\nTrack whether this slice required:\n- a new special-case (`specialCaseIntroduced`)\n- an unplanned abstraction (`unplannedAbstractionIntroduced`)\n- unexpected file changes outside planned scope (`unexpectedScopeChange`)\n\nSet `verifyNeeded` to true if ANY of:\n- `sliceIndex` is odd (verify every 2 slices)\n- `specialCaseIntroduced = true`\n- `unplannedAbstractionIntroduced = true`\n- `unexpectedScopeChange = true`\n- tests or build failed\n\nIf `prStrategy = MultiPR`, stop with a concise PR package for user review.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `specialCaseIntroduced`\n- `unplannedAbstractionIntroduced`\n- `unexpectedScopeChange`\n- `verifyNeeded`",
223
223
  "requireConfirmation": {
224
224
  "var": "prStrategy",
225
225
  "equals": "MultiPR"
@@ -232,7 +232,7 @@
232
232
  "var": "verifyNeeded",
233
233
  "equals": true
234
234
  },
235
- "prompt": "Evaluate what was just implemented with fresh eyes.\n\nReview:\n- does the implementation match the plan intent, not just the letter?\n- are there hidden assumptions or edge cases the implementation glossed over?\n- do invariants still hold?\n- are there philosophy-alignment regressions?\n- if multiple slices have passed since last verification, review all unverified slices together\n\nBefore delegating (if applicable), state: what is your honest assessment of the slice you just implemented? Where are you least confident?\n\nMode-adaptive delegation:\n- QUICK: self-verify only\n- STANDARD: if a fresh-eye trigger fired (`specialCaseIntroduced`, `unplannedAbstractionIntroduced`, or `unexpectedScopeChange`), optionally spawn ONE or TWO WorkRail Executors running `routine-hypothesis-challenge` and `routine-philosophy-alignment`\n- THOROUGH: if any fresh-eye trigger fired, spawn up to THREE WorkRail Executors running `routine-hypothesis-challenge`, `routine-execution-simulation`, and `routine-philosophy-alignment`\n\nAfter receiving subagent output (if used), interrogate: did they find something you genuinely missed, or are they flagging things you already considered and accepted? State what changed your assessment.\n\nSet context variables:\n- `verificationFindings`\n- `verificationFailed`\n\nRule:\n- if serious concerns are found, stop and return to planning or ask the user\n- do not rubber-stamp; this step exists specifically to catch what the implementation step missed",
235
+ "prompt": "Evaluate what was just implemented with fresh eyes.\n\nReview:\n- does the implementation match the plan intent, not just the letter?\n- are there hidden assumptions or edge cases the implementation glossed over?\n- do invariants still hold?\n- are there philosophy-alignment regressions?\n- if multiple slices have passed since last verification, review all unverified slices together\n\nBefore delegating (if applicable), state: what is your honest assessment of the slice you just implemented? Where are you least confident?\n\nMode-adaptive delegation:\n- QUICK: self-verify only\n- STANDARD: if a fresh-eye trigger fired (`specialCaseIntroduced`, `unplannedAbstractionIntroduced`, or `unexpectedScopeChange`), optionally spawn ONE or TWO WorkRail Executors running `routine-hypothesis-challenge` and `routine-philosophy-alignment`\n- THOROUGH: if any fresh-eye trigger fired, spawn up to THREE WorkRail Executors running `routine-hypothesis-challenge`, `routine-execution-simulation`, and `routine-philosophy-alignment`\n\nAfter receiving subagent output (if used), interrogate: did they find something you genuinely missed, or are they flagging things you already considered and accepted? State what changed your assessment.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `verificationFindings`\n- `verificationFailed`\n\nRule:\n- if serious concerns are found, stop and return to planning or ask the user\n- do not rubber-stamp; this step exists specifically to catch what the implementation step missed",
236
236
  "requireConfirmation": {
237
237
  "var": "verificationFailed",
238
238
  "equals": true
@@ -272,7 +272,7 @@
272
272
  {
273
273
  "id": "phase-7b-fix-and-summarize",
274
274
  "title": "Fix Issues and Summarize Verification",
275
- "prompt": "Read `final-verification-findings.md` and turn it into workflow-owned decisions and fixes.\n\nRequired:\n- interpret the findings yourself rather than rubber-stamping them\n- identify any invariant violations or regressions that must be fixed now\n- if issues are found, fix them immediately\n- re-run affected tests\n- update `implementation_plan.md` if the fix changed boundaries or approach\n\nSet context variables:\n- `integrationFindings`\n- `integrationPassed`\n- `regressionDetected`",
275
+ "prompt": "Read `final-verification-findings.md` and turn it into workflow-owned decisions and fixes.\n\nRequired:\n- interpret the findings yourself rather than rubber-stamping them\n- identify any invariant violations or regressions that must be fixed now\n- if issues are found, fix them immediately\n- re-run affected tests\n- update `implementation_plan.md` if the fix changed boundaries or approach\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `integrationFindings`\n- `integrationPassed`\n- `regressionDetected`",
276
276
  "requireConfirmation": false
277
277
  },
278
278
  {