@exaudeus/workrail 3.9.0 → 3.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -157,6 +157,7 @@ The agent will find the workflow, start at step 1, and proceed systematically.
157
157
  - **Lockfile is enforced**: `package-lock.json` is canonical and CI will fail if `npm ci` would modify it. Commit lockfile changes intentionally.
158
158
  - **Release authority**: releases are produced by **semantic-release** in GitHub Actions (don’t bump versions/tags locally).
159
159
  - **Major releases are approval-gated**: breaking changes become **minor by default** and only become **major** when `WORKRAIL_ALLOW_MAJOR_RELEASE=true`.
160
+ - **Release type comes from the commit on `main`**: for squash merges, the PR title / squash commit title controls whether the release is patch, minor, major, or untagged. See `docs/reference/releases.md`.
160
161
  - **Preview a release (dry-run)**:
161
162
  - **Locally**: `npx semantic-release --dry-run --no-ci`
162
163
  - **Locally (major allowed)**: `WORKRAIL_ALLOW_MAJOR_RELEASE=true npx semantic-release --dry-run --no-ci`
@@ -174,7 +175,7 @@ The agent will find the workflow, start at step 1, and proceed systematically.
174
175
  | `coding-task-workflow-agentic` | Feature development with notes-first durability and audit loops |
175
176
  | `bug-investigation-agentic` | Systematic debugging with evidence-based analysis |
176
177
  | `mr-review-workflow-agentic` | Code review with parallel reviewer families |
177
- | `exploration-workflow` | Understanding an unfamiliar codebase |
178
+ | `wr.discovery` | Upstream exploration, framing, and design synthesis |
178
179
  | `document-creation-workflow` | Technical documentation with structure |
179
180
 
180
181
  Workflows adapt to complexity - simple tasks get fast-tracked, complex tasks get full rigor.
@@ -11,6 +11,24 @@ const FEATURE_DEFINITIONS = [
11
11
  ],
12
12
  ],
13
13
  },
14
+ {
15
+ id: 'wr.features.capabilities',
16
+ constraints: [
17
+ 'Do not assume optional capabilities are available just because this workflow can use them.',
18
+ 'If delegation, web access, or another optional capability matters here, verify it through a real probe or attempt before relying on it.',
19
+ 'If a capability is unavailable, degrade explicitly instead of silently acting as if it existed.',
20
+ ],
21
+ procedure: [
22
+ 'When an optional capability would materially affect the quality of this step, decide whether using it is worth it here.',
23
+ 'If you take a capability-dependent path, record the observation or attempt that justified it.',
24
+ 'If you skip delegation or another optional capability, keep going yourself and record why the fallback path is sufficient.',
25
+ ],
26
+ verify: [
27
+ 'No capability-dependent path was taken without an explicit observation, probe, or attempted use.',
28
+ 'If a capability was unavailable or skipped, the fallback path and its limitation were made explicit.',
29
+ 'The main agent still owns synthesis and final judgment.',
30
+ ],
31
+ },
14
32
  {
15
33
  id: 'wr.features.subagent_guidance',
16
34
  constraints: [
@@ -14,8 +14,8 @@
14
14
  "bytes": 740
15
15
  },
16
16
  "application/services/compiler/feature-registry.js": {
17
- "sha256": "350bc4b849426ed5c8ba64801b13ad47130ec7084d1ee693eed065bf158f10ab",
18
- "bytes": 2239
17
+ "sha256": "ee7efd8b3594e2f02a2ecc6fe024ed9fb96ba0f8b5f1683d0c63c6196eab20e2",
18
+ "bytes": 3436
19
19
  },
20
20
  "application/services/compiler/prompt-blocks.d.ts": {
21
21
  "sha256": "2f40fd7599bd351584505152e9916fdea60891833d22c774c8ac9955f01ccf66",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exaudeus/workrail",
3
- "version": "3.9.0",
3
+ "version": "3.9.2",
4
4
  "description": "Step-by-step workflow enforcement for AI agents via MCP",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -20,7 +20,7 @@
20
20
  },
21
21
  "description": {
22
22
  "type": "string",
23
- "description": "What this workflow accomplishes",
23
+ "description": "What this workflow accomplishes. Prefer clear user-facing language for bundled/user-facing workflows; schema does not enforce prose style.",
24
24
  "minLength": 1,
25
25
  "maxLength": 512
26
26
  },
@@ -42,7 +42,7 @@
42
42
  },
43
43
  "clarificationPrompts": {
44
44
  "type": "array",
45
- "description": "Questions to ask upfront to resolve ambiguities",
45
+ "description": "Questions to ask upfront to resolve ambiguities. Prefer direct, user-grounded wording over abstract framework narration.",
46
46
  "items": {
47
47
  "type": "string",
48
48
  "minLength": 1,
@@ -67,7 +67,7 @@
67
67
  },
68
68
  "metaGuidance": {
69
69
  "type": "array",
70
- "description": "Persistent behavioral rules surfaced on start and resume. Not repeated on every step advance. For external document pointers, use 'references' instead.",
70
+ "description": "Persistent behavioral rules surfaced on start and resume. Not repeated on every step advance. Use this to define quality bars and anti-failure guidance without rigidly scripting every thought. For external document pointers, use 'references' instead.",
71
71
  "items": {
72
72
  "type": "string",
73
73
  "minLength": 1,
@@ -127,7 +127,7 @@
127
127
  },
128
128
  "features": {
129
129
  "type": "array",
130
- "description": "Compiler features to apply to this workflow (e.g. wr.features.memory_context). Features inject content into promptBlocks at compile time.",
130
+ "description": "Compiler features to apply to this workflow (e.g. wr.features.memory_context). Features inject content into promptBlocks at compile time and work best when the workflow uses structured promptBlocks instead of relying entirely on raw prompt prose.",
131
131
  "items": {
132
132
  "type": "string",
133
133
  "minLength": 1,
@@ -137,7 +137,7 @@
137
137
  },
138
138
  "extensionPoints": {
139
139
  "type": "array",
140
- "description": "Bounded cognitive slots that users can customize via .workrail/bindings.json. Each slot is referenced in step prompts via {{wr.bindings.slotId}} and resolved at compile time.",
140
+ "description": "Bounded cognitive slots that users can customize via .workrail/bindings.json. Each slot is referenced in step prompts via {{wr.bindings.slotId}} and resolved at compile time. Use extension points for bounded cognitive units, not for core orchestration or final synthesis ownership.",
141
141
  "items": {
142
142
  "$ref": "#/$defs/extensionPoint"
143
143
  },
@@ -163,7 +163,7 @@
163
163
  "$defs": {
164
164
  "extensionPoint": {
165
165
  "type": "object",
166
- "description": "A bounded cognitive slot that can be customized via .workrail/bindings.json",
166
+ "description": "A bounded cognitive slot that can be customized via .workrail/bindings.json. Good uses include candidate generation, review, and validation passes; poor uses include replacing the parent workflow's core orchestration contract.",
167
167
  "properties": {
168
168
  "slotId": {
169
169
  "type": "string",
@@ -283,7 +283,7 @@
283
283
  "properties": {
284
284
  "id": { "$ref": "#/$defs/stepId", "description": "Unique identifier for the step" },
285
285
  "title": { "type": "string", "minLength": 1, "maxLength": 128 },
286
- "prompt": { "type": "string", "minLength": 1, "maxLength": 8192 },
286
+ "prompt": { "type": "string", "minLength": 1, "maxLength": 8192, "description": "Traditional single-string prompt. Use when structure is simple; prefer promptBlocks when you need stronger quality bars without over-scripting cognition." },
287
287
  "promptBlocks": { "$ref": "#/$defs/promptBlocks" },
288
288
  "agentRole": { "type": "string", "minLength": 10, "maxLength": 1024 },
289
289
  "guidance": { "type": "array", "items": { "type": "string" } },
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "id": "mr-review-workflow-agentic",
3
- "name": "MR Review Workflow (v2 • Notes-First • Parallel Reviewer Families)",
4
- "version": "2.1.0",
5
- "description": "A v2-first MR review workflow that uses a shared fact packet, parallel reviewer families, an explicit coverage ledger, and contradiction-driven synthesis to produce high-signal review output without duplicating context gathering.",
3
+ "name": "MR Review Workflow (Lean v2 • Notes-First • Evidence-Driven Reviewer Families)",
4
+ "version": "2.2.0",
5
+ "description": "Lean v2 MR review workflow. Merges intake, missing-input gating, context gathering, and re-triage into one structured front phase, then drives review through a shared fact packet, parallel reviewer families, contradiction-driven synthesis, and evidence-first final validation.",
6
6
  "recommendedPreferences": {
7
7
  "recommendedAutonomy": "guided",
8
8
  "recommendedRiskPolicy": "conservative"
@@ -11,97 +11,69 @@
11
11
  "wr.features.subagent_guidance"
12
12
  ],
13
13
  "preconditions": [
14
- "User has the MR/PR context and a code diff accessible as pasted text or a file path.",
15
- "The agent has access to tools for reading the diff, changed files, and surrounding code.",
16
- "A human reviewer or author will consume the final review output."
17
- ],
18
- "clarificationPrompts": [
19
- "What is the MR title, purpose, and any related ticket or acceptance criteria?",
20
- "Where is the diff located, or can you paste it?",
21
- "Are there specific focus areas for this review (performance, security, API design, data integrity, etc.)?"
14
+ "User provides an MR/PR objective, diff, patch, branch, or equivalent review target.",
15
+ "The agent can inspect the changed files, surrounding code, and any deterministic validation artifacts needed to review accurately.",
16
+ "A human reviewer or author will consume the final recommendation and findings."
22
17
  ],
23
18
  "metaGuidance": [
24
19
  "DEFAULT BEHAVIOR: self-execute with tools. Only ask for missing external artifacts, permissions, or business context you cannot resolve yourself.",
25
- "V2 DURABILITY: use output.notesMarkdown and explicit `continue_workflow` context keys as durable workflow state. Do NOT rely on the live review document as required workflow memory.",
26
- "ARTIFACT STRATEGY: `reviewDocPath` is a human-facing artifact only. Keep it updated for readability, but keep execution truth in notes/`continue_workflow` context.",
27
- "MAIN AGENT OWNS REVIEW: the main agent owns truth, synthesis, severity calibration, final recommendation, and document finalization.",
28
- "SUBAGENT MODEL: use the WorkRail Executor only. Do not refer to Builder, Researcher, or other named subagent identities.",
29
- "PARALLELISM: parallelize independent cognition; serialize synthesis, canonical review findings, recommendation decisions, and final document writes.",
30
- "REVIEW MODEL: first build shared context, then freeze a neutral fact packet, then let multiple reviewer families interpret the same packet in parallel.",
31
- "REVIEWER FAMILIES: use specialist reviewer families such as correctness/invariants, patterns/architecture, runtime/production-risk, test/docs/rollout, false-positive skeptic, and missed-issue hunter.",
32
- "COVERAGE LEDGER: explicitly track what review domains are `checked`, `uncertain`, `not_applicable`, `contradicted`, or `needs_followup`. Do not finalize with unresolved important coverage gaps unless you name them explicitly.",
33
- "SYNTHESIS: when reviewer families disagree, treat the disagreement as first-class work. Resolve it explicitly; do not handwave contradictory outputs.",
34
- "TRIGGERS: WorkRail can only react to explicit fields. Use structural fields like `contextUnknownCount`, `criticalSurfaceTouched`, `coverageUncertainCount`, and `majorFindingsCount`.",
35
- "TRIGGERS (cont): also track `falsePositiveRiskCount`, `blindSpotCount`, `contradictionCount`, and `needsSimulation` to route fresh-eye review and synthesis work.",
36
- "BOUNDARY: do not post comments, approve, reject, or merge unless the user explicitly asks. Produce a high-quality review artifact and recommendation only."
20
+ "V2 DURABILITY: use output.notesMarkdown and explicit `continue_workflow` context keys as durable workflow state. Do NOT rely on the review document as required workflow memory.",
21
+ "ARTIFACT STRATEGY: `reviewDocPath` is an optional human-facing artifact only. Create or update it only when it materially improves handoff or readability. Workflow truth lives in notes and explicit context fields.",
22
+ "OWNERSHIP & DELEGATION: the main agent owns truth, synthesis, severity calibration, recommendation, and final handoff. Delegate only bounded reviewer or validation work through the WorkRail Executor.",
23
+ "SUBAGENT SYNTHESIS: treat reviewer-family and validator output as evidence, not conclusions. State your current hypothesis before delegation, then say what was confirmed, what was new, what you reject, and what changed your mind.",
24
+ "PARALLELISM: parallelize independent cognition; serialize canonical synthesis, coverage-ledger updates, recommendation decisions, and document writes.",
25
+ "REVIEW MODEL: first build shared understanding, then freeze a neutral fact packet, then let reviewer families challenge it in parallel, then reconcile contradictions explicitly.",
26
+ "COVERAGE LEDGER: explicitly track review domains as `checked`, `uncertain`, `not_applicable`, `contradicted`, or `needs_followup`. Do not finalize with unresolved material gaps unless you name them clearly.",
27
+ "TRIGGERS: WorkRail can only react to explicit fields. Use structural fields such as `contextUnknownCount`, `criticalSurfaceTouched`, `coverageUncertainCount`, `contradictionCount`, `falsePositiveRiskCount`, `blindSpotCount`, and `needsSimulation`.",
28
+ "BOUNDARY: do not post comments, approve, reject, or merge unless the user explicitly asks. Produce findings, recommendation, and handoff material only."
37
29
  ],
38
30
  "steps": [
39
31
  {
40
- "id": "phase-0-triage-and-mode",
41
- "title": "Phase 0: Triage (MR Context • Risk • Mode)",
42
- "prompt": "Understand the MR and choose the right rigor.\n\nCapture:\n- `mrTitle`\n- `mrPurpose`\n- `ticketContext`\n- `focusAreas`\n- `changedFileCount`\n- `criticalSurfaceTouched` (true/false)\n- `reviewMode`: QUICK / STANDARD / THOROUGH\n- `riskLevel`: Low / Medium / High\n- `maxParallelism`: 0 / 3 / 5\n\nDecision guidance:\n- QUICK: very small, isolated, low-risk changes with little ambiguity\n- STANDARD: typical feature or bug-fix reviews with moderate ambiguity or moderate risk\n- THOROUGH: critical surfaces, architectural novelty, high risk, broad change sets, or strong need for independent reviewer perspectives\n\nAlso choose `reviewDocPath` for the human-facing live artifact. Default suggestion: `mr-review.md` at the project root.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `mrTitle`\n- `mrPurpose`\n- `ticketContext`\n- `focusAreas`\n- `changedFileCount`\n- `criticalSurfaceTouched`\n- `reviewMode`\n- `riskLevel`\n- `maxParallelism`\n- `reviewDocPath`\n\nAsk for confirmation only if the selected mode materially changes expectations or if the diff/source context is still missing.",
43
- "requireConfirmation": true
44
- },
45
- {
46
- "id": "phase-0b-minimum-inputs-gate",
47
- "title": "Phase 0b: Minimum Inputs Gate",
48
- "prompt": "If critical inputs are missing, ask only for the minimum needed to review effectively.\n\nPossible asks:\n- missing diff path or pasted diff\n- missing MR purpose or intended behavior change\n- missing ticket or requirements context when the diff alone is not enough\n- missing repo access or file paths needed to inspect surrounding code\n\nDo NOT ask for information you can discover with tools.",
32
+ "id": "phase-0-understand-and-classify",
33
+ "title": "Phase 0: Understand & Classify",
34
+ "prompt": "Build understanding and classify the review in one pass.\n\nStep 1 — Early exit / minimum inputs:\nBefore exploring, verify that the review target is real and inspectable. If the diff, changed files, or equivalent review material are completely absent and cannot be inferred with tools, ask for the minimum missing artifact and stop. Do NOT ask questions you can resolve with tools.\n\nStep 2 — Explore:\nUse tools to build the minimum complete understanding needed to review accurately. Read independent files in parallel when possible.\n\nGather:\n- MR title and purpose, if discoverable\n- ticket or acceptance-criteria context when available\n- changed files overview and changed-file count\n- module roots, call chain highlights, public contracts, impacted consumers, and repo patterns that matter\n- explicit unknowns, likely blind spots, and whether author intent remains unclear\n- whether any critical surface is touched\n\nStep 3 — Classify after exploration:\nSet:\n- `reviewMode`: QUICK / STANDARD / THOROUGH\n- `riskLevel`: Low / Medium / High\n- `maxParallelism`: 0 / 3 / 5\n- `criticalSurfaceTouched`: true / false\n- `needsSimulation`: true / false\n\nDecision guidance:\n- QUICK: very small, isolated, low-risk changes with little ambiguity\n- STANDARD: typical feature or bug-fix reviews with moderate ambiguity or moderate risk\n- THOROUGH: critical surfaces, architectural novelty, high risk, broad change sets, or strong need for independent reviewer perspectives\n\nStep 4 — Optional deeper context:\nIf `reviewMode` is STANDARD or THOROUGH and understanding still feels incomplete, and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH. Synthesize both outputs before finishing this step.\n\nStep 5 — Human-facing artifact:\nChoose `reviewDocPath` only if a live artifact will materially improve human readability. Default suggestion: `mr-review.md` at the project root. This artifact is optional and never canonical workflow state.\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `mrTitle`\n- `mrPurpose`\n- `ticketContext`\n- `focusAreas`\n- `changedFileCount`\n- `criticalSurfaceTouched`\n- `reviewMode`\n- `riskLevel`\n- `maxParallelism`\n- `reviewDocPath`\n- `contextSummary`\n- `candidateFiles`\n- `moduleRoots`\n- `contextUnknownCount`\n- `coverageGapCount`\n- `authorIntentUnclear`\n- `needsSimulation`\n- `openQuestions`\n\nRules:\n- answer your own questions with tools whenever possible\n- only keep true human-decision questions in `openQuestions`\n- keep `openQuestions` bounded to the minimum necessary\n- if the review target is missing entirely, ask only for that missing artifact\n- classify AFTER exploring, not before",
49
35
  "requireConfirmation": {
50
36
  "or": [
51
- { "var": "reviewMode", "equals": "STANDARD" },
52
- { "var": "reviewMode", "equals": "THOROUGH" }
37
+ { "var": "reviewMode", "equals": "THOROUGH" },
38
+ { "var": "riskLevel", "equals": "High" }
53
39
  ]
54
40
  }
55
41
  },
56
42
  {
57
- "id": "phase-1-context-understanding",
58
- "title": "Phase 1: Shared Context Understanding",
43
+ "id": "phase-1-state-hypothesis",
44
+ "title": "Phase 1: State Review Hypothesis",
45
+ "prompt": "Before selecting reviewer families, state your current review hypothesis in 3-5 sentences.\n\nBased on what you learned in Phase 0, write:\n1. your current best guess about the review recommendation direction\n2. the issue category or risk you are most concerned about\n3. what would most likely make that current view wrong\n\nThis is your reference point. After reviewer-family and validation work, you will compare the result against this hypothesis and say what changed your mind or what held firm.\n\nSet this key in the next `continue_workflow` call's `context` object:\n- `recommendationHypothesis`",
46
+ "requireConfirmation": false
47
+ },
48
+ {
49
+ "id": "phase-2-freeze-fact-packet-and-select-reviewers",
50
+ "title": "Phase 2: Freeze Fact Packet and Select Reviewer Families",
59
51
  "promptBlocks": {
60
- "goal": "Build the minimum complete understanding needed to review accurately.",
52
+ "goal": "Freeze a shared factual basis for review and decide how much reviewer-family parallelism is warranted.",
61
53
  "constraints": [
62
54
  [
63
55
  { "kind": "ref", "refId": "wr.refs.notes_first_durability" }
64
56
  ],
65
- "Do the main context work yourself using tools.",
66
- "Keep `reviewDocPath` updated for human readability, but keep execution truth in notes/`continue_workflow` context."
57
+ "The fact packet is the primary truth for downstream reviewer families.",
58
+ "Keep `recommendationHypothesis` as a secondary hypothesis to challenge, not a frame to defend."
67
59
  ],
68
60
  "procedure": [
69
- "Produce a concise MR summary and intended behavior change, changed files overview, module or subsystem neighborhood, bounded call graph / public contracts / impacted consumers where relevant, repo patterns that matter for this review, and explicit unknowns / likely blind spots.",
70
- "Set these keys in the next `continue_workflow` call's `context` object: `contextSummary`, `candidateFiles`, `moduleRoots`, `contextUnknownCount`, `coverageGapCount`, `authorIntentUnclear`, `retriageNeeded`.",
71
- "Compute `contextUnknownCount` as unresolved technical unknowns that materially affect review quality.",
72
- "Compute `coverageGapCount` as likely review angles or code areas still insufficiently understood.",
73
- "Set `retriageNeeded = true` if the real risk or surface area is larger than Phase 0 suggested.",
74
- "Mode-adaptive context audit: QUICK = self-check only; STANDARD = if `contextUnknownCount > 0` and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH, then synthesize both outputs; THOROUGH = if delegation is available, spawn the same two parallel context auditors and synthesize both outputs."
61
+ "Create a neutral `reviewFactPacket` containing: MR purpose and expected behavior change, changed files and module roots, key contracts / invariants / affected consumers, call-chain highlights, relevant repo patterns and exemplars, tests/docs expectations, and explicit open unknowns.",
62
+ "Initialize `coverageLedger` for these domains: `correctness_logic`, `contracts_invariants`, `patterns_architecture`, `runtime_production_risk`, `tests_docs_rollout`, `security_performance`.",
63
+ "Perform a preliminary self-review from the fact packet before choosing reviewer families.",
64
+ "Reviewer family options: `correctness_invariants`, `patterns_architecture`, `runtime_production_risk`, `test_docs_rollout`, `false_positive_skeptic`, `missed_issue_hunter`.",
65
+ "Selection guidance: QUICK = no bundle by default unless ambiguity still feels material; STANDARD = 3 families by default; THOROUGH = 5 families by default.",
66
+ "Always include `correctness_invariants` unless clearly not applicable. Include `test_docs_rollout` in STANDARD and THOROUGH unless clearly not applicable. Include `runtime_production_risk` when `criticalSurfaceTouched = true` or `needsSimulation = true`. Include `missed_issue_hunter` in THOROUGH. Include `false_positive_skeptic` when Major/Critical findings seem plausible or severity inflation risk is non-trivial.",
67
+ "Set `coverageUncertainCount` as the number of coverage domains not yet safely closed: `uncertain` + `contradicted` + `needs_followup`.",
68
+ "Initialize `contradictionCount`, `blindSpotCount`, and `falsePositiveRiskCount` to `0` if no reviewer-family bundle will run."
75
69
  ],
76
70
  "verify": [
77
- "All material technical unknowns are counted in `contextUnknownCount`.",
78
- "Likely review blind spots are reflected in `coverageGapCount`.",
79
- "Any delegated context outputs are synthesized before proceeding."
71
+ "The fact packet is concrete enough that downstream reviewer families can use it without regathering broad context.",
72
+ "The workflow has a clear reason for whether `needsReviewerBundle` is true or false."
80
73
  ]
81
74
  },
82
75
  "requireConfirmation": false
83
76
  },
84
- {
85
- "id": "phase-1b-retriage-after-context",
86
- "title": "Phase 1b: Re-Triage After Context",
87
- "runCondition": {
88
- "var": "retriageNeeded",
89
- "equals": true
90
- },
91
- "prompt": "Reassess the review mode now that the real code context is known.\n\nReview:\n- `contextUnknownCount`\n- `coverageGapCount`\n- actual systems/components involved\n- whether `criticalSurfaceTouched` is still accurate\n- whether runtime or production simulation now looks necessary\n\nDo:\n- confirm or adjust `reviewMode`\n- confirm or adjust `riskLevel`\n- confirm or adjust `maxParallelism`\n- set `needsSimulation` to true or false\n- set `retriageChanged`\n\nEscalation rules:\n- QUICK may escalate to STANDARD if `criticalSurfaceTouched = true` or `contextUnknownCount > 0`\n- STANDARD may escalate to THOROUGH if `criticalSurfaceTouched = true` and risk is High, or if multiple unresolved context gaps remain\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewMode`\n- `riskLevel`\n- `maxParallelism`\n- `needsSimulation`\n- `retriageChanged`",
92
- "requireConfirmation": {
93
- "or": [
94
- { "var": "retriageChanged", "equals": true },
95
- { "var": "riskLevel", "equals": "High" }
96
- ]
97
- }
98
- },
99
- {
100
- "id": "phase-2-fact-packet-and-family-selection",
101
- "title": "Phase 2: Freeze Fact Packet and Select Reviewer Families",
102
- "prompt": "Freeze the shared factual basis that all downstream reviewer families must use, then choose the reviewer families from that same phase.\n\nCreate a neutral `reviewFactPacket` containing:\n- MR purpose and expected behavior change\n- changed files and module roots\n- key contracts, invariants, and affected consumers\n- call graph highlights or execution touchpoints\n- relevant repo patterns and exemplars\n- tests/docs expectations\n- explicit open unknowns\n\nInitialize `coverageLedger` with these domains, each marked as `checked`, `uncertain`, `not_applicable`, `contradicted`, or `needs_followup`:\n- correctness_logic\n- contracts_invariants\n- patterns_architecture\n- runtime_production_risk\n- tests_docs_rollout\n- security_performance\n\nThen perform a preliminary review from the shared fact packet and choose reviewer families.\n\nReviewer family options:\n- `correctness_invariants`\n- `patterns_architecture`\n- `runtime_production_risk`\n- `test_docs_rollout`\n- `false_positive_skeptic`\n- `missed_issue_hunter`\n\nSelection guidance:\n- QUICK: no family bundle by default; add `false_positive_skeptic` only if a supposedly easy review still feels risky or ambiguous\n- STANDARD: run 3 families by default\n- THOROUGH: run 5 families by default\n- always include `correctness_invariants` unless clearly not applicable\n- always include `test_docs_rollout` in STANDARD and THOROUGH unless clearly not applicable\n- include `runtime_production_risk` when `criticalSurfaceTouched = true` or `needsSimulation = true`\n- include `missed_issue_hunter` in THOROUGH mode\n- include `false_positive_skeptic` whenever Major/Critical findings are likely, the change is controversial, or severity inflation risk is non-trivial\n\nAnti-anchoring rule:\n- reviewer families must treat `reviewFactPacket` as primary truth\n- `recommendationHypothesis` is optional secondary context only; it must not become the frame every family simply validates\n\nCoverage ledger rules:\n- use `contradicted` when evidence materially conflicts across reviewer families and the disagreement is unresolved\n- use `needs_followup` when the domain is relevant and additional targeted work is still required\n- use `uncertain` only for bounded ambiguity where no direct contradiction exists yet\n- compute `coverageUncertainCount` as the count of coverage domains not yet safely closed: `uncertain` + `contradicted` + `needs_followup`\n\nDefault reviewer-bundle rule:\n- QUICK: `needsReviewerBundle = false` unless a trigger or risk signal clearly justifies it\n- STANDARD / THOROUGH: `needsReviewerBundle = true` by default unless the review is materially simpler than expected\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewFactPacket`\n- `coverageLedger`\n- `coverageUncertainCount`\n- `preliminaryFindings`\n- `recommendationHypothesis`\n- `reviewFamiliesSelected`\n- `needsReviewerBundle`",
103
- "requireConfirmation": false
104
- },
105
77
  {
106
78
  "id": "phase-3-reviewer-family-bundle",
107
79
  "title": "Phase 3: Parallel Reviewer Family Bundle",
@@ -110,7 +82,7 @@
110
82
  "equals": true
111
83
  },
112
84
  "promptBlocks": {
113
- "goal": "Run the selected reviewer families in parallel from the same shared fact packet.",
85
+ "goal": "Run the selected reviewer families in parallel from the same fact packet, then synthesize their output as evidence rather than conclusions.",
114
86
  "constraints": [
115
87
  [
116
88
  { "kind": "ref", "refId": "wr.refs.notes_first_durability" }
@@ -120,12 +92,14 @@
120
92
  ],
121
93
  "Each reviewer family must use `reviewFactPacket` as primary truth.",
122
94
  "Use `recommendationHypothesis` only as secondary comparison context.",
123
- "If a family disagrees with the preliminary direction, it must say so explicitly."
95
+ "Reviewer-family outputs are raw evidence, not canonical review state."
124
96
  ],
125
97
  "procedure": [
98
+ "Before delegating, restate the current `recommendationHypothesis` and say which reviewer family is most likely to challenge it.",
126
99
  "Each reviewer family must return: key findings, severity estimates, confidence level, top risks, recommendation, and what others may have missed.",
127
- "Family missions: `correctness_invariants` = logic, correctness, API and invariant risks; `patterns_architecture` = pattern fit, design consistency, architectural concerns; `runtime_production_risk` = runtime behavior, production impact, performance/state-flow risk; `test_docs_rollout` = test adequacy, docs, migration, rollout, affected consumers; `false_positive_skeptic` = challenge likely overreaches, weak evidence, or severity inflation; `missed_issue_hunter` = search for an important category of issue the others may miss.",
100
+ "Family missions: `correctness_invariants` = logic, correctness, API and invariant risks; `patterns_architecture` = pattern fit, design consistency, architectural concerns; `runtime_production_risk` = runtime behavior, production impact, performance/state-flow risk; `test_docs_rollout` = test adequacy, docs, migration, rollout, affected consumers; `false_positive_skeptic` = challenge likely overreaches, weak evidence, or severity inflation; `missed_issue_hunter` = search for an important issue category the others may miss.",
128
101
  "Mode-adaptive parallelism: STANDARD = spawn THREE WorkRail Executors SIMULTANEOUSLY for the selected families; THOROUGH = spawn FIVE WorkRail Executors SIMULTANEOUSLY for the selected families.",
102
+ "After receiving outputs, explicitly synthesize: what reviewer families confirmed, what was genuinely new, what appeared weak or overreached, and what changed your mind or did not.",
129
103
  "Set these keys in the next `continue_workflow` call's `context` object: `familyFindingsSummary`, `familyRecommendationSpread`, `contradictionCount`, `blindSpotCount`, `falsePositiveRiskCount`, `needsSimulation`.",
130
104
  "Compute `contradictionCount` as material disagreements across reviewer families about issue validity, severity, or final recommendation.",
131
105
  "Increase `blindSpotCount` if the missed-issue hunter or any other family identifies uncovered review space.",
@@ -133,8 +107,8 @@
133
107
  ],
134
108
  "verify": [
135
109
  "The same fact packet was used as primary truth across reviewer families.",
136
- "Contradictions, blind spots, and false-positive risks are all reflected structurally in the `continue_workflow` context object.",
137
- "Parallel reviewer outputs are not treated as self-finalizing; the main agent still owns synthesis."
110
+ "Reviewer-family output is not treated as self-finalizing.",
111
+ "Contradictions, blind spots, and false-positive risks are reflected structurally in context."
138
112
  ]
139
113
  },
140
114
  "requireConfirmation": false
@@ -157,17 +131,19 @@
157
131
  "id": "phase-4a-targeted-follow-up",
158
132
  "title": "Targeted Follow-Up Bundle",
159
133
  "promptBlocks": {
160
- "goal": "If contradictions or important coverage gaps remain, run only the smallest targeted follow-up work needed.",
134
+ "goal": "If contradictions, blind spots, or important coverage gaps remain, run only the smallest targeted follow-up needed.",
161
135
  "constraints": [
162
136
  [
163
137
  { "kind": "ref", "refId": "wr.refs.parallelize_cognition_serialize_synthesis" }
164
138
  ],
165
- "Prefer one compact targeted bundle over multiple new delegation moments.",
166
- "Do not regather broad context unless a contradiction proves the original fact packet is insufficient."
139
+ "Prefer one compact targeted bundle over repeated broad delegation moments.",
140
+ "Do not regather broad context unless a contradiction proves the original fact packet is insufficient.",
141
+ "Targeted follow-up output is evidence only and must still be synthesized by the main agent."
167
142
  ],
168
143
  "procedure": [
144
+ "Before delegating, state the current canonical recommendation direction, the strongest unresolved concern, and what result would change your mind.",
169
145
  "If `contradictionCount > 0`, run targeted challenge or validation aimed at the specific disagreement.",
170
- "If `coverageUncertainCount > 0` or `blindSpotCount > 0`, run the smallest reviewer family or context follow-up needed to close the gap.",
146
+ "If `coverageUncertainCount > 0` or `blindSpotCount > 0`, run the smallest reviewer-family or context follow-up needed to close the gap.",
171
147
  "If `needsSimulation = true`, include `routine-execution-simulation`.",
172
148
  "If `falsePositiveRiskCount > 0`, include `routine-hypothesis-challenge`.",
173
149
  "If no trigger fires, do not delegate this step."
@@ -182,13 +158,13 @@
182
158
  {
183
159
  "id": "phase-4b-canonical-synthesis",
184
160
  "title": "Canonical Synthesis and Coverage Update",
185
- "prompt": "Synthesize all reviewer-family outputs and any targeted follow-up into one canonical review state.\n\nSynthesis decision table:\n- if 2+ reviewer families flag the same serious issue with the same severity, treat it as validated\n- if the same issue is flagged with different severities, default to the higher severity unless the lower-severity position includes specific counter-evidence\n- if one family flags an issue and others are silent, investigate it but do not automatically block unless it is clearly critical or security-sensitive\n- if one family says false positive and another says valid issue, require explicit main-agent adjudication in notes before finalization\n- if recommendation spread shows material disagreement, findings override recommendation until reconciled\n- if simulation reveals a new production risk, add a new finding and re-evaluate recommendation confidence\n\nCoverage ledger rules:\n- move a domain from `uncertain` to `checked` only when the evidence is materially adequate\n- keep a domain `uncertain` if disagreement or missing evidence still materially affects recommendation quality\n- mark `not_applicable` only when the MR genuinely does not engage that dimension\n- clear `contradicted` only when the contradiction is explicitly resolved by evidence or adjudication\n- clear `needs_followup` only when the required targeted follow-up has actually been completed or the domain is explicitly downgraded as non-material\n\nRecommendation confidence rules:\n- set `recommendationConfidenceBand = High` only if no unresolved material contradictions remain, no important coverage domains remain uncertain, false-positive risk is not material, and consensus is strong enough for the current mode\n- set `recommendationConfidenceBand = Medium` when one bounded uncertainty remains but the recommendation is still directionally justified\n- set `recommendationConfidenceBand = Low` when multiple viable interpretations remain, major contradictions are unresolved, or important coverage gaps still weaken the recommendation\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewFindings`\n- `criticalFindingsCount`\n- `majorFindingsCount`\n- `minorFindingsCount`\n- `nitFindingsCount`\n- `recommendation`\n- `recommendationConfidenceBand`\n- `recommendationDriftDetected`\n- `coverageLedger`\n- `coverageUncertainCount`\n- `docCompletenessConcernCount`\n\nUpdate `reviewDocPath` so the human artifact matches the canonical review state.",
161
+ "prompt": "Synthesize all reviewer-family outputs and targeted follow-up into one canonical review state.\n\nPart A — Compare against your hypothesis:\n- revisit `recommendationHypothesis`\n- what did the evidence confirm?\n- what did it challenge?\n- what changed your mind, what held firm, and what do you explicitly reject?\n\nPart B — Synthesis decision table:\n- if 2+ reviewer families flag the same serious issue with the same severity, treat it as validated\n- if the same issue is flagged with different severities, default to the higher severity unless the lower-severity position includes specific counter-evidence\n- if one family flags an issue and others are silent, investigate it but do not automatically block unless it is clearly critical or security-sensitive\n- if one family says false positive and another says valid issue, require explicit main-agent adjudication in notes before finalization\n- if recommendation spread shows material disagreement, findings override recommendation until reconciled\n- if simulation reveals a new production risk, add a new finding and re-evaluate recommendation confidence\n\nPart C — Coverage ledger rules:\n- move a domain from `uncertain` to `checked` only when evidence is materially adequate\n- keep a domain `uncertain` if disagreement or missing evidence still materially affects recommendation quality\n- mark `not_applicable` only when the MR genuinely does not engage that dimension\n- clear `contradicted` only when the contradiction is explicitly resolved by evidence or adjudication\n- clear `needs_followup` only when required follow-up has actually been completed or the domain is explicitly downgraded as non-material\n\nPart D — Recommendation confidence rules:\n- set `recommendationConfidenceBand = High` only if no unresolved material contradictions remain, no important coverage domains remain uncertain, false-positive risk is not material, and the evidence is strong enough for the current mode\n- set `recommendationConfidenceBand = Medium` when one bounded uncertainty remains but the recommendation is still directionally justified\n- set `recommendationConfidenceBand = Low` when multiple viable interpretations remain, major contradictions are unresolved, or important coverage gaps still weaken the recommendation\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewFindings`\n- `criticalFindingsCount`\n- `majorFindingsCount`\n- `minorFindingsCount`\n- `nitFindingsCount`\n- `recommendation`\n- `recommendationConfidenceBand`\n- `recommendationDriftDetected`\n- `coverageLedger`\n- `coverageUncertainCount`\n- `docCompletenessConcernCount`\n\nIf `reviewDocPath` exists, keep it aligned for human readability only. Notes/context remain workflow truth.",
186
162
  "requireConfirmation": false
187
163
  },
188
164
  {
189
165
  "id": "phase-4c-loop-decision",
190
166
  "title": "Synthesis Loop Decision",
191
- "prompt": "Decide whether the synthesis loop should continue.\n\nDecision rules:\n- if `contradictionCount > 0` continue\n- else if `coverageUncertainCount > 0` and the uncertainty materially affects the recommendation continue\n- else if `falsePositiveRiskCount > 0` continue\n- else if `recommendationDriftDetected = true` continue\n- else stop\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
167
+ "prompt": "Decide whether the synthesis loop should continue.\n\nDecision rules:\n- if `contradictionCount > 0` -> continue\n- else if `coverageUncertainCount > 0` and the uncertainty materially affects the recommendation -> continue\n- else if `falsePositiveRiskCount > 0` -> continue\n- else if `recommendationDriftDetected = true` -> continue\n- else -> stop\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
192
168
  "requireConfirmation": true,
193
169
  "outputContract": {
194
170
  "contractRef": "wr.contracts.loop_control"
@@ -200,18 +176,21 @@
200
176
  "id": "phase-5-final-validation",
201
177
  "title": "Phase 5: Final Validation",
202
178
  "promptBlocks": {
203
- "goal": "Before final handoff, decide whether additional validation is still required.",
179
+ "goal": "Stress-test the current recommendation before final handoff.",
204
180
  "constraints": [
205
181
  [
206
182
  { "kind": "ref", "refId": "wr.refs.adversarial_challenge_rules" }
207
183
  ],
208
184
  [
209
185
  { "kind": "ref", "refId": "wr.refs.synthesis_under_disagreement" }
210
- ]
186
+ ],
187
+ "Validation output is evidence to synthesize, not an automatic reopen signal."
211
188
  ],
212
189
  "procedure": [
213
190
  "Run final validation if any of these are true: `criticalSurfaceTouched = true`, `needsSimulation = true`, `falsePositiveRiskCount > 0`, `coverageUncertainCount > 0`, `docCompletenessConcernCount > 0`, or `recommendationConfidenceBand != High`.",
191
+ "Before delegating, state: what is your current recommendation, where are you least confident, and what finding would most likely change your mind now?",
214
192
  "Mode-adaptive validation: QUICK = self-validate and optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` if a serious uncertainty remains; STANDARD = if validation is required and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge` and either `routine-execution-simulation` or `routine-plan-analysis`; THOROUGH = if validation is required and delegation is available, spawn THREE WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge`, `routine-execution-simulation` when needed, and `routine-plan-analysis`.",
193
+ "After receiving validator output, explicitly synthesize what was confirmed, what was new, what appears weak, and whether your recommendation changed.",
215
194
  "Compute `docCompletenessConcernCount` by counting one concern for each material packaging gap: missing rationale for any Critical or Major finding, missing ready-to-post MR comment for any Critical or Major finding, recommendation mismatch with canonical findings, still-uncertain / contradicted / needs-followup coverage domains not summarized clearly, or any missing required final section needed for actionability.",
216
195
  "Set these keys in the next `continue_workflow` call's `context` object: `validatorConsensusLevel`, `validationSummary`, `recommendationConfidenceBand`, `docCompletenessConcernCount`."
217
196
  ],
@@ -231,8 +210,8 @@
231
210
  {
232
211
  "id": "phase-6-final-handoff",
233
212
  "title": "Phase 6: Final Handoff",
234
- "prompt": "Provide the final MR review handoff.\n\nInclude:\n- MR title and purpose\n- review mode used\n- final recommendation and confidence band\n- counts of Critical / Major / Minor / Nit findings\n- top findings with rationale\n- strongest areas of uncertainty, if any\n- summary of the coverage ledger, especially any still-uncertain domains\n- path to the full human-facing review artifact (`reviewDocPath`)\n- ready-to-post MR comments summary\n- any validation outcomes worth the human reviewer seeing\n\nRules:\n- the final recommendation assists a human reviewer; it does not replace them\n- keep `reviewDocPath` updated, but do not treat it as workflow state\n- do not post comments, approve, reject, or merge unless the user explicitly asks",
213
+ "prompt": "Provide the final MR review handoff.\n\nInclude:\n- MR title and purpose\n- review mode used\n- final recommendation and confidence band\n- counts of Critical / Major / Minor / Nit findings\n- top findings with rationale\n- strongest remaining areas of uncertainty, if any\n- summary of the coverage ledger, especially any still-uncertain domains\n- ready-to-post MR comments summary\n- any validation outcomes a human reviewer should see\n- path to the full human-facing review artifact (`reviewDocPath`) only if one was created\n\nRules:\n- the final recommendation assists a human reviewer; it does not replace them\n- if `reviewDocPath` exists, treat it as a human-facing companion artifact only\n- do not post comments, approve, reject, or merge unless the user explicitly asks",
235
214
  "requireConfirmation": true
236
215
  }
237
216
  ]
238
- }
217
+ }