@exaudeus/workrail 1.16.0 → 1.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/application/services/compiler/feature-registry.js +18 -0
  2. package/dist/application/services/compiler/prompt-blocks.js +2 -1
  3. package/dist/application/services/compiler/ref-registry.js +36 -0
  4. package/dist/application/services/validation-engine.d.ts +1 -0
  5. package/dist/application/services/validation-engine.js +14 -0
  6. package/dist/application/services/workflow-validation-pipeline.d.ts +96 -0
  7. package/dist/application/services/workflow-validation-pipeline.js +94 -0
  8. package/dist/application/use-cases/raw-workflow-file-scanner.d.ts +18 -0
  9. package/dist/application/use-cases/raw-workflow-file-scanner.js +91 -0
  10. package/dist/application/use-cases/validate-workflow-file.d.ts +17 -0
  11. package/dist/application/use-cases/validate-workflow-file.js +96 -0
  12. package/dist/application/use-cases/validate-workflow-json.d.ts +2 -1
  13. package/dist/application/use-cases/validate-workflow-json.js +67 -13
  14. package/dist/application/use-cases/validate-workflow-registry.d.ts +72 -0
  15. package/dist/application/use-cases/validate-workflow-registry.js +215 -0
  16. package/dist/application/validation.d.ts +4 -0
  17. package/dist/application/validation.js +16 -0
  18. package/dist/cli/commands/validate.js +15 -0
  19. package/dist/cli.js +10 -1
  20. package/dist/infrastructure/storage/caching-workflow-storage.d.ts +1 -0
  21. package/dist/infrastructure/storage/caching-workflow-storage.js +3 -0
  22. package/dist/infrastructure/storage/enhanced-multi-source-workflow-storage.d.ts +2 -1
  23. package/dist/infrastructure/storage/enhanced-multi-source-workflow-storage.js +8 -21
  24. package/dist/infrastructure/storage/file-workflow-storage.d.ts +0 -1
  25. package/dist/infrastructure/storage/file-workflow-storage.js +15 -36
  26. package/dist/infrastructure/storage/schema-validating-workflow-storage.d.ts +1 -0
  27. package/dist/infrastructure/storage/schema-validating-workflow-storage.js +16 -6
  28. package/dist/infrastructure/storage/workflow-resolution.d.ts +62 -0
  29. package/dist/infrastructure/storage/workflow-resolution.js +150 -0
  30. package/dist/manifest.json +102 -62
  31. package/dist/mcp/handlers/v2-execution/replay.d.ts +1 -1
  32. package/dist/mcp/handlers/v2-execution/replay.js +37 -21
  33. package/dist/mcp/handlers/v2-execution/start.js +35 -13
  34. package/dist/mcp/handlers/v2-execution-helpers.d.ts +9 -11
  35. package/dist/mcp/handlers/v2-execution-helpers.js +6 -18
  36. package/dist/mcp/output-schemas.d.ts +20 -20
  37. package/dist/types/storage.d.ts +1 -0
  38. package/dist/v2/durable-core/domain/prompt-renderer.js +13 -7
  39. package/dist/v2/durable-core/domain/start-construction.d.ts +22 -0
  40. package/dist/v2/durable-core/domain/start-construction.js +31 -0
  41. package/dist/v2/durable-core/schemas/compiled-workflow/index.d.ts +8 -8
  42. package/dist/v2/read-only/v1-to-v2-shim.d.ts +5 -0
  43. package/dist/v2/read-only/v1-to-v2-shim.js +18 -0
  44. package/package.json +3 -2
  45. package/workflows/bug-investigation.agentic.v2.json +134 -0
  46. package/workflows/mr-review-workflow.agentic.v2.json +238 -0
@@ -12,12 +12,12 @@ export declare const CompiledWorkflowSnapshotV1Schema: z.ZodDiscriminatedUnion<"
12
12
  prompt: z.ZodString;
13
13
  }, "strip", z.ZodTypeAny, {
14
14
  stepId: string;
15
- prompt: string;
16
15
  title: string;
16
+ prompt: string;
17
17
  }, {
18
18
  stepId: string;
19
- prompt: string;
20
19
  title: string;
20
+ prompt: string;
21
21
  }>;
22
22
  }, "strip", z.ZodTypeAny, {
23
23
  name: string;
@@ -28,8 +28,8 @@ export declare const CompiledWorkflowSnapshotV1Schema: z.ZodDiscriminatedUnion<"
28
28
  sourceKind: "v1_preview";
29
29
  preview: {
30
30
  stepId: string;
31
- prompt: string;
32
31
  title: string;
32
+ prompt: string;
33
33
  };
34
34
  }, {
35
35
  name: string;
@@ -40,8 +40,8 @@ export declare const CompiledWorkflowSnapshotV1Schema: z.ZodDiscriminatedUnion<"
40
40
  sourceKind: "v1_preview";
41
41
  preview: {
42
42
  stepId: string;
43
- prompt: string;
44
43
  title: string;
44
+ prompt: string;
45
45
  };
46
46
  }>, z.ZodObject<{
47
47
  schemaVersion: z.ZodLiteral<1>;
@@ -82,12 +82,12 @@ export declare const CompiledWorkflowSnapshotSchema: z.ZodDiscriminatedUnion<"so
82
82
  prompt: z.ZodString;
83
83
  }, "strip", z.ZodTypeAny, {
84
84
  stepId: string;
85
- prompt: string;
86
85
  title: string;
86
+ prompt: string;
87
87
  }, {
88
88
  stepId: string;
89
- prompt: string;
90
89
  title: string;
90
+ prompt: string;
91
91
  }>;
92
92
  }, "strip", z.ZodTypeAny, {
93
93
  name: string;
@@ -98,8 +98,8 @@ export declare const CompiledWorkflowSnapshotSchema: z.ZodDiscriminatedUnion<"so
98
98
  sourceKind: "v1_preview";
99
99
  preview: {
100
100
  stepId: string;
101
- prompt: string;
102
101
  title: string;
102
+ prompt: string;
103
103
  };
104
104
  }, {
105
105
  name: string;
@@ -110,8 +110,8 @@ export declare const CompiledWorkflowSnapshotSchema: z.ZodDiscriminatedUnion<"so
110
110
  sourceKind: "v1_preview";
111
111
  preview: {
112
112
  stepId: string;
113
- prompt: string;
114
113
  title: string;
114
+ prompt: string;
115
115
  };
116
116
  }>, z.ZodObject<{
117
117
  schemaVersion: z.ZodLiteral<1>;
@@ -1,8 +1,13 @@
1
1
  import type { Workflow } from '../../types/workflow.js';
2
2
  import type { CompiledWorkflowSnapshotV1 } from '../durable-core/schemas/compiled-workflow/index.js';
3
+ import { type Result } from 'neverthrow';
4
+ import type { DomainError } from '../../domain/execution/error.js';
3
5
  export declare function compileV1WorkflowToV2PreviewSnapshot(workflow: Workflow): Extract<CompiledWorkflowSnapshotV1, {
4
6
  sourceKind: 'v1_preview';
5
7
  }>;
6
8
  export declare function compileV1WorkflowToPinnedSnapshot(workflow: Workflow): Extract<CompiledWorkflowSnapshotV1, {
7
9
  sourceKind: 'v1_pinned';
8
10
  }>;
11
+ export declare function normalizeV1WorkflowToPinnedSnapshot(workflow: Workflow): Result<Extract<CompiledWorkflowSnapshotV1, {
12
+ sourceKind: 'v1_pinned';
13
+ }>, DomainError>;
@@ -2,7 +2,9 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.compileV1WorkflowToV2PreviewSnapshot = compileV1WorkflowToV2PreviewSnapshot;
4
4
  exports.compileV1WorkflowToPinnedSnapshot = compileV1WorkflowToPinnedSnapshot;
5
+ exports.normalizeV1WorkflowToPinnedSnapshot = normalizeV1WorkflowToPinnedSnapshot;
5
6
  const workflow_compiler_js_1 = require("../../application/services/workflow-compiler.js");
7
+ const neverthrow_1 = require("neverthrow");
6
8
  function compileV1WorkflowToV2PreviewSnapshot(workflow) {
7
9
  const firstStep = workflow.definition.steps[0];
8
10
  if (!firstStep) {
@@ -63,3 +65,19 @@ function compileV1WorkflowToPinnedSnapshot(workflow) {
63
65
  definition: resolvedDefinition,
64
66
  };
65
67
  }
68
+ function normalizeV1WorkflowToPinnedSnapshot(workflow) {
69
+ const resolved = (0, workflow_compiler_js_1.resolveDefinitionSteps)(workflow.definition.steps, workflow.definition.features ?? []);
70
+ if (resolved.isErr()) {
71
+ return (0, neverthrow_1.err)(resolved.error);
72
+ }
73
+ const resolvedDefinition = { ...workflow.definition, steps: resolved.value };
74
+ return (0, neverthrow_1.ok)({
75
+ schemaVersion: 1,
76
+ sourceKind: 'v1_pinned',
77
+ workflowId: workflow.definition.id,
78
+ name: workflow.definition.name,
79
+ description: workflow.definition.description,
80
+ version: workflow.definition.version,
81
+ definition: resolvedDefinition,
82
+ });
83
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exaudeus/workrail",
3
- "version": "1.16.0",
3
+ "version": "1.16.1",
4
4
  "description": "Step-by-step workflow enforcement for AI agents via MCP",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -31,8 +31,9 @@
31
31
  "dev": "npm run build && node dist/mcp-server.js",
32
32
  "watch": "tsc --watch",
33
33
  "validate:workflows": "bash scripts/validate-workflows.sh",
34
+ "validate:registry": "node scripts/validate-workflows-registry.ts",
34
35
  "validate:workflow-discovery": "node scripts/validate-workflow-discovery.js",
35
- "precommit": "npm run validate:workflows",
36
+ "precommit": "npm run validate:registry",
36
37
  "preinstall": "node -e \"const v=parseInt(process.versions.node.split('.')[0],10); if(v<20){console.error('WorkRail requires Node.js >=20. Current: '+process.versions.node+'\\nPlease upgrade: https://nodejs.org/'); process.exit(1);}\"",
37
38
  "web:dev": "npm run build && WORKRAIL_ENABLE_SESSION_TOOLS=true node dist/mcp-server.js",
38
39
  "web:ci": "WORKRAIL_ENABLE_SESSION_TOOLS=true node dist/mcp-server.js",
@@ -0,0 +1,134 @@
1
+ {
2
+ "id": "bug-investigation-agentic",
3
+ "name": "Bug Investigation (v2 • Notes-First • WorkRail Executor)",
4
+ "version": "2.0.0",
5
+ "description": "A v2-first bug investigation workflow focused on moving from theory to proof with notes-first durability, explicit trigger fields, de-anchored fresh-eye review, and investigation-only handoff boundaries.",
6
+ "recommendedPreferences": {
7
+ "recommendedAutonomy": "guided",
8
+ "recommendedRiskPolicy": "conservative"
9
+ },
10
+ "preconditions": [
11
+ "User has a specific bug report, failing test, or unexpected behavior to investigate.",
12
+ "Agent has codebase access and can run tests, commands, or other deterministic evidence-gathering steps.",
13
+ "Some reproduction information or observable symptom is available, even if incomplete."
14
+ ],
15
+ "clarificationPrompts": [
16
+ "What are the repro steps, observed symptoms, and expected behavior?",
17
+ "How reproducible is this issue? (always, intermittent, unknown)",
18
+ "What environment, logs, failing tests, or constraints are already known?"
19
+ ],
20
+ "metaGuidance": [
21
+ "DEFAULT BEHAVIOR: self-execute with tools. Only ask the user for missing external facts, permissions, or business decisions you cannot resolve yourself.",
22
+ "V2 DURABILITY: use output.notesMarkdown and explicit context variables as the durable investigation state. Do NOT rely on BUG_investigation.md, BUG_hypotheses.md, or other markdown artifacts as required workflow memory.",
23
+ "ARTIFACT STRATEGY: markdown artifacts are optional human-facing outputs only. If created, they should be derived from notes/context state, not serve as the source of truth.",
24
+ "MAIN AGENT OWNS INVESTIGATION: the main agent owns truth, synthesis, evidence gathering, diagnosis, and final handoff.",
25
+ "SUBAGENT MODEL: use the WorkRail Executor only. Do not refer to Builder/Researcher identities.",
26
+ "AUDITOR MODEL: prefer delegation for context auditing, alternate hypothesis generation, adversarial challenge, and execution simulation. Do not hand off investigation ownership.",
27
+ "PARALLELISM: parallelize independent cognition; serialize synthesis, instrumentation changes, evidence interpretation, and final diagnosis.",
28
+ "PROOF OVER THEORY: code reading creates theories, not proof. A diagnosis is only strong when evidence separates it from the strongest alternatives.",
29
+ "ANTI-ANCHORING: in STANDARD and THOROUGH, at least one hypothesis-generation pass must be de-anchored from the main agent's preferred explanation.",
30
+ "DIAGNOSIS MODEL: allow `single_cause`, `multi_factor`, `root_plus_downstream`, `working_as_designed`, and `inconclusive_but_narrowed` outcomes. Do not force every investigation into a single-cause winner.",
31
+ "TRIGGERS: WorkRail can only react to explicit outputs. Use structural fields like `contextUnknownCount`, `hypothesesConsideredCount`, `hasStrongAlternative`, `contradictionCount`, `unresolvedEvidenceGapCount`, and `diagnosisConfidenceBand`.",
32
+ "BOUNDARY: this workflow investigates and proves root cause. It may describe high-level fix direction and likely files, but must not create implementation plans, patch sequencing, PR plans, or code-writing momentum."
33
+ ],
34
+ "steps": [
35
+ {
36
+ "id": "phase-0-triage-and-intake",
37
+ "title": "Phase 0: Triage (Bug Intake • Risk • Mode)",
38
+ "prompt": "Understand the bug report and choose the right rigor.\n\nCapture:\n- `bugSummary`: concise statement of the issue\n- `reproSummary`: repro steps, symptoms, expected behavior, environment notes\n- `investigationComplexity`: Small / Medium / Large\n- `riskLevel`: Low / Medium / High\n- `rigorMode`: QUICK / STANDARD / THOROUGH\n- `automationLevel`: High / Medium / Low\n- `maxParallelism`: 0 / 2 / 3\n\nDecision guidance:\n- QUICK: clear repro, narrow surface area, low ambiguity\n- STANDARD: moderate ambiguity, moderate system breadth, or meaningful risk\n- THOROUGH: high ambiguity, high-risk production impact, broad surface area, or multiple plausible causes\n\nSet context variables:\n- `bugSummary`\n- `reproSummary`\n- `investigationComplexity`\n- `riskLevel`\n- `rigorMode`\n- `automationLevel`\n- `maxParallelism`\n- `reproducibilityConfidence` (High / Medium / Low)\n\nAsk for confirmation only if the chosen rigor materially affects expectations or if critical repro details are still missing.",
39
+ "requireConfirmation": true
40
+ },
41
+ {
42
+ "id": "phase-0b-minimum-inputs-gate",
43
+ "title": "Phase 0b: Minimum Inputs Gate",
44
+ "prompt": "If critical inputs are missing, ask only for the minimum needed to investigate.\n\nPossible asks:\n- missing repro steps or failing test command\n- missing expected behavior\n- missing environment constraints or permissions\n- missing logs or stack traces when the codebase alone cannot answer the gap\n\nDo NOT ask for information you can discover with tools.",
45
+ "requireConfirmation": {
46
+ "or": [
47
+ { "var": "automationLevel", "equals": "Low" },
48
+ { "var": "automationLevel", "equals": "Medium" }
49
+ ]
50
+ }
51
+ },
52
+ {
53
+ "id": "phase-1-context-and-execution-understanding",
54
+ "title": "Phase 1: Context + Execution Understanding",
55
+ "prompt": "Build the minimum complete understanding needed to investigate correctly.\n\nDo the main investigation yourself using tools.\n\nDeliverable:\n- relevant files, modules, entry points, and call chain sketch\n- execution summary from trigger to failure or surprising behavior\n- suspicious points and explicit unknowns\n- whether the issue might plausibly be `working_as_designed`\n\nSet context variables:\n- `contextSummary`\n- `executionSummary`\n- `candidateFiles`\n- `contextUnknownCount`\n- `executionPathCount`\n- `suspiciousPointCount`\n- `workingAsDesignedStillPlausible`\n- `retriageNeeded`\n\nComputation rules:\n- `contextUnknownCount` = number of unresolved technical unknowns that materially affect hypothesis quality or evidence design\n- `executionPathCount` = number of materially distinct execution paths currently in play\n- `suspiciousPointCount` = count of concrete suspicious locations or transitions worth explaining\n- set `retriageNeeded = true` if the real surface area or risk is clearly larger than Phase 0 assumed\n\nMode-adaptive context audit:\n- QUICK: self-check only\n- STANDARD: if `contextUnknownCount > 0` and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH, then synthesize both outputs\n- THOROUGH: if delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH, then synthesize both outputs",
56
+ "requireConfirmation": false
57
+ },
58
+ {
59
+ "id": "phase-1b-retriage-after-context",
60
+ "title": "Phase 1b: Re-Triage After Context",
61
+ "runCondition": {
62
+ "var": "retriageNeeded",
63
+ "equals": true
64
+ },
65
+ "prompt": "Reassess investigation scope after real context is known.\n\nReview:\n- `contextUnknownCount`\n- `executionPathCount`\n- `suspiciousPointCount`\n- actual systems/components involved\n- whether risk or ambiguity is larger than originally assessed\n\nDo:\n- confirm or adjust `investigationComplexity`\n- confirm or adjust `riskLevel`\n- confirm or adjust `rigorMode`\n- confirm or adjust `maxParallelism`\n\nSet context variables:\n- `investigationComplexity`\n- `riskLevel`\n- `rigorMode`\n- `maxParallelism`\n- `retriageChanged`\n\nRule:\n- upgrade rigor when the real investigation surface is broader or riskier than expected",
66
+ "requireConfirmation": {
67
+ "or": [
68
+ { "var": "retriageChanged", "equals": true },
69
+ { "var": "automationLevel", "equals": "Low" }
70
+ ]
71
+ }
72
+ },
73
+ {
74
+ "id": "phase-2-hypothesis-generation-and-shortlist",
75
+ "title": "Phase 2: Hypothesis Generation + Shortlist",
76
+ "prompt": "Generate and rank plausible explanations without over-committing early.\n\nRequired candidate types:\n- normal code/logic explanation(s)\n- state/data explanation(s) where applicable\n- integration/environment explanation(s) where applicable\n- `working_as_designed` when still plausible\n- multi-factor explanation if interacting causes plausibly explain the symptoms better than any single cause\n\nMode-adaptive generation:\n- QUICK: self-generate at least 3 materially distinct hypotheses\n- STANDARD: if delegation is available, spawn TWO or THREE WorkRail Executors SIMULTANEOUSLY for hypothesis generation from different lenses; at least one pass must be de-anchored from your current leading theory\n- THOROUGH: if delegation is available, spawn THREE WorkRail Executors SIMULTANEOUSLY for logic, data/state, and integration/environment lenses; at least one pass must be de-anchored from your current leading theory\n\nDe-anchored packet rule:\n- include the bug report, repro summary, symptoms, candidate files, and key references\n- do NOT frame the work package around your preferred explanation as the only story\n- if you already have a favorite theory, pass it separately as `currentLeadingTheory`\n\nAfter generation:\n- merge duplicates\n- rank hypotheses\n- explicitly decide whether a strong alternative remains\n- force one more generation pass if all candidates cluster in the same pattern family\n\nSet context variables:\n- `candidateHypotheses`\n- `hypothesesConsideredCount`\n- `hasStrongAlternative`\n- `leadingHypothesisConfidenceBand`\n- `currentLeadingTheory`\n\nComputation rules:\n- `hypothesesConsideredCount` = number of materially distinct viable explanations after merging duplicates\n- minimums: QUICK >= 3; STANDARD/THOROUGH >= 4 when `workingAsDesignedStillPlausible = true`, otherwise >= 3\n- `hasStrongAlternative = true` when a non-leading hypothesis still explains most symptoms with medium-or-better evidence fit\n\nRules:\n- if `hypothesesConsideredCount` is below the mode minimum, this phase cannot close\n- if `hasStrongAlternative = true`, challenge before the evidence loop can close",
77
+ "requireConfirmation": false
78
+ },
79
+ {
80
+ "id": "phase-3-evidence-strategy-and-instrumentation",
81
+ "title": "Phase 3: Evidence Strategy + Instrumentation",
82
+ "prompt": "Design the minimum evidence plan needed to separate the strongest remaining explanations.\n\nDo:\n- define what evidence would prove or disprove each top explanation\n- identify the minimum instrumentation, tests, commands, or observations needed\n- optionally use simulation to find the most informative observation points\n- make instrumentation changes or test additions only as needed for evidence collection\n\nMode-adaptive simulation:\n- QUICK: self-design the evidence plan\n- STANDARD: optionally spawn ONE WorkRail Executor running `routine-execution-simulation` when runtime/state-flow uncertainty is still meaningful\n- THOROUGH: if delegation is available, spawn ONE WorkRail Executor running `routine-execution-simulation` to improve evidence leverage before finalizing the plan\n\nSet context variables:\n- `evidenceStrategy`\n- `instrumentationCoverageScore`\n- `unresolvedEvidenceGapCount`\n- `evidenceRounds`\n\nComputation rules:\n- `instrumentationCoverageScore` = rough 1-10 score for how well the evidence plan can separate the top explanations\n- `unresolvedEvidenceGapCount` = number of still-missing observations needed to separate top remaining explanations\n- initialize `evidenceRounds = 0`\n\nRule:\n- this workflow may add instrumentation or tests to gather proof, but it must not drift into implementation planning or fix sequencing",
83
+ "requireConfirmation": false
84
+ },
85
+ {
86
+ "id": "phase-4-evidence-loop",
87
+ "type": "loop",
88
+ "title": "Phase 4: Evidence Collection Loop",
89
+ "loop": {
90
+ "type": "while",
91
+ "conditionSource": {
92
+ "kind": "artifact_contract",
93
+ "contractRef": "wr.contracts.loop_control",
94
+ "loopId": "bug_evidence_loop"
95
+ },
96
+ "maxIterations": 5
97
+ },
98
+ "body": [
99
+ {
100
+ "id": "phase-4a-collect-and-synthesize-evidence",
101
+ "title": "Collect and Synthesize Evidence",
102
+ "prompt": "Run the planned experiments, repros, tests, or commands and synthesize what they imply.\n\nAlways:\n- collect evidence against the leading theory and the strongest alternative(s)\n- update which explanations remain viable\n- identify contradictions explicitly\n- keep the investigation-only boundary intact\n\nSet context variables:\n- `evidenceRounds`\n- `evidenceStrengthByHypothesis`\n- `contradictionCount`\n- `unresolvedEvidenceGapCount`\n- `diagnosisType`\n- `residualUncertainty`\n\nComputation rules:\n- increment `evidenceRounds` once per completed pass\n- `contradictionCount` = number of material observations that directly conflict with the current leading diagnosis\n- `diagnosisType` must be one of: `single_cause`, `multi_factor`, `root_plus_downstream`, `working_as_designed`, `inconclusive_but_narrowed`\n\nStop / reopen guidance:\n- continue if `unresolvedEvidenceGapCount > 0`\n- continue if `contradictionCount > 0`\n- reopen the shortlist if the challenger breaks the leader or new evidence materially contradicts the current top two\n- allow bounded inconclusive status only when further evidence is not realistically available and the field has been meaningfully narrowed",
103
+ "requireConfirmation": false
104
+ },
105
+ {
106
+ "id": "phase-4b-loop-decision",
107
+ "title": "Evidence Loop Decision",
108
+ "prompt": "Decide whether the evidence loop should continue.\n\nDecision rules:\n- if `contradictionCount > 0` → continue\n- else if `unresolvedEvidenceGapCount > 0` → continue\n- else if `hasStrongAlternative = true` and the alternative is not meaningfully weaker → continue\n- else if `diagnosisType = inconclusive_but_narrowed` and further evidence is not realistically available → stop with bounded uncertainty\n- else → stop\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"loopId\": \"bug_evidence_loop\",\n \"decision\": \"continue\"\n }]\n}\n```",
109
+ "requireConfirmation": true,
110
+ "outputContract": {
111
+ "contractRef": "wr.contracts.loop_control"
112
+ }
113
+ }
114
+ ]
115
+ },
116
+ {
117
+ "id": "phase-5-diagnosis-validation",
118
+ "title": "Phase 5: Diagnosis Validation Bundle",
119
+ "prompt": "Stress-test the current diagnosis before handoff.\n\nSet `diagnosisConfidenceBand` using these rules:\n- High = all symptoms explained, no material contradictions, no unresolved evidence gaps\n- Medium = likely diagnosis, but one bounded uncertainty remains\n- Low = multiple viable explanations remain or contradictions are unresolved\n\nMode-adaptive validation:\n- QUICK: self-challenge; if `diagnosisConfidenceBand != High` or contradictions remain, optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge`\n- STANDARD: if delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge` and `routine-execution-simulation`\n- THOROUGH: if delegation is available, spawn THREE WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge`, `routine-execution-simulation`, and an additional `routine-hypothesis-challenge` pass focused on breaking the current diagnosis from a different angle\n\nParallel-output synthesis rules:\n- if 2+ validators raise serious concerns, reopen evidence or shortlist work\n- if exactly one validator raises a concern, investigate it before escalating\n- if no validator can materially break the diagnosis and `contradictionCount = 0`, proceed to handoff\n\nSet context variables:\n- `diagnosisConfidenceBand`\n- `validationFindingsCountBySeverity`\n- `validationSummary`\n\nBoundary rule:\n- allowed: high-level fix direction, likely files involved, verification recommendations\n- not allowed: implementation plan, patch sequencing, PR plan, or code-writing momentum",
120
+ "requireConfirmation": {
121
+ "or": [
122
+ { "var": "diagnosisConfidenceBand", "equals": "Low" },
123
+ { "var": "contradictionCount", "not_equals": 0 }
124
+ ]
125
+ }
126
+ },
127
+ {
128
+ "id": "phase-6-final-handoff",
129
+ "title": "Phase 6: Final Handoff",
130
+ "prompt": "Provide the final investigation handoff.\n\nInclude:\n- concise bug summary and repro summary\n- diagnosis type (`single_cause`, `multi_factor`, `root_plus_downstream`, `working_as_designed`, or `inconclusive_but_narrowed`)\n- proof summary and why the conclusion is justified\n- strongest alternative(s) that were ruled out\n- residual uncertainty, if any\n- high-level fix direction only\n- likely files or areas involved\n- verification recommendations for whoever implements the fix\n\nOptional artifact:\n- create `bug_handoff.md` only if it materially improves human handoff; it must be derived from the notes/context state, not treated as required workflow memory\n\nRule:\n- do not implement the fix, create a PR plan, or hand off a patch sequence",
131
+ "requireConfirmation": true
132
+ }
133
+ ]
134
+ }
@@ -0,0 +1,238 @@
1
+ {
2
+ "id": "mr-review-workflow-agentic",
3
+ "name": "MR Review Workflow (v2 • Notes-First • Parallel Reviewer Families)",
4
+ "version": "2.1.0",
5
+ "description": "A v2-first MR review workflow that uses a shared fact packet, parallel reviewer families, an explicit coverage ledger, and contradiction-driven synthesis to produce high-signal review output without duplicating context gathering.",
6
+ "recommendedPreferences": {
7
+ "recommendedAutonomy": "guided",
8
+ "recommendedRiskPolicy": "conservative"
9
+ },
10
+ "features": [
11
+ "wr.features.subagent_guidance"
12
+ ],
13
+ "preconditions": [
14
+ "User has the MR/PR context and a code diff accessible as pasted text or a file path.",
15
+ "The agent has access to tools for reading the diff, changed files, and surrounding code.",
16
+ "A human reviewer or author will consume the final review output."
17
+ ],
18
+ "clarificationPrompts": [
19
+ "What is the MR title, purpose, and any related ticket or acceptance criteria?",
20
+ "Where is the diff located, or can you paste it?",
21
+ "Are there specific focus areas for this review (performance, security, API design, data integrity, etc.)?"
22
+ ],
23
+ "metaGuidance": [
24
+ "DEFAULT BEHAVIOR: self-execute with tools. Only ask for missing external artifacts, permissions, or business context you cannot resolve yourself.",
25
+ "V2 DURABILITY: use output.notesMarkdown and explicit context variables as durable workflow state. Do NOT rely on the live review document as required workflow memory.",
26
+ "ARTIFACT STRATEGY: `reviewDocPath` is a human-facing artifact only. Keep it updated for readability, but keep execution truth in notes/context variables.",
27
+ "MAIN AGENT OWNS REVIEW: the main agent owns truth, synthesis, severity calibration, final recommendation, and document finalization.",
28
+ "SUBAGENT MODEL: use the WorkRail Executor only. Do not refer to Builder, Researcher, or other named subagent identities.",
29
+ "PARALLELISM: parallelize independent cognition; serialize synthesis, canonical review findings, recommendation decisions, and final document writes.",
30
+ "REVIEW MODEL: first build shared context, then freeze a neutral fact packet, then let multiple reviewer families interpret the same packet in parallel.",
31
+ "REVIEWER FAMILIES: use specialist reviewer families such as correctness/invariants, patterns/architecture, runtime/production-risk, test/docs/rollout, false-positive skeptic, and missed-issue hunter.",
32
+ "COVERAGE LEDGER: explicitly track what review domains are `checked`, `uncertain`, `not_applicable`, `contradicted`, or `needs_followup`. Do not finalize with unresolved important coverage gaps unless you name them explicitly.",
33
+ "SYNTHESIS: when reviewer families disagree, treat the disagreement as first-class work. Resolve it explicitly; do not handwave contradictory outputs.",
34
+ "TRIGGERS: WorkRail can only react to explicit fields. Use structural fields like `contextUnknownCount`, `criticalSurfaceTouched`, `coverageUncertainCount`, and `majorFindingsCount`.",
35
+ "TRIGGERS (cont): also track `falsePositiveRiskCount`, `blindSpotCount`, `contradictionCount`, and `needsSimulation` to route fresh-eye review and synthesis work.",
36
+ "BOUNDARY: do not post comments, approve, reject, or merge unless the user explicitly asks. Produce a high-quality review artifact and recommendation only."
37
+ ],
38
+ "steps": [
39
+ {
40
+ "id": "phase-0-triage-and-mode",
41
+ "title": "Phase 0: Triage (MR Context • Risk • Mode)",
42
+ "prompt": "Understand the MR and choose the right rigor.\n\nCapture:\n- `mrTitle`\n- `mrPurpose`\n- `ticketContext`\n- `focusAreas`\n- `changedFileCount`\n- `criticalSurfaceTouched` (true/false)\n- `reviewMode`: QUICK / STANDARD / THOROUGH\n- `riskLevel`: Low / Medium / High\n- `maxParallelism`: 0 / 3 / 5\n\nDecision guidance:\n- QUICK: very small, isolated, low-risk changes with little ambiguity\n- STANDARD: typical feature or bug-fix reviews with moderate ambiguity or moderate risk\n- THOROUGH: critical surfaces, architectural novelty, high risk, broad change sets, or strong need for independent reviewer perspectives\n\nAlso choose `reviewDocPath` for the human-facing live artifact. Default suggestion: `mr-review.md` at the project root.\n\nSet context variables:\n- `mrTitle`\n- `mrPurpose`\n- `ticketContext`\n- `focusAreas`\n- `changedFileCount`\n- `criticalSurfaceTouched`\n- `reviewMode`\n- `riskLevel`\n- `maxParallelism`\n- `reviewDocPath`\n\nAsk for confirmation only if the selected mode materially changes expectations or if the diff/source context is still missing.",
43
+ "requireConfirmation": true
44
+ },
45
+ {
46
+ "id": "phase-0b-minimum-inputs-gate",
47
+ "title": "Phase 0b: Minimum Inputs Gate",
48
+ "prompt": "If critical inputs are missing, ask only for the minimum needed to review effectively.\n\nPossible asks:\n- missing diff path or pasted diff\n- missing MR purpose or intended behavior change\n- missing ticket or requirements context when the diff alone is not enough\n- missing repo access or file paths needed to inspect surrounding code\n\nDo NOT ask for information you can discover with tools.",
49
+ "requireConfirmation": {
50
+ "or": [
51
+ { "var": "reviewMode", "equals": "STANDARD" },
52
+ { "var": "reviewMode", "equals": "THOROUGH" }
53
+ ]
54
+ }
55
+ },
56
+ {
57
+ "id": "phase-1-context-understanding",
58
+ "title": "Phase 1: Shared Context Understanding",
59
+ "promptBlocks": {
60
+ "goal": "Build the minimum complete understanding needed to review accurately.",
61
+ "constraints": [
62
+ [
63
+ { "kind": "ref", "refId": "wr.refs.notes_first_durability" }
64
+ ],
65
+ "Do the main context work yourself using tools.",
66
+ "Keep `reviewDocPath` updated for human readability, but keep execution truth in notes/context variables."
67
+ ],
68
+ "procedure": [
69
+ "Produce a concise MR summary and intended behavior change, changed files overview, module or subsystem neighborhood, bounded call graph / public contracts / impacted consumers where relevant, repo patterns that matter for this review, and explicit unknowns / likely blind spots.",
70
+ "Set context variables: `contextSummary`, `candidateFiles`, `moduleRoots`, `contextUnknownCount`, `coverageGapCount`, `authorIntentUnclear`, `retriageNeeded`.",
71
+ "Compute `contextUnknownCount` as unresolved technical unknowns that materially affect review quality.",
72
+ "Compute `coverageGapCount` as likely review angles or code areas still insufficiently understood.",
73
+ "Set `retriageNeeded = true` if the real risk or surface area is larger than Phase 0 suggested.",
74
+ "Mode-adaptive context audit: QUICK = self-check only; STANDARD = if `contextUnknownCount > 0` and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH, then synthesize both outputs; THOROUGH = if delegation is available, spawn the same two parallel context auditors and synthesize both outputs."
75
+ ],
76
+ "verify": [
77
+ "All material technical unknowns are counted in `contextUnknownCount`.",
78
+ "Likely review blind spots are reflected in `coverageGapCount`.",
79
+ "Any delegated context outputs are synthesized before proceeding."
80
+ ]
81
+ },
82
+ "requireConfirmation": false
83
+ },
84
+ {
85
+ "id": "phase-1b-retriage-after-context",
86
+ "title": "Phase 1b: Re-Triage After Context",
87
+ "runCondition": {
88
+ "var": "retriageNeeded",
89
+ "equals": true
90
+ },
91
+ "prompt": "Reassess the review mode now that the real code context is known.\n\nReview:\n- `contextUnknownCount`\n- `coverageGapCount`\n- actual systems/components involved\n- whether `criticalSurfaceTouched` is still accurate\n- whether runtime or production simulation now looks necessary\n\nDo:\n- confirm or adjust `reviewMode`\n- confirm or adjust `riskLevel`\n- confirm or adjust `maxParallelism`\n- set `needsSimulation` to true or false\n- set `retriageChanged`\n\nEscalation rules:\n- QUICK may escalate to STANDARD if `criticalSurfaceTouched = true` or `contextUnknownCount > 0`\n- STANDARD may escalate to THOROUGH if `criticalSurfaceTouched = true` and risk is High, or if multiple unresolved context gaps remain\n\nSet context variables:\n- `reviewMode`\n- `riskLevel`\n- `maxParallelism`\n- `needsSimulation`\n- `retriageChanged`",
92
+ "requireConfirmation": {
93
+ "or": [
94
+ { "var": "retriageChanged", "equals": true },
95
+ { "var": "riskLevel", "equals": "High" }
96
+ ]
97
+ }
98
+ },
99
+ {
100
+ "id": "phase-2-fact-packet-and-family-selection",
101
+ "title": "Phase 2: Freeze Fact Packet and Select Reviewer Families",
102
+ "prompt": "Freeze the shared factual basis that all downstream reviewer families must use, then choose the reviewer families from that same phase.\n\nCreate a neutral `reviewFactPacket` containing:\n- MR purpose and expected behavior change\n- changed files and module roots\n- key contracts, invariants, and affected consumers\n- call graph highlights or execution touchpoints\n- relevant repo patterns and exemplars\n- tests/docs expectations\n- explicit open unknowns\n\nInitialize `coverageLedger` with these domains, each marked as `checked`, `uncertain`, `not_applicable`, `contradicted`, or `needs_followup`:\n- correctness_logic\n- contracts_invariants\n- patterns_architecture\n- runtime_production_risk\n- tests_docs_rollout\n- security_performance\n\nThen perform a preliminary review from the shared fact packet and choose reviewer families.\n\nReviewer family options:\n- `correctness_invariants`\n- `patterns_architecture`\n- `runtime_production_risk`\n- `test_docs_rollout`\n- `false_positive_skeptic`\n- `missed_issue_hunter`\n\nSelection guidance:\n- QUICK: no family bundle by default; add `false_positive_skeptic` only if a supposedly easy review still feels risky or ambiguous\n- STANDARD: run 3 families by default\n- THOROUGH: run 5 families by default\n- always include `correctness_invariants` unless clearly not applicable\n- always include `test_docs_rollout` in STANDARD and THOROUGH unless clearly not applicable\n- include `runtime_production_risk` when `criticalSurfaceTouched = true` or `needsSimulation = true`\n- include `missed_issue_hunter` in THOROUGH mode\n- include `false_positive_skeptic` whenever Major/Critical findings are likely, the change is controversial, or severity inflation risk is non-trivial\n\nAnti-anchoring rule:\n- reviewer families must treat `reviewFactPacket` as primary truth\n- `recommendationHypothesis` is optional secondary context only; it must not become the frame every family simply validates\n\nCoverage ledger rules:\n- use `contradicted` when evidence materially conflicts across reviewer families and the disagreement is unresolved\n- use `needs_followup` when the domain is relevant and additional targeted work is still required\n- use `uncertain` only for bounded ambiguity where no direct contradiction exists yet\n- compute `coverageUncertainCount` as the count of coverage domains not yet safely closed: `uncertain` + `contradicted` + `needs_followup`\n\nDefault reviewer-bundle rule:\n- QUICK: `needsReviewerBundle = false` unless a trigger or risk signal clearly justifies it\n- STANDARD / THOROUGH: `needsReviewerBundle = true` by default unless the review is materially simpler than expected\n\nSet context variables:\n- `reviewFactPacket`\n- `coverageLedger`\n- `coverageUncertainCount`\n- `preliminaryFindings`\n- `recommendationHypothesis`\n- `reviewFamiliesSelected`\n- `needsReviewerBundle`",
103
+ "requireConfirmation": false
104
+ },
105
+ {
106
+ "id": "phase-3-reviewer-family-bundle",
107
+ "title": "Phase 3: Parallel Reviewer Family Bundle",
108
+ "runCondition": {
109
+ "var": "needsReviewerBundle",
110
+ "equals": true
111
+ },
112
+ "promptBlocks": {
113
+ "goal": "Run the selected reviewer families in parallel from the same shared fact packet.",
114
+ "constraints": [
115
+ [
116
+ { "kind": "ref", "refId": "wr.refs.notes_first_durability" }
117
+ ],
118
+ [
119
+ { "kind": "ref", "refId": "wr.refs.synthesis_under_disagreement" }
120
+ ],
121
+ "Each reviewer family must use `reviewFactPacket` as primary truth.",
122
+ "Use `recommendationHypothesis` only as secondary comparison context.",
123
+ "If a family disagrees with the preliminary direction, it must say so explicitly."
124
+ ],
125
+ "procedure": [
126
+ "Each reviewer family must return: key findings, severity estimates, confidence level, top risks, recommendation, and what others may have missed.",
127
+ "Family missions: `correctness_invariants` = logic, correctness, API and invariant risks; `patterns_architecture` = pattern fit, design consistency, architectural concerns; `runtime_production_risk` = runtime behavior, production impact, performance/state-flow risk; `test_docs_rollout` = test adequacy, docs, migration, rollout, affected consumers; `false_positive_skeptic` = challenge likely overreaches, weak evidence, or severity inflation; `missed_issue_hunter` = search for an important category of issue the others may miss.",
128
+ "Mode-adaptive parallelism: STANDARD = spawn THREE WorkRail Executors SIMULTANEOUSLY for the selected families; THOROUGH = spawn FIVE WorkRail Executors SIMULTANEOUSLY for the selected families.",
129
+ "Set context variables: `familyFindingsSummary`, `familyRecommendationSpread`, `contradictionCount`, `blindSpotCount`, `falsePositiveRiskCount`, `needsSimulation`.",
130
+ "Compute `contradictionCount` as material disagreements across reviewer families about issue validity, severity, or final recommendation.",
131
+ "Increase `blindSpotCount` if the missed-issue hunter or any other family identifies uncovered review space.",
132
+ "Increase `falsePositiveRiskCount` when the skeptic materially weakens one or more high-severity findings."
133
+ ],
134
+ "verify": [
135
+ "The same fact packet was used as primary truth across reviewer families.",
136
+ "Contradictions, blind spots, and false-positive risks are all reflected structurally in context variables.",
137
+ "Parallel reviewer outputs are not treated as self-finalizing; the main agent still owns synthesis."
138
+ ]
139
+ },
140
+ "requireConfirmation": false
141
+ },
142
+ {
143
+ "id": "phase-4-contradiction-and-synthesis-loop",
144
+ "type": "loop",
145
+ "title": "Phase 4: Contradiction Resolution and Synthesis Loop",
146
+ "loop": {
147
+ "type": "while",
148
+ "conditionSource": {
149
+ "kind": "artifact_contract",
150
+ "contractRef": "wr.contracts.loop_control",
151
+ "loopId": "mr_review_synthesis_loop"
152
+ },
153
+ "maxIterations": 4
154
+ },
155
+ "body": [
156
+ {
157
+ "id": "phase-4a-targeted-follow-up",
158
+ "title": "Targeted Follow-Up Bundle",
159
+ "promptBlocks": {
160
+ "goal": "If contradictions or important coverage gaps remain, run only the smallest targeted follow-up work needed.",
161
+ "constraints": [
162
+ [
163
+ { "kind": "ref", "refId": "wr.refs.parallelize_cognition_serialize_synthesis" }
164
+ ],
165
+ "Prefer one compact targeted bundle over multiple new delegation moments.",
166
+ "Do not regather broad context unless a contradiction proves the original fact packet is insufficient."
167
+ ],
168
+ "procedure": [
169
+ "If `contradictionCount > 0`, run targeted challenge or validation aimed at the specific disagreement.",
170
+ "If `coverageUncertainCount > 0` or `blindSpotCount > 0`, run the smallest reviewer family or context follow-up needed to close the gap.",
171
+ "If `needsSimulation = true`, include `routine-execution-simulation`.",
172
+ "If `falsePositiveRiskCount > 0`, include `routine-hypothesis-challenge`.",
173
+ "If no trigger fires, do not delegate this step."
174
+ ],
175
+ "verify": [
176
+ "Only the smallest targeted bundle needed was run.",
177
+ "No broad context regather happened without an explicit contradiction-driven reason."
178
+ ]
179
+ },
180
+ "requireConfirmation": false
181
+ },
182
+ {
183
+ "id": "phase-4b-canonical-synthesis",
184
+ "title": "Canonical Synthesis and Coverage Update",
185
+ "prompt": "Synthesize all reviewer-family outputs and any targeted follow-up into one canonical review state.\n\nSynthesis decision table:\n- if 2+ reviewer families flag the same serious issue with the same severity, treat it as validated\n- if the same issue is flagged with different severities, default to the higher severity unless the lower-severity position includes specific counter-evidence\n- if one family flags an issue and others are silent, investigate it but do not automatically block unless it is clearly critical or security-sensitive\n- if one family says false positive and another says valid issue, require explicit main-agent adjudication in notes before finalization\n- if recommendation spread shows material disagreement, findings override recommendation until reconciled\n- if simulation reveals a new production risk, add a new finding and re-evaluate recommendation confidence\n\nCoverage ledger rules:\n- move a domain from `uncertain` to `checked` only when the evidence is materially adequate\n- keep a domain `uncertain` if disagreement or missing evidence still materially affects recommendation quality\n- mark `not_applicable` only when the MR genuinely does not engage that dimension\n- clear `contradicted` only when the contradiction is explicitly resolved by evidence or adjudication\n- clear `needs_followup` only when the required targeted follow-up has actually been completed or the domain is explicitly downgraded as non-material\n\nRecommendation confidence rules:\n- set `recommendationConfidenceBand = High` only if no unresolved material contradictions remain, no important coverage domains remain uncertain, false-positive risk is not material, and consensus is strong enough for the current mode\n- set `recommendationConfidenceBand = Medium` when one bounded uncertainty remains but the recommendation is still directionally justified\n- set `recommendationConfidenceBand = Low` when multiple viable interpretations remain, major contradictions are unresolved, or important coverage gaps still weaken the recommendation\n\nSet context variables:\n- `reviewFindings`\n- `criticalFindingsCount`\n- `majorFindingsCount`\n- `minorFindingsCount`\n- `nitFindingsCount`\n- `recommendation`\n- `recommendationConfidenceBand`\n- `recommendationDriftDetected`\n- `coverageLedger`\n- `coverageUncertainCount`\n- `docCompletenessConcernCount`\n\nUpdate `reviewDocPath` so the human artifact matches the canonical review state.",
186
+ "requireConfirmation": false
187
+ },
188
+ {
189
+ "id": "phase-4c-loop-decision",
190
+ "title": "Synthesis Loop Decision",
191
+ "prompt": "Decide whether the synthesis loop should continue.\n\nDecision rules:\n- if `contradictionCount > 0` → continue\n- else if `coverageUncertainCount > 0` and the uncertainty materially affects the recommendation → continue\n- else if `falsePositiveRiskCount > 0` → continue\n- else if `recommendationDriftDetected = true` → continue\n- else → stop\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"loopId\": \"mr_review_synthesis_loop\",\n \"decision\": \"continue\"\n }]\n}\n```",
192
+ "requireConfirmation": true,
193
+ "outputContract": {
194
+ "contractRef": "wr.contracts.loop_control"
195
+ }
196
+ }
197
+ ]
198
+ },
199
+ {
200
+ "id": "phase-5-final-validation",
201
+ "title": "Phase 5: Final Validation",
202
+ "promptBlocks": {
203
+ "goal": "Before final handoff, decide whether additional validation is still required.",
204
+ "constraints": [
205
+ [
206
+ { "kind": "ref", "refId": "wr.refs.adversarial_challenge_rules" }
207
+ ],
208
+ [
209
+ { "kind": "ref", "refId": "wr.refs.synthesis_under_disagreement" }
210
+ ]
211
+ ],
212
+ "procedure": [
213
+ "Run final validation if any of these are true: `criticalSurfaceTouched = true`, `needsSimulation = true`, `falsePositiveRiskCount > 0`, `coverageUncertainCount > 0`, `docCompletenessConcernCount > 0`, or `recommendationConfidenceBand != High`.",
214
+ "Mode-adaptive validation: QUICK = self-validate and optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` if a serious uncertainty remains; STANDARD = if validation is required and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge` and either `routine-execution-simulation` or `routine-plan-analysis`; THOROUGH = if validation is required and delegation is available, spawn THREE WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge`, `routine-execution-simulation` when needed, and `routine-plan-analysis`.",
215
+ "Compute `docCompletenessConcernCount` by counting one concern for each material packaging gap: missing rationale for any Critical or Major finding, missing ready-to-post MR comment for any Critical or Major finding, recommendation mismatch with canonical findings, still-uncertain / contradicted / needs-followup coverage domains not summarized clearly, or any missing required final section needed for actionability.",
216
+ "Set context variables: `validatorConsensusLevel`, `validationSummary`, `recommendationConfidenceBand`, `docCompletenessConcernCount`."
217
+ ],
218
+ "verify": [
219
+ "If 2+ validators still raise serious concerns, confidence is downgraded and synthesis is reopened.",
220
+ "If exactly one validator raises a concern, it is investigated before deciding whether to reopen anything.",
221
+ "If no validator can materially break the current recommendation and findings are internally consistent, proceed to handoff."
222
+ ]
223
+ },
224
+ "requireConfirmation": {
225
+ "or": [
226
+ { "var": "validatorConsensusLevel", "equals": "Low" },
227
+ { "var": "recommendationConfidenceBand", "equals": "Low" }
228
+ ]
229
+ }
230
+ },
231
+ {
232
+ "id": "phase-6-final-handoff",
233
+ "title": "Phase 6: Final Handoff",
234
+ "prompt": "Provide the final MR review handoff.\n\nInclude:\n- MR title and purpose\n- review mode used\n- final recommendation and confidence band\n- counts of Critical / Major / Minor / Nit findings\n- top findings with rationale\n- strongest areas of uncertainty, if any\n- summary of the coverage ledger, especially any still-uncertain domains\n- path to the full human-facing review artifact (`reviewDocPath`)\n- ready-to-post MR comments summary\n- any validation outcomes worth the human reviewer seeing\n\nRules:\n- the final recommendation assists a human reviewer; it does not replace them\n- keep `reviewDocPath` updated, but do not treat it as workflow state\n- do not post comments, approve, reject, or merge unless the user explicitly asks",
235
+ "requireConfirmation": true
236
+ }
237
+ ]
238
+ }