@exaudeus/workrail 3.11.0 → 3.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/spec/authoring-spec.json
CHANGED
|
@@ -578,8 +578,8 @@
|
|
|
578
578
|
"prompt.composition",
|
|
579
579
|
"workflow.definition"
|
|
580
580
|
],
|
|
581
|
-
"rule": "Use extension points when a workflow wants stable
|
|
582
|
-
"why": "Extension points make customization explicit, inspectable, and project-overridable.",
|
|
581
|
+
"rule": "Use extension points when a workflow wants stable project-overridable delegation seams rather than hardcoding bound routine or workflow names inline.",
|
|
582
|
+
"why": "Extension points make delegated customization explicit, inspectable, and project-overridable without conflating rebinding with routine injection.",
|
|
583
583
|
"enforcement": [
|
|
584
584
|
"advisory"
|
|
585
585
|
],
|
|
@@ -602,11 +602,15 @@
|
|
|
602
602
|
],
|
|
603
603
|
"checks": [
|
|
604
604
|
"Declare extension points at the workflow level when bindings are part of the contract.",
|
|
605
|
-
"Avoid hidden or undocumented binding slots in prompts."
|
|
605
|
+
"Avoid hidden or undocumented binding slots in prompts.",
|
|
606
|
+
"Prefer `templateCall` when the real goal is reusable inline routine structure, visible injected steps, or parent-step confirmation behavior.",
|
|
607
|
+
"Use extension points only when the seam is intentionally delegated and may need project-level rebinding."
|
|
606
608
|
],
|
|
607
609
|
"antiPatterns": [
|
|
608
610
|
"Hardcoding team-customizable routine names in prompt text without an extension-point declaration",
|
|
609
|
-
"Using `{{wr.bindings.*}}` tokens in a workflow that declares no extension points"
|
|
611
|
+
"Using `{{wr.bindings.*}}` tokens in a workflow that declares no extension points",
|
|
612
|
+
"Using extension points where `templateCall` would better represent the parent workflow's real structure",
|
|
613
|
+
"Expecting `{{wr.bindings.*}}` to change which routine gets injected inline"
|
|
610
614
|
]
|
|
611
615
|
}
|
|
612
616
|
]
|
|
@@ -682,6 +686,36 @@
|
|
|
682
686
|
}
|
|
683
687
|
]
|
|
684
688
|
},
|
|
689
|
+
{
|
|
690
|
+
"id": "references",
|
|
691
|
+
"title": "Workflow references",
|
|
692
|
+
"rules": [
|
|
693
|
+
{
|
|
694
|
+
"id": "references-are-for-runtime-companion-material",
|
|
695
|
+
"status": "active",
|
|
696
|
+
"level": "recommended",
|
|
697
|
+
"scope": [
|
|
698
|
+
"workflow.references",
|
|
699
|
+
"workflow.definition"
|
|
700
|
+
],
|
|
701
|
+
"rule": "Declare references only for documents the running workflow may genuinely need while executing its task.",
|
|
702
|
+
"why": "References are surfaced to the agent at workflow start and become part of the workflow hash. Maintainer-only or authoring-only references add cognitive load and hash churn without improving runtime execution.",
|
|
703
|
+
"enforcement": [
|
|
704
|
+
"advisory"
|
|
705
|
+
],
|
|
706
|
+
"checks": [
|
|
707
|
+
"Keep references that materially help the running workflow perform its task.",
|
|
708
|
+
"Prefer rubrics, target-system specs, policies, or playbooks that constrain runtime judgment.",
|
|
709
|
+
"If removing a reference would not make the running workflow materially worse at execution, remove it."
|
|
710
|
+
],
|
|
711
|
+
"antiPatterns": [
|
|
712
|
+
"Adding workflow-schema references to ordinary execution workflows that are not authoring or validation workflows",
|
|
713
|
+
"Adding authoring-spec or provenance references to workflows whose runtime task is unrelated to workflow authoring",
|
|
714
|
+
"Using references to justify a workflow's design to maintainers instead of helping the running agent do the task"
|
|
715
|
+
]
|
|
716
|
+
}
|
|
717
|
+
]
|
|
718
|
+
},
|
|
685
719
|
{
|
|
686
720
|
"id": "response-supplements",
|
|
687
721
|
"title": "Response supplements and delivery-owned guidance",
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
## Production Readiness Audit Rubric
|
|
2
|
+
|
|
3
|
+
Use this rubric when running the bundled `production-readiness-audit` workflow.
|
|
4
|
+
|
|
5
|
+
### Coverage domains
|
|
6
|
+
|
|
7
|
+
- Debugging and correctness
|
|
8
|
+
- Runtime readiness
|
|
9
|
+
- Technical debt and maintainability
|
|
10
|
+
- Philosophy and repo-pattern alignment
|
|
11
|
+
- Tests and observability
|
|
12
|
+
- Security and performance when the audited scope materially touches them
|
|
13
|
+
|
|
14
|
+
### Finding classes
|
|
15
|
+
|
|
16
|
+
- **Confirmed**: supported by primary evidence such as code, tests, build output, runtime traces, or a directly checked artifact
|
|
17
|
+
- **Plausible**: directionally concerning, but not yet strong enough to drive the verdict alone
|
|
18
|
+
- **Rejected**: weakened or disproved by fuller context or direct evidence
|
|
19
|
+
|
|
20
|
+
### Verdicts
|
|
21
|
+
|
|
22
|
+
- **ready**: no material blockers, no major unresolved gaps, and confidence is strong enough for the audited scope
|
|
23
|
+
- **ready_with_conditions**: broadly shippable, but bounded conditions or follow-up work still matter
|
|
24
|
+
- **not_ready**: blockers or major risks make shipping irresponsible right now
|
|
25
|
+
- **inconclusive**: the scope or evidence is too weak for a clean readiness call
|
|
26
|
+
|
|
27
|
+
### Confidence bands
|
|
28
|
+
|
|
29
|
+
- **High**: coverage is materially adequate and serious claims are backed by primary evidence
|
|
30
|
+
- **Medium**: most important areas are covered, but some uncertainty or weaker proof remains
|
|
31
|
+
- **Low**: major gaps, contradictions, or thin evidence still cap the verdict
|
|
32
|
+
|
|
33
|
+
### Severity discipline
|
|
34
|
+
|
|
35
|
+
- Do not upgrade a claim to blocker status just because multiple subagents agree
|
|
36
|
+
- Do not flatten real contradictions into a single confident story without adjudication
|
|
37
|
+
- Do not call a scope production-ready when a material coverage gap still weakens the verdict
|
|
38
|
+
|
|
39
|
+
### Synthesis discipline
|
|
40
|
+
|
|
41
|
+
- Treat delegated output as evidence, not final truth
|
|
42
|
+
- Say what changed your mind, what you rejected, and why
|
|
43
|
+
- Keep the final handoff decision-focused rather than implementation-focused
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "production-readiness-audit",
|
|
3
|
+
"name": "Production Readiness Audit (v2 • Evidence-Driven Readiness Review)",
|
|
4
|
+
"version": "0.1.0",
|
|
5
|
+
"description": "Audit a bounded codebase scope for debugging risk, runtime readiness, stale or misleading implementation surfaces, technical debt, and anything else that would keep it from being honestly production-ready.",
|
|
6
|
+
"recommendedPreferences": {
|
|
7
|
+
"recommendedAutonomy": "guided",
|
|
8
|
+
"recommendedRiskPolicy": "conservative"
|
|
9
|
+
},
|
|
10
|
+
"features": [
|
|
11
|
+
"wr.features.subagent_guidance"
|
|
12
|
+
],
|
|
13
|
+
"references": [
|
|
14
|
+
{
|
|
15
|
+
"id": "audit-rubric",
|
|
16
|
+
"title": "Production Readiness Audit Rubric",
|
|
17
|
+
"source": "./spec/production-readiness-audit-rubric.md",
|
|
18
|
+
"purpose": "Canonical coverage, evidence, confidence, and verdict rubric for this workflow.",
|
|
19
|
+
"authoritative": true,
|
|
20
|
+
"resolveFrom": "package"
|
|
21
|
+
}
|
|
22
|
+
],
|
|
23
|
+
"preconditions": [
|
|
24
|
+
"The user provides a target scope or the agent can infer a bounded scope from the request.",
|
|
25
|
+
"The agent can inspect the code, surrounding context, and deterministic evidence needed to assess readiness honestly.",
|
|
26
|
+
"A human will consume the final verdict, findings, or remediation order."
|
|
27
|
+
],
|
|
28
|
+
"metaGuidance": [
|
|
29
|
+
"DEFAULT BEHAVIOR: self-execute with tools. Ask only for true scope decisions, missing external artifacts, or permissions you cannot resolve yourself.",
|
|
30
|
+
"V2 DURABILITY: keep workflow truth in output.notesMarkdown and explicit context fields. Human-facing markdown artifacts are optional companions only.",
|
|
31
|
+
"OWNERSHIP: the main agent owns the fact packet, synthesis, severity calibration, verdict, and remediation order. Delegated work is evidence, not authority.",
|
|
32
|
+
"SUBAGENT DISCIPLINE: use a few explicit fan-out and fan-in checkpoints rather than scattered optional subagent calls.",
|
|
33
|
+
"READINESS MODEL: first understand and bound the scope, then state a readiness hypothesis, then freeze a neutral readiness fact packet, then let reviewer families challenge it in parallel, then reconcile contradictions explicitly.",
|
|
34
|
+
"COVERAGE LEDGER: track audit domains as `checked`, `uncertain`, `not_applicable`, `contradicted`, or `needs_followup`. Do not finalize with unresolved material gaps unless you name them clearly.",
|
|
35
|
+
"VERDICTS: allow `ready`, `ready_with_conditions`, `not_ready`, and `inconclusive`. Do not force a cleaner answer than the evidence supports.",
|
|
36
|
+
"BOUNDARY: this workflow audits and prioritizes. It must not drift into implementation planning or patch sequencing unless the user explicitly asks."
|
|
37
|
+
],
|
|
38
|
+
"steps": [
|
|
39
|
+
{
|
|
40
|
+
"id": "phase-0-understand-and-classify",
|
|
41
|
+
"title": "Phase 0: Understand and Classify",
|
|
42
|
+
"promptBlocks": {
|
|
43
|
+
"goal": "Build the minimum complete understanding needed to audit readiness honestly.",
|
|
44
|
+
"constraints": [
|
|
45
|
+
[
|
|
46
|
+
{ "kind": "ref", "refId": "wr.refs.notes_first_durability" }
|
|
47
|
+
],
|
|
48
|
+
"Use tools first. Ask only for true scope or permission gaps you cannot resolve yourself.",
|
|
49
|
+
"Separate in-scope code from adjacent noise before you classify rigor or risk."
|
|
50
|
+
],
|
|
51
|
+
"procedure": [
|
|
52
|
+
"Locate the real target surface, likely entry points, critical paths, public contracts, invariants, data or runtime surfaces, and affected consumers that matter.",
|
|
53
|
+
"Find the repo patterns and philosophy sources that should shape the audit, and state what production-ready should mean for this scope instead of assuming a generic bar.",
|
|
54
|
+
"Classify `scopeShape`, `riskLevel`, `rigorMode`, `criticalSurfaceTouched`, and `needsSimulation` after exploration, not before.",
|
|
55
|
+
"Run a context-clarity check with concrete scores for boundary clarity, production-bar clarity, philosophy clarity, and verification clarity.",
|
|
56
|
+
"If rigor and uncertainty justify it, spawn TWO WorkRail Executors in parallel running `routine-context-gathering` with complementary completeness/depth focus, then synthesize what changed."
|
|
57
|
+
],
|
|
58
|
+
"outputRequired": {
|
|
59
|
+
"notesMarkdown": "Audit scope, production bar, classification, clarity scores, and what is still unknown.",
|
|
60
|
+
"context": "Capture scopeShape, riskLevel, rigorMode, contextSummary, candidateFiles, criticalPaths, productionBar, contextUnknownCount, criticalSurfaceTouched, needsSimulation, and openQuestions."
|
|
61
|
+
},
|
|
62
|
+
"verify": [
|
|
63
|
+
"The classification is driven by evidence, not vibes.",
|
|
64
|
+
"Open questions are real human-decision gaps only.",
|
|
65
|
+
"If scope is whole-codebase or risk is High, treat confirmation as a real review barrier."
|
|
66
|
+
]
|
|
67
|
+
},
|
|
68
|
+
"requireConfirmation": {
|
|
69
|
+
"or": [
|
|
70
|
+
{ "var": "scopeShape", "equals": "whole_codebase" },
|
|
71
|
+
{ "var": "riskLevel", "equals": "High" }
|
|
72
|
+
]
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"id": "phase-1-state-readiness-hypothesis",
|
|
77
|
+
"title": "Phase 1: State Readiness Hypothesis",
|
|
78
|
+
"promptBlocks": {
|
|
79
|
+
"goal": "State your current readiness hypothesis before the reviewer families challenge it.",
|
|
80
|
+
"constraints": [
|
|
81
|
+
"Keep this short and falsifiable.",
|
|
82
|
+
"This is a reference point, not a position to defend."
|
|
83
|
+
],
|
|
84
|
+
"procedure": [
|
|
85
|
+
"Write your current best guess about the likely readiness verdict direction.",
|
|
86
|
+
"Name the issue category or failure mode you are most worried about right now.",
|
|
87
|
+
"Say what would most likely make your current view wrong."
|
|
88
|
+
],
|
|
89
|
+
"outputRequired": {
|
|
90
|
+
"notesMarkdown": "Current readiness hypothesis and the strongest reason it might be wrong.",
|
|
91
|
+
"context": "Capture readinessHypothesis."
|
|
92
|
+
},
|
|
93
|
+
"verify": [
|
|
94
|
+
"The hypothesis is concrete enough that later synthesis can say what changed your mind."
|
|
95
|
+
]
|
|
96
|
+
},
|
|
97
|
+
"requireConfirmation": false
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"id": "phase-2-freeze-fact-packet-and-select-reviewers",
|
|
101
|
+
"title": "Phase 2: Freeze Fact Packet and Select Reviewer Families",
|
|
102
|
+
"promptBlocks": {
|
|
103
|
+
"goal": "Freeze a neutral readiness fact packet and decide how much reviewer-family parallelism is warranted.",
|
|
104
|
+
"constraints": [
|
|
105
|
+
[
|
|
106
|
+
{ "kind": "ref", "refId": "wr.refs.notes_first_durability" }
|
|
107
|
+
],
|
|
108
|
+
"The fact packet is the primary truth for downstream reviewer families.",
|
|
109
|
+
"Keep `readinessHypothesis` as a hypothesis to challenge, not a frame to defend.",
|
|
110
|
+
"Keep any live audit artifact optional. Workflow truth lives in notes and context."
|
|
111
|
+
],
|
|
112
|
+
"procedure": [
|
|
113
|
+
"Create a neutral `readinessFactPacket` containing: scope purpose and expected behavior, key entry points and runtime surfaces, critical invariants and failure costs, data and deployment assumptions, changed or risky seams, test/observability posture, repo patterns and philosophy constraints, and explicit open unknowns.",
|
|
114
|
+
"Include realism signals directly in the fact packet: likely dead code paths, fixture or fake-data dependence, placeholder behavior, stale comments or docs, and any seams that look misleadingly complete.",
|
|
115
|
+
"Initialize `coverageLedger` for these domains: `debugging_correctness`, `runtime_operability`, `artifact_realism`, `maintainability_debt`, `tests_observability`, `philosophy_patterns`, `security_performance`.",
|
|
116
|
+
"Perform a preliminary self-audit from the fact packet before choosing reviewer families.",
|
|
117
|
+
"Reviewer family options: `correctness_debugging`, `runtime_operability`, `artifact_realism`, `maintainability_debt`, `tests_observability`, `philosophy_patterns`, `security_performance`, `false_positive_skeptic`, `missed_issue_hunter`.",
|
|
118
|
+
"Selection guidance: QUICK = no bundle by default unless ambiguity still feels material; STANDARD = 4 or 5 families by default; THOROUGH = 6 or 7 families by default.",
|
|
119
|
+
"Always include `correctness_debugging`, `runtime_operability`, and `artifact_realism` unless clearly not applicable. Include `security_performance` when the scope touches auth, permissions, input trust boundaries, secrets, network surfaces, data exposure, resource intensity, latency-sensitive flows, or unbounded work. Include `tests_observability` in STANDARD and THOROUGH unless clearly not applicable. Include `philosophy_patterns` when the repo or user philosophy is strong enough to judge honestly. Include `missed_issue_hunter` in THOROUGH. Include `false_positive_skeptic` when blocker or major-grade findings already look plausible or severity inflation risk is non-trivial.",
|
|
120
|
+
"Set `needsReviewerBundle` explicitly. Set `coverageUncertainCount` as the number of coverage domains not yet safely closed: `uncertain` + `contradicted` + `needs_followup`. Initialize `contradictionCount`, `blindSpotCount`, and `falsePositiveRiskCount` to `0` if no reviewer-family bundle will run."
|
|
121
|
+
],
|
|
122
|
+
"outputRequired": {
|
|
123
|
+
"notesMarkdown": "Neutral readiness fact packet, preliminary self-audit, selected reviewer families, and why the bundle is sized the way it is.",
|
|
124
|
+
"context": "Capture readinessFactPacket, coverageLedger, selectedReviewerFamilies, needsReviewerBundle, coverageUncertainCount, contradictionCount, blindSpotCount, falsePositiveRiskCount, needsSimulation."
|
|
125
|
+
},
|
|
126
|
+
"verify": [
|
|
127
|
+
"The fact packet is concrete enough that downstream reviewer families can use it without regathering broad context.",
|
|
128
|
+
"The workflow has a clear reason for whether `needsReviewerBundle` is true or false."
|
|
129
|
+
]
|
|
130
|
+
},
|
|
131
|
+
"promptFragments": [
|
|
132
|
+
{
|
|
133
|
+
"id": "phase-2-quick",
|
|
134
|
+
"when": { "var": "rigorMode", "equals": "QUICK" },
|
|
135
|
+
"text": "Keep the fact packet compact. QUICK should not manufacture a giant ceremony layer."
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
"id": "phase-2-thorough",
|
|
139
|
+
"when": { "var": "rigorMode", "equals": "THOROUGH" },
|
|
140
|
+
"text": "For THOROUGH rigor, make the hidden-risk surfaces explicit: blind spots, fake confidence vectors, and production assumptions that would hurt if wrong."
|
|
141
|
+
}
|
|
142
|
+
],
|
|
143
|
+
"requireConfirmation": false
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
"id": "phase-3-reviewer-family-bundle",
|
|
147
|
+
"title": "Phase 3: Parallel Reviewer Family Bundle",
|
|
148
|
+
"runCondition": {
|
|
149
|
+
"var": "needsReviewerBundle",
|
|
150
|
+
"equals": true
|
|
151
|
+
},
|
|
152
|
+
"promptBlocks": {
|
|
153
|
+
"goal": "Run the selected reviewer families in parallel from the same readiness fact packet, then synthesize their output as evidence rather than conclusions.",
|
|
154
|
+
"constraints": [
|
|
155
|
+
[
|
|
156
|
+
{ "kind": "ref", "refId": "wr.refs.notes_first_durability" }
|
|
157
|
+
],
|
|
158
|
+
[
|
|
159
|
+
{ "kind": "ref", "refId": "wr.refs.synthesis_under_disagreement" }
|
|
160
|
+
],
|
|
161
|
+
"Each reviewer family must use `readinessFactPacket` as primary truth.",
|
|
162
|
+
"Use `readinessHypothesis` only as comparison context.",
|
|
163
|
+
"Reviewer-family outputs are raw evidence, not canonical audit state."
|
|
164
|
+
],
|
|
165
|
+
"procedure": [
|
|
166
|
+
"Before delegating, restate the current `readinessHypothesis` and say which reviewer family is most likely to challenge it.",
|
|
167
|
+
"Each reviewer family must return: top findings, strongest evidence, biggest uncertainty, likely false-confidence vector, and what would most likely falsify its current conclusion.",
|
|
168
|
+
"Family missions: `correctness_debugging` = logic defects, contradictory state, failure paths, unsafe assumptions, and strongest debugging leads; `runtime_operability` = production behavior, concurrency/state flow, deployment assumptions, resilience, rollback pain, and observability under failure; `artifact_realism` = stale code, dead seams, placeholder behavior, misleading comments/docs, fake-data dependence, and surfaces that look complete but are not; `maintainability_debt` = complexity, duplication, brittle seams, drift, and future-change cost; `tests_observability` = test adequacy, verification blind spots, logging/monitoring gaps, hidden failure modes, and rollout confidence; `philosophy_patterns` = architectural consistency, repo-pattern drift, and principle tension; `security_performance` = trust boundaries, auth/permission mistakes, secrets handling, unsafe inputs, data exposure, expensive paths, unbounded work, and performance cliffs likely to matter in production; `false_positive_skeptic` = challenge overreach, weak evidence, or severity inflation; `missed_issue_hunter` = search for an important issue family the others may miss.",
|
|
169
|
+
"Mode-adaptive parallelism: STANDARD = spawn FOUR WorkRail Executors simultaneously for the selected families; THOROUGH = spawn SIX WorkRail Executors simultaneously for the selected families.",
|
|
170
|
+
"After receiving outputs, explicitly synthesize: what reviewer families confirmed, what was genuinely new, what appeared weak or overreached, and what changed your mind or did not.",
|
|
171
|
+
"Build a compact `familyEvidenceLedger` for each selected family covering its strongest concern, strongest evidence, biggest uncertainty, and what could make it wrong."
|
|
172
|
+
],
|
|
173
|
+
"outputRequired": {
|
|
174
|
+
"notesMarkdown": "Reviewer-family synthesis, contradictions, blind spots, false-positive challenges, and the family evidence ledger.",
|
|
175
|
+
"context": "Capture familyEvidenceLedger, familyFindingsSummary, contradictionCount, blindSpotCount, falsePositiveRiskCount, coverageUncertainCount, and needsSimulation."
|
|
176
|
+
},
|
|
177
|
+
"verify": [
|
|
178
|
+
"The same fact packet was used as primary truth across reviewer families.",
|
|
179
|
+
"Reviewer-family output is not treated as self-finalizing.",
|
|
180
|
+
"Contradictions, blind spots, and false-positive risks are reflected structurally in context."
|
|
181
|
+
]
|
|
182
|
+
},
|
|
183
|
+
"requireConfirmation": false
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
"id": "phase-4-evidence-and-contradiction-loop",
|
|
187
|
+
"type": "loop",
|
|
188
|
+
"title": "Phase 4: Evidence and Contradiction Loop",
|
|
189
|
+
"loop": {
|
|
190
|
+
"type": "while",
|
|
191
|
+
"conditionSource": {
|
|
192
|
+
"kind": "artifact_contract",
|
|
193
|
+
"contractRef": "wr.contracts.loop_control",
|
|
194
|
+
"loopId": "readiness_synthesis_loop"
|
|
195
|
+
},
|
|
196
|
+
"maxIterations": 4
|
|
197
|
+
},
|
|
198
|
+
"body": [
|
|
199
|
+
{
|
|
200
|
+
"id": "phase-4a-targeted-follow-up",
|
|
201
|
+
"title": "Targeted Follow-Up Bundle",
|
|
202
|
+
"promptBlocks": {
|
|
203
|
+
"goal": "If contradictions, blind spots, or important coverage gaps remain, run only the smallest targeted follow-up needed.",
|
|
204
|
+
"constraints": [
|
|
205
|
+
[
|
|
206
|
+
{ "kind": "ref", "refId": "wr.refs.parallelize_cognition_serialize_synthesis" }
|
|
207
|
+
],
|
|
208
|
+
"Prefer one compact targeted bundle over repeated broad delegation moments.",
|
|
209
|
+
"Do not regather broad context unless a contradiction proves the original fact packet is insufficient.",
|
|
210
|
+
"Targeted follow-up output is evidence only and must still be synthesized by the main agent."
|
|
211
|
+
],
|
|
212
|
+
"procedure": [
|
|
213
|
+
"Before delegating, state the current likely readiness verdict, the strongest unresolved concern, and what result would change your mind.",
|
|
214
|
+
"If `contradictionCount > 0`, run targeted challenge or validation aimed at the specific disagreement.",
|
|
215
|
+
"If `coverageUncertainCount > 0` or `blindSpotCount > 0`, run the smallest reviewer-family or context follow-up needed to close the gap.",
|
|
216
|
+
"If `needsSimulation = true`, include `routine-execution-simulation`.",
|
|
217
|
+
"If `falsePositiveRiskCount > 0`, include `routine-hypothesis-challenge`.",
|
|
218
|
+
"If philosophy tension is materially affecting severity or verdict quality, include `routine-philosophy-alignment`.",
|
|
219
|
+
"If no trigger fires, do not delegate this step."
|
|
220
|
+
],
|
|
221
|
+
"outputRequired": {
|
|
222
|
+
"notesMarkdown": "What targeted follow-up ran, why it was needed, and what it resolved or failed to resolve."
|
|
223
|
+
},
|
|
224
|
+
"verify": [
|
|
225
|
+
"Only the smallest targeted bundle needed was run.",
|
|
226
|
+
"No broad context regather happened without an explicit contradiction-driven reason."
|
|
227
|
+
]
|
|
228
|
+
},
|
|
229
|
+
"requireConfirmation": false
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
"id": "phase-4b-canonical-synthesis",
|
|
233
|
+
"title": "Canonical Synthesis and Coverage Update",
|
|
234
|
+
"promptBlocks": {
|
|
235
|
+
"goal": "Turn reviewer-family evidence and follow-up work into one canonical readiness state.",
|
|
236
|
+
"constraints": [
|
|
237
|
+
"If a blocker-grade or major-grade finding is still only plausible, say so plainly instead of silently upgrading it.",
|
|
238
|
+
"If a domain remains uncertain, carry that uncertainty into the final verdict."
|
|
239
|
+
],
|
|
240
|
+
"procedure": [
|
|
241
|
+
"Revisit `readinessHypothesis`: say what the evidence confirmed, what it challenged, what changed your mind, what held firm, and what you explicitly reject.",
|
|
242
|
+
"Apply this decision table: if multiple reviewer families independently surface the same serious issue with compatible evidence, treat it as strongly supported; if severities disagree, default upward only when the lower-severity position lacks concrete counter-evidence; if one family says false positive and another says valid issue, explicitly adjudicate the disagreement in notes before finalization; if simulation reveals a new operational risk, add a new finding and re-evaluate verdict confidence.",
|
|
243
|
+
"Update the findings ledger and classify each material finding as Confirmed, Plausible, or Rejected.",
|
|
244
|
+
"Update the coverage ledger honestly: move a domain to `checked` only when evidence is materially adequate; keep it `uncertain` if disagreement or missing evidence still affects verdict quality; use `not_applicable` only when the scope truly does not engage that area; clear `contradicted` only when the contradiction is explicitly resolved.",
|
|
245
|
+
"Cap `finalConfidenceBand` downward when unresolved blind spots still cover materially risky space, and prefer `inconclusive` later if those blind spots remain decision-relevant without a cheap next check."
|
|
246
|
+
],
|
|
247
|
+
"outputRequired": {
|
|
248
|
+
"notesMarkdown": "Canonical findings ledger update, readiness-hypothesis comparison, coverage update, and confidence update.",
|
|
249
|
+
"context": "Capture findingsLedger, confirmedFindingsCount, plausibleFindingsCount, rejectedFindingsCount, blockerCount, majorGapCount, coverageLedger, coverageUncertainCount, contradictionCount, blindSpotCount, falsePositiveRiskCount, finalConfidenceBand, needsEvidenceRefinement."
|
|
250
|
+
},
|
|
251
|
+
"verify": [
|
|
252
|
+
"Decision-driving findings are explicitly classified.",
|
|
253
|
+
"Coverage status matches the actual evidence quality."
|
|
254
|
+
]
|
|
255
|
+
},
|
|
256
|
+
"requireConfirmation": false
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
"id": "phase-4c-loop-decision",
|
|
260
|
+
"title": "Synthesis Loop Decision",
|
|
261
|
+
"promptBlocks": {
|
|
262
|
+
"goal": "Decide whether the evidence-and-contradiction loop should continue.",
|
|
263
|
+
"constraints": [
|
|
264
|
+
"Use the trigger rules, not vibes."
|
|
265
|
+
],
|
|
266
|
+
"procedure": [
|
|
267
|
+
"Continue if `contradictionCount > 0`.",
|
|
268
|
+
"Otherwise continue if `coverageUncertainCount > 0` and the uncertainty materially affects the verdict.",
|
|
269
|
+
"Otherwise continue if `falsePositiveRiskCount > 0` for a serious finding, or if `blindSpotCount > 0` for uncovered materially risky space.",
|
|
270
|
+
"Otherwise continue if `needsEvidenceRefinement = true`.",
|
|
271
|
+
"Otherwise stop."
|
|
272
|
+
],
|
|
273
|
+
"outputRequired": {
|
|
274
|
+
"artifact": "Emit a `wr.loop_control` artifact for `readiness_synthesis_loop` with `decision` set to `continue` or `stop`."
|
|
275
|
+
},
|
|
276
|
+
"verify": [
|
|
277
|
+
"The output preserves the loop-control contract without forcing one decision in the example."
|
|
278
|
+
]
|
|
279
|
+
},
|
|
280
|
+
"outputContract": {
|
|
281
|
+
"contractRef": "wr.contracts.loop_control"
|
|
282
|
+
},
|
|
283
|
+
"requireConfirmation": false
|
|
284
|
+
}
|
|
285
|
+
]
|
|
286
|
+
},
|
|
287
|
+
{
|
|
288
|
+
"id": "phase-5-final-validation",
|
|
289
|
+
"title": "Phase 5: Final Validation",
|
|
290
|
+
"promptBlocks": {
|
|
291
|
+
"goal": "Stress-test the current readiness verdict before final handoff.",
|
|
292
|
+
"constraints": [
|
|
293
|
+
[
|
|
294
|
+
{ "kind": "ref", "refId": "wr.refs.adversarial_challenge_rules" }
|
|
295
|
+
],
|
|
296
|
+
[
|
|
297
|
+
{ "kind": "ref", "refId": "wr.refs.synthesis_under_disagreement" }
|
|
298
|
+
],
|
|
299
|
+
"Validation output is evidence to synthesize, not automatic authority."
|
|
300
|
+
],
|
|
301
|
+
"procedure": [
|
|
302
|
+
"Run final validation if any of these are true: `criticalSurfaceTouched = true`, `needsSimulation = true`, `falsePositiveRiskCount > 0`, `blindSpotCount > 0`, `coverageUncertainCount > 0`, or `finalConfidenceBand != High`.",
|
|
303
|
+
"Before delegating, state: what is your current verdict, where are you least confident, and what finding would most likely change your mind now.",
|
|
304
|
+
"Set the current readiness verdict first: `ready`, `ready_with_conditions`, `not_ready`, or `inconclusive`.",
|
|
305
|
+
"Use `inconclusive` deliberately when material coverage uncertainty or unresolved blind spots remain and there is no bounded next check that would resolve them cheaply.",
|
|
306
|
+
"Mode-adaptive validation: QUICK = self-validate and optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` if a serious uncertainty remains; STANDARD = if validation is required and delegation is available, spawn TWO WorkRail Executors simultaneously running `routine-hypothesis-challenge` and either `routine-execution-simulation` or `routine-final-verification`; THOROUGH = if validation is required and delegation is available, spawn THREE WorkRail Executors simultaneously running `routine-hypothesis-challenge`, `routine-execution-simulation` when needed, and `routine-final-verification`.",
|
|
307
|
+
"After receiving validator output, explicitly synthesize what was confirmed, what was new, what appears weak, and whether your verdict changed.",
|
|
308
|
+
"State explicitly whether the verdict is being limited by unresolved contradictions, unresolved false-positive risk, blind spots, or coverage uncertainty."
|
|
309
|
+
],
|
|
310
|
+
"outputRequired": {
|
|
311
|
+
"notesMarkdown": "Validation synthesis, verdict stress test, and any conditions the verdict still depends on.",
|
|
312
|
+
"context": "Capture finalVerdict, finalConfidenceBand, validationSummary, followUpCount, and verdictConditions."
|
|
313
|
+
},
|
|
314
|
+
"verify": [
|
|
315
|
+
"If multiple validators still raise serious concerns, confidence is downgraded and synthesis is reopened.",
|
|
316
|
+
"If exactly one validator raises a concern, it is adjudicated before finalization.",
|
|
317
|
+
"If no validator can materially break the current verdict and the evidence is internally consistent, proceed to handoff."
|
|
318
|
+
]
|
|
319
|
+
},
|
|
320
|
+
"requireConfirmation": {
|
|
321
|
+
"or": [
|
|
322
|
+
{ "var": "finalConfidenceBand", "equals": "Low" },
|
|
323
|
+
{ "var": "finalVerdict", "equals": "inconclusive" }
|
|
324
|
+
]
|
|
325
|
+
}
|
|
326
|
+
},
|
|
327
|
+
{
|
|
328
|
+
"id": "phase-6-final-handoff",
|
|
329
|
+
"title": "Phase 6: Final Handoff",
|
|
330
|
+
"promptBlocks": {
|
|
331
|
+
"goal": "Deliver the final production-readiness handoff for a human decision-maker.",
|
|
332
|
+
"constraints": [
|
|
333
|
+
"This workflow informs a decision. It does not approve a release or make code changes by itself.",
|
|
334
|
+
"Do not drift into implementation planning, patch sequencing, or PR execution unless the user explicitly asks."
|
|
335
|
+
],
|
|
336
|
+
"procedure": [
|
|
337
|
+
"Summarize the target scope, audit intent, final verdict, and confidence band.",
|
|
338
|
+
"List blocker-grade findings, major gaps, strongest remaining uncertainties, top confirmed findings, and plausible but unresolved findings that still matter.",
|
|
339
|
+
"Call out the strongest debugging leads, runtime or operational risks, artifact-realism concerns such as stale code or fake completeness, and the most important technical-debt themes.",
|
|
340
|
+
"Summarize the coverage ledger, especially any domains still uncertain or needing follow-up.",
|
|
341
|
+
"Give a remediation order and verification or monitoring follow-ups. Mention human-facing companion artifacts only if you actually created them."
|
|
342
|
+
],
|
|
343
|
+
"outputRequired": {
|
|
344
|
+
"notesMarkdown": "Decision-ready final handoff covering verdict, confidence, findings, coverage gaps, and recommended remediation order."
|
|
345
|
+
},
|
|
346
|
+
"verify": [
|
|
347
|
+
"The handoff is verdict-first and evidence-aware.",
|
|
348
|
+
"Open uncertainty is disclosed rather than hidden."
|
|
349
|
+
]
|
|
350
|
+
},
|
|
351
|
+
"requireConfirmation": false
|
|
352
|
+
}
|
|
353
|
+
]
|
|
354
|
+
}
|
|
@@ -1,25 +1,30 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "workflow-for-workflows",
|
|
3
|
-
"name": "Workflow Authoring Workflow (
|
|
4
|
-
"version": "2.
|
|
5
|
-
"description": "Guides an agent through authoring or modernizing a WorkRail workflow: understand the task,
|
|
3
|
+
"name": "Workflow Authoring Workflow (Quality Gate v2)",
|
|
4
|
+
"version": "2.1.0",
|
|
5
|
+
"description": "Guides an agent through authoring or modernizing a WorkRail workflow with a stronger quality gate: understand the task, define effectiveness targets, design both workflow and quality architecture, draft, validate, simulate execution, run adversarial review, redesign if needed, and only then hand off.",
|
|
6
6
|
"recommendedPreferences": {
|
|
7
7
|
"recommendedAutonomy": "guided",
|
|
8
8
|
"recommendedRiskPolicy": "conservative"
|
|
9
9
|
},
|
|
10
|
+
"features": [
|
|
11
|
+
"wr.features.subagent_guidance"
|
|
12
|
+
],
|
|
10
13
|
"preconditions": [
|
|
11
14
|
"User has a recurring task or problem a workflow should solve, or an existing workflow that should be modernized.",
|
|
12
15
|
"Agent has access to file creation, editing, and terminal tools.",
|
|
13
|
-
"Agent can run workflow validators
|
|
16
|
+
"Agent can run workflow validators such as `npm run validate:registry` or equivalent."
|
|
14
17
|
],
|
|
15
18
|
"metaGuidance": [
|
|
16
19
|
"REFERENCE HIERARCHY: treat workflow-schema as legal truth for structure. Treat authoring-spec as canonical current guidance for what makes a workflow good. Treat authoring-provenance as optional maintainer context only.",
|
|
17
20
|
"META DISTINCTION: you are authoring or modernizing a workflow, not executing one. Keep the authored workflow's concerns separate from this meta-workflow's execution.",
|
|
21
|
+
"QUALITY-GATE ROLE: this workflow is the trust gate for other workflows. It must optimize not only for validity and modern authoring, but also for task effectiveness, false-confidence resistance, and future maintainability.",
|
|
18
22
|
"DEFAULT BEHAVIOR: self-execute with tools. Only ask the user for business decisions about the workflow being authored or modernized, not things you can learn from the schema, authoring spec, or example workflows.",
|
|
19
23
|
"AUTHORED VOICE: prompts in the authored workflow must be user-voiced. No middleware narration, no pseudo-DSL, no tutorial framing, no teaching-product language.",
|
|
20
|
-
"
|
|
21
|
-
"VOICE EXAMPLES: Coding: 'Review the changes in this MR.' Ops: 'Check whether the pipeline is healthy.' Content: 'Read the draft and check the argument.' NOT: 'The system will now perform a comprehensive analysis of...'",
|
|
24
|
+
"BASELINE DISCIPLINE: choose both an authoring baseline and an outcome baseline whenever possible. Copy structural patterns, not domain language.",
|
|
22
25
|
"VALIDATION GATE: validate with real validators, not regex approximations. When validator output and authoring assumptions conflict, runtime wins.",
|
|
26
|
+
"DEEP REVIEW: authoring integrity and outcome effectiveness are separate concerns. A workflow is not ready unless both pass.",
|
|
27
|
+
"THOROUGH MODE: for complex or high-trust workflow work, prefer the deepest review path: state economy audit, execution simulation, adversarial review, and redesign if hard gates fail.",
|
|
23
28
|
"ARTIFACT STRATEGY: the workflow JSON file is the primary output. Intermediate notes go in output.notesMarkdown. Do not create extra planning artifacts unless the workflow is genuinely complex.",
|
|
24
29
|
"V2 DURABILITY: use output.notesMarkdown as the primary durable record. Do not mirror execution state into CONTEXT.md or markdown checkpoint files.",
|
|
25
30
|
"ANTI-PATTERNS TO AVOID IN AUTHORED WORKFLOWS: no pseudo-function metaGuidance, no learning-path branching, no satisfaction-score loops, no heavy clarification batteries, no regex-as-primary-validation, no celebration phases.",
|
|
@@ -76,28 +81,120 @@
|
|
|
76
81
|
},
|
|
77
82
|
{
|
|
78
83
|
"id": "lean-coding-workflow",
|
|
79
|
-
"title": "Lean Coding Workflow (Modern
|
|
84
|
+
"title": "Lean Coding Workflow (Modern Authoring Baseline)",
|
|
80
85
|
"source": "workflows/coding-task-workflow-agentic.lean.v2.json",
|
|
81
86
|
"resolveFrom": "package",
|
|
82
|
-
"purpose": "
|
|
87
|
+
"purpose": "Strong modern example for engine-native authoring patterns, loop semantics, prompt density, and bounded delegation.",
|
|
88
|
+
"authoritative": false
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"id": "mr-review-workflow",
|
|
92
|
+
"title": "MR Review Workflow (Outcome Baseline Example)",
|
|
93
|
+
"source": "workflows/mr-review-workflow.agentic.v2.json",
|
|
94
|
+
"resolveFrom": "package",
|
|
95
|
+
"purpose": "Strong example of hypothesis, neutral fact packet, reviewer families, contradiction synthesis, and final validation.",
|
|
96
|
+
"authoritative": false
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"id": "readiness-audit-workflow",
|
|
100
|
+
"title": "Production Readiness Audit (Audit Baseline Example)",
|
|
101
|
+
"source": "workflows/production-readiness-audit.json",
|
|
102
|
+
"resolveFrom": "package",
|
|
103
|
+
"purpose": "Example of a thorough evidence-driven audit workflow with explicit reviewer-family structure and confidence handling.",
|
|
83
104
|
"authoritative": false
|
|
84
105
|
}
|
|
85
106
|
],
|
|
86
107
|
"steps": [
|
|
87
108
|
{
|
|
88
|
-
"id": "phase-0-understand",
|
|
89
|
-
"title": "Phase 0: Understand
|
|
90
|
-
"
|
|
109
|
+
"id": "phase-0-understand-and-classify",
|
|
110
|
+
"title": "Phase 0: Understand and Classify the Authoring Task",
|
|
111
|
+
"promptBlocks": {
|
|
112
|
+
"goal": "Understand what workflow you are authoring or modernizing, and classify the task before you design anything.",
|
|
113
|
+
"constraints": [
|
|
114
|
+
[
|
|
115
|
+
{ "kind": "ref", "refId": "wr.refs.notes_first_durability" }
|
|
116
|
+
],
|
|
117
|
+
"Explore first. Ask the user only what you genuinely cannot determine with tools and references.",
|
|
118
|
+
"Choose baselines as models, not templates. Copy structural patterns, not another workflow's domain voice."
|
|
119
|
+
],
|
|
120
|
+
"procedure": [
|
|
121
|
+
"Read the schema, authoring spec, v2 authoring guides, and the strongest relevant example workflows.",
|
|
122
|
+
"Decide `authoringMode`: `create` or `modernize_existing`.",
|
|
123
|
+
"Classify the target workflow archetype: `review_audit`, `coding_execution`, `diagnostic_investigation`, `planning_design`, `linear_operational`, or `content_analysis`.",
|
|
124
|
+
"Classify `workflowComplexity`: Simple, Medium, or Complex. Classify `rigorMode`: QUICK, STANDARD, or THOROUGH.",
|
|
125
|
+
"Choose an `authoringBaseline` for engine-native authoring quality and an `outcomeBaseline` for the kind of job the authored workflow should perform. If no good baseline exists for one of them, set it to `none` and explain why.",
|
|
126
|
+
"If `authoringMode = modernize_existing`, identify what must stay the same about purpose, what feels stale, and what modernization constraints apply."
|
|
127
|
+
],
|
|
128
|
+
"outputRequired": {
|
|
129
|
+
"notesMarkdown": "Task understanding, baseline choices, patterns to borrow or avoid, and any real open questions.",
|
|
130
|
+
"context": "Capture authoringMode, workflowArchetype, workflowComplexity, rigorMode, taskDescription, intendedAudience, successCriteria, domainConstraints, targetWorkflowPath, modernizationGoals, authoringBaseline, outcomeBaseline, baselineDecisionRationale, authoringPatternsToBorrow, outcomePatternsToBorrow, patternsToAvoid, openQuestions."
|
|
131
|
+
},
|
|
132
|
+
"verify": [
|
|
133
|
+
"The task is understood well enough to design the workflow without guessing blindly.",
|
|
134
|
+
"Both authoring and outcome baselines are explicit, or their absence is justified."
|
|
135
|
+
]
|
|
136
|
+
},
|
|
91
137
|
"requireConfirmation": true
|
|
92
138
|
},
|
|
93
139
|
{
|
|
94
|
-
"id": "phase-1-
|
|
95
|
-
"title": "Phase 1:
|
|
140
|
+
"id": "phase-1-define-effectiveness-target",
|
|
141
|
+
"title": "Phase 1: Define the Effectiveness Target",
|
|
142
|
+
"promptBlocks": {
|
|
143
|
+
"goal": "Define what success should feel like for the authored workflow, not just what fields it should contain.",
|
|
144
|
+
"constraints": [
|
|
145
|
+
"Be specific about user satisfaction and dangerous false-confidence outcomes.",
|
|
146
|
+
"Distinguish a technically valid workflow from a satisfying one."
|
|
147
|
+
],
|
|
148
|
+
"procedure": [
|
|
149
|
+
"State what result the authored workflow should reliably produce for its user.",
|
|
150
|
+
"List the criteria that would make the workflow feel genuinely satisfying in practice.",
|
|
151
|
+
"Name the biggest likely failure mode and the most dangerous false-confidence mode.",
|
|
152
|
+
"State what would make the workflow technically correct but still disappointing."
|
|
153
|
+
],
|
|
154
|
+
"outputRequired": {
|
|
155
|
+
"notesMarkdown": "Effectiveness target, satisfaction criteria, failure modes, and false-confidence risks.",
|
|
156
|
+
"context": "Capture effectivenessTarget, userSatisfactionCriteria, primaryFailureMode, dangerousFalseConfidenceModes, likelyWeakOutcomeModes, and trustRisk."
|
|
157
|
+
},
|
|
158
|
+
"verify": [
|
|
159
|
+
"The authored workflow now has a clear outcome bar, not just an authoring bar."
|
|
160
|
+
]
|
|
161
|
+
},
|
|
162
|
+
"requireConfirmation": false
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
"id": "phase-2-design-workflow-architecture",
|
|
166
|
+
"title": "Phase 2: Design the Workflow Architecture",
|
|
96
167
|
"runCondition": {
|
|
97
168
|
"var": "workflowComplexity",
|
|
98
169
|
"not_equals": "Simple"
|
|
99
170
|
},
|
|
100
|
-
"
|
|
171
|
+
"promptBlocks": {
|
|
172
|
+
"goal": "Decide the workflow architecture before you write JSON.",
|
|
173
|
+
"constraints": [
|
|
174
|
+
"Separate workflow architecture from quality-gate architecture. This phase is about the authored workflow itself.",
|
|
175
|
+
"Keep delegation bounded and keep ownership with the main agent."
|
|
176
|
+
],
|
|
177
|
+
"procedure": [
|
|
178
|
+
"Decide the phase list, one-line goal for each phase, and overall ordering.",
|
|
179
|
+
"Design loops with explicit exit rules, bounded maxIterations, and real reasons for another pass.",
|
|
180
|
+
"Decide confirmation gates, delegation vs template injection vs direct execution, promptFragments, references, artifacts, and metaGuidance.",
|
|
181
|
+
"If `authoringMode = modernize_existing`, decide whether the plan is preserve-in-place, restructure, or rewrite, and map legacy behaviors as `keep`, `merge`, `remove`, or `replace`."
|
|
182
|
+
],
|
|
183
|
+
"outputRequired": {
|
|
184
|
+
"notesMarkdown": "Structured workflow outline, loop design, confirmation design, delegation design, artifact plan, and modernization mapping.",
|
|
185
|
+
"context": "Capture workflowOutline, loopDesign, confirmationDesign, delegationDesign, artifactPlan, contextModel, voiceStrategy, routineAudit, delegationBoundaries, templateInjectionPlan, modernizationStrategy, legacyMapping, and behaviorPreservationNotes."
|
|
186
|
+
},
|
|
187
|
+
"verify": [
|
|
188
|
+
"The authored workflow architecture is coherent before JSON drafting begins."
|
|
189
|
+
]
|
|
190
|
+
},
|
|
191
|
+
"promptFragments": [
|
|
192
|
+
{
|
|
193
|
+
"id": "phase-2-simple-direct",
|
|
194
|
+
"when": { "var": "workflowComplexity", "equals": "Simple" },
|
|
195
|
+
"text": "For Simple workflows, keep the architecture linear and compact. Do not invent loops or ceremony unless the task truly needs them."
|
|
196
|
+
}
|
|
197
|
+
],
|
|
101
198
|
"requireConfirmation": {
|
|
102
199
|
"or": [
|
|
103
200
|
{ "var": "workflowComplexity", "not_equals": "Simple" },
|
|
@@ -106,22 +203,71 @@
|
|
|
106
203
|
}
|
|
107
204
|
},
|
|
108
205
|
{
|
|
109
|
-
"id": "phase-
|
|
110
|
-
"title": "Phase
|
|
111
|
-
"
|
|
206
|
+
"id": "phase-3-design-quality-architecture",
|
|
207
|
+
"title": "Phase 3: Design the Quality-Gate Architecture",
|
|
208
|
+
"promptBlocks": {
|
|
209
|
+
"goal": "Design how the authored workflow will avoid shallow results, false confidence, and state bloat.",
|
|
210
|
+
"constraints": [
|
|
211
|
+
"This phase is about the authored workflow's quality model, not its basic phase list.",
|
|
212
|
+
"Prefer explicit quality structure over hoping the agent will infer it."
|
|
213
|
+
],
|
|
214
|
+
"procedure": [
|
|
215
|
+
"Decide whether the authored workflow needs a hypothesis step, neutral fact packet, reviewer or validator families, contradiction loop, final validation bundle, or explicit blind-spot handling.",
|
|
216
|
+
"Design the confidence model, blind-spot model, and state economy plan.",
|
|
217
|
+
"Decide the hard-gate dimensions that would make the authored workflow unsafe or unsatisfying if they fail.",
|
|
218
|
+
"Write the redesign triggers that should force architectural revision rather than cosmetic refinement."
|
|
219
|
+
],
|
|
220
|
+
"outputRequired": {
|
|
221
|
+
"notesMarkdown": "Quality architecture, confidence model, blind-spot model, state economy plan, and hard-gate triggers.",
|
|
222
|
+
"context": "Capture qualityArchitecture, confidenceModel, blindSpotModel, stateEconomyPlan, reviewBundlePlan, qualityGateTriggers, and hardGateModel."
|
|
223
|
+
},
|
|
224
|
+
"verify": [
|
|
225
|
+
"The authored workflow has an explicit plan for false-confidence resistance and quality review."
|
|
226
|
+
]
|
|
227
|
+
},
|
|
228
|
+
"requireConfirmation": {
|
|
229
|
+
"or": [
|
|
230
|
+
{ "var": "rigorMode", "equals": "THOROUGH" },
|
|
231
|
+
{ "var": "workflowComplexity", "equals": "Complex" }
|
|
232
|
+
]
|
|
233
|
+
}
|
|
234
|
+
},
|
|
235
|
+
{
|
|
236
|
+
"id": "phase-4-draft-or-revise",
|
|
237
|
+
"title": "Phase 4: Draft or Revise the Workflow",
|
|
238
|
+
"promptBlocks": {
|
|
239
|
+
"goal": "Write the workflow JSON file using the architecture and quality model you already chose.",
|
|
240
|
+
"constraints": [
|
|
241
|
+
"The schema defines what is legal. The authoring spec defines what is good.",
|
|
242
|
+
"Write prompts in the user's voice. Vary prompt density by step needs rather than using one density everywhere.",
|
|
243
|
+
"If you are modernizing, preserve what still fits the workflow's purpose. Do not rewrite just because a workflow is old."
|
|
244
|
+
],
|
|
245
|
+
"procedure": [
|
|
246
|
+
"If `authoringMode = create` and no filename was specified, ask the user for the filename before writing.",
|
|
247
|
+
"If `authoringMode = modernize_existing`, default to editing `targetWorkflowPath` unless there is a strong reason to create a new variant or file.",
|
|
248
|
+
"Write the workflow file. Keep protocol requirements explicit, loops bounded, confirmations meaningful, and metaGuidance clean."
|
|
249
|
+
],
|
|
250
|
+
"outputRequired": {
|
|
251
|
+
"notesMarkdown": "Draft status and any notable authoring choices that are important to later review.",
|
|
252
|
+
"context": "Capture workflowFilePath and draftComplete."
|
|
253
|
+
},
|
|
254
|
+
"verify": [
|
|
255
|
+
"The workflow file exists and reflects the chosen architecture rather than an improvised one."
|
|
256
|
+
]
|
|
257
|
+
},
|
|
112
258
|
"promptFragments": [
|
|
113
259
|
{
|
|
114
|
-
"id": "phase-
|
|
260
|
+
"id": "phase-4-simple-fast",
|
|
115
261
|
"when": { "var": "workflowComplexity", "equals": "Simple" },
|
|
116
|
-
"text": "
|
|
262
|
+
"text": "For Simple workflows, keep the file compact and linear. Do not create extra metaGuidance or loops unless the task truly needs them."
|
|
117
263
|
}
|
|
118
264
|
],
|
|
119
265
|
"requireConfirmation": false
|
|
120
266
|
},
|
|
121
267
|
{
|
|
122
|
-
"id": "phase-
|
|
268
|
+
"id": "phase-5-validate",
|
|
123
269
|
"type": "loop",
|
|
124
|
-
"title": "Phase
|
|
270
|
+
"title": "Phase 5: Structural Validation Loop",
|
|
125
271
|
"loop": {
|
|
126
272
|
"type": "while",
|
|
127
273
|
"conditionSource": {
|
|
@@ -133,76 +279,267 @@
|
|
|
133
279
|
},
|
|
134
280
|
"body": [
|
|
135
281
|
{
|
|
136
|
-
"id": "phase-
|
|
282
|
+
"id": "phase-5a-run-validation",
|
|
137
283
|
"title": "Run Validation",
|
|
138
|
-
"
|
|
284
|
+
"promptBlocks": {
|
|
285
|
+
"goal": "Run the real workflow validators against the drafted workflow.",
|
|
286
|
+
"constraints": [
|
|
287
|
+
"Do not rely on reading the JSON and eyeballing it.",
|
|
288
|
+
"If runtime and authoring assumptions conflict, runtime wins."
|
|
289
|
+
],
|
|
290
|
+
"procedure": [
|
|
291
|
+
"Run the available validation tools or commands such as `npm run validate:registry`, schema validation, or the MCP validation surface.",
|
|
292
|
+
"If validation fails, list the actual errors, fix them in the workflow file, and re-run validation.",
|
|
293
|
+
"If validation passes cleanly, say so plainly."
|
|
294
|
+
],
|
|
295
|
+
"outputRequired": {
|
|
296
|
+
"notesMarkdown": "Validation results, actual errors if any, and what was fixed.",
|
|
297
|
+
"context": "Capture validationErrors and validationPassed."
|
|
298
|
+
},
|
|
299
|
+
"verify": [
|
|
300
|
+
"Validation results are based on real validators, not approximations."
|
|
301
|
+
]
|
|
302
|
+
},
|
|
139
303
|
"promptFragments": [
|
|
140
304
|
{
|
|
141
|
-
"id": "phase-
|
|
305
|
+
"id": "phase-5a-thorough",
|
|
142
306
|
"when": { "var": "rigorMode", "equals": "THOROUGH" },
|
|
143
|
-
"text": "After
|
|
307
|
+
"text": "After structural validation passes, also check the workflow manually against required-level authoring-spec rules and fix any failures before moving on."
|
|
144
308
|
}
|
|
145
309
|
],
|
|
146
310
|
"requireConfirmation": false
|
|
147
311
|
},
|
|
148
312
|
{
|
|
149
|
-
"id": "phase-
|
|
313
|
+
"id": "phase-5b-loop-decision",
|
|
150
314
|
"title": "Validation Loop Decision",
|
|
151
|
-
"
|
|
152
|
-
|
|
315
|
+
"promptBlocks": {
|
|
316
|
+
"goal": "Decide whether structural validation needs another pass.",
|
|
317
|
+
"constraints": [
|
|
318
|
+
"Use validator state, not vibes."
|
|
319
|
+
],
|
|
320
|
+
"procedure": [
|
|
321
|
+
"If all errors are fixed and validation passes, stop.",
|
|
322
|
+
"If you fixed errors but have not re-validated yet, continue.",
|
|
323
|
+
"If you hit the iteration limit, stop and record what remains."
|
|
324
|
+
],
|
|
325
|
+
"outputRequired": {
|
|
326
|
+
"artifact": "Emit a `wr.loop_control` artifact for `validation_loop` with `decision` set to `continue` or `stop`."
|
|
327
|
+
},
|
|
328
|
+
"verify": [
|
|
329
|
+
"The loop decision matches the actual validation state."
|
|
330
|
+
]
|
|
331
|
+
},
|
|
153
332
|
"outputContract": {
|
|
154
333
|
"contractRef": "wr.contracts.loop_control"
|
|
155
|
-
}
|
|
334
|
+
},
|
|
335
|
+
"requireConfirmation": false
|
|
156
336
|
}
|
|
157
337
|
]
|
|
158
338
|
},
|
|
159
339
|
{
|
|
160
|
-
"id": "phase-
|
|
340
|
+
"id": "phase-5-escalation",
|
|
161
341
|
"title": "Validation Escalation",
|
|
162
342
|
"runCondition": {
|
|
163
343
|
"var": "validationPassed",
|
|
164
344
|
"equals": false
|
|
165
345
|
},
|
|
166
|
-
"
|
|
346
|
+
"promptBlocks": {
|
|
347
|
+
"goal": "Do not silently continue with a structurally broken workflow.",
|
|
348
|
+
"constraints": [
|
|
349
|
+
"Present the situation honestly."
|
|
350
|
+
],
|
|
351
|
+
"procedure": [
|
|
352
|
+
"List the remaining validation errors and assess their severity.",
|
|
353
|
+
"Present the options: proceed with known issues documented, or stop so the user can intervene manually."
|
|
354
|
+
]
|
|
355
|
+
},
|
|
167
356
|
"requireConfirmation": true
|
|
168
357
|
},
|
|
169
358
|
{
|
|
170
|
-
"id": "phase-
|
|
171
|
-
"
|
|
172
|
-
"
|
|
173
|
-
"
|
|
359
|
+
"id": "phase-6-quality-gate-loop",
|
|
360
|
+
"type": "loop",
|
|
361
|
+
"title": "Phase 6: Quality-Gate Loop",
|
|
362
|
+
"loop": {
|
|
363
|
+
"type": "while",
|
|
364
|
+
"conditionSource": {
|
|
365
|
+
"kind": "artifact_contract",
|
|
366
|
+
"contractRef": "wr.contracts.loop_control",
|
|
367
|
+
"loopId": "quality_gate_loop"
|
|
368
|
+
},
|
|
369
|
+
"maxIterations": 2
|
|
370
|
+
},
|
|
371
|
+
"body": [
|
|
174
372
|
{
|
|
175
|
-
"id": "phase-
|
|
176
|
-
"
|
|
177
|
-
"
|
|
373
|
+
"id": "phase-6a-state-economy-audit",
|
|
374
|
+
"title": "State Economy Audit",
|
|
375
|
+
"promptBlocks": {
|
|
376
|
+
"goal": "Check whether every context field in the authored workflow earns its keep.",
|
|
377
|
+
"constraints": [
|
|
378
|
+
"A field is justified only if it materially affects routing, synthesis, confidence, or handoff quality.",
|
|
379
|
+
"Do not keep bookkeeping fields just because they sound organized."
|
|
380
|
+
],
|
|
381
|
+
"procedure": [
|
|
382
|
+
"For each meaningful captured context field, record where it is set, where it is consumed, what decision or outcome it influences, and what gets worse if it is removed.",
|
|
383
|
+
"Classify each field as `keep`, `wire`, or `remove`.",
|
|
384
|
+
"Fix weak or unused fields directly in the workflow file."
|
|
385
|
+
],
|
|
386
|
+
"outputRequired": {
|
|
387
|
+
"notesMarkdown": "State field audit with keep/wire/remove decisions and any fixes applied.",
|
|
388
|
+
"context": "Capture stateFieldAudit, unusedOrWeakFields, and stateEconomyPassed."
|
|
389
|
+
},
|
|
390
|
+
"verify": [
|
|
391
|
+
"Weak or unused fields are either wired meaningfully or removed."
|
|
392
|
+
]
|
|
393
|
+
},
|
|
394
|
+
"requireConfirmation": false
|
|
178
395
|
},
|
|
179
396
|
{
|
|
180
|
-
"id": "phase-
|
|
181
|
-
"
|
|
182
|
-
"
|
|
397
|
+
"id": "phase-6b-execution-simulation",
|
|
398
|
+
"title": "Execution Simulation",
|
|
399
|
+
"promptBlocks": {
|
|
400
|
+
"goal": "Simulate what would happen if the authored workflow ran on the user's real task.",
|
|
401
|
+
"constraints": [
|
|
402
|
+
"This is about practical utility, not only context-flow correctness.",
|
|
403
|
+
"Flag places where the workflow would produce paperwork, generic output, or false confidence instead of value."
|
|
404
|
+
],
|
|
405
|
+
"procedure": [
|
|
406
|
+
"Trace the authored workflow step by step against the user's actual task or the closest realistic scenario.",
|
|
407
|
+
"For each step, ask: what would the agent actually do, what context would it have, what would it likely produce, and what would the next step inherit?",
|
|
408
|
+
"Identify likely weak steps, likely unsatisfying outputs, and likely false-confidence modes.",
|
|
409
|
+
"Fix issues directly in the workflow file when the right improvement is clear."
|
|
410
|
+
],
|
|
411
|
+
"outputRequired": {
|
|
412
|
+
"notesMarkdown": "Execution simulation findings, likely weak steps, unsatisfying outputs, false-confidence risks, and any fixes applied.",
|
|
413
|
+
"context": "Capture simulationFindings, likelyWeakSteps, likelyUnsatisfyingOutputs, falseConfidenceFindings, and outcomeEffectivenessPassed."
|
|
414
|
+
},
|
|
415
|
+
"verify": [
|
|
416
|
+
"The simulation judges likely usefulness, not just structural legality."
|
|
417
|
+
]
|
|
418
|
+
},
|
|
419
|
+
"promptFragments": [
|
|
420
|
+
{
|
|
421
|
+
"id": "phase-6b-quick",
|
|
422
|
+
"when": { "var": "rigorMode", "equals": "QUICK" },
|
|
423
|
+
"text": "For QUICK rigor, keep the simulation compact but still answer where the workflow would likely disappoint the user if it disappointed them at all."
|
|
424
|
+
}
|
|
425
|
+
],
|
|
426
|
+
"requireConfirmation": false
|
|
183
427
|
},
|
|
184
428
|
{
|
|
185
|
-
"id": "phase-
|
|
186
|
-
"
|
|
187
|
-
"
|
|
429
|
+
"id": "phase-6c-adversarial-quality-review",
|
|
430
|
+
"title": "Adversarial Quality Review",
|
|
431
|
+
"promptBlocks": {
|
|
432
|
+
"goal": "Review the authored workflow as a quality gate, not just as a valid JSON file.",
|
|
433
|
+
"constraints": [
|
|
434
|
+
"Authoring integrity and outcome effectiveness are separate concerns. Score both.",
|
|
435
|
+
"Reviewer-family or validator output is evidence, not authority."
|
|
436
|
+
],
|
|
437
|
+
"procedure": [
|
|
438
|
+
"Score these dimensions 0-2 with one sentence of evidence each: `voiceClarity`, `ceremonyLevel`, `loopSoundness`, `delegationBoundedness`, `artifactClarity`, `taskEffectiveness`, `falseConfidenceResistance`, `stateMinimality`, `coverageSharpness`, `domainFit`, `handoffUtility`, and `modernizationDiscipline`.",
|
|
439
|
+
"If delegation is available and rigor is THOROUGH, run an adversarial review bundle with these lenses: `engine_native_reviewer`, `task_effectiveness_reviewer`, `state_economy_reviewer`, `false_confidence_reviewer`, `domain_fit_reviewer`, and `maintainer_reviewer`.",
|
|
440
|
+
"Synthesize what the review confirmed, what it challenged, and what changed your mind.",
|
|
441
|
+
"Set hard-gate failures whenever any of these are materially weak: `taskEffectiveness`, `falseConfidenceResistance`, `stateMinimality`, `coverageSharpness`, `domainFit`, or `handoffUtility`.",
|
|
442
|
+
"Set `authoringIntegrityPassed = true` only if structural and authoring-quality dimensions are all acceptable. Set `outcomeEffectivenessPassed = true` only if the workflow is likely to achieve satisfying results for the user."
|
|
443
|
+
],
|
|
444
|
+
"outputRequired": {
|
|
445
|
+
"notesMarkdown": "Quality review scores, adversarial review findings, hard-gate failures, and the current redesign severity.",
|
|
446
|
+
"context": "Capture reviewScores, hardGateFailures, authoringIntegrityPassed, outcomeEffectivenessPassed, qualityReviewSummary, and redesignSeverity."
|
|
447
|
+
},
|
|
448
|
+
"verify": [
|
|
449
|
+
"Hard gates reflect real user-trust risk, not cosmetic imperfections."
|
|
450
|
+
]
|
|
451
|
+
},
|
|
452
|
+
"promptFragments": [
|
|
453
|
+
{
|
|
454
|
+
"id": "phase-6c-standard",
|
|
455
|
+
"when": { "var": "rigorMode", "equals": "STANDARD" },
|
|
456
|
+
"text": "For STANDARD rigor, you may keep the review self-executed unless uncertainty remains material. If you do delegate, prefer a small adversarial bundle."
|
|
457
|
+
},
|
|
458
|
+
{
|
|
459
|
+
"id": "phase-6c-thorough",
|
|
460
|
+
"when": { "var": "rigorMode", "equals": "THOROUGH" },
|
|
461
|
+
"text": "For THOROUGH rigor, assume the first review is not enough. Use adversarial reviewer lanes unless a hard limitation makes them impossible."
|
|
462
|
+
}
|
|
463
|
+
],
|
|
464
|
+
"requireConfirmation": false
|
|
465
|
+
},
|
|
466
|
+
{
|
|
467
|
+
"id": "phase-6d-redesign-and-revalidate",
|
|
468
|
+
"title": "Redesign and Revalidate",
|
|
469
|
+
"promptBlocks": {
|
|
470
|
+
"goal": "If hard gates fail, redesign the workflow instead of polishing around the problem.",
|
|
471
|
+
"constraints": [
|
|
472
|
+
"Minor cosmetic refinement is not enough when task effectiveness or false-confidence resistance is weak.",
|
|
473
|
+
"If structure changes, re-run real validators before leaving this step."
|
|
474
|
+
],
|
|
475
|
+
"procedure": [
|
|
476
|
+
"If `authoringIntegrityPassed` and `outcomeEffectivenessPassed` are both true and `hardGateFailures` is empty, say that no redesign is needed.",
|
|
477
|
+
"Otherwise classify the needed redesign severity as `minor`, `architectural`, or `unsafe_to_ship` and apply the necessary fixes directly to the workflow file.",
|
|
478
|
+
"If the redesign changed structure, run the real validators again and update the validation state before leaving this step."
|
|
479
|
+
],
|
|
480
|
+
"outputRequired": {
|
|
481
|
+
"notesMarkdown": "Redesign actions taken, why they were needed, and whether revalidation passed.",
|
|
482
|
+
"context": "Capture redesignApplied, validationPassed, and remainingConcerns."
|
|
483
|
+
},
|
|
484
|
+
"verify": [
|
|
485
|
+
"Structural redesign problems are handled as redesign problems, not cosmetic ones."
|
|
486
|
+
]
|
|
487
|
+
},
|
|
488
|
+
"requireConfirmation": false
|
|
489
|
+
},
|
|
490
|
+
{
|
|
491
|
+
"id": "phase-6e-quality-loop-decision",
|
|
492
|
+
"title": "Quality Loop Decision",
|
|
493
|
+
"promptBlocks": {
|
|
494
|
+
"goal": "Decide whether the quality-gate loop needs another pass.",
|
|
495
|
+
"constraints": [
|
|
496
|
+
"Use hard gates and actual remaining concerns, not vibes."
|
|
497
|
+
],
|
|
498
|
+
"procedure": [
|
|
499
|
+
"Continue if `authoringIntegrityPassed = false`.",
|
|
500
|
+
"Otherwise continue if `outcomeEffectivenessPassed = false`.",
|
|
501
|
+
"Otherwise continue if `hardGateFailures` is not empty.",
|
|
502
|
+
"Otherwise continue if `redesignSeverity` is `architectural` or `unsafe_to_ship` and you have not yet re-reviewed the redesigned workflow.",
|
|
503
|
+
"Otherwise continue if `validationPassed = false` after redesign.",
|
|
504
|
+
"Otherwise stop."
|
|
505
|
+
],
|
|
506
|
+
"outputRequired": {
|
|
507
|
+
"artifact": "Emit a `wr.loop_control` artifact for `quality_gate_loop` with `decision` set to `continue` or `stop`."
|
|
508
|
+
},
|
|
509
|
+
"verify": [
|
|
510
|
+
"The workflow does not stop while hard trust problems remain."
|
|
511
|
+
]
|
|
512
|
+
},
|
|
513
|
+
"outputContract": {
|
|
514
|
+
"contractRef": "wr.contracts.loop_control"
|
|
515
|
+
},
|
|
516
|
+
"requireConfirmation": false
|
|
188
517
|
}
|
|
189
|
-
]
|
|
190
|
-
"requireConfirmation": false
|
|
518
|
+
]
|
|
191
519
|
},
|
|
192
520
|
{
|
|
193
|
-
"id": "phase-
|
|
194
|
-
"title": "Phase
|
|
195
|
-
"
|
|
196
|
-
"
|
|
197
|
-
"
|
|
521
|
+
"id": "phase-7-final-trust-handoff",
|
|
522
|
+
"title": "Phase 7: Final Trust Handoff",
|
|
523
|
+
"promptBlocks": {
|
|
524
|
+
"goal": "Summarize the authored or modernized workflow as a trust decision, not just a file edit.",
|
|
525
|
+
"constraints": [
|
|
526
|
+
"Keep it concise. The workflow file is the deliverable, not the summary."
|
|
527
|
+
],
|
|
528
|
+
"procedure": [
|
|
529
|
+
"State the workflow file path and name, whether it was created or modernized, and what it does in one sentence.",
|
|
530
|
+
"Summarize the step structure, loops, confirmations, and delegation profile.",
|
|
531
|
+
"Report validation status, authoring-integrity status, and outcome-effectiveness status.",
|
|
532
|
+
"Set a final `workflowReadinessVerdict`: `ready`, `ready_with_conditions`, or `not_ready`.",
|
|
533
|
+
"List the main improvements, residual weaknesses, trust risks if any, and how to test the workflow."
|
|
534
|
+
],
|
|
535
|
+
"outputRequired": {
|
|
536
|
+
"notesMarkdown": "Final trust handoff covering readiness verdict, validation status, strengths, residual weaknesses, and testing guidance.",
|
|
537
|
+
"context": "Capture workflowReadinessVerdict, trustRiskSummary, knownFailureModes, and residualWeaknesses."
|
|
538
|
+
},
|
|
539
|
+
"verify": [
|
|
540
|
+
"The final handoff makes clear whether WorkRail should trust this workflow."
|
|
541
|
+
]
|
|
198
542
|
},
|
|
199
|
-
"prompt": "The workflow is valid and reviewed. Check whether any of these improvements are worth making:\n\n1. **Prompt fragments**: are there steps with near-identical prompts that differ only by rigor mode or `authoringMode`? Extract the differences into promptFragments.\n2. **Extension points**: are there slots that different teams or projects would want to customize? Declare them.\n3. **References**: should the workflow point at external documents the agent should be aware of during execution?\n4. **Deduplication**: is there repeated prose across steps that could be moved to metaGuidance or a shared pattern?\n5. **Context templates**: are there simple variable substitutions that would make prompts cleaner?\n\nOnly make changes that genuinely improve the workflow. Do not refine for the sake of refining.\n\nIf `authoringMode = modernize_existing`, prefer refinements that remove leftover legacy wording or mismatched structure over cosmetic rewrites.\n\nIf you change anything, re-run validation.\n\nCapture:\n- `refinementsApplied`\n- `finalValidationPassed`",
|
|
200
|
-
"requireConfirmation": false
|
|
201
|
-
},
|
|
202
|
-
{
|
|
203
|
-
"id": "phase-6-handoff",
|
|
204
|
-
"title": "Phase 6: Handoff",
|
|
205
|
-
"prompt": "Summarize what you authored or updated.\n\nInclude:\n- workflow file path and name\n- whether you created a new workflow or modernized an existing one\n- what the workflow does (one sentence)\n- step count and structure overview\n- loops, confirmations, and delegation if any\n- validation status\n- for modernization: the main improvements and any legacy residue still left intentionally\n- any known limitations or future improvements\n- how to test it: where to place the file and how to run it\n\nKeep it concise. The workflow file is the deliverable, not the summary.",
|
|
206
543
|
"notesOptional": true,
|
|
207
544
|
"requireConfirmation": false
|
|
208
545
|
}
|
|
@@ -11,26 +11,6 @@
|
|
|
11
11
|
"wr.features.capabilities",
|
|
12
12
|
"wr.features.subagent_guidance"
|
|
13
13
|
],
|
|
14
|
-
"extensionPoints": [
|
|
15
|
-
{
|
|
16
|
-
"slotId": "candidate_generation",
|
|
17
|
-
"purpose": "Let me swap in a different way of generating candidate directions without forking the whole workflow.",
|
|
18
|
-
"default": "routine-tension-driven-design",
|
|
19
|
-
"acceptedKinds": ["routine", "workflow"]
|
|
20
|
-
},
|
|
21
|
-
{
|
|
22
|
-
"slotId": "direction_review",
|
|
23
|
-
"purpose": "Let me swap in a different way of pressure-testing the selected direction without giving up parent workflow synthesis.",
|
|
24
|
-
"default": "routine-design-review",
|
|
25
|
-
"acceptedKinds": ["routine", "workflow"]
|
|
26
|
-
},
|
|
27
|
-
{
|
|
28
|
-
"slotId": "final_validation",
|
|
29
|
-
"purpose": "Let me swap in a different final challenge or validation pass before handoff.",
|
|
30
|
-
"default": "routine-hypothesis-challenge",
|
|
31
|
-
"acceptedKinds": ["routine", "workflow"]
|
|
32
|
-
}
|
|
33
|
-
],
|
|
34
14
|
"preconditions": [
|
|
35
15
|
"I can tell you what problem, opportunity, or decision I want help thinking through.",
|
|
36
16
|
"You can keep durable state in `output.notesMarkdown` and `continue_workflow` context keys.",
|
|
@@ -453,56 +433,48 @@
|
|
|
453
433
|
},
|
|
454
434
|
{
|
|
455
435
|
"id": "phase-3b-candidates-deep",
|
|
456
|
-
"title": "Phase 3b: Candidate Generation
|
|
436
|
+
"title": "Phase 3b: Candidate Generation Setup",
|
|
457
437
|
"runCondition": {
|
|
458
438
|
"var": "rigorMode",
|
|
459
439
|
"not_equals": "QUICK"
|
|
460
440
|
},
|
|
461
441
|
"promptBlocks": {
|
|
462
|
-
"goal": "
|
|
442
|
+
"goal": "Set up the injected candidate-generation pass so it produces the right kind of candidate set for this path.",
|
|
463
443
|
"constraints": [
|
|
464
|
-
"
|
|
465
|
-
"
|
|
444
|
+
"The next step injects the reusable candidate-generation routine inline.",
|
|
445
|
+
"Record any path-specific bias or extra rigor expectation before the injected routine runs."
|
|
466
446
|
],
|
|
467
447
|
"procedure": [
|
|
468
|
-
"
|
|
469
|
-
"
|
|
448
|
+
"State what the injected candidate-generation routine must emphasize for this path and why.",
|
|
449
|
+
"For `design_first`, require at least one direction that meaningfully reframes the problem instead of only packaging obvious solutions.",
|
|
450
|
+
"For `landscape_first`, require the candidate set to clearly reflect landscape precedents, constraints, and contradictions rather than drifting into free invention.",
|
|
451
|
+
"For `THOROUGH`, require one extra push if the first spread still feels clustered or too safe.",
|
|
452
|
+
"Write these expectations into `designDocPath` so the later synthesis can judge whether the injected routine met them."
|
|
470
453
|
],
|
|
471
454
|
"verify": [
|
|
472
|
-
"
|
|
455
|
+
"The path-specific expectations for candidate generation are explicit before the injected routine runs."
|
|
473
456
|
]
|
|
474
457
|
},
|
|
475
|
-
"
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
"equals": "landscape_first"
|
|
489
|
-
},
|
|
490
|
-
"text": "Because this is `landscape_first`, make sure the candidate set clearly reflects landscape precedents, constraints, and contradictions rather than drifting into free invention."
|
|
491
|
-
},
|
|
492
|
-
{
|
|
493
|
-
"id": "p3b-thorough-depth",
|
|
494
|
-
"when": {
|
|
495
|
-
"var": "rigorMode",
|
|
496
|
-
"equals": "THOROUGH"
|
|
497
|
-
},
|
|
498
|
-
"text": "Because this is a `THOROUGH` pass, do not stop at the first good spread. Push once more if the set still feels clustered or too safe."
|
|
458
|
+
"requireConfirmation": false
|
|
459
|
+
},
|
|
460
|
+
{
|
|
461
|
+
"id": "phase-3c-candidates-deep-core",
|
|
462
|
+
"title": "Phase 3c: Candidate Generation (Injected Routine)",
|
|
463
|
+
"runCondition": {
|
|
464
|
+
"var": "rigorMode",
|
|
465
|
+
"not_equals": "QUICK"
|
|
466
|
+
},
|
|
467
|
+
"templateCall": {
|
|
468
|
+
"templateId": "wr.templates.routine.tension-driven-design",
|
|
469
|
+
"args": {
|
|
470
|
+
"deliverableName": "design-candidates.md"
|
|
499
471
|
}
|
|
500
|
-
|
|
472
|
+
},
|
|
501
473
|
"requireConfirmation": false
|
|
502
474
|
},
|
|
503
475
|
{
|
|
504
|
-
"id": "phase-
|
|
505
|
-
"title": "Phase
|
|
476
|
+
"id": "phase-3d-select-direction",
|
|
477
|
+
"title": "Phase 3d: Challenge and Select Direction",
|
|
506
478
|
"promptBlocks": {
|
|
507
479
|
"goal": "Read `design-candidates.md`, challenge the leading option, and make the final selection for me.",
|
|
508
480
|
"constraints": [
|
|
@@ -515,7 +487,7 @@
|
|
|
515
487
|
"procedure": [
|
|
516
488
|
"Compare candidates against `pathRecommendation` and `decisionCriteria`: which candidate best fits, which candidate is the strongest alternative, and what evidence or stakeholder tension the top candidate still risks missing.",
|
|
517
489
|
"Self-produce the strongest argument against the leading option.",
|
|
518
|
-
"If `delegationAvailable = true` and (`rigorMode != QUICK` or `pathRecommendation = full_spectrum`), decide whether a delegated challenge is likely to sharpen the decision enough to be worth the extra step. If yes, spawn ONE WorkRail Executor running `
|
|
490
|
+
"If `delegationAvailable = true` and (`rigorMode != QUICK` or `pathRecommendation = full_spectrum`), decide whether a delegated challenge is likely to sharpen the decision enough to be worth the extra step. If yes, spawn ONE WorkRail Executor running `routine-hypothesis-challenge` against the leading option. If not, keep the challenge in your own hands and say why.",
|
|
519
491
|
"If `delegationAvailable = true` and `rigorMode = THOROUGH`, decide whether an execution simulation would materially sharpen the decision. If yes, you may also spawn ONE WorkRail Executor running `routine-execution-simulation`.",
|
|
520
492
|
"Choose `selectedDirection` and `runnerUpDirection`.",
|
|
521
493
|
"Record `acceptedTradeoffs`, `identifiedFailureModes`, and what would trigger a switch.",
|
|
@@ -579,19 +551,11 @@
|
|
|
579
551
|
{
|
|
580
552
|
"id": "phase-4a-review-core",
|
|
581
553
|
"title": "Direction Review Core",
|
|
582
|
-
"
|
|
583
|
-
"
|
|
584
|
-
"
|
|
585
|
-
"
|
|
586
|
-
|
|
587
|
-
],
|
|
588
|
-
"procedure": [
|
|
589
|
-
"Choose the execution mode that is most likely to produce a useful review here. If direct execution is enough, follow the bounded review implementation referenced by `{{wr.bindings.direction_review}}` and produce `design-review-findings.md`.",
|
|
590
|
-
"If delegation is likely to surface better weaknesses or hidden tradeoffs, spawn ONE WorkRail Executor running `{{wr.bindings.direction_review}}` with the selected direction, accepted tradeoffs, failure modes, and path context, then capture the resulting findings in `design-review-findings.md`. If you do not delegate, record why direct review is enough."
|
|
591
|
-
],
|
|
592
|
-
"verify": [
|
|
593
|
-
"`design-review-findings.md` exists and reflects an actual review pass, not a placeholder."
|
|
594
|
-
]
|
|
554
|
+
"templateCall": {
|
|
555
|
+
"templateId": "wr.templates.routine.design-review",
|
|
556
|
+
"args": {
|
|
557
|
+
"deliverableName": "design-review-findings.md"
|
|
558
|
+
}
|
|
595
559
|
},
|
|
596
560
|
"requireConfirmation": false
|
|
597
561
|
},
|
|
@@ -852,7 +816,7 @@
|
|
|
852
816
|
],
|
|
853
817
|
"procedure": [
|
|
854
818
|
"Validate that the selected path still makes sense in hindsight, the chosen direction still beats the strongest alternative, the remaining uncertainty is named honestly, and the design doc is complete enough for a human to use.",
|
|
855
|
-
"If a serious unresolved concern remains and `delegationAvailable = true`, decide whether a delegated final challenge is likely to change the result or sharpen the caveats. If yes, spawn ONE WorkRail Executor running `
|
|
819
|
+
"If a serious unresolved concern remains and `delegationAvailable = true`, decide whether a delegated final challenge is likely to change the result or sharpen the caveats. If yes, spawn ONE WorkRail Executor running `routine-hypothesis-challenge` against the final recommendation. If not, keep the challenge in your own hands and record why.",
|
|
856
820
|
"Set these keys in the next `continue_workflow` call's `context` object: `finalConfidenceBand`, `residualRiskCount`, `handoffReady`."
|
|
857
821
|
],
|
|
858
822
|
"verify": [
|