@exaudeus/workrail 3.11.0 → 3.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,25 +1,30 @@
1
1
  {
2
2
  "id": "workflow-for-workflows",
3
- "name": "Workflow Authoring Workflow (Lean, References-First)",
4
- "version": "2.0.0",
5
- "description": "Guides an agent through authoring or modernizing a WorkRail workflow: understand the task, choose the shape, draft or revise the JSON, validate with real validators, review the method, and optionally refine.",
3
+ "name": "Workflow Authoring Workflow (Quality Gate v2)",
4
+ "version": "2.1.0",
5
+ "description": "Guides an agent through authoring or modernizing a WorkRail workflow with a stronger quality gate: understand the task, define effectiveness targets, design both workflow and quality architecture, draft, validate, simulate execution, run adversarial review, redesign if needed, and only then hand off.",
6
6
  "recommendedPreferences": {
7
7
  "recommendedAutonomy": "guided",
8
8
  "recommendedRiskPolicy": "conservative"
9
9
  },
10
+ "features": [
11
+ "wr.features.subagent_guidance"
12
+ ],
10
13
  "preconditions": [
11
14
  "User has a recurring task or problem a workflow should solve, or an existing workflow that should be modernized.",
12
15
  "Agent has access to file creation, editing, and terminal tools.",
13
- "Agent can run workflow validators (npm run validate:registry or equivalent)."
16
+ "Agent can run workflow validators such as `npm run validate:registry` or equivalent."
14
17
  ],
15
18
  "metaGuidance": [
16
19
  "REFERENCE HIERARCHY: treat workflow-schema as legal truth for structure. Treat authoring-spec as canonical current guidance for what makes a workflow good. Treat authoring-provenance as optional maintainer context only.",
17
20
  "META DISTINCTION: you are authoring or modernizing a workflow, not executing one. Keep the authored workflow's concerns separate from this meta-workflow's execution.",
21
+ "QUALITY-GATE ROLE: this workflow is the trust gate for other workflows. It must optimize not only for validity and modern authoring, but also for task effectiveness, false-confidence resistance, and future maintainability.",
18
22
  "DEFAULT BEHAVIOR: self-execute with tools. Only ask the user for business decisions about the workflow being authored or modernized, not things you can learn from the schema, authoring spec, or example workflows.",
19
23
  "AUTHORED VOICE: prompts in the authored workflow must be user-voiced. No middleware narration, no pseudo-DSL, no tutorial framing, no teaching-product language.",
20
- "VOICE ADAPTATION: the lean coding workflow is one voice example, not the universal template. Copy structural patterns, not domain language. Adapt vocabulary and tone to the authored workflow's domain.",
21
- "VOICE EXAMPLES: Coding: 'Review the changes in this MR.' Ops: 'Check whether the pipeline is healthy.' Content: 'Read the draft and check the argument.' NOT: 'The system will now perform a comprehensive analysis of...'",
24
+ "BASELINE DISCIPLINE: choose both an authoring baseline and an outcome baseline whenever possible. Copy structural patterns, not domain language.",
22
25
  "VALIDATION GATE: validate with real validators, not regex approximations. When validator output and authoring assumptions conflict, runtime wins.",
26
+ "DEEP REVIEW: authoring integrity and outcome effectiveness are separate concerns. A workflow is not ready unless both pass.",
27
+ "THOROUGH MODE: for complex or high-trust workflow work, prefer the deepest review path: state economy audit, execution simulation, adversarial review, and redesign if hard gates fail.",
23
28
  "ARTIFACT STRATEGY: the workflow JSON file is the primary output. Intermediate notes go in output.notesMarkdown. Do not create extra planning artifacts unless the workflow is genuinely complex.",
24
29
  "V2 DURABILITY: use output.notesMarkdown as the primary durable record. Do not mirror execution state into CONTEXT.md or markdown checkpoint files.",
25
30
  "ANTI-PATTERNS TO AVOID IN AUTHORED WORKFLOWS: no pseudo-function metaGuidance, no learning-path branching, no satisfaction-score loops, no heavy clarification batteries, no regex-as-primary-validation, no celebration phases.",
@@ -76,28 +81,120 @@
76
81
  },
77
82
  {
78
83
  "id": "lean-coding-workflow",
79
- "title": "Lean Coding Workflow (Modern Example)",
84
+ "title": "Lean Coding Workflow (Modern Authoring Baseline)",
80
85
  "source": "workflows/coding-task-workflow-agentic.lean.v2.json",
81
86
  "resolveFrom": "package",
82
- "purpose": "Current modern example of a well-authored workflow. Inspect for patterns, voice, loop semantics, prompt fragments, and delegation policy.",
87
+ "purpose": "Strong modern example for engine-native authoring patterns, loop semantics, prompt density, and bounded delegation.",
88
+ "authoritative": false
89
+ },
90
+ {
91
+ "id": "mr-review-workflow",
92
+ "title": "MR Review Workflow (Outcome Baseline Example)",
93
+ "source": "workflows/mr-review-workflow.agentic.v2.json",
94
+ "resolveFrom": "package",
95
+ "purpose": "Strong example of hypothesis, neutral fact packet, reviewer families, contradiction synthesis, and final validation.",
96
+ "authoritative": false
97
+ },
98
+ {
99
+ "id": "readiness-audit-workflow",
100
+ "title": "Production Readiness Audit (Audit Baseline Example)",
101
+ "source": "workflows/production-readiness-audit.json",
102
+ "resolveFrom": "package",
103
+ "purpose": "Example of a thorough evidence-driven audit workflow with explicit reviewer-family structure and confidence handling.",
83
104
  "authoritative": false
84
105
  }
85
106
  ],
86
107
  "steps": [
87
108
  {
88
- "id": "phase-0-understand",
89
- "title": "Phase 0: Understand the Workflow to Author or Modernize",
90
- "prompt": "Before you write anything, understand what you're working on.\n\nStart by reading:\n- `workflow-schema` reference (legal structure)\n- `authoring-spec` reference (canonical authoring rules)\n- `authoring-guide-v2` reference (current v2 authoring principles)\n- `workflow-authoring-reference` reference (detailed structure patterns)\n- `lean-coding-workflow` reference (modern example to inspect)\n\nRead `routines-guide` too if you think the authored workflow may need delegation or template injection.\n\nThen decide what kind of authoring task this is:\n- `authoringMode`: `create` or `modernize_existing`\n\nIf `authoringMode = create`, understand:\n- What recurring task or problem should this workflow solve?\n- Who runs it and how often?\n- What does success look like?\n- What constraints exist (tools, permissions, domain rules)?\n\nIf `authoringMode = modernize_existing`, understand:\n- Which workflow file is being updated?\n- What should stay the same about its purpose?\n- What feels stale, legacy, repetitive, or misaligned with current authoring guidance?\n- What constraints apply to the modernization (keep file path, preserve compatibility, avoid broad rewrites, etc.)?\n- Which modern example should act as the primary baseline, if any?\n\nFor `modernize_existing`, make an explicit baseline decision before architecture work:\n- choose exactly one `primaryBaseline` when a single modern example fits well\n- optional `secondaryBaselines` may be used for supporting patterns only\n- if no single baseline fits, set `primaryBaseline = none` and explain whether you are using a hybrid baseline or reasoning directly from schema + authoring guidance\n- list `patternsToBorrow` and `patternsToAvoid`\n\nRule:\n- baselines are models, not templates. Copy structural patterns, not another workflow's domain voice.\n\nExplore first. Use tools to understand the existing workflow, surrounding docs, and relevant domain context. Ask the user only what you genuinely cannot figure out yourself.\n\nThen classify:\n- `workflowComplexity`: Simple (linear, few steps) / Medium (branches, loops, or moderate step count) / Complex (multiple loops, delegation, extension points, many steps)\n- `rigorMode`: QUICK (simple linear workflow, low risk) / STANDARD (moderate complexity or domain risk) / THOROUGH (complex architecture, high stakes, needs review loops)\n\nCapture:\n- `authoringMode`\n- `workflowComplexity`\n- `rigorMode`\n- `taskDescription`\n- `intendedAudience`\n- `successCriteria`\n- `domainConstraints`\n- `targetWorkflowPath` (required for `modernize_existing`, otherwise empty)\n- `modernizationGoals` (required for `modernize_existing`, otherwise empty)\n- `primaryBaseline` (for `modernize_existing`, otherwise empty)\n- `secondaryBaselines` (for `modernize_existing`, otherwise empty)\n- `baselineDecisionRationale` (for `modernize_existing`, otherwise empty)\n- `patternsToBorrow` (for `modernize_existing`, otherwise empty)\n- `patternsToAvoid` (for `modernize_existing`, otherwise empty)\n- `openQuestions` (only real questions that need user input)",
109
+ "id": "phase-0-understand-and-classify",
110
+ "title": "Phase 0: Understand and Classify the Authoring Task",
111
+ "promptBlocks": {
112
+ "goal": "Understand what workflow you are authoring or modernizing, and classify the task before you design anything.",
113
+ "constraints": [
114
+ [
115
+ { "kind": "ref", "refId": "wr.refs.notes_first_durability" }
116
+ ],
117
+ "Explore first. Ask the user only what you genuinely cannot determine with tools and references.",
118
+ "Choose baselines as models, not templates. Copy structural patterns, not another workflow's domain voice."
119
+ ],
120
+ "procedure": [
121
+ "Read the schema, authoring spec, v2 authoring guides, and the strongest relevant example workflows.",
122
+ "Decide `authoringMode`: `create` or `modernize_existing`.",
123
+ "Classify the target workflow archetype: `review_audit`, `coding_execution`, `diagnostic_investigation`, `planning_design`, `linear_operational`, or `content_analysis`.",
124
+ "Classify `workflowComplexity`: Simple, Medium, or Complex. Classify `rigorMode`: QUICK, STANDARD, or THOROUGH.",
125
+ "Choose an `authoringBaseline` for engine-native authoring quality and an `outcomeBaseline` for the kind of job the authored workflow should perform. If no good baseline exists for one of them, set it to `none` and explain why.",
126
+ "If `authoringMode = modernize_existing`, identify what must stay the same about purpose, what feels stale, and what modernization constraints apply."
127
+ ],
128
+ "outputRequired": {
129
+ "notesMarkdown": "Task understanding, baseline choices, patterns to borrow or avoid, and any real open questions.",
130
+ "context": "Capture authoringMode, workflowArchetype, workflowComplexity, rigorMode, taskDescription, intendedAudience, successCriteria, domainConstraints, targetWorkflowPath, modernizationGoals, authoringBaseline, outcomeBaseline, baselineDecisionRationale, authoringPatternsToBorrow, outcomePatternsToBorrow, patternsToAvoid, openQuestions."
131
+ },
132
+ "verify": [
133
+ "The task is understood well enough to design the workflow without guessing blindly.",
134
+ "Both authoring and outcome baselines are explicit, or their absence is justified."
135
+ ]
136
+ },
91
137
  "requireConfirmation": true
92
138
  },
93
139
  {
94
- "id": "phase-1-shape",
95
- "title": "Phase 1: Choose the Workflow Shape",
140
+ "id": "phase-1-define-effectiveness-target",
141
+ "title": "Phase 1: Define the Effectiveness Target",
142
+ "promptBlocks": {
143
+ "goal": "Define what success should feel like for the authored workflow, not just what fields it should contain.",
144
+ "constraints": [
145
+ "Be specific about user satisfaction and dangerous false-confidence outcomes.",
146
+ "Distinguish a technically valid workflow from a satisfying one."
147
+ ],
148
+ "procedure": [
149
+ "State what result the authored workflow should reliably produce for its user.",
150
+ "List the criteria that would make the workflow feel genuinely satisfying in practice.",
151
+ "Name the biggest likely failure mode and the most dangerous false-confidence mode.",
152
+ "State what would make the workflow technically correct but still disappointing."
153
+ ],
154
+ "outputRequired": {
155
+ "notesMarkdown": "Effectiveness target, satisfaction criteria, failure modes, and false-confidence risks.",
156
+ "context": "Capture effectivenessTarget, userSatisfactionCriteria, primaryFailureMode, dangerousFalseConfidenceModes, likelyWeakOutcomeModes, and trustRisk."
157
+ },
158
+ "verify": [
159
+ "The authored workflow now has a clear outcome bar, not just an authoring bar."
160
+ ]
161
+ },
162
+ "requireConfirmation": false
163
+ },
164
+ {
165
+ "id": "phase-2-design-workflow-architecture",
166
+ "title": "Phase 2: Design the Workflow Architecture",
96
167
  "runCondition": {
97
168
  "var": "workflowComplexity",
98
169
  "not_equals": "Simple"
99
170
  },
100
- "prompt": "Decide the architecture before you write JSON.\n\nBased on what you learned in Phase 0, decide:\n\n1. **Step structure**: how many phases, what each one does, what order\n2. **Loops**: does any phase need iteration? If so, what are the exit rules and max iterations?\n\nLoop design heuristics:\n- Add a loop ONLY when: (a) a quality gate may fail on first pass (validation, review), (b) each pass adds measurable value (progressive refinement), or (c) external feedback requires re-execution.\n- Do NOT loop when: (a) the agent can get it right in one pass with sufficient context, or (b) the full workflow is cheap enough to re-run entirely.\n- Every loop needs: an explicit exit condition (not vibes), a bounded maxIterations, and a decision step with outputContract.\n- Sensible defaults: validation ≈ 2-3, review/refinement ≈ 2, user-feedback ≈ 2-3 with confirmation gate. Go higher only with explicit justification in your notes.\n3. **Confirmation gates**: where does the user genuinely need to approve before proceeding? Don't add confirmations as ceremony.\n4. **Delegation and reuse**: for each phase, decide between direct execution, routine delegation, template injection, or no special mechanism. If a routine or template is not used, say why not. Keep delegation bounded and keep ownership with the main agent.\n5. **Prompt composition**: will any steps need promptFragments for rigor-mode branching? Will any steps share enough structure to use templates?\n6. **Extension points**: are there customizable slots that projects might want to override (e.g., a verification routine, a review routine)?\n7. **References**: should the authored workflow declare its own references to external docs?\n8. **Artifacts**: what does each step produce? Which artifact is canonical for which concern?\n9. **metaGuidance**: what persistent behavioral rules should the agent see on start and resume?\n\nIf `authoringMode = modernize_existing`, also decide:\n- should this workflow be preserved mostly in place, restructured selectively, or rewritten more substantially?\n- which existing steps, loops, references, or metaGuidance should stay because they still fit the workflow's purpose?\n- which legacy patterns or repetitive sections should be removed or reshaped?\n- whether the file path should stay the same or whether a new variant/file is genuinely warranted\n- how each major old phase or behavior maps to the new workflow: `keep`, `merge`, `remove`, or `replace`\n\nFor `modernize_existing`, create a compact legacy mapping in your notes. For each major old phase or behavior, record:\n- source step or behavior\n- disposition: `keep` / `merge` / `remove` / `replace`\n- rationale\n- destination in the new workflow, if any\n\nFor routine and template decisions, create a compact audit in your notes. For each meaningful phase or concern, record:\n- chosen mechanism: direct / routine / template / none\n- why it helps or why it would be overkill\n- the ownership boundary that stays with the main agent\n\nWrite the shape as a structured outline in your notes. Include:\n- Phase list with titles and one-line goals\n- Which phases loop and why\n- Which phases have confirmation gates and why\n- Context variables that flow between phases\n- Artifact ownership (which artifact is canonical for what)\n- for `modernize_existing`: whether the plan is preserve-in-place, restructure, or rewrite-biased and why\n\nDon't write JSON yet.\n\nCapture:\n- `workflowOutline`\n- `loopDesign`\n- `confirmationDesign`\n- `delegationDesign`\n- `artifactPlan`\n- `contextModel` (the context variables the workflow will use and where they're set)\n- `voiceStrategy` (domain vocabulary, authority posture: directive/collaborative/supervisory, density calibration)\n- `routineAudit`\n- `delegationBoundaries`\n- `templateInjectionPlan`\n- `modernizationStrategy` (for `modernize_existing`: preserve_in_place / restructure / rewrite, otherwise empty)\n- `legacyMapping` (for `modernize_existing`, otherwise empty)\n- `behaviorPreservationNotes` (for `modernize_existing`, otherwise empty)",
171
+ "promptBlocks": {
172
+ "goal": "Decide the workflow architecture before you write JSON.",
173
+ "constraints": [
174
+ "Separate workflow architecture from quality-gate architecture. This phase is about the authored workflow itself.",
175
+ "Keep delegation bounded and keep ownership with the main agent."
176
+ ],
177
+ "procedure": [
178
+ "Decide the phase list, one-line goal for each phase, and overall ordering.",
179
+ "Design loops with explicit exit rules, bounded maxIterations, and real reasons for another pass.",
180
+ "Decide confirmation gates, delegation vs template injection vs direct execution, promptFragments, references, artifacts, and metaGuidance.",
181
+ "If `authoringMode = modernize_existing`, decide whether the plan is preserve-in-place, restructure, or rewrite, and map legacy behaviors as `keep`, `merge`, `remove`, or `replace`."
182
+ ],
183
+ "outputRequired": {
184
+ "notesMarkdown": "Structured workflow outline, loop design, confirmation design, delegation design, artifact plan, and modernization mapping.",
185
+ "context": "Capture workflowOutline, loopDesign, confirmationDesign, delegationDesign, artifactPlan, contextModel, voiceStrategy, routineAudit, delegationBoundaries, templateInjectionPlan, modernizationStrategy, legacyMapping, and behaviorPreservationNotes."
186
+ },
187
+ "verify": [
188
+ "The authored workflow architecture is coherent before JSON drafting begins."
189
+ ]
190
+ },
191
+ "promptFragments": [
192
+ {
193
+ "id": "phase-2-simple-direct",
194
+ "when": { "var": "workflowComplexity", "equals": "Simple" },
195
+ "text": "For Simple workflows, keep the architecture linear and compact. Do not invent loops or ceremony unless the task truly needs them."
196
+ }
197
+ ],
101
198
  "requireConfirmation": {
102
199
  "or": [
103
200
  { "var": "workflowComplexity", "not_equals": "Simple" },
@@ -106,22 +203,71 @@
106
203
  }
107
204
  },
108
205
  {
109
- "id": "phase-2-draft",
110
- "title": "Phase 2: Draft or Revise the Workflow",
111
- "prompt": "Write the workflow JSON file.\n\nUse the outline from Phase 1 and produce the best first pass you can. If `authoringMode = create`, draft a new workflow. If `authoringMode = modernize_existing`, revise the existing workflow so it keeps the right intent while removing stale structure, legacy patterns, or unnecessary repetition. Phase 3 will catch structural issues, so focus on getting the shape and voice right. Follow these rules:\n\n1. The schema (`workflow-schema` reference) defines what is structurally legal. Do not invent fields.\n2. The authoring spec (`authoring-spec` reference) defines what is good. Follow its active rules.\n3. Write prompts in the user's voice. The opening sentence of each step should sound like a direct ask, not system narration.\n4. Calibrate prompt density to the step's needs. Not all steps need the same level of detail:\n - Sparse (expert audience, clear task): direct ask + capture footer only.\n - Focused (expert audience, ambiguous task): direct ask + key criteria or trade-offs + capture.\n - Guided (broad audience, clear task): direct ask + enumerated sub-steps + capture.\n - Scaffolded (broad audience, ambiguous task): direct ask + context frame + sub-steps + heuristic + capture.\n Default to Focused if unsure. Vary density across steps -- uniform density is a smell.\n5. Keep protocol requirements explicit. If a step must emit a specific artifact or capture specific context, say that plainly.\n6. Use promptFragments for conditional rigor-mode branches instead of duplicating entire steps.\n7. Loop decision steps must use `outputContract` with `wr.contracts.loop_control` and allow both `continue` and `stop` in the output example.\n8. Loops must have explicit exit rules, bounded maxIterations, and a clear reason for another pass.\n9. Confirmation gates are for real human decisions, not routine ceremony.\n10. metaGuidance should be clean behavioral rules, not pseudo-functions or teaching prose.\n11. Do not use regex validationCriteria as the primary quality gate. Use real validators.\n12. If you are modernizing, preserve what still fits the workflow's purpose. Do not rewrite just because a workflow is old.\n\nIf `authoringMode = create`, ask the user what filename to use if they haven't specified one.\nIf `authoringMode = modernize_existing`, default to editing `targetWorkflowPath` unless there is a strong reason to create a new variant or file.\n\nWrite the file. Do not explain the JSON back to the user field by field.\n\nCapture:\n- `workflowFilePath`\n- `draftComplete`",
206
+ "id": "phase-3-design-quality-architecture",
207
+ "title": "Phase 3: Design the Quality-Gate Architecture",
208
+ "promptBlocks": {
209
+ "goal": "Design how the authored workflow will avoid shallow results, false confidence, and state bloat.",
210
+ "constraints": [
211
+ "This phase is about the authored workflow's quality model, not its basic phase list.",
212
+ "Prefer explicit quality structure over hoping the agent will infer it."
213
+ ],
214
+ "procedure": [
215
+ "Decide whether the authored workflow needs a hypothesis step, neutral fact packet, reviewer or validator families, contradiction loop, final validation bundle, or explicit blind-spot handling.",
216
+ "Design the confidence model, blind-spot model, and state economy plan.",
217
+ "Decide the hard-gate dimensions that would make the authored workflow unsafe or unsatisfying if they fail.",
218
+ "Write the redesign triggers that should force architectural revision rather than cosmetic refinement."
219
+ ],
220
+ "outputRequired": {
221
+ "notesMarkdown": "Quality architecture, confidence model, blind-spot model, state economy plan, and hard-gate triggers.",
222
+ "context": "Capture qualityArchitecture, confidenceModel, blindSpotModel, stateEconomyPlan, reviewBundlePlan, qualityGateTriggers, and hardGateModel."
223
+ },
224
+ "verify": [
225
+ "The authored workflow has an explicit plan for false-confidence resistance and quality review."
226
+ ]
227
+ },
228
+ "requireConfirmation": {
229
+ "or": [
230
+ { "var": "rigorMode", "equals": "THOROUGH" },
231
+ { "var": "workflowComplexity", "equals": "Complex" }
232
+ ]
233
+ }
234
+ },
235
+ {
236
+ "id": "phase-4-draft-or-revise",
237
+ "title": "Phase 4: Draft or Revise the Workflow",
238
+ "promptBlocks": {
239
+ "goal": "Write the workflow JSON file using the architecture and quality model you already chose.",
240
+ "constraints": [
241
+ "The schema defines what is legal. The authoring spec defines what is good.",
242
+ "Write prompts in the user's voice. Vary prompt density by step needs rather than using one density everywhere.",
243
+ "If you are modernizing, preserve what still fits the workflow's purpose. Do not rewrite just because a workflow is old."
244
+ ],
245
+ "procedure": [
246
+ "If `authoringMode = create` and no filename was specified, ask the user for the filename before writing.",
247
+ "If `authoringMode = modernize_existing`, default to editing `targetWorkflowPath` unless there is a strong reason to create a new variant or file.",
248
+ "Write the workflow file. Keep protocol requirements explicit, loops bounded, confirmations meaningful, and metaGuidance clean."
249
+ ],
250
+ "outputRequired": {
251
+ "notesMarkdown": "Draft status and any notable authoring choices that are important to later review.",
252
+ "context": "Capture workflowFilePath and draftComplete."
253
+ },
254
+ "verify": [
255
+ "The workflow file exists and reflects the chosen architecture rather than an improvised one."
256
+ ]
257
+ },
112
258
  "promptFragments": [
113
259
  {
114
- "id": "phase-2-simple-fast",
260
+ "id": "phase-4-simple-fast",
115
261
  "when": { "var": "workflowComplexity", "equals": "Simple" },
116
- "text": "No shape outline exists for Simple workflows -- Phase 1 was skipped. Decide the step list now and draft directly. Keep it linear, 3-5 steps, no loops unless the task genuinely needs iteration. metaGuidance is optional for Simple workflows."
262
+ "text": "For Simple workflows, keep the file compact and linear. Do not create extra metaGuidance or loops unless the task truly needs them."
117
263
  }
118
264
  ],
119
265
  "requireConfirmation": false
120
266
  },
121
267
  {
122
- "id": "phase-3-validate",
268
+ "id": "phase-5-validate",
123
269
  "type": "loop",
124
- "title": "Phase 3: Validate and Fix",
270
+ "title": "Phase 5: Structural Validation Loop",
125
271
  "loop": {
126
272
  "type": "while",
127
273
  "conditionSource": {
@@ -133,76 +279,267 @@
133
279
  },
134
280
  "body": [
135
281
  {
136
- "id": "phase-3a-run-validation",
282
+ "id": "phase-5a-run-validation",
137
283
  "title": "Run Validation",
138
- "prompt": "Run the real workflow validators against the drafted workflow.\n\nUse the available validation tools or commands (e.g., `npm run validate:registry`, schema validation, or the MCP validation surface). Do not rely on reading the JSON and eyeballing it.\n\nIf validation passes cleanly, say so and move to the loop decision.\n\nIf validation fails:\n1. List the actual errors.\n2. Fix each one in the workflow file.\n3. Re-run validation to confirm the fixes worked.\n\nIf the validator reports something that conflicts with your authoring assumptions, the validator (runtime) wins. Update your understanding.\n\nCapture:\n- `validationErrors`\n- `validationPassed`",
284
+ "promptBlocks": {
285
+ "goal": "Run the real workflow validators against the drafted workflow.",
286
+ "constraints": [
287
+ "Do not rely on reading the JSON and eyeballing it.",
288
+ "If runtime and authoring assumptions conflict, runtime wins."
289
+ ],
290
+ "procedure": [
291
+ "Run the available validation tools or commands such as `npm run validate:registry`, schema validation, or the MCP validation surface.",
292
+ "If validation fails, list the actual errors, fix them in the workflow file, and re-run validation.",
293
+ "If validation passes cleanly, say so plainly."
294
+ ],
295
+ "outputRequired": {
296
+ "notesMarkdown": "Validation results, actual errors if any, and what was fixed.",
297
+ "context": "Capture validationErrors and validationPassed."
298
+ },
299
+ "verify": [
300
+ "Validation results are based on real validators, not approximations."
301
+ ]
302
+ },
139
303
  "promptFragments": [
140
304
  {
141
- "id": "phase-3a-thorough",
305
+ "id": "phase-5a-thorough",
142
306
  "when": { "var": "rigorMode", "equals": "THOROUGH" },
143
- "text": "After fixing structural errors, also check the workflow against the authoring spec rules manually. Score each active required-level rule as pass/fail and fix any failures before moving on."
307
+ "text": "After structural validation passes, also check the workflow manually against required-level authoring-spec rules and fix any failures before moving on."
144
308
  }
145
309
  ],
146
310
  "requireConfirmation": false
147
311
  },
148
312
  {
149
- "id": "phase-3b-loop-decision",
313
+ "id": "phase-5b-loop-decision",
150
314
  "title": "Validation Loop Decision",
151
- "prompt": "Decide whether validation needs another pass.\n\n- If all errors are fixed and validation passes: stop.\n- If you fixed errors but haven't re-validated yet: continue.\n- If you've hit the iteration limit: stop and record what remains.\n\nEmit the loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue or stop\"\n }]\n}\n```",
152
- "requireConfirmation": false,
315
+ "promptBlocks": {
316
+ "goal": "Decide whether structural validation needs another pass.",
317
+ "constraints": [
318
+ "Use validator state, not vibes."
319
+ ],
320
+ "procedure": [
321
+ "If all errors are fixed and validation passes, stop.",
322
+ "If you fixed errors but have not re-validated yet, continue.",
323
+ "If you hit the iteration limit, stop and record what remains."
324
+ ],
325
+ "outputRequired": {
326
+ "artifact": "Emit a `wr.loop_control` artifact for `validation_loop` with `decision` set to `continue` or `stop`."
327
+ },
328
+ "verify": [
329
+ "The loop decision matches the actual validation state."
330
+ ]
331
+ },
153
332
  "outputContract": {
154
333
  "contractRef": "wr.contracts.loop_control"
155
- }
334
+ },
335
+ "requireConfirmation": false
156
336
  }
157
337
  ]
158
338
  },
159
339
  {
160
- "id": "phase-3-escalation",
340
+ "id": "phase-5-escalation",
161
341
  "title": "Validation Escalation",
162
342
  "runCondition": {
163
343
  "var": "validationPassed",
164
344
  "equals": false
165
345
  },
166
- "prompt": "Validation did not pass after the maximum number of attempts.\n\nCheck whether the last validation run actually passed. If `validationPassed` is false or if you're uncertain, list the remaining errors and your assessment of their severity.\n\nThen present the options:\n1. Proceed with known issues documented in handoff notes.\n2. Stop here so the user can intervene manually.\n\nDo not silently continue with a broken workflow.",
346
+ "promptBlocks": {
347
+ "goal": "Do not silently continue with a structurally broken workflow.",
348
+ "constraints": [
349
+ "Present the situation honestly."
350
+ ],
351
+ "procedure": [
352
+ "List the remaining validation errors and assess their severity.",
353
+ "Present the options: proceed with known issues documented, or stop so the user can intervene manually."
354
+ ]
355
+ },
167
356
  "requireConfirmation": true
168
357
  },
169
358
  {
170
- "id": "phase-4-review",
171
- "title": "Phase 4: Method Review",
172
- "prompt": "The workflow is valid. Now check whether it's actually good.\n\nScore each dimension 0-2 with one sentence of evidence:\n\n- `voiceClarity`: 0 = prompts are direct user-voiced asks in the workflow's domain vocabulary, 1 = mostly user-voiced but borrows vocabulary from other domains or has middleware narration, 2 = reads like system documentation or sounds like a different domain\n- `ceremonyLevel`: 0 = confirmations only at real decision points, 1 = one or two unnecessary gates, 2 = over-asks the user or adds routine ceremony\n- `loopSoundness`: 0 = loops have explicit exit rules, bounded iterations, and real decision steps, 1 = minor issues with exit clarity, 2 = vibes-only exit conditions or unbounded loops (score 0 if no loops)\n- `delegationBoundedness`: 0 = delegation is bounded and explicit or absent, 1 = one delegation could be tighter or a good routine/template opportunity was missed, 2 = open-ended or ownership-transferring delegation, or routine/template choices are unjustified (score 0 if no delegation and no reuse need exists)\n- `legacyPatterns`: 0 = no legacy anti-patterns, 1 = minor legacy residue, 2 = pseudo-DSL, learning paths, satisfaction loops, or regex-as-gate present\n- `artifactClarity`: 0 = clear what each artifact is for and which is canonical, 1 = mostly clear, 2 = ambiguous artifact ownership\n- `modeFit`: 0 = the workflow fits the selected `authoringMode`, 1 = minor creation/modernization mismatch remains, 2 = the workflow still reads like the wrong mode entirely\n- `modernizationDiscipline`: 0 = valuable behavior was preserved and legacy structure was removed cleanly, 1 = minor mismatch or over/under-preservation, 2 = either valuable behavior was lost or legacy structure still dominates (score 0 for `create` mode)\n\nIf the total score is 0-3: the workflow is ready.\nIf the total score is 4-6: fix the worst dimensions before proceeding.\nIf the total score is 7+: this needs significant rework. Fix the worst dimensions here, re-validate, and record what you would change if you could redraft from scratch.\n\nIf `authoringMode = modernize_existing`, check explicitly:\n- does the updated workflow preserve the right purpose?\n- did you remove legacy structure without rewriting valuable behavior away?\n- does the final workflow still align with `primaryBaseline` and `patternsToBorrow` without copying domain language?\n- does the final workflow respect the `legacyMapping`, especially for anything marked keep or merge?\n- do the routine/template choices still match the `routineAudit` and stay bounded?\n- do any prompts, captures, or handoff notes still assume this was a brand-new workflow?\n\nFix any issues directly in the workflow file. Re-run validation if you changed structure.\n\nCapture:\n- `reviewScores`\n- `reviewPassed`\n- `fixesApplied`",
173
- "promptFragments": [
359
+ "id": "phase-6-quality-gate-loop",
360
+ "type": "loop",
361
+ "title": "Phase 6: Quality-Gate Loop",
362
+ "loop": {
363
+ "type": "while",
364
+ "conditionSource": {
365
+ "kind": "artifact_contract",
366
+ "contractRef": "wr.contracts.loop_control",
367
+ "loopId": "quality_gate_loop"
368
+ },
369
+ "maxIterations": 2
370
+ },
371
+ "body": [
174
372
  {
175
- "id": "phase-4-quick-skip",
176
- "when": { "var": "rigorMode", "equals": "QUICK" },
177
- "text": "For QUICK rigor, do a fast pass on voiceClarity and legacyPatterns only. Skip the full rubric unless something looks off."
373
+ "id": "phase-6a-state-economy-audit",
374
+ "title": "State Economy Audit",
375
+ "promptBlocks": {
376
+ "goal": "Check whether every context field in the authored workflow earns its keep.",
377
+ "constraints": [
378
+ "A field is justified only if it materially affects routing, synthesis, confidence, or handoff quality.",
379
+ "Do not keep bookkeeping fields just because they sound organized."
380
+ ],
381
+ "procedure": [
382
+ "For each meaningful captured context field, record where it is set, where it is consumed, what decision or outcome it influences, and what gets worse if it is removed.",
383
+ "Classify each field as `keep`, `wire`, or `remove`.",
384
+ "Fix weak or unused fields directly in the workflow file."
385
+ ],
386
+ "outputRequired": {
387
+ "notesMarkdown": "State field audit with keep/wire/remove decisions and any fixes applied.",
388
+ "context": "Capture stateFieldAudit, unusedOrWeakFields, and stateEconomyPassed."
389
+ },
390
+ "verify": [
391
+ "Weak or unused fields are either wired meaningfully or removed."
392
+ ]
393
+ },
394
+ "requireConfirmation": false
178
395
  },
179
396
  {
180
- "id": "phase-4-thorough-extra",
181
- "when": { "var": "rigorMode", "equals": "THOROUGH" },
182
- "text": "Also review:\n- Are the context captures complete and correctly named?\n- Do runConditions and promptFragment conditions use the right variables?\n- Is the metaGuidance minimal and non-redundant?\n- Would this workflow make sense to an agent that has never seen it before?"
397
+ "id": "phase-6b-execution-simulation",
398
+ "title": "Execution Simulation",
399
+ "promptBlocks": {
400
+ "goal": "Simulate what would happen if the authored workflow ran on the user's real task.",
401
+ "constraints": [
402
+ "This is about practical utility, not only context-flow correctness.",
403
+ "Flag places where the workflow would produce paperwork, generic output, or false confidence instead of value."
404
+ ],
405
+ "procedure": [
406
+ "Trace the authored workflow step by step against the user's actual task or the closest realistic scenario.",
407
+ "For each step, ask: what would the agent actually do, what context would it have, what would it likely produce, and what would the next step inherit?",
408
+ "Identify likely weak steps, likely unsatisfying outputs, and likely false-confidence modes.",
409
+ "Fix issues directly in the workflow file when the right improvement is clear."
410
+ ],
411
+ "outputRequired": {
412
+ "notesMarkdown": "Execution simulation findings, likely weak steps, unsatisfying outputs, false-confidence risks, and any fixes applied.",
413
+ "context": "Capture simulationFindings, likelyWeakSteps, likelyUnsatisfyingOutputs, falseConfidenceFindings, and outcomeEffectivenessPassed."
414
+ },
415
+ "verify": [
416
+ "The simulation judges likely usefulness, not just structural legality."
417
+ ]
418
+ },
419
+ "promptFragments": [
420
+ {
421
+ "id": "phase-6b-quick",
422
+ "when": { "var": "rigorMode", "equals": "QUICK" },
423
+ "text": "For QUICK rigor, keep the simulation compact but still answer where the workflow would likely disappoint the user if it disappointed them at all."
424
+ }
425
+ ],
426
+ "requireConfirmation": false
183
427
  },
184
428
  {
185
- "id": "phase-4-trace-walkthrough",
186
- "when": { "var": "rigorMode", "not_equals": "QUICK" },
187
- "text": "After scoring the rubric, trace through the workflow with the user's original task as the test scenario. For each step:\n- What context variables does it need? Are they available from prior steps?\n- What would the agent actually do given only the prompt and references?\n- What does it produce? Does the next step have everything it needs?\n\nFlag any context flow gaps, naming mismatches, or steps where the agent wouldn't know what to do. Fix them in the workflow file."
429
+ "id": "phase-6c-adversarial-quality-review",
430
+ "title": "Adversarial Quality Review",
431
+ "promptBlocks": {
432
+ "goal": "Review the authored workflow as a quality gate, not just as a valid JSON file.",
433
+ "constraints": [
434
+ "Authoring integrity and outcome effectiveness are separate concerns. Score both.",
435
+ "Reviewer-family or validator output is evidence, not authority."
436
+ ],
437
+ "procedure": [
438
+ "Score these dimensions 0-2 with one sentence of evidence each: `voiceClarity`, `ceremonyLevel`, `loopSoundness`, `delegationBoundedness`, `artifactClarity`, `taskEffectiveness`, `falseConfidenceResistance`, `stateMinimality`, `coverageSharpness`, `domainFit`, `handoffUtility`, and `modernizationDiscipline`.",
439
+ "If delegation is available and rigor is THOROUGH, run an adversarial review bundle with these lenses: `engine_native_reviewer`, `task_effectiveness_reviewer`, `state_economy_reviewer`, `false_confidence_reviewer`, `domain_fit_reviewer`, and `maintainer_reviewer`.",
440
+ "Synthesize what the review confirmed, what it challenged, and what changed your mind.",
441
+ "Set hard-gate failures whenever any of these are materially weak: `taskEffectiveness`, `falseConfidenceResistance`, `stateMinimality`, `coverageSharpness`, `domainFit`, or `handoffUtility`.",
442
+ "Set `authoringIntegrityPassed = true` only if structural and authoring-quality dimensions are all acceptable. Set `outcomeEffectivenessPassed = true` only if the workflow is likely to achieve satisfying results for the user."
443
+ ],
444
+ "outputRequired": {
445
+ "notesMarkdown": "Quality review scores, adversarial review findings, hard-gate failures, and the current redesign severity.",
446
+ "context": "Capture reviewScores, hardGateFailures, authoringIntegrityPassed, outcomeEffectivenessPassed, qualityReviewSummary, and redesignSeverity."
447
+ },
448
+ "verify": [
449
+ "Hard gates reflect real user-trust risk, not cosmetic imperfections."
450
+ ]
451
+ },
452
+ "promptFragments": [
453
+ {
454
+ "id": "phase-6c-standard",
455
+ "when": { "var": "rigorMode", "equals": "STANDARD" },
456
+ "text": "For STANDARD rigor, you may keep the review self-executed unless uncertainty remains material. If you do delegate, prefer a small adversarial bundle."
457
+ },
458
+ {
459
+ "id": "phase-6c-thorough",
460
+ "when": { "var": "rigorMode", "equals": "THOROUGH" },
461
+ "text": "For THOROUGH rigor, assume the first review is not enough. Use adversarial reviewer lanes unless a hard limitation makes them impossible."
462
+ }
463
+ ],
464
+ "requireConfirmation": false
465
+ },
466
+ {
467
+ "id": "phase-6d-redesign-and-revalidate",
468
+ "title": "Redesign and Revalidate",
469
+ "promptBlocks": {
470
+ "goal": "If hard gates fail, redesign the workflow instead of polishing around the problem.",
471
+ "constraints": [
472
+ "Minor cosmetic refinement is not enough when task effectiveness or false-confidence resistance is weak.",
473
+ "If structure changes, re-run real validators before leaving this step."
474
+ ],
475
+ "procedure": [
476
+ "If `authoringIntegrityPassed` and `outcomeEffectivenessPassed` are both true and `hardGateFailures` is empty, say that no redesign is needed.",
477
+ "Otherwise classify the needed redesign severity as `minor`, `architectural`, or `unsafe_to_ship` and apply the necessary fixes directly to the workflow file.",
478
+ "If the redesign changed structure, run the real validators again and update the validation state before leaving this step."
479
+ ],
480
+ "outputRequired": {
481
+ "notesMarkdown": "Redesign actions taken, why they were needed, and whether revalidation passed.",
482
+ "context": "Capture redesignApplied, validationPassed, and remainingConcerns."
483
+ },
484
+ "verify": [
485
+ "Structural redesign problems are handled as redesign problems, not cosmetic ones."
486
+ ]
487
+ },
488
+ "requireConfirmation": false
489
+ },
490
+ {
491
+ "id": "phase-6e-quality-loop-decision",
492
+ "title": "Quality Loop Decision",
493
+ "promptBlocks": {
494
+ "goal": "Decide whether the quality-gate loop needs another pass.",
495
+ "constraints": [
496
+ "Use hard gates and actual remaining concerns, not vibes."
497
+ ],
498
+ "procedure": [
499
+ "Continue if `authoringIntegrityPassed = false`.",
500
+ "Otherwise continue if `outcomeEffectivenessPassed = false`.",
501
+ "Otherwise continue if `hardGateFailures` is not empty.",
502
+ "Otherwise continue if `redesignSeverity` is `architectural` or `unsafe_to_ship` and you have not yet re-reviewed the redesigned workflow.",
503
+ "Otherwise continue if `validationPassed = false` after redesign.",
504
+ "Otherwise stop."
505
+ ],
506
+ "outputRequired": {
507
+ "artifact": "Emit a `wr.loop_control` artifact for `quality_gate_loop` with `decision` set to `continue` or `stop`."
508
+ },
509
+ "verify": [
510
+ "The workflow does not stop while hard trust problems remain."
511
+ ]
512
+ },
513
+ "outputContract": {
514
+ "contractRef": "wr.contracts.loop_control"
515
+ },
516
+ "requireConfirmation": false
188
517
  }
189
- ],
190
- "requireConfirmation": false
518
+ ]
191
519
  },
192
520
  {
193
- "id": "phase-5-refine",
194
- "title": "Phase 5: Refinement",
195
- "runCondition": {
196
- "var": "rigorMode",
197
- "not_equals": "QUICK"
521
+ "id": "phase-7-final-trust-handoff",
522
+ "title": "Phase 7: Final Trust Handoff",
523
+ "promptBlocks": {
524
+ "goal": "Summarize the authored or modernized workflow as a trust decision, not just a file edit.",
525
+ "constraints": [
526
+ "Keep it concise. The workflow file is the deliverable, not the summary."
527
+ ],
528
+ "procedure": [
529
+ "State the workflow file path and name, whether it was created or modernized, and what it does in one sentence.",
530
+ "Summarize the step structure, loops, confirmations, and delegation profile.",
531
+ "Report validation status, authoring-integrity status, and outcome-effectiveness status.",
532
+ "Set a final `workflowReadinessVerdict`: `ready`, `ready_with_conditions`, or `not_ready`.",
533
+ "List the main improvements, residual weaknesses, trust risks if any, and how to test the workflow."
534
+ ],
535
+ "outputRequired": {
536
+ "notesMarkdown": "Final trust handoff covering readiness verdict, validation status, strengths, residual weaknesses, and testing guidance.",
537
+ "context": "Capture workflowReadinessVerdict, trustRiskSummary, knownFailureModes, and residualWeaknesses."
538
+ },
539
+ "verify": [
540
+ "The final handoff makes clear whether WorkRail should trust this workflow."
541
+ ]
198
542
  },
199
- "prompt": "The workflow is valid and reviewed. Check whether any of these improvements are worth making:\n\n1. **Prompt fragments**: are there steps with near-identical prompts that differ only by rigor mode or `authoringMode`? Extract the differences into promptFragments.\n2. **Extension points**: are there slots that different teams or projects would want to customize? Declare them.\n3. **References**: should the workflow point at external documents the agent should be aware of during execution?\n4. **Deduplication**: is there repeated prose across steps that could be moved to metaGuidance or a shared pattern?\n5. **Context templates**: are there simple variable substitutions that would make prompts cleaner?\n\nOnly make changes that genuinely improve the workflow. Do not refine for the sake of refining.\n\nIf `authoringMode = modernize_existing`, prefer refinements that remove leftover legacy wording or mismatched structure over cosmetic rewrites.\n\nIf you change anything, re-run validation.\n\nCapture:\n- `refinementsApplied`\n- `finalValidationPassed`",
200
- "requireConfirmation": false
201
- },
202
- {
203
- "id": "phase-6-handoff",
204
- "title": "Phase 6: Handoff",
205
- "prompt": "Summarize what you authored or updated.\n\nInclude:\n- workflow file path and name\n- whether you created a new workflow or modernized an existing one\n- what the workflow does (one sentence)\n- step count and structure overview\n- loops, confirmations, and delegation if any\n- validation status\n- for modernization: the main improvements and any legacy residue still left intentionally\n- any known limitations or future improvements\n- how to test it: where to place the file and how to run it\n\nKeep it concise. The workflow file is the deliverable, not the summary.",
206
543
  "notesOptional": true,
207
544
  "requireConfirmation": false
208
545
  }