@exaudeus/workrail 3.31.1 → 3.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/cli/commands/index.d.ts +1 -0
  2. package/dist/cli/commands/index.js +3 -1
  3. package/dist/cli/commands/worktrain-await.js +11 -9
  4. package/dist/cli/commands/worktrain-daemon-install.d.ts +35 -0
  5. package/dist/cli/commands/worktrain-daemon-install.js +291 -0
  6. package/dist/cli/commands/worktrain-daemon.d.ts +31 -0
  7. package/dist/cli/commands/worktrain-daemon.js +272 -0
  8. package/dist/cli/commands/worktrain-spawn.js +11 -9
  9. package/dist/cli-worktrain.js +329 -0
  10. package/dist/cli.js +4 -22
  11. package/dist/console/standalone-console.d.ts +28 -0
  12. package/dist/console/standalone-console.js +142 -0
  13. package/dist/{console/assets/index-6H9DeFxj.js → console-ui/assets/index-BuJFLLfY.js} +1 -1
  14. package/dist/{console → console-ui}/index.html +1 -1
  15. package/dist/daemon/agent-loop.d.ts +26 -0
  16. package/dist/daemon/agent-loop.js +53 -2
  17. package/dist/daemon/daemon-events.d.ts +103 -0
  18. package/dist/daemon/daemon-events.js +56 -0
  19. package/dist/daemon/workflow-runner.d.ts +6 -3
  20. package/dist/daemon/workflow-runner.js +229 -33
  21. package/dist/infrastructure/session/HttpServer.js +133 -34
  22. package/dist/manifest.json +134 -70
  23. package/dist/mcp/output-schemas.d.ts +30 -30
  24. package/dist/mcp/transports/bridge-events.d.ts +4 -0
  25. package/dist/mcp/transports/fatal-exit.js +4 -0
  26. package/dist/mcp/transports/http-entry.js +2 -0
  27. package/dist/mcp/transports/stdio-entry.js +26 -6
  28. package/dist/mcp/v2/tools.d.ts +4 -4
  29. package/dist/trigger/adapters/github-poller.d.ts +44 -0
  30. package/dist/trigger/adapters/github-poller.js +190 -0
  31. package/dist/trigger/adapters/gitlab-poller.d.ts +27 -0
  32. package/dist/trigger/adapters/gitlab-poller.js +81 -0
  33. package/dist/trigger/delivery-client.d.ts +2 -1
  34. package/dist/trigger/delivery-client.js +4 -1
  35. package/dist/trigger/index.d.ts +4 -1
  36. package/dist/trigger/index.js +5 -1
  37. package/dist/trigger/polled-event-store.d.ts +22 -0
  38. package/dist/trigger/polled-event-store.js +173 -0
  39. package/dist/trigger/polling-scheduler.d.ts +20 -0
  40. package/dist/trigger/polling-scheduler.js +249 -0
  41. package/dist/trigger/trigger-listener.d.ts +5 -0
  42. package/dist/trigger/trigger-listener.js +53 -4
  43. package/dist/trigger/trigger-router.d.ts +4 -2
  44. package/dist/trigger/trigger-router.js +7 -4
  45. package/dist/trigger/trigger-store.js +114 -33
  46. package/dist/trigger/types.d.ts +17 -1
  47. package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +224 -224
  48. package/dist/v2/durable-core/schemas/session/events.d.ts +42 -42
  49. package/dist/v2/durable-core/schemas/session/manifest.d.ts +6 -6
  50. package/dist/v2/durable-core/schemas/session/validation-event.d.ts +2 -2
  51. package/dist/v2/durable-core/tokens/payloads.d.ts +52 -52
  52. package/dist/v2/usecases/console-routes.js +3 -3
  53. package/dist/v2/usecases/console-service.js +133 -9
  54. package/dist/v2/usecases/console-types.d.ts +7 -0
  55. package/docs/design/daemon-conversation-logging-plan.md +98 -0
  56. package/docs/design/daemon-conversation-logging-review.md +55 -0
  57. package/docs/design/daemon-conversation-logging.md +129 -0
  58. package/docs/design/github-polling-adapter-design-candidates.md +226 -0
  59. package/docs/design/github-polling-adapter-design-review-findings.md +131 -0
  60. package/docs/design/github-polling-adapter-implementation-plan.md +284 -0
  61. package/docs/design/implementation_plan.md +192 -0
  62. package/docs/design/workflow-id-validation-at-startup.md +146 -0
  63. package/docs/design/workflow-id-validation-design-review.md +87 -0
  64. package/docs/design/workflow-id-validation-implementation-plan.md +185 -0
  65. package/docs/design/worktrain-system-prompt-report-issue-candidates.md +135 -0
  66. package/docs/design/worktrain-system-prompt-report-issue-design-review.md +73 -0
  67. package/docs/ideas/backlog.md +465 -0
  68. package/package.json +1 -1
  69. package/workflows/architecture-scalability-audit.json +1 -1
  70. package/workflows/bug-investigation.agentic.v2.json +3 -3
  71. package/workflows/coding-task-workflow-agentic.json +32 -32
  72. package/workflows/coding-task-workflow-agentic.lean.v2.json +1 -1
  73. package/workflows/coding-task-workflow-agentic.v2.json +7 -7
  74. package/workflows/mr-review-workflow.agentic.v2.json +21 -12
  75. package/workflows/personal-learning-materials-creation-branched.json +2 -2
  76. package/workflows/production-readiness-audit.json +1 -1
  77. package/workflows/relocation-workflow-us.json +2 -2
  78. package/workflows/ui-ux-design-workflow.json +14 -14
  79. package/workflows/workflow-for-workflows.json +3 -3
  80. package/workflows/workflow-for-workflows.v2.json +2 -2
  81. package/workflows/wr.discovery.json +1 -1
  82. /package/dist/{console → console-ui}/assets/index-8dh0Psu-.css +0 -0
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "id": "mr-review-workflow-agentic",
3
- "name": "MR Review Workflow (Lean v2 \u2022 Notes-First \u2022 Evidence-Driven Reviewer Families)",
4
- "version": "2.5.0",
3
+ "name": "MR Review Workflow (Lean v2 Notes-First Evidence-Driven Reviewer Families)",
4
+ "version": "2.6.0",
5
5
  "description": "Lean v2 MR review workflow. Merges intake, missing-input gating, context gathering, and re-triage into one structured front phase, then drives review through a shared fact packet, parallel reviewer families, contradiction-driven synthesis, and evidence-first final validation.",
6
6
  "about": "## MR Review Workflow\n\nThis workflow conducts a structured, evidence-driven code review of a merge request or pull request. It is designed for cases where you want a thorough, audit-quality review rather than a quick glance -- particularly when the change touches critical surfaces, spans many files, or carries real production risk.\n\n**What it does:**\nThe workflow locates and bounds the review target, enriches it with PR context and ticket intent, classifies the change by risk and shape, then runs parallel \"reviewer family\" agents (covering correctness, architecture, runtime risk, tests/docs, and more) from a shared neutral fact packet. It reconciles contradictions between reviewer families, stress-tests the recommendation with adversarial validators, and produces a final handoff with severity-classified findings and ready-to-post MR comments.\n\n**When to use it:**\n- Before merging a PR that touches auth, data models, APIs, or critical paths\n- When you want independent perspectives on a change without the noise of an unstructured review\n- When the change is large or the reviewer is unfamiliar with the surrounding code\n- When you need a reproducible audit trail for compliance or team review processes\n\n**What it produces:**\nA final review recommendation (approve / request changes / needs discussion) with a confidence band, severity-graded findings (Critical / Major / Minor / Nit), ready-to-post MR comments, a coverage ledger showing which review domains were checked, and an honest disclosure of any context that could not be recovered.\n\n**How to get good results:**\nProvide the PR URL, branch name, or diff. The workflow can recover most context on its own -- ticket links, repo patterns, policy docs -- but if the change has non-obvious intent, a one-sentence description of the goal helps calibrate review sensitivity. The workflow will not post comments or approve/reject without explicit instruction.",
7
7
  "examples": [
@@ -25,7 +25,10 @@
25
25
  {
26
26
  "id": "evidence_quality",
27
27
  "purpose": "Each finding cites a specific file, function, or line. No finding relies on intuition or pattern-matching without concrete grounding.",
28
- "levels": ["low", "high"]
28
+ "levels": [
29
+ "low",
30
+ "high"
31
+ ]
29
32
  }
30
33
  ]
31
34
  },
@@ -36,7 +39,10 @@
36
39
  {
37
40
  "id": "coverage_completeness",
38
41
  "purpose": "All material review domains are checked or explicitly acknowledged as gaps in the coverage ledger.",
39
- "levels": ["low", "high"]
42
+ "levels": [
43
+ "low",
44
+ "high"
45
+ ]
40
46
  }
41
47
  ]
42
48
  },
@@ -47,7 +53,10 @@
47
53
  {
48
54
  "id": "contradiction_resolution",
49
55
  "purpose": "Every material contradiction is resolved by evidence or explicitly acknowledged with a stated position and rationale.",
50
- "levels": ["low", "high"]
56
+ "levels": [
57
+ "low",
58
+ "high"
59
+ ]
51
60
  }
52
61
  ]
53
62
  }
@@ -76,7 +85,7 @@
76
85
  {
77
86
  "id": "phase-0-understand-and-classify",
78
87
  "title": "Phase 0: Locate, Bound, Enrich & Classify",
79
- "prompt": "Build the review foundation in one pass.\n\nStep 1 \u2014 Early exit / minimum inputs:\nBefore exploring, verify that the review target is real and inspectable. If the diff, changed files, or equivalent review material are completely absent and cannot be inferred with tools, ask for the minimum missing artifact and stop. Do NOT ask questions you can resolve with tools.\n\nStep 2 \u2014 Locate and bound the review target:\nAttempt to determine the strongest available review target and boundary.\n\nAttempt to establish:\n- `reviewTargetKind` from the strongest available source such as PR/MR, branch, patch, diff, or local working tree changes\n- `reviewTargetSource` describing where the target came from\n- likely PR/MR identity when available (`prUrl`, `prNumber`)\n- likely base / ancestor reference (`baseCandidate`, `mergeBaseRef`) when available\n- whether the branch may include inherited or out-of-scope changes\n- `boundaryConfidence`: High / Medium / Low\n\nDo not over-prescribe your own investigation path. Use the strongest available evidence and record uncertainty honestly.\n\nStep 3 \u2014 Enrich with context:\nRecover the strongest available intent and policy context from whatever sources are actually available.\n\nAttempt to recover:\n- MR title and purpose\n- ticket / issue / acceptance context (`ticketRefs`, `ticketContext`)\n- supporting docs / specs / rollout context (`supportingDocsFound`)\n- repo or user policy/convention context when it is likely to affect review judgment (`policySourcesFound`)\n- `contextConfidence`: High / Medium / Low\n\nStep 4 \u2014 Review-surface hygiene:\nClassify the visible change into a minimal review surface.\n\nSet:\n- `coreReviewSurface`\n- `likelyNoiseOrMechanicalChurn`\n- `likelyInheritedOrOutOfScopeChanges`\n- `reviewSurfaceSummary`\n- `reviewScopeWarnings`\n\nThe goal is not a giant ledger. The goal is to avoid treating every visible changed file as equally worthy of deep review by default.\n\nStep 5 \u2014 Classify the review:\nAfter exploration, classify the work.\n\nSet:\n- `reviewMode`: QUICK / STANDARD / THOROUGH\n- `riskLevel`: Low / Medium / High\n- `shapeProfile`: choose the best primary label from `isolated_change`, `crosscutting_change`, `mechanically_noisy_change`, or `ambiguous_boundary`\n- `changeTypeProfile`: choose the best primary label from `general_code_change`, `api_contract_change`, `data_model_or_migration`, `security_sensitive`, or `test_only`\n- `maxParallelism`: 0 / 3 / 5\n- `criticalSurfaceTouched`: true / false\n- `needsSimulation`: true / false\n- `needsBoundaryFollowup`: true / false\n- `needsContextFollowup`: true / false\n- `needsReviewerBundle`: true / false\n\nDecision guidance:\n- QUICK: very small, isolated, low-risk changes with little ambiguity\n- STANDARD: typical feature or bug-fix reviews with moderate ambiguity or moderate risk\n- THOROUGH: critical surfaces, architectural novelty, high risk, broad change sets, or strong need for independent reviewer perspectives\n\nMinimal routing guidance:\n- if `boundaryConfidence = Low`, bias toward boundary/context follow-up before strong recommendation confidence\n- if `changeTypeProfile = api_contract_change`, bias toward contract/consumer/backward-compatibility scrutiny\n- if `changeTypeProfile = data_model_or_migration`, bias toward rollout / compatibility / simulation scrutiny\n- if `changeTypeProfile = security_sensitive`, bias toward adversarial/runtime-risk scrutiny and lower tolerance for weak evidence\n- if `changeTypeProfile = test_only`, bias toward stronger false-positive suppression\n- if `shapeProfile = mechanically_noisy_change`, bias toward stronger noise filtering and lower appetite for style-only findings\n\nStep 6 \u2014 Optional deeper context:\nIf `reviewMode` is STANDARD or THOROUGH and context remains incomplete, and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH. Synthesize both outputs before finishing this step.\n\nStep 7 \u2014 Human-facing artifact:\nChoose `reviewDocPath` only if a live artifact will materially improve human readability. Default suggestion: `mr-review.md` at the project root. This artifact is optional and never canonical workflow state.\n\nFallback behavior:\n- if PR/MR is not found but a branch/diff is inspectable, continue with downgraded context confidence and disclose missing PR context later\n- if the branch is inspectable but merge-base / ancestor remains ambiguous, continue with downgraded boundary confidence, set `needsBoundaryFollowup = true`, and disclose the uncertainty later\n- if ticket or supporting docs are missing, continue with downgraded context confidence and avoid overclaiming intent-sensitive findings\n- if only a patch/diff is available, continue if it is inspectable, but keep lower confidence on intent/boundary-dependent conclusions\n- if the review target itself is missing, ask only for that missing artifact and stop\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewTargetKind`\n- `reviewTargetSource`\n- `prUrl`\n- `prNumber`\n- `baseCandidate`\n- `mergeBaseRef`\n- `boundaryConfidence`\n- `contextConfidence`\n- `mrTitle`\n- `mrPurpose`\n- `ticketRefs`\n- `ticketContext`\n- `supportingDocsFound`\n- `policySourcesFound`\n- `accessibleContextSources`\n- `missingContextSources`\n- `focusAreas`\n- `changedFileCount`\n- `criticalSurfaceTouched`\n- `reviewMode`\n- `riskLevel`\n- `shapeProfile`\n- `changeTypeProfile`\n- `maxParallelism`\n- `reviewDocPath`\n- `contextSummary`\n- `candidateFiles`\n- `moduleRoots`\n- `contextUnknownCount`\n- `coverageGapCount`\n- `authorIntentUnclear`\n- `needsSimulation`\n- `needsBoundaryFollowup`\n- `needsContextFollowup`\n- `needsReviewerBundle`\n- `coreReviewSurface`\n- `likelyNoiseOrMechanicalChurn`\n- `likelyInheritedOrOutOfScopeChanges`\n- `reviewSurfaceSummary`\n- `reviewScopeWarnings`\n- `openQuestions`\n\nRules:\n- answer your own questions with tools whenever possible\n- only keep true human-decision questions in `openQuestions`\n- keep `openQuestions` bounded to the minimum necessary\n- classify AFTER exploring, not before\n- before leaving this phase, either establish the likely review boundary or explicitly record why you could not\n\nAlso set in the context object: one sentence describing what you are trying to accomplish (e.g. \"implement OAuth refresh token rotation\", \"review PR #47 before merge\"). This populates the session title in the Workspace console immediately.",
88
+ "prompt": "Build the review foundation in one pass.\n\nStep 1 Early exit / minimum inputs:\nBefore exploring, verify that the review target is real and inspectable. If the diff, changed files, or equivalent review material are completely absent and cannot be inferred with tools, ask for the minimum missing artifact and stop. Do NOT ask questions you can resolve with tools.\n\nStep 2 Locate and bound the review target:\nAttempt to determine the strongest available review target and boundary.\n\nAttempt to establish:\n- `reviewTargetKind` from the strongest available source such as PR/MR, branch, patch, diff, or local working tree changes\n- `reviewTargetSource` describing where the target came from\n- likely PR/MR identity when available (`prUrl`, `prNumber`)\n- likely base / ancestor reference (`baseCandidate`, `mergeBaseRef`) when available\n- whether the branch may include inherited or out-of-scope changes\n- `boundaryConfidence`: High / Medium / Low\n\nDo not over-prescribe your own investigation path. Use the strongest available evidence and record uncertainty honestly.\n\nStep 3 Enrich with context:\nRecover the strongest available intent and policy context from whatever sources are actually available.\n\nAttempt to recover:\n- MR title and purpose\n- ticket / issue / acceptance context (`ticketRefs`, `ticketContext`)\n- supporting docs / specs / rollout context (`supportingDocsFound`)\n- repo or user policy/convention context when it is likely to affect review judgment (`policySourcesFound`)\n- `contextConfidence`: High / Medium / Low\n\nStep 4 Review-surface hygiene:\nClassify the visible change into a minimal review surface.\n\nSet:\n- `coreReviewSurface`\n- `likelyNoiseOrMechanicalChurn`\n- `likelyInheritedOrOutOfScopeChanges`\n- `reviewSurfaceSummary`\n- `reviewScopeWarnings`\n\nThe goal is not a giant ledger. The goal is to avoid treating every visible changed file as equally worthy of deep review by default.\n\nStep 5 Classify the review:\nAfter exploration, classify the work.\n\nSet:\n- `reviewMode`: QUICK / STANDARD / THOROUGH\n- `riskLevel`: Low / Medium / High\n- `shapeProfile`: choose the best primary label from `isolated_change`, `crosscutting_change`, `mechanically_noisy_change`, or `ambiguous_boundary`\n- `changeTypeProfile`: choose the best primary label from `general_code_change`, `api_contract_change`, `data_model_or_migration`, `security_sensitive`, or `test_only`\n- `maxParallelism`: 0 / 3 / 5\n- `criticalSurfaceTouched`: true / false\n- `needsSimulation`: true / false\n- `needsBoundaryFollowup`: true / false\n- `needsContextFollowup`: true / false\n- `needsReviewerBundle`: true / false\n\nDecision guidance:\n- QUICK: very small, isolated, low-risk changes with little ambiguity\n- STANDARD: typical feature or bug-fix reviews with moderate ambiguity or moderate risk\n- THOROUGH: critical surfaces, architectural novelty, high risk, broad change sets, or strong need for independent reviewer perspectives\n\nMinimal routing guidance:\n- if `boundaryConfidence = Low`, bias toward boundary/context follow-up before strong recommendation confidence\n- if `changeTypeProfile = api_contract_change`, bias toward contract/consumer/backward-compatibility scrutiny\n- if `changeTypeProfile = data_model_or_migration`, bias toward rollout / compatibility / simulation scrutiny\n- if `changeTypeProfile = security_sensitive`, bias toward adversarial/runtime-risk scrutiny and lower tolerance for weak evidence\n- if `changeTypeProfile = test_only`, bias toward stronger false-positive suppression\n- if `shapeProfile = mechanically_noisy_change`, bias toward stronger noise filtering and lower appetite for style-only findings\n\nStep 6 Optional deeper context:\nIf `reviewMode` is STANDARD or THOROUGH and context remains incomplete, and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH. Synthesize both outputs before finishing this step.\n\nStep 7 Human-facing artifact:\nChoose `reviewDocPath` only if a live artifact will materially improve human readability. Default suggestion: `mr-review.md` at the project root. This artifact is optional and never canonical workflow state.\n\nFallback behavior:\n- if PR/MR is not found but a branch/diff is inspectable, continue with downgraded context confidence and disclose missing PR context later\n- if the branch is inspectable but merge-base / ancestor remains ambiguous, continue with downgraded boundary confidence, set `needsBoundaryFollowup = true`, and disclose the uncertainty later\n- if ticket or supporting docs are missing, continue with downgraded context confidence and avoid overclaiming intent-sensitive findings\n- if only a patch/diff is available, continue if it is inspectable, but keep lower confidence on intent/boundary-dependent conclusions\n- if the review target itself is missing, ask only for that missing artifact and stop\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewTargetKind`\n- `reviewTargetSource`\n- `prUrl`\n- `prNumber`\n- `baseCandidate`\n- `mergeBaseRef`\n- `boundaryConfidence`\n- `contextConfidence`\n- `mrTitle`\n- `mrPurpose`\n- `ticketRefs`\n- `ticketContext`\n- `supportingDocsFound`\n- `policySourcesFound`\n- `accessibleContextSources`\n- `missingContextSources`\n- `focusAreas`\n- `changedFileCount`\n- `criticalSurfaceTouched`\n- `reviewMode`\n- `riskLevel`\n- `shapeProfile`\n- `changeTypeProfile`\n- `maxParallelism`\n- `reviewDocPath`\n- `contextSummary`\n- `candidateFiles`\n- `moduleRoots`\n- `contextUnknownCount`\n- `coverageGapCount`\n- `authorIntentUnclear`\n- `needsSimulation`\n- `needsBoundaryFollowup`\n- `needsContextFollowup`\n- `needsReviewerBundle`\n- `coreReviewSurface`\n- `likelyNoiseOrMechanicalChurn`\n- `likelyInheritedOrOutOfScopeChanges`\n- `reviewSurfaceSummary`\n- `reviewScopeWarnings`\n- `openQuestions`\n\nRules:\n- answer your own questions with tools whenever possible\n- only keep true human-decision questions in `openQuestions`\n- keep `openQuestions` bounded to the minimum necessary\n- classify AFTER exploring, not before\n- before leaving this phase, either establish the likely review boundary or explicitly record why you could not\n\nAlso set in the context object: one sentence describing what you are trying to accomplish (e.g. \"implement OAuth refresh token rotation\", \"review PR #47 before merge\"). This populates the session title in the Workspace console immediately.",
80
89
  "requireConfirmation": {
81
90
  "or": [
82
91
  {
@@ -112,12 +121,12 @@
112
121
  "Keep `recommendationHypothesis` as a secondary hypothesis to challenge, not a frame to defend."
113
122
  ],
114
123
  "procedure": [
115
- "Create a neutral `reviewFactPacket` containing: MR purpose and expected behavior change, review target and review-surface summary, changed files and module roots, key contracts / invariants / affected consumers, call-chain highlights, relevant repo patterns and exemplars, tests/docs expectations, discovered ticket/doc/policy context, accessible and missing context sources, and explicit open unknowns.",
116
- "Initialize `coverageLedger` for these domains: `correctness_logic`, `contracts_invariants`, `patterns_architecture`, `runtime_production_risk`, `tests_docs_rollout`, `security_performance`.",
124
+ "Create a neutral `reviewFactPacket` containing: MR purpose and expected behavior change, review target and review-surface summary, changed files and module roots, key contracts / invariants / affected consumers, call-chain highlights, relevant repo patterns and exemplars, tests/docs expectations, discovered ticket/doc/policy context, accessible and missing context sources, and explicit open unknowns, relevant coding philosophy principles for this change (from CLAUDE.md, AGENTS.md, ~/.firebender/commands/philosophy.mdc, or soul file -- scope to the 3-5 most relevant for what was changed, not all principles), and existing patterns in the changed module (how similar problems are solved today in the same directory).",
125
+ "Initialize `coverageLedger` for these domains: `correctness_logic`, `contracts_invariants`, `patterns_architecture`, `philosophy_alignment`, `runtime_production_risk`, `tests_docs_rollout`, `security_performance`.",
117
126
  "Perform a preliminary self-review from the fact packet before choosing reviewer families.",
118
- "Reviewer family options: `correctness_invariants`, `patterns_architecture`, `runtime_production_risk`, `test_docs_rollout`, `false_positive_skeptic`, `missed_issue_hunter`.",
127
+ "Reviewer family options: `correctness_invariants`, `patterns_architecture`, `philosophy_alignment`, `runtime_production_risk`, `test_docs_rollout`, `false_positive_skeptic`, `missed_issue_hunter`.",
119
128
  "Selection guidance: QUICK = no bundle by default unless ambiguity still feels material; STANDARD = 3 families by default; THOROUGH = 5 families by default.",
120
- "Always include `correctness_invariants` unless clearly not applicable. Include `test_docs_rollout` in STANDARD and THOROUGH unless clearly not applicable. Include `runtime_production_risk` when `criticalSurfaceTouched = true` or `needsSimulation = true`. Include `missed_issue_hunter` in THOROUGH. Include `false_positive_skeptic` when Major/Critical findings seem plausible or severity inflation risk is non-trivial.",
129
+ "Always include `correctness_invariants` unless clearly not applicable. Include `test_docs_rollout` in STANDARD and THOROUGH unless clearly not applicable. Include `runtime_production_risk` when `criticalSurfaceTouched = true` or `needsSimulation = true`. Include `missed_issue_hunter` in THOROUGH. Include `false_positive_skeptic` when Major/Critical findings seem plausible or severity inflation risk is non-trivial. Include `philosophy_alignment` in STANDARD and THOROUGH when the change introduces new abstractions, modifies core patterns, or touches areas where the codebase philosophy is particularly relevant (error handling, type safety, DI boundaries, state management).",
121
130
  "Routing guidance: for `api_contract_change`, bias toward contract / consumer / backward-compatibility scrutiny; for `data_model_or_migration`, bias toward rollout / compatibility / simulation scrutiny; for `security_sensitive`, bias toward runtime-risk scrutiny and lower tolerance for weak evidence; for `test_only`, bias toward stronger false-positive suppression; for `mechanically_noisy_change`, bias toward stronger noise filtering and lower appetite for style-only findings.",
122
131
  "Set `coverageUncertainCount` as the number of coverage domains not yet safely closed: `uncertain` + `contradicted` + `needs_followup`.",
123
132
  "Initialize `contradictionCount`, `blindSpotCount`, and `falsePositiveRiskCount` to `0` if no reviewer-family bundle will run."
@@ -158,7 +167,7 @@
158
167
  "procedure": [
159
168
  "Before delegating, restate the current `recommendationHypothesis` and say which reviewer family is most likely to challenge it.",
160
169
  "Each reviewer family must return: key findings, severity estimates, confidence level, top risks, recommendation, and what others may have missed.",
161
- "Family missions: `correctness_invariants` = logic, correctness, API and invariant risks; `patterns_architecture` = pattern fit, design consistency, architectural concerns; `runtime_production_risk` = runtime behavior, production impact, performance/state-flow risk; `test_docs_rollout` = test adequacy, docs, migration, rollout, affected consumers; `false_positive_skeptic` = challenge likely overreaches, weak evidence, or severity inflation; `missed_issue_hunter` = search for an important issue category the others may miss.",
170
+ "Family missions: `correctness_invariants` = logic, correctness, API and invariant risks; `patterns_architecture` = pattern fit, design consistency, architectural concerns; `runtime_production_risk` = runtime behavior, production impact, performance/state-flow risk; `test_docs_rollout` = test adequacy, docs, migration, rollout, affected consumers; `false_positive_skeptic` = challenge likely overreaches, weak evidence, or severity inflation; `missed_issue_hunter` = search for an important issue category the others may miss; `philosophy_alignment` = evaluate the implementation against the scoped principles from the fact packet -- name each violation by principle, explain how the code diverges, and distinguish real violations from stylistic preferences. Also ask: is this the right design approach, not just a correct one? Does it follow the established patterns in this module or introduce unnecessary divergence?",
162
171
  "Mode-adaptive parallelism: STANDARD = spawn THREE WorkRail Executors SIMULTANEOUSLY for the selected families; THOROUGH = spawn FIVE WorkRail Executors SIMULTANEOUSLY for the selected families.",
163
172
  "After receiving outputs, explicitly synthesize: what reviewer families confirmed, what was genuinely new, what appeared weak or overreached, and what changed your mind or did not.",
164
173
  "Set these keys in the next `continue_workflow` call's `context` object: `familyFindingsSummary`, `familyRecommendationSpread`, `contradictionCount`, `blindSpotCount`, `falsePositiveRiskCount`, `needsSimulation`.",
@@ -222,7 +231,7 @@
222
231
  {
223
232
  "id": "phase-4b-canonical-synthesis",
224
233
  "title": "Canonical Synthesis and Coverage Update",
225
- "prompt": "Synthesize all reviewer-family outputs and targeted follow-up into one canonical review state.\n\nPart A \u2014 Compare against your hypothesis:\n- revisit `recommendationHypothesis`\n- what did the evidence confirm?\n- what did it challenge?\n- what changed your mind, what held firm, and what do you explicitly reject?\n\nPart B \u2014 Synthesis decision table:\n- if 2+ reviewer families flag the same serious issue with the same severity, treat it as validated\n- if the same issue is flagged with different severities, default to the higher severity unless the lower-severity position includes specific counter-evidence\n- if one family flags an issue and others are silent, investigate it but do not automatically block unless it is clearly critical or security-sensitive\n- if one family says false positive and another says valid issue, require explicit main-agent adjudication in notes before finalization\n- if recommendation spread shows material disagreement, findings override recommendation until reconciled\n- if simulation reveals a new production risk, add a new finding and re-evaluate recommendation confidence\n\nPart C \u2014 Coverage ledger rules:\n- move a domain from `uncertain` to `checked` only when evidence is materially adequate\n- keep a domain `uncertain` if disagreement or missing evidence still materially affects recommendation quality\n- mark `not_applicable` only when the MR genuinely does not engage that dimension\n- clear `contradicted` only when the contradiction is explicitly resolved by evidence or adjudication\n- clear `needs_followup` only when required follow-up has actually been completed or the domain is explicitly downgraded as non-material\n\nPart D \u2014 Recommendation confidence rules:\n- set `recommendationConfidenceBand = High` only if no unresolved material contradictions remain, no important coverage domains remain uncertain, false-positive risk is not material, and the evidence is strong enough for the current mode\n- set `recommendationConfidenceBand = Medium` when one bounded uncertainty remains but the recommendation is still directionally justified\n- set `recommendationConfidenceBand = Low` when multiple viable interpretations remain, major contradictions are unresolved, or important coverage gaps still weaken the recommendation\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewFindings`\n- `criticalFindingsCount`\n- `majorFindingsCount`\n- `minorFindingsCount`\n- `nitFindingsCount`\n- `recommendation`\n- `recommendationConfidenceBand`\n- `recommendationDriftDetected`\n- `coverageLedger`\n- `coverageUncertainCount`\n- `docCompletenessConcernCount`\n\nIf `reviewDocPath` exists, keep it aligned for human readability only. Notes/context remain workflow truth.",
234
+ "prompt": "Synthesize all reviewer-family outputs and targeted follow-up into one canonical review state.\n\nPart A Compare against your hypothesis:\n- revisit `recommendationHypothesis`\n- what did the evidence confirm?\n- what did it challenge?\n- what changed your mind, what held firm, and what do you explicitly reject?\n\nPart B Synthesis decision table:\n- if 2+ reviewer families flag the same serious issue with the same severity, treat it as validated\n- if the same issue is flagged with different severities, default to the higher severity unless the lower-severity position includes specific counter-evidence\n- if one family flags an issue and others are silent, investigate it but do not automatically block unless it is clearly critical or security-sensitive\n- if one family says false positive and another says valid issue, require explicit main-agent adjudication in notes before finalization\n- if recommendation spread shows material disagreement, findings override recommendation until reconciled\n- if simulation reveals a new production risk, add a new finding and re-evaluate recommendation confidence\n\nPart C Coverage ledger rules:\n- move a domain from `uncertain` to `checked` only when evidence is materially adequate\n- keep a domain `uncertain` if disagreement or missing evidence still materially affects recommendation quality\n- mark `not_applicable` only when the MR genuinely does not engage that dimension\n- clear `contradicted` only when the contradiction is explicitly resolved by evidence or adjudication\n- clear `needs_followup` only when required follow-up has actually been completed or the domain is explicitly downgraded as non-material\n\nPart D Recommendation confidence rules:\n- set `recommendationConfidenceBand = High` only if no unresolved material contradictions remain, no important coverage domains remain uncertain, false-positive risk is not material, and the evidence is strong enough for the current mode\n- set `recommendationConfidenceBand = Medium` when one bounded uncertainty remains but the recommendation is still directionally justified\n- set `recommendationConfidenceBand = Low` when multiple viable interpretations remain, major contradictions are unresolved, or important coverage gaps still weaken the recommendation\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewFindings`\n- `criticalFindingsCount`\n- `majorFindingsCount`\n- `minorFindingsCount`\n- `nitFindingsCount`\n- `recommendation`\n- `recommendationConfidenceBand`\n- `recommendationDriftDetected`\n- `coverageLedger`\n- `coverageUncertainCount`\n- `docCompletenessConcernCount`\n\nIf `reviewDocPath` exists, keep it aligned for human readability only. Notes/context remain workflow truth.",
226
235
  "requireConfirmation": false
227
236
  },
228
237
  {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "id": "personal-learning-materials-creation-branched",
3
- "name": "Personal Learning Materials Creation Workflow (Branched)",
3
+ "name": "Personal Learning Materials Creation Workflow",
4
4
  "version": "1.1.0",
5
5
  "description": "Use this to create learning materials for a course or subject. Adapts depth and format to your time budget — Quick Start, Balanced, or Comprehensive.",
6
6
  "about": "## Personal Learning Materials Creation Workflow\n\nUse this to create the actual study materials for a course or subject you are learning -- study guides, exercises, assessments, and spaced-repetition review materials. This workflow assumes you already have a learning plan or course design with defined objectives; it focuses on producing materials that directly support those objectives.\n\n### What it produces\n\nDepending on the path you choose:\n\n- **Quick Start (2-3 weeks)**: study guides and basic exercises for immediate use.\n- **Balanced (4-6 weeks)**: a complete learning system -- study guides, exercises, assessments, and spaced repetition materials.\n- **Comprehensive (8-12 weeks)**: a full learning ecosystem with interactive elements, effectiveness measurement, and a scalable update protocol.\n\n### When to use it\n\n- You have a learning plan and need to turn it into usable materials.\n- You are preparing for a certification, exam, or structured self-study program.\n- You want materials tailored to your specific objectives rather than relying entirely on off-the-shelf resources.\n\n### When NOT to use it\n\n- You haven't designed your learning course yet -- use the Personal Learning Course Design workflow first to define objectives and structure.\n- You need to design a course for others to take -- use the Learner-Centered Course workflow instead.\n\n### How to get good results\n\n- Select the path honestly based on available time. Starting with Quick Start and expanding later is better than committing to Comprehensive and abandoning it.\n- Have your learning objectives written out before starting -- the workflow maps every material directly to an objective.\n- Be specific about your preferred learning formats (text, diagrams, flashcards, practice problems) at the start.",
@@ -192,4 +192,4 @@
192
192
  "hasValidation": true
193
193
  }
194
194
  ]
195
- }
195
+ }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "id": "production-readiness-audit",
3
- "name": "Production Readiness Audit (v2 \u2022 Evidence-Driven Readiness Review)",
3
+ "name": "Production Readiness Audit",
4
4
  "version": "0.1.0",
5
5
  "description": "Use this to audit a codebase scope for production readiness. Checks debugging correctness, runtime operability, artifact realism, technical debt, and anything that would prevent honest production deployment.",
6
6
  "about": "## Production Readiness Audit\n\nThis workflow performs a structured, evidence-driven audit to answer one question honestly: is this code actually ready for production? It goes beyond style and lint -- it looks for debugging correctness, runtime operability under real conditions, artifact realism (stale code, fake completeness, placeholder behavior), maintainability debt, test and observability gaps, and security or performance risks.\n\n**What it does:**\nThe workflow bounds the audit scope, states a readiness hypothesis, freezes a neutral fact packet, then runs parallel reviewer families -- each specializing in a different readiness dimension. It reconciles contradictions through an evidence loop and produces a final verdict: `ready`, `ready_with_conditions`, `not_ready`, or `inconclusive`.\n\n**When to use it:**\n- Before shipping a new service, feature, or major refactor to production\n- When a codebase has been under rapid development and you want an honest readiness check before a launch deadline\n- When onboarding to a codebase and wanting a structured assessment of its production posture\n- When a post-incident review surfaces questions about whether the system was truly ready\n\n**What it produces:**\nA verdict with a confidence band, a prioritized list of blocker-grade and major findings, debugging leads, runtime and operational risk callouts, artifact-realism concerns (misleading completeness, stale docs, dead paths), a coverage ledger by audit domain, and a remediation order with specific follow-up recommendations.\n\n**How to get good results:**\nProvide a clear scope -- a service name, a module path, or a feature boundary. The narrower and more concrete the scope, the sharper the findings. If \"production-ready\" has a specific meaning for your team (e.g. SLA requirements, specific deployment constraints), mention it. The workflow will try to infer the production bar from repo patterns and context, but explicit criteria improve accuracy.",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "id": "relocation-workflow-us",
3
- "name": "US Relocation Decision Workflow (Evidence-Driven • AreaSpec • Ranked Dossier)",
3
+ "name": "US Relocation Decision Workflow",
4
4
  "version": "1.0.0",
5
5
  "description": "Use this to evaluate US cities or regions for a potential relocation. Discovers your preferences, generates candidate areas, screens them, and produces a ranked dossier with evidence.",
6
6
  "about": "## US Relocation Decision Workflow\n\nUse this to evaluate US cities and regions for a potential move. The workflow takes a structured, evidence-driven approach: it starts by calibrating your preferences and dealbreakers, generates a broad diverse pool of candidate areas (including non-obvious ones), screens them systematically, and produces a ranked dossier you can actually act on.\n\n### What it produces\n\n- A `RELOCATION_DOSSIER.md` with your full preference model, screening results, and comparison matrix.\n- Individual per-candidate profiles at `relocation-profiles/<slug>.md` covering housing, cost of living, taxes, safety, climate risk, schools, healthcare, commute, and any other modules you activate.\n- A scored ranking with explainable reasoning and an explicit disclosure of any data gaps.\n- A next-steps plan: visit recommendations, open questions per candidate, and pivot triggers.\n\n### When to use it\n\n- You are seriously considering a US relocation and want a rigorous, evidence-backed shortlist.\n- You want to surface non-obvious candidates you wouldn't have considered on your own.\n- You've been anchoring on a handful of cities and want a structured process to either validate or challenge that.\n\n### How to get good results\n\n- Be honest about dealbreakers upfront -- the workflow builds these into screening and filters candidates early.\n- The MaxDiff weight calibration exercise (offered in Phase 1) is worth doing if you're unsure how to weight competing priorities. It takes 5-10 minutes and produces more reliable weights than guessing.\n- The calibration deck in Phase 1 shows you lifestyle archetypes and asks for reactions -- engage with this seriously. Surprises in your reactions are valuable signal.\n- The workflow activates only the research modules you need. Keep it focused on what actually matters to your household.",
@@ -245,4 +245,4 @@
245
245
  "requireConfirmation": true
246
246
  }
247
247
  ]
248
- }
248
+ }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "id": "ui-ux-design-workflow",
3
- "name": "UI/UX Design Workflow (v1 \u2022 Process-Enforced \u2022 Evidence-Driven)",
3
+ "name": "UI/UX Design Workflow",
4
4
  "version": "0.1.0",
5
5
  "description": "Design UI/UX from scratch with enforced process. Makes problem framing structurally required before solution proposals, forces exploration of multiple design directions before convergence, and applies reviewer families for information architecture, UX laws, accessibility, edge cases, and content. Output: a design spec concrete enough to implement or review.",
6
6
  "about": "## UI/UX Design Workflow\n\nThis workflow produces a design spec for a new feature, screen, component, or interaction. It is built around a single principle: problem framing must happen before any solutions are proposed. The workflow makes this structurally impossible to skip, which prevents the most common failure mode in AI-assisted design -- going straight from \"I need a settings screen\" to a layout without ever asking who uses it or what they are trying to do.\n\n**What it does:**\nPhase 0 frames the problem by reading existing code patterns and asking only what tools cannot answer. Phase 1 generates 2-3 genuinely different design directions before any one is chosen. Phases 2-5 run parallel reviewer families -- information architecture, UX laws (Hick's Law, Miller's Law, Fitts's Law, and others), accessibility (specific WCAG requirements, not just \"follow WCAG\"), edge cases (empty, error, loading, first-use), and content quality -- then synthesize their findings, resolve contradictions, and write a spec only after all hard quality gates pass.\n\n**When to use it:**\n- You need to design a new screen, feature, or non-trivial component\n- You want explicit coverage of accessibility, edge states, and UX laws, not just a layout sketch\n- You need a spec concrete enough for an engineer to implement or a designer to review\n- Simple single-component changes also work through a lighter direct-spec path\n\n**What it produces:**\nA design spec with 8 sections: design decision, information architecture, interaction design, all element states, specific accessibility requirements, content copy, reviewer findings with citations, and open questions that still require human visual review.\n\n**How to get good results:**\nPoint the workflow to your codebase so it can read existing components and patterns. Provide the design system location if it is not in the repo. Share any known user pain points or research. The workflow will surface what it cannot determine on its own.",
@@ -24,7 +24,7 @@
24
24
  ],
25
25
  "metaGuidance": [
26
26
  "PROCESS IS THE VALUE: the biggest failure mode in AI-assisted design is skipping to solutions before understanding the problem. This workflow makes that structurally impossible. Do not shortcut Phase 0.",
27
- "EVIDENCE OVER PLATITUDES: every finding must cite a specific element from the context packet. 'Consider reducing cognitive load' is not a finding. 'The settings panel has 14 options, violating Miller\u2019s Law (7\u00b12)' is a finding.",
27
+ "EVIDENCE OVER PLATITUDES: every finding must cite a specific element from the context packet. 'Consider reducing cognitive load' is not a finding. 'The settings panel has 14 options, violating Miller’s Law (7±2)' is a finding.",
28
28
  "SIMPLE CRITERIA: designComplexity=Simple is only valid for a single existing component with a minor change, no new user flows, no information architecture changes, and no new interaction patterns. If uncertain, classify upward.",
29
29
  "HONEST LIMITS: this workflow produces a text-based design spec. It cannot produce visual mockups, conduct usability testing, or verify visual quality. Say so explicitly in the handoff and flag what still needs human visual review.",
30
30
  "CONTEXT BLINDNESS: if the user has not provided design system, existing component patterns, or platform conventions, surface this gap in Phase 0 and ask. Do not silently design without this context.",
@@ -74,14 +74,14 @@
74
74
  "promptBlocks": {
75
75
  "goal": "Generate 2-3 genuinely different design directions before committing to any one of them.",
76
76
  "constraints": [
77
- "Directions must be genuinely different \u2014 not variations of the same pattern with different labels.",
77
+ "Directions must be genuinely different not variations of the same pattern with different labels.",
78
78
  "Each direction needs an information architecture sketch: how is content organized, what is the primary navigation path, what is the visual hierarchy?",
79
79
  "Do not select a direction in this phase. Exploration comes before convergence."
80
80
  ],
81
81
  "procedure": [
82
82
  "Generate Direction A: the most conventional approach that follows existing platform patterns and design system. Low risk, familiar to users.",
83
- "Generate Direction B: an approach that prioritizes the primary user goal differently \u2014 different IA, different entry point, or different interaction model.",
84
- "Generate Direction C (if designComplexity=Complex): a third direction that challenges the assumptions in A and B \u2014 a more radical rethinking of the problem.",
83
+ "Generate Direction B: an approach that prioritizes the primary user goal differently different IA, different entry point, or different interaction model.",
84
+ "Generate Direction C (if designComplexity=Complex): a third direction that challenges the assumptions in A and B a more radical rethinking of the problem.",
85
85
  "For each direction, describe: (1) the primary IA sketch (main sections, navigation path, content hierarchy), (2) the core interaction model (how does the user accomplish their goal?), (3) the key tradeoffs relative to user goals and constraints.",
86
86
  "After describing all directions, restate which user goals each direction serves well and where each direction is weakest."
87
87
  ],
@@ -107,8 +107,8 @@
107
107
  "promptBlocks": {
108
108
  "goal": "Assemble a neutral context packet that all reviewer families will use as shared truth, then declare which reviewers are needed.",
109
109
  "constraints": [
110
- "The context packet is neutral \u2014 it presents the design problem and directions without advocating for any one.",
111
- "Select the direction to develop further before running reviewers \u2014 reviewers evaluate a specific direction, not an abstract problem.",
110
+ "The context packet is neutral it presents the design problem and directions without advocating for any one.",
111
+ "Select the direction to develop further before running reviewers reviewers evaluate a specific direction, not an abstract problem.",
112
112
  "All 5 reviewer families are active for Complex designs; IA and UX laws reviewers are always included for Standard."
113
113
  ],
114
114
  "procedure": [
@@ -151,7 +151,7 @@
151
151
  "procedure": [
152
152
  "Before delegating, restate the selected direction and the user goal it serves best.",
153
153
  "Spawn one WorkRail Executor per selected reviewer family simultaneously. Each executor receives: the designContextPacket, their specific reviewer mission, and the finding format requirement.",
154
- "Reviewer family missions: (1) IA reviewer \u2014 evaluate content hierarchy, navigation paths, grouping logic, and information scent against user goals; cite specific IA decisions; (2) UX laws reviewer \u2014 check each relevant law: Hick's Law (decision count), Miller's Law (working memory), Jakob's Law (familiar patterns), Fitts's Law (target size and distance), Peak-End Rule (emotional journey), Tesler's Law (irreducible complexity), Von Restorff Effect (visual differentiation of important elements); cite specific violations or confirmations; (3) accessibility reviewer \u2014 check WCAG requirements: color contrast ratios (4.5:1 normal, 3:1 large text), keyboard navigation path, touch target sizes (44x44px minimum), screen reader labels, focus indicators, animation controls; produce specific requirements not 'follow WCAG'; (4) edge cases reviewer \u2014 for each interactive element, explicitly address: empty state (no data), error state (failed action), loading state, first-use/onboarding, offline or degraded state, destructive actions; flag any state not addressed in the current design; (5) content reviewer \u2014 evaluate every label, button copy, placeholder, error message, and helper text against clarity, user language vs. technical jargon, and actionability of error messages.",
154
+ "Reviewer family missions: (1) IA reviewer evaluate content hierarchy, navigation paths, grouping logic, and information scent against user goals; cite specific IA decisions; (2) UX laws reviewer check each relevant law: Hick's Law (decision count), Miller's Law (working memory), Jakob's Law (familiar patterns), Fitts's Law (target size and distance), Peak-End Rule (emotional journey), Tesler's Law (irreducible complexity), Von Restorff Effect (visual differentiation of important elements); cite specific violations or confirmations; (3) accessibility reviewer check WCAG requirements: color contrast ratios (4.5:1 normal, 3:1 large text), keyboard navigation path, touch target sizes (44x44px minimum), screen reader labels, focus indicators, animation controls; produce specific requirements not 'follow WCAG'; (4) edge cases reviewer for each interactive element, explicitly address: empty state (no data), error state (failed action), loading state, first-use/onboarding, offline or degraded state, destructive actions; flag any state not addressed in the current design; (5) content reviewer evaluate every label, button copy, placeholder, error message, and helper text against clarity, user language vs. technical jargon, and actionability of error messages.",
155
155
  "After receiving all executor outputs, synthesize explicitly: what was confirmed, what was new, what looks weak or generic, and what has citations vs. what is speculation.",
156
156
  "Set evidenceWeakCount to the number of findings without specific citations."
157
157
  ],
@@ -249,13 +249,13 @@
249
249
  "promptBlocks": {
250
250
  "goal": "Verify all quality gates pass before writing the design spec.",
251
251
  "constraints": [
252
- "If any gate fails, fix the underlying issue before advancing \u2014 do not write the spec over known gaps."
252
+ "If any gate fails, fix the underlying issue before advancing do not write the spec over known gaps."
253
253
  ],
254
254
  "procedure": [
255
- "Gate 1 \u2014 Evidence citations: confirm every finding in reviewerFindings cites a specific design element from the context packet. Flag any finding that is generic advice without a specific reference and either improve it or mark it advisory-only.",
256
- "Gate 2 \u2014 Reviewer coverage: confirm every declared reviewer family has at least one substantive finding. If a family has no findings, state explicitly why (e.g., 'IA reviewer found no issues \u2014 the single-screen design has no navigation structure to evaluate').",
257
- "Gate 3 \u2014 Edge case coverage: confirm empty state, error state, loading state, and first-use are addressed for each interactive element in the selected design direction. List any that are not yet addressed.",
258
- "Gate 4 \u2014 Accessibility specificity: confirm accessibility requirements are listed as specific constraints (color contrast ratios, touch target sizes, keyboard tab order), not as a generic 'follow WCAG' instruction."
255
+ "Gate 1 Evidence citations: confirm every finding in reviewerFindings cites a specific design element from the context packet. Flag any finding that is generic advice without a specific reference and either improve it or mark it advisory-only.",
256
+ "Gate 2 Reviewer coverage: confirm every declared reviewer family has at least one substantive finding. If a family has no findings, state explicitly why (e.g., 'IA reviewer found no issues the single-screen design has no navigation structure to evaluate').",
257
+ "Gate 3 Edge case coverage: confirm empty state, error state, loading state, and first-use are addressed for each interactive element in the selected design direction. List any that are not yet addressed.",
258
+ "Gate 4 Accessibility specificity: confirm accessibility requirements are listed as specific constraints (color contrast ratios, touch target sizes, keyboard tab order), not as a generic 'follow WCAG' instruction."
259
259
  ],
260
260
  "outputRequired": {
261
261
  "notesMarkdown": "Gate check results: which passed, which failed, what was fixed.",
@@ -280,7 +280,7 @@
280
280
  "Do not drift into implementation planning (specific component libraries, code) unless explicitly asked."
281
281
  ],
282
282
  "procedure": [
283
- "Write the design spec covering: (1) Design Decision \u2014 which direction was chosen and the specific reason it was chosen over the others; (2) Information Architecture \u2014 content hierarchy, navigation structure, primary user path; (3) Interaction Design \u2014 how each interactive element works, what triggers what, what feedback the user gets; (4) States \u2014 for each element: default, hover/focus, loading, error, empty, first-use, disabled; (5) Accessibility Requirements \u2014 specific requirements (color contrast ratios, keyboard tab order, touch target sizes, screen reader labels); (6) Content \u2014 all copy, labels, error messages, placeholders, and onboarding text; (7) Reviewer Findings \u2014 per-dimension findings with citations that the design should address or has already addressed; (8) Open Questions \u2014 what still needs human input (visual design, usability testing, design system component availability).",
283
+ "Write the design spec covering: (1) Design Decision which direction was chosen and the specific reason it was chosen over the others; (2) Information Architecture content hierarchy, navigation structure, primary user path; (3) Interaction Design how each interactive element works, what triggers what, what feedback the user gets; (4) States for each element: default, hover/focus, loading, error, empty, first-use, disabled; (5) Accessibility Requirements specific requirements (color contrast ratios, keyboard tab order, touch target sizes, screen reader labels); (6) Content all copy, labels, error messages, placeholders, and onboarding text; (7) Reviewer Findings per-dimension findings with citations that the design should address or has already addressed; (8) Open Questions what still needs human input (visual design, usability testing, design system component availability).",
284
284
  "Close the spec by naming: what visual review a human designer should perform, and what this workflow cannot verify (visual quality, usability, emotional feel)."
285
285
  ],
286
286
  "outputRequired": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "id": "workflow-for-workflows",
3
- "name": "Workflow Authoring Workflow (Quality Gate v2)",
3
+ "name": "Workflow Authoring Workflow",
4
4
  "version": "2.4.0",
5
5
  "description": "Use this to author or modernize a WorkRail workflow. Guides through understanding the task, defining effectiveness targets, designing architecture and quality gates, drafting, validating, assigning tags, and handing off.",
6
6
  "about": "## Workflow Authoring Workflow\n\nThis is the standard WorkRail workflow for creating a new workflow from scratch or modernizing an existing one. It is the trust gate for all other workflows: a workflow is not considered production-ready until it has passed through here.\n\n**What it does:**\nThe workflow walks through the full authoring lifecycle: understanding the task, choosing the right baseline and archetype, designing the phase and quality-gate architecture, drafting the workflow JSON, running structural validators, auditing state fields for bloat, simulating execution against real scenarios, running an adversarial quality review, and producing a final trust handoff. For modernization tasks it builds a value inventory first to ensure enforcement mechanisms, domain knowledge, and behavioral rules are preserved or equivalently replaced.\n\n**When to use it:**\n- You want to author a new WorkRail workflow for a recurring task or problem\n- You have an existing workflow that is outdated, uses legacy patterns (pseudo-DSL, regex validation, satisfaction-score loops), or produces shallow results\n- You want a workflow that will pass the WorkRail quality bar and be trusted to run in production\n\n**What it produces:**\nA validated, tagged workflow JSON file with a `validatedAgainstSpecVersion` stamp. A final trust handoff with readiness verdict, known failure modes, residual weaknesses, and testing guidance.\n\n**How to get good results:**\nDescribe the recurring task the workflow should solve, who will run it, and what a satisfying result looks like. For modernization, point to the existing workflow file. The workflow reads the schema and authoring spec itself -- you do not need to know the JSON format in advance.",
@@ -210,7 +210,7 @@
210
210
  ],
211
211
  "procedure": [
212
212
  "Decide the phase list, one-line goal for each phase, and overall ordering.",
213
- "Identify meaningful input classifications that require different workflow paths. For each variant dimension, decide the branching mechanism: `runCondition` on separate steps (diverging paths), `promptFragments` (additive behavior on a shared base), or a separate workflow entirely. For each captured variable that drives branching, define its closed set of valid values \u2014 unexpected values are a common source of silent misbehavior.",
213
+ "Identify meaningful input classifications that require different workflow paths. For each variant dimension, decide the branching mechanism: `runCondition` on separate steps (diverging paths), `promptFragments` (additive behavior on a shared base), or a separate workflow entirely. For each captured variable that drives branching, define its closed set of valid values unexpected values are a common source of silent misbehavior.",
214
214
  "Design loops with explicit exit rules, bounded maxIterations, and real reasons for another pass.",
215
215
  "Decide confirmation gates, delegation vs template injection vs direct execution, promptFragments, references, artifacts, and metaGuidance.",
216
216
  "If the authored workflow encodes domain knowledge tied to a specific version of an external system or codebase, decide how to handle staleness: prefer reading the codebase at runtime over hardcoding patterns, or explicitly document versioned assumptions so they surface as maintenance debt."
@@ -266,7 +266,7 @@
266
266
  "procedure": [
267
267
  "Decide whether the authored workflow needs a hypothesis step, neutral fact packet, reviewer or validator families, contradiction loop, final validation bundle, or explicit blind-spot handling.",
268
268
  "Design the confidence model, blind-spot model, and state economy plan.",
269
- "Decide the hard-gate dimensions that would make the authored workflow unsafe or unsatisfying if they fail. Choose the right enforcement mechanism for each gate: `assessments` + `assessmentRefs` + `assessmentConsequences` for bounded confidence judgments (each dimension captures a distinct orthogonal failure mode \u2014 see `mr-review-workflow.agentic.v2.json` and `bug-investigation.agentic.v2.json`); `validationCriteria` with context-aware conditions for completion-gating on structured checklists or required output content (the engine enforces that required content appears in the response before the step can complete, without a loop \u2014 conditions on individual rules can match the workflow's branching context); a re-verification loop for fix-and-verify cycles where the agent must act then prove the action worked. Do not default to a loop when `validationCriteria` is the right tool, or to `requireConfirmation` when a hard gate is needed.",
269
+ "Decide the hard-gate dimensions that would make the authored workflow unsafe or unsatisfying if they fail. Choose the right enforcement mechanism for each gate: `assessments` + `assessmentRefs` + `assessmentConsequences` for bounded confidence judgments (each dimension captures a distinct orthogonal failure mode see `mr-review-workflow.agentic.v2.json` and `bug-investigation.agentic.v2.json`); `validationCriteria` with context-aware conditions for completion-gating on structured checklists or required output content (the engine enforces that required content appears in the response before the step can complete, without a loop conditions on individual rules can match the workflow's branching context); a re-verification loop for fix-and-verify cycles where the agent must act then prove the action worked. Do not default to a loop when `validationCriteria` is the right tool, or to `requireConfirmation` when a hard gate is needed.",
270
270
  "Write the redesign triggers that should force architectural revision rather than cosmetic refinement."
271
271
  ],
272
272
  "outputRequired": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "id": "workflow-for-workflows",
3
- "name": "Workflow Authoring Workflow (Quality Gate v2)",
4
- "version": "2.4.0",
3
+ "name": "Workflow Authoring Workflow",
4
+ "version": "2.5.0",
5
5
  "description": "Use this to author or modernize a WorkRail workflow. Guides through understanding the task, defining effectiveness targets, designing architecture and quality gates, drafting, validating, assigning tags, and handing off.",
6
6
  "about": "## Workflow Authoring Workflow\n\nThis is the standard WorkRail workflow for creating a new workflow from scratch or modernizing an existing one. It is the trust gate for all other workflows: a workflow is not considered production-ready until it has passed through here.\n\n**What it does:**\nThe workflow walks through the full authoring lifecycle: understanding the task, choosing the right baseline and archetype, designing the phase and quality-gate architecture, drafting the workflow JSON, running structural validators, auditing state fields for bloat, simulating execution against real scenarios, running an adversarial quality review, and producing a final trust handoff. For modernization tasks it builds a value inventory first to ensure enforcement mechanisms, domain knowledge, and behavioral rules are preserved or equivalently replaced.\n\n**When to use it:**\n- You want to author a new WorkRail workflow for a recurring task or problem\n- You have an existing workflow that is outdated, uses legacy patterns (pseudo-DSL, regex validation, satisfaction-score loops), or produces shallow results\n- You want a workflow that will pass the WorkRail quality bar and be trusted to run in production\n\n**What it produces:**\nA validated, tagged workflow JSON file with a `validatedAgainstSpecVersion` stamp. A final trust handoff with readiness verdict, known failure modes, residual weaknesses, and testing guidance.\n\n**How to get good results:**\nDescribe the recurring task the workflow should solve, who will run it, and what a satisfying result looks like. For modernization, point to the existing workflow file. The workflow reads the schema and authoring spec itself -- you do not need to know the JSON format in advance.",
7
7
  "examples": [
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "id": "wr.discovery",
3
- "name": "Discovery Workflow (Bundled • Exploration + Design Synthesis)",
3
+ "name": "Discovery Workflow",
4
4
  "version": "3.1.0",
5
5
  "description": "Use this to explore and think through a problem end-to-end. Moves between landscape exploration, problem framing, candidate generation, adversarial challenge, and uncertainty resolution.",
6
6
  "about": "## Discovery Workflow\n\nThis workflow is for structured thinking through an ambiguous problem, opportunity, or decision -- the kind where you are not sure of the right answer yet and jumping straight to solutions would be premature.\n\n**What it does:**\nThe workflow selects one of three emphasis paths based on your actual need: `landscape_first` for understanding the current state and comparing options, `full_spectrum` for important or ambiguous problems where both landscape grounding and reframing are needed, and `design_first` when the dominant risk is solving the wrong problem. It then moves through landscape research, stakeholder and problem framing, candidate direction generation, adversarial challenge, and an uncertainty-resolution stage that can close with a recommendation, a targeted research follow-up, or a prototype/test plan. A design document is maintained throughout as the human-facing artifact.\n\n**When to use it:**\n- You face a decision, architectural question, or design problem with no obvious right answer\n- You want to explore an opportunity space before committing to a direction\n- You suspect the stated problem might not be the real problem\n- You need a structured recommendation with explicit tradeoffs and alternatives rather than the first plausible answer\n\n**What it produces:**\nA design document covering: the selected path and framing, landscape takeaways, chosen direction and why it won, the strongest alternative and why it lost, confidence band, residual risks, and next actions.\n\n**How to get good results:**\nDescribe the problem, opportunity, or decision you want help thinking through. State what outcome you want (a recommendation, a comparison, a research plan, a prototype direction). The more context you provide upfront about constraints and anti-goals, the sharper the framing will be.",