@exaudeus/workrail 3.32.0 → 3.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/index.d.ts +1 -0
- package/dist/cli/commands/index.js +3 -1
- package/dist/cli/commands/worktrain-await.js +11 -9
- package/dist/cli/commands/worktrain-daemon-install.d.ts +35 -0
- package/dist/cli/commands/worktrain-daemon-install.js +291 -0
- package/dist/cli/commands/worktrain-daemon.d.ts +31 -0
- package/dist/cli/commands/worktrain-daemon.js +272 -0
- package/dist/cli/commands/worktrain-spawn.js +11 -9
- package/dist/cli-worktrain.js +488 -0
- package/dist/cli.js +1 -22
- package/dist/console/standalone-console.d.ts +28 -0
- package/dist/console/standalone-console.js +142 -0
- package/dist/{console/assets/index-Cb_LO718.js → console-ui/assets/index-C1JXnwZS.js} +1 -1
- package/dist/{console → console-ui}/index.html +1 -1
- package/dist/daemon/agent-loop.d.ts +27 -0
- package/dist/daemon/agent-loop.js +39 -1
- package/dist/daemon/daemon-events.d.ts +63 -1
- package/dist/daemon/workflow-runner.d.ts +3 -2
- package/dist/daemon/workflow-runner.js +285 -46
- package/dist/infrastructure/session/HttpServer.js +133 -34
- package/dist/manifest.json +136 -104
- package/dist/mcp/handlers/v2-error-mapping.d.ts +3 -0
- package/dist/mcp/handlers/v2-error-mapping.js +2 -0
- package/dist/mcp/handlers/v2-execution/advance.js +25 -0
- package/dist/mcp/handlers/v2-execution/continue-advance.js +7 -0
- package/dist/mcp/output-schemas.d.ts +30 -30
- package/dist/mcp/transports/fatal-exit.js +4 -0
- package/dist/mcp/transports/http-entry.js +0 -5
- package/dist/mcp/transports/stdio-entry.js +24 -12
- package/dist/mcp/v2/tools.d.ts +4 -4
- package/dist/mcp-server.d.ts +0 -2
- package/dist/mcp-server.js +1 -42
- package/dist/trigger/adapters/github-poller.d.ts +44 -0
- package/dist/trigger/adapters/github-poller.js +190 -0
- package/dist/trigger/adapters/gitlab-poller.d.ts +27 -0
- package/dist/trigger/adapters/gitlab-poller.js +81 -0
- package/dist/trigger/index.d.ts +4 -1
- package/dist/trigger/index.js +5 -1
- package/dist/trigger/polled-event-store.d.ts +22 -0
- package/dist/trigger/polled-event-store.js +173 -0
- package/dist/trigger/polling-scheduler.d.ts +20 -0
- package/dist/trigger/polling-scheduler.js +249 -0
- package/dist/trigger/trigger-listener.d.ts +3 -0
- package/dist/trigger/trigger-listener.js +47 -3
- package/dist/trigger/trigger-store.js +114 -33
- package/dist/trigger/types.d.ts +17 -1
- package/dist/v2/durable-core/domain/observation-builder.d.ts +3 -0
- package/dist/v2/durable-core/domain/observation-builder.js +2 -2
- package/dist/v2/durable-core/domain/prompt-renderer.d.ts +2 -1
- package/dist/v2/durable-core/domain/prompt-renderer.js +10 -0
- package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +224 -224
- package/dist/v2/durable-core/schemas/session/events.d.ts +42 -42
- package/dist/v2/durable-core/schemas/session/manifest.d.ts +6 -6
- package/dist/v2/durable-core/schemas/session/validation-event.d.ts +2 -2
- package/dist/v2/durable-core/tokens/payloads.d.ts +52 -52
- package/dist/v2/usecases/console-routes.js +3 -3
- package/dist/v2/usecases/console-service.js +185 -10
- package/dist/v2/usecases/console-types.d.ts +8 -0
- package/docs/design/bridge-removal-pr-a-candidates.md +115 -0
- package/docs/design/bridge-removal-pr-a-design-review.md +79 -0
- package/docs/design/bridge-removal-pr-a-implementation-plan.md +203 -0
- package/docs/design/daemon-conversation-logging-plan.md +98 -0
- package/docs/design/daemon-conversation-logging-review.md +55 -0
- package/docs/design/daemon-conversation-logging.md +129 -0
- package/docs/design/github-polling-adapter-design-candidates.md +226 -0
- package/docs/design/github-polling-adapter-design-review-findings.md +131 -0
- package/docs/design/github-polling-adapter-implementation-plan.md +284 -0
- package/docs/design/implementation_plan.md +192 -0
- package/docs/design/workflow-id-validation-at-startup.md +146 -0
- package/docs/design/workflow-id-validation-design-review.md +87 -0
- package/docs/design/workflow-id-validation-implementation-plan.md +185 -0
- package/docs/design/worktrain-system-prompt-report-issue-candidates.md +135 -0
- package/docs/design/worktrain-system-prompt-report-issue-design-review.md +73 -0
- package/docs/discovery/design-candidates.md +180 -0
- package/docs/discovery/design-review-findings.md +110 -0
- package/docs/discovery/wr-discovery-goal-reframing.md +303 -0
- package/docs/ideas/backlog.md +627 -0
- package/package.json +1 -1
- package/workflows/architecture-scalability-audit.json +1 -1
- package/workflows/bug-investigation.agentic.v2.json +3 -3
- package/workflows/coding-task-workflow-agentic.json +32 -32
- package/workflows/coding-task-workflow-agentic.lean.v2.json +1 -1
- package/workflows/coding-task-workflow-agentic.v2.json +7 -7
- package/workflows/mr-review-workflow.agentic.v2.json +21 -12
- package/workflows/personal-learning-materials-creation-branched.json +2 -2
- package/workflows/production-readiness-audit.json +1 -1
- package/workflows/relocation-workflow-us.json +2 -2
- package/workflows/ui-ux-design-workflow.json +14 -14
- package/workflows/workflow-for-workflows.json +3 -3
- package/workflows/workflow-for-workflows.v2.json +2 -2
- package/workflows/wr.discovery.json +59 -8
- package/dist/mcp/transports/bridge-entry.d.ts +0 -102
- package/dist/mcp/transports/bridge-entry.js +0 -454
- package/dist/mcp/transports/bridge-events.d.ts +0 -51
- package/dist/mcp/transports/bridge-events.js +0 -24
- package/dist/mcp/transports/primary-tombstone.d.ts +0 -21
- package/dist/mcp/transports/primary-tombstone.js +0 -51
- /package/dist/{console → console-ui}/assets/index-8dh0Psu-.css +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "coding-task-workflow-agentic",
|
|
3
|
-
"name": "Agentic Task Dev Workflow (v2
|
|
3
|
+
"name": "Agentic Task Dev Workflow (v2)",
|
|
4
4
|
"version": "2.0.0",
|
|
5
5
|
"description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
|
|
6
6
|
"about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",
|
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"steps": [
|
|
48
48
|
{
|
|
49
49
|
"id": "phase-0-triage-and-mode",
|
|
50
|
-
"title": "Phase 0: Triage (Complexity
|
|
50
|
+
"title": "Phase 0: Triage (Complexity • Risk • PR Strategy)",
|
|
51
51
|
"prompt": "Analyze the task and choose the right rigor.\n\nClassify:\n- `taskComplexity`: Small / Medium / Large\n- `riskLevel`: Low / Medium / High\n- `rigorMode`: QUICK / STANDARD / THOROUGH\n- `automationLevel`: High / Medium / Low\n- `prStrategy`: SinglePR / MultiPR\n- `maxParallelism`: 0 / 3 / 4\n\nDecision guidance:\n- QUICK: small, low-risk, clear path, little ambiguity\n- STANDARD: medium scope or moderate risk\n- THOROUGH: large scope, architectural uncertainty, or high-risk change\n\nParallelism guidance:\n- QUICK: no delegation by default\n- STANDARD: few delegation moments, but allow multiple parallel executors at each moment\n- THOROUGH: same pattern, but with one extra delegation moment and broader parallel validation\n\nAlso capture `userRules` from the active session instructions and explicit philosophy. Keep them as a focused list of concrete, actionable rules.\n\nSet these keys in the next `continue_workflow` call's `context` object: `taskComplexity`, `riskLevel`, `rigorMode`, `automationLevel`, `prStrategy`, `maxParallelism`, `userRules`.\n\nAsk the user to confirm only if the rigor or PR strategy materially affects delivery expectations.\n\nAlso set in the context object: one sentence describing what you are trying to accomplish (e.g. \"implement OAuth refresh token rotation\", \"review PR #47 before merge\"). This populates the session title in the Workspace console immediately.",
|
|
52
52
|
"requireConfirmation": true
|
|
53
53
|
},
|
|
@@ -101,12 +101,12 @@
|
|
|
101
101
|
},
|
|
102
102
|
{
|
|
103
103
|
"id": "phase-2-architecture-decision",
|
|
104
|
-
"title": "Phase 2: Architecture Decision (Generate
|
|
104
|
+
"title": "Phase 2: Architecture Decision (Generate • Compare • Challenge • Select)",
|
|
105
105
|
"runCondition": {
|
|
106
106
|
"var": "taskComplexity",
|
|
107
107
|
"not_equals": "Small"
|
|
108
108
|
},
|
|
109
|
-
"prompt": "Make the architecture decision in one coherent phase instead of serializing every thinking mode into a separate step.\n\nPart A
|
|
109
|
+
"prompt": "Make the architecture decision in one coherent phase instead of serializing every thinking mode into a separate step.\n\nPart A — Prepare a neutral fact packet:\n- problem statement\n- acceptance criteria\n- non-goals\n- invariants\n- constraints\n- `userRules`\n- relevant files / pattern examples\n- current risks and unknowns\n\nPart B — Generate candidate plans:\n- QUICK: self-generate at least 3 genuinely different approaches\n- STANDARD: if delegation is available, spawn TWO or THREE WorkRail Executors SIMULTANEOUSLY running `routine-plan-generation` with different perspectives (for example simplicity, maintainability, pragmatic)\n- THOROUGH: if delegation is available, spawn THREE or FOUR WorkRail Executors SIMULTANEOUSLY running `routine-plan-generation` with different perspectives (for example simplicity, maintainability, architecture-first, rollback-safe)\n\nPart C — Diversity gate before commitment:\n- assign each candidate plan a short `candidatePlanFamily` label\n- check whether the candidates are materially different in shape, not just wording\n- if all candidates cluster on the same pattern family, generate at least one more plan from a deliberately different perspective before selecting\n- set `candidateDiversityAdequate = true|false`\n\nPart D — Compare candidate plans:\n- invariant fit\n- philosophy alignment (`userRules` as active lens)\n- risk profile\n- implementation shape\n- likely reviewability / PR shape\n\nPart E — Challenge the best one or two:\n- STANDARD: optionally challenge the leading candidate with ONE WorkRail Executor running `routine-hypothesis-challenge`\n- THOROUGH: challenge the top 1-2 candidate plans using ONE or TWO WorkRail Executors running `routine-hypothesis-challenge`\n\nPart F — Decide:\nSet these keys in the next `continue_workflow` call's `context` object:\n- `approaches`\n- `alternativesConsideredCount`\n- `candidatePlanFamilies`\n- `candidateDiversityAdequate`\n- `hasRunnerUp`\n- `selectedApproach`\n- `runnerUpApproach`\n- `architectureRationale`\n- `keyRiskToMonitor`\n- `pivotTriggers`\n- `architectureConfidenceBand`\n\nRules:\n- the main agent owns the final decision\n- subagents generate candidate plans; they do not decide the winner\n- if the challenged leading candidate no longer looks best, switch deliberately rather than defending sunk cost",
|
|
110
110
|
"requireConfirmation": {
|
|
111
111
|
"or": [
|
|
112
112
|
{
|
|
@@ -131,13 +131,13 @@
|
|
|
131
131
|
"var": "taskComplexity",
|
|
132
132
|
"not_equals": "Small"
|
|
133
133
|
},
|
|
134
|
-
"prompt": "Create or update the human-facing implementation artifact: `implementation_plan.md`.\n\nThis phase combines slicing, plan drafting, philosophy alignment, and test design.\n\nThe plan must include:\n1. Problem statement\n2. Acceptance criteria\n3. Non-goals\n4. Applied `userRules` and philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only when they improve execution or enable safe parallelism\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n - [principle]
|
|
134
|
+
"prompt": "Create or update the human-facing implementation artifact: `implementation_plan.md`.\n\nThis phase combines slicing, plan drafting, philosophy alignment, and test design.\n\nThe plan must include:\n1. Problem statement\n2. Acceptance criteria\n3. Non-goals\n4. Applied `userRules` and philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only when they improve execution or enable safe parallelism\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n - [principle] → [satisfied / tension / violated + 1-line why]\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount`\n- `planConfidenceBand`\n\nRules:\n- keep `implementation_plan.md` concrete enough for another engineer to implement without guessing\n- use work packages only when they create real clarity; do not over-fragment work\n- use the user's coding philosophy as the primary planning lens, and name tensions explicitly\n- set `unresolvedUnknownCount` to the number of still-open issues that would materially affect implementation quality\n- set `planConfidenceBand` to Low / Medium / High based on how ready the plan actually is",
|
|
135
135
|
"requireConfirmation": false
|
|
136
136
|
},
|
|
137
137
|
{
|
|
138
138
|
"id": "phase-4-plan-iterations",
|
|
139
139
|
"type": "loop",
|
|
140
|
-
"title": "Phase 4: Plan Audit Loop (Audit
|
|
140
|
+
"title": "Phase 4: Plan Audit Loop (Audit → Refocus → Decide)",
|
|
141
141
|
"runCondition": {
|
|
142
142
|
"var": "taskComplexity",
|
|
143
143
|
"not_equals": "Small"
|
|
@@ -167,7 +167,7 @@
|
|
|
167
167
|
{
|
|
168
168
|
"id": "phase-4c-loop-decision",
|
|
169
169
|
"title": "Loop Exit Decision",
|
|
170
|
-
"prompt": "Provide a loop control artifact.\n\nDecision rules:\n- if `planFindings` is non-empty
|
|
170
|
+
"prompt": "Provide a loop control artifact.\n\nDecision rules:\n- if `planFindings` is non-empty → continue\n- if `planFindings` is empty → stop, but enumerate what was checked to justify the clean pass\n- if max iterations reached → stop and document remaining concerns\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
|
|
171
171
|
"requireConfirmation": true,
|
|
172
172
|
"outputContract": {
|
|
173
173
|
"contractRef": "wr.contracts.loop_control"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "mr-review-workflow-agentic",
|
|
3
|
-
"name": "MR Review Workflow (Lean v2
|
|
4
|
-
"version": "2.
|
|
3
|
+
"name": "MR Review Workflow (Lean v2 • Notes-First • Evidence-Driven Reviewer Families)",
|
|
4
|
+
"version": "2.6.0",
|
|
5
5
|
"description": "Lean v2 MR review workflow. Merges intake, missing-input gating, context gathering, and re-triage into one structured front phase, then drives review through a shared fact packet, parallel reviewer families, contradiction-driven synthesis, and evidence-first final validation.",
|
|
6
6
|
"about": "## MR Review Workflow\n\nThis workflow conducts a structured, evidence-driven code review of a merge request or pull request. It is designed for cases where you want a thorough, audit-quality review rather than a quick glance -- particularly when the change touches critical surfaces, spans many files, or carries real production risk.\n\n**What it does:**\nThe workflow locates and bounds the review target, enriches it with PR context and ticket intent, classifies the change by risk and shape, then runs parallel \"reviewer family\" agents (covering correctness, architecture, runtime risk, tests/docs, and more) from a shared neutral fact packet. It reconciles contradictions between reviewer families, stress-tests the recommendation with adversarial validators, and produces a final handoff with severity-classified findings and ready-to-post MR comments.\n\n**When to use it:**\n- Before merging a PR that touches auth, data models, APIs, or critical paths\n- When you want independent perspectives on a change without the noise of an unstructured review\n- When the change is large or the reviewer is unfamiliar with the surrounding code\n- When you need a reproducible audit trail for compliance or team review processes\n\n**What it produces:**\nA final review recommendation (approve / request changes / needs discussion) with a confidence band, severity-graded findings (Critical / Major / Minor / Nit), ready-to-post MR comments, a coverage ledger showing which review domains were checked, and an honest disclosure of any context that could not be recovered.\n\n**How to get good results:**\nProvide the PR URL, branch name, or diff. The workflow can recover most context on its own -- ticket links, repo patterns, policy docs -- but if the change has non-obvious intent, a one-sentence description of the goal helps calibrate review sensitivity. The workflow will not post comments or approve/reject without explicit instruction.",
|
|
7
7
|
"examples": [
|
|
@@ -25,7 +25,10 @@
|
|
|
25
25
|
{
|
|
26
26
|
"id": "evidence_quality",
|
|
27
27
|
"purpose": "Each finding cites a specific file, function, or line. No finding relies on intuition or pattern-matching without concrete grounding.",
|
|
28
|
-
"levels": [
|
|
28
|
+
"levels": [
|
|
29
|
+
"low",
|
|
30
|
+
"high"
|
|
31
|
+
]
|
|
29
32
|
}
|
|
30
33
|
]
|
|
31
34
|
},
|
|
@@ -36,7 +39,10 @@
|
|
|
36
39
|
{
|
|
37
40
|
"id": "coverage_completeness",
|
|
38
41
|
"purpose": "All material review domains are checked or explicitly acknowledged as gaps in the coverage ledger.",
|
|
39
|
-
"levels": [
|
|
42
|
+
"levels": [
|
|
43
|
+
"low",
|
|
44
|
+
"high"
|
|
45
|
+
]
|
|
40
46
|
}
|
|
41
47
|
]
|
|
42
48
|
},
|
|
@@ -47,7 +53,10 @@
|
|
|
47
53
|
{
|
|
48
54
|
"id": "contradiction_resolution",
|
|
49
55
|
"purpose": "Every material contradiction is resolved by evidence or explicitly acknowledged with a stated position and rationale.",
|
|
50
|
-
"levels": [
|
|
56
|
+
"levels": [
|
|
57
|
+
"low",
|
|
58
|
+
"high"
|
|
59
|
+
]
|
|
51
60
|
}
|
|
52
61
|
]
|
|
53
62
|
}
|
|
@@ -76,7 +85,7 @@
|
|
|
76
85
|
{
|
|
77
86
|
"id": "phase-0-understand-and-classify",
|
|
78
87
|
"title": "Phase 0: Locate, Bound, Enrich & Classify",
|
|
79
|
-
"prompt": "Build the review foundation in one pass.\n\nStep 1
|
|
88
|
+
"prompt": "Build the review foundation in one pass.\n\nStep 1 — Early exit / minimum inputs:\nBefore exploring, verify that the review target is real and inspectable. If the diff, changed files, or equivalent review material are completely absent and cannot be inferred with tools, ask for the minimum missing artifact and stop. Do NOT ask questions you can resolve with tools.\n\nStep 2 — Locate and bound the review target:\nAttempt to determine the strongest available review target and boundary.\n\nAttempt to establish:\n- `reviewTargetKind` from the strongest available source such as PR/MR, branch, patch, diff, or local working tree changes\n- `reviewTargetSource` describing where the target came from\n- likely PR/MR identity when available (`prUrl`, `prNumber`)\n- likely base / ancestor reference (`baseCandidate`, `mergeBaseRef`) when available\n- whether the branch may include inherited or out-of-scope changes\n- `boundaryConfidence`: High / Medium / Low\n\nDo not over-prescribe your own investigation path. Use the strongest available evidence and record uncertainty honestly.\n\nStep 3 — Enrich with context:\nRecover the strongest available intent and policy context from whatever sources are actually available.\n\nAttempt to recover:\n- MR title and purpose\n- ticket / issue / acceptance context (`ticketRefs`, `ticketContext`)\n- supporting docs / specs / rollout context (`supportingDocsFound`)\n- repo or user policy/convention context when it is likely to affect review judgment (`policySourcesFound`)\n- `contextConfidence`: High / Medium / Low\n\nStep 4 — Review-surface hygiene:\nClassify the visible change into a minimal review surface.\n\nSet:\n- `coreReviewSurface`\n- `likelyNoiseOrMechanicalChurn`\n- `likelyInheritedOrOutOfScopeChanges`\n- `reviewSurfaceSummary`\n- `reviewScopeWarnings`\n\nThe goal is not a giant ledger. The goal is to avoid treating every visible changed file as equally worthy of deep review by default.\n\nStep 5 — Classify the review:\nAfter exploration, classify the work.\n\nSet:\n- `reviewMode`: QUICK / STANDARD / THOROUGH\n- `riskLevel`: Low / Medium / High\n- `shapeProfile`: choose the best primary label from `isolated_change`, `crosscutting_change`, `mechanically_noisy_change`, or `ambiguous_boundary`\n- `changeTypeProfile`: choose the best primary label from `general_code_change`, `api_contract_change`, `data_model_or_migration`, `security_sensitive`, or `test_only`\n- `maxParallelism`: 0 / 3 / 5\n- `criticalSurfaceTouched`: true / false\n- `needsSimulation`: true / false\n- `needsBoundaryFollowup`: true / false\n- `needsContextFollowup`: true / false\n- `needsReviewerBundle`: true / false\n\nDecision guidance:\n- QUICK: very small, isolated, low-risk changes with little ambiguity\n- STANDARD: typical feature or bug-fix reviews with moderate ambiguity or moderate risk\n- THOROUGH: critical surfaces, architectural novelty, high risk, broad change sets, or strong need for independent reviewer perspectives\n\nMinimal routing guidance:\n- if `boundaryConfidence = Low`, bias toward boundary/context follow-up before strong recommendation confidence\n- if `changeTypeProfile = api_contract_change`, bias toward contract/consumer/backward-compatibility scrutiny\n- if `changeTypeProfile = data_model_or_migration`, bias toward rollout / compatibility / simulation scrutiny\n- if `changeTypeProfile = security_sensitive`, bias toward adversarial/runtime-risk scrutiny and lower tolerance for weak evidence\n- if `changeTypeProfile = test_only`, bias toward stronger false-positive suppression\n- if `shapeProfile = mechanically_noisy_change`, bias toward stronger noise filtering and lower appetite for style-only findings\n\nStep 6 — Optional deeper context:\nIf `reviewMode` is STANDARD or THOROUGH and context remains incomplete, and delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-context-gathering` with focus=COMPLETENESS and focus=DEPTH. Synthesize both outputs before finishing this step.\n\nStep 7 — Human-facing artifact:\nChoose `reviewDocPath` only if a live artifact will materially improve human readability. Default suggestion: `mr-review.md` at the project root. This artifact is optional and never canonical workflow state.\n\nFallback behavior:\n- if PR/MR is not found but a branch/diff is inspectable, continue with downgraded context confidence and disclose missing PR context later\n- if the branch is inspectable but merge-base / ancestor remains ambiguous, continue with downgraded boundary confidence, set `needsBoundaryFollowup = true`, and disclose the uncertainty later\n- if ticket or supporting docs are missing, continue with downgraded context confidence and avoid overclaiming intent-sensitive findings\n- if only a patch/diff is available, continue if it is inspectable, but keep lower confidence on intent/boundary-dependent conclusions\n- if the review target itself is missing, ask only for that missing artifact and stop\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewTargetKind`\n- `reviewTargetSource`\n- `prUrl`\n- `prNumber`\n- `baseCandidate`\n- `mergeBaseRef`\n- `boundaryConfidence`\n- `contextConfidence`\n- `mrTitle`\n- `mrPurpose`\n- `ticketRefs`\n- `ticketContext`\n- `supportingDocsFound`\n- `policySourcesFound`\n- `accessibleContextSources`\n- `missingContextSources`\n- `focusAreas`\n- `changedFileCount`\n- `criticalSurfaceTouched`\n- `reviewMode`\n- `riskLevel`\n- `shapeProfile`\n- `changeTypeProfile`\n- `maxParallelism`\n- `reviewDocPath`\n- `contextSummary`\n- `candidateFiles`\n- `moduleRoots`\n- `contextUnknownCount`\n- `coverageGapCount`\n- `authorIntentUnclear`\n- `needsSimulation`\n- `needsBoundaryFollowup`\n- `needsContextFollowup`\n- `needsReviewerBundle`\n- `coreReviewSurface`\n- `likelyNoiseOrMechanicalChurn`\n- `likelyInheritedOrOutOfScopeChanges`\n- `reviewSurfaceSummary`\n- `reviewScopeWarnings`\n- `openQuestions`\n\nRules:\n- answer your own questions with tools whenever possible\n- only keep true human-decision questions in `openQuestions`\n- keep `openQuestions` bounded to the minimum necessary\n- classify AFTER exploring, not before\n- before leaving this phase, either establish the likely review boundary or explicitly record why you could not\n\nAlso set in the context object: one sentence describing what you are trying to accomplish (e.g. \"implement OAuth refresh token rotation\", \"review PR #47 before merge\"). This populates the session title in the Workspace console immediately.",
|
|
80
89
|
"requireConfirmation": {
|
|
81
90
|
"or": [
|
|
82
91
|
{
|
|
@@ -112,12 +121,12 @@
|
|
|
112
121
|
"Keep `recommendationHypothesis` as a secondary hypothesis to challenge, not a frame to defend."
|
|
113
122
|
],
|
|
114
123
|
"procedure": [
|
|
115
|
-
"Create a neutral `reviewFactPacket` containing: MR purpose and expected behavior change, review target and review-surface summary, changed files and module roots, key contracts / invariants / affected consumers, call-chain highlights, relevant repo patterns and exemplars, tests/docs expectations, discovered ticket/doc/policy context, accessible and missing context sources, and explicit open unknowns.",
|
|
116
|
-
"Initialize `coverageLedger` for these domains: `correctness_logic`, `contracts_invariants`, `patterns_architecture`, `runtime_production_risk`, `tests_docs_rollout`, `security_performance`.",
|
|
124
|
+
"Create a neutral `reviewFactPacket` containing: MR purpose and expected behavior change, review target and review-surface summary, changed files and module roots, key contracts / invariants / affected consumers, call-chain highlights, relevant repo patterns and exemplars, tests/docs expectations, discovered ticket/doc/policy context, accessible and missing context sources, and explicit open unknowns, relevant coding philosophy principles for this change (from CLAUDE.md, AGENTS.md, ~/.firebender/commands/philosophy.mdc, or soul file -- scope to the 3-5 most relevant for what was changed, not all principles), and existing patterns in the changed module (how similar problems are solved today in the same directory).",
|
|
125
|
+
"Initialize `coverageLedger` for these domains: `correctness_logic`, `contracts_invariants`, `patterns_architecture`, `philosophy_alignment`, `runtime_production_risk`, `tests_docs_rollout`, `security_performance`.",
|
|
117
126
|
"Perform a preliminary self-review from the fact packet before choosing reviewer families.",
|
|
118
|
-
"Reviewer family options: `correctness_invariants`, `patterns_architecture`, `runtime_production_risk`, `test_docs_rollout`, `false_positive_skeptic`, `missed_issue_hunter`.",
|
|
127
|
+
"Reviewer family options: `correctness_invariants`, `patterns_architecture`, `philosophy_alignment`, `runtime_production_risk`, `test_docs_rollout`, `false_positive_skeptic`, `missed_issue_hunter`.",
|
|
119
128
|
"Selection guidance: QUICK = no bundle by default unless ambiguity still feels material; STANDARD = 3 families by default; THOROUGH = 5 families by default.",
|
|
120
|
-
"Always include `correctness_invariants` unless clearly not applicable. Include `test_docs_rollout` in STANDARD and THOROUGH unless clearly not applicable. Include `runtime_production_risk` when `criticalSurfaceTouched = true` or `needsSimulation = true`. Include `missed_issue_hunter` in THOROUGH. Include `false_positive_skeptic` when Major/Critical findings seem plausible or severity inflation risk is non-trivial.",
|
|
129
|
+
"Always include `correctness_invariants` unless clearly not applicable. Include `test_docs_rollout` in STANDARD and THOROUGH unless clearly not applicable. Include `runtime_production_risk` when `criticalSurfaceTouched = true` or `needsSimulation = true`. Include `missed_issue_hunter` in THOROUGH. Include `false_positive_skeptic` when Major/Critical findings seem plausible or severity inflation risk is non-trivial. Include `philosophy_alignment` in STANDARD and THOROUGH when the change introduces new abstractions, modifies core patterns, or touches areas where the codebase philosophy is particularly relevant (error handling, type safety, DI boundaries, state management).",
|
|
121
130
|
"Routing guidance: for `api_contract_change`, bias toward contract / consumer / backward-compatibility scrutiny; for `data_model_or_migration`, bias toward rollout / compatibility / simulation scrutiny; for `security_sensitive`, bias toward runtime-risk scrutiny and lower tolerance for weak evidence; for `test_only`, bias toward stronger false-positive suppression; for `mechanically_noisy_change`, bias toward stronger noise filtering and lower appetite for style-only findings.",
|
|
122
131
|
"Set `coverageUncertainCount` as the number of coverage domains not yet safely closed: `uncertain` + `contradicted` + `needs_followup`.",
|
|
123
132
|
"Initialize `contradictionCount`, `blindSpotCount`, and `falsePositiveRiskCount` to `0` if no reviewer-family bundle will run."
|
|
@@ -158,7 +167,7 @@
|
|
|
158
167
|
"procedure": [
|
|
159
168
|
"Before delegating, restate the current `recommendationHypothesis` and say which reviewer family is most likely to challenge it.",
|
|
160
169
|
"Each reviewer family must return: key findings, severity estimates, confidence level, top risks, recommendation, and what others may have missed.",
|
|
161
|
-
"Family missions: `correctness_invariants` = logic, correctness, API and invariant risks; `patterns_architecture` = pattern fit, design consistency, architectural concerns; `runtime_production_risk` = runtime behavior, production impact, performance/state-flow risk; `test_docs_rollout` = test adequacy, docs, migration, rollout, affected consumers; `false_positive_skeptic` = challenge likely overreaches, weak evidence, or severity inflation; `missed_issue_hunter` = search for an important issue category the others may miss.",
|
|
170
|
+
"Family missions: `correctness_invariants` = logic, correctness, API and invariant risks; `patterns_architecture` = pattern fit, design consistency, architectural concerns; `runtime_production_risk` = runtime behavior, production impact, performance/state-flow risk; `test_docs_rollout` = test adequacy, docs, migration, rollout, affected consumers; `false_positive_skeptic` = challenge likely overreaches, weak evidence, or severity inflation; `missed_issue_hunter` = search for an important issue category the others may miss; `philosophy_alignment` = evaluate the implementation against the scoped principles from the fact packet -- name each violation by principle, explain how the code diverges, and distinguish real violations from stylistic preferences. Also ask: is this the right design approach, not just a correct one? Does it follow the established patterns in this module or introduce unnecessary divergence?",
|
|
162
171
|
"Mode-adaptive parallelism: STANDARD = spawn THREE WorkRail Executors SIMULTANEOUSLY for the selected families; THOROUGH = spawn FIVE WorkRail Executors SIMULTANEOUSLY for the selected families.",
|
|
163
172
|
"After receiving outputs, explicitly synthesize: what reviewer families confirmed, what was genuinely new, what appeared weak or overreached, and what changed your mind or did not.",
|
|
164
173
|
"Set these keys in the next `continue_workflow` call's `context` object: `familyFindingsSummary`, `familyRecommendationSpread`, `contradictionCount`, `blindSpotCount`, `falsePositiveRiskCount`, `needsSimulation`.",
|
|
@@ -222,7 +231,7 @@
|
|
|
222
231
|
{
|
|
223
232
|
"id": "phase-4b-canonical-synthesis",
|
|
224
233
|
"title": "Canonical Synthesis and Coverage Update",
|
|
225
|
-
"prompt": "Synthesize all reviewer-family outputs and targeted follow-up into one canonical review state.\n\nPart A
|
|
234
|
+
"prompt": "Synthesize all reviewer-family outputs and targeted follow-up into one canonical review state.\n\nPart A — Compare against your hypothesis:\n- revisit `recommendationHypothesis`\n- what did the evidence confirm?\n- what did it challenge?\n- what changed your mind, what held firm, and what do you explicitly reject?\n\nPart B — Synthesis decision table:\n- if 2+ reviewer families flag the same serious issue with the same severity, treat it as validated\n- if the same issue is flagged with different severities, default to the higher severity unless the lower-severity position includes specific counter-evidence\n- if one family flags an issue and others are silent, investigate it but do not automatically block unless it is clearly critical or security-sensitive\n- if one family says false positive and another says valid issue, require explicit main-agent adjudication in notes before finalization\n- if recommendation spread shows material disagreement, findings override recommendation until reconciled\n- if simulation reveals a new production risk, add a new finding and re-evaluate recommendation confidence\n\nPart C — Coverage ledger rules:\n- move a domain from `uncertain` to `checked` only when evidence is materially adequate\n- keep a domain `uncertain` if disagreement or missing evidence still materially affects recommendation quality\n- mark `not_applicable` only when the MR genuinely does not engage that dimension\n- clear `contradicted` only when the contradiction is explicitly resolved by evidence or adjudication\n- clear `needs_followup` only when required follow-up has actually been completed or the domain is explicitly downgraded as non-material\n\nPart D — Recommendation confidence rules:\n- set `recommendationConfidenceBand = High` only if no unresolved material contradictions remain, no important coverage domains remain uncertain, false-positive risk is not material, and the evidence is strong enough for the current mode\n- set `recommendationConfidenceBand = Medium` when one bounded uncertainty remains but the recommendation is still directionally justified\n- set `recommendationConfidenceBand = Low` when multiple viable interpretations remain, major contradictions are unresolved, or important coverage gaps still weaken the recommendation\n\nSet these keys in the next `continue_workflow` call's `context` object:\n- `reviewFindings`\n- `criticalFindingsCount`\n- `majorFindingsCount`\n- `minorFindingsCount`\n- `nitFindingsCount`\n- `recommendation`\n- `recommendationConfidenceBand`\n- `recommendationDriftDetected`\n- `coverageLedger`\n- `coverageUncertainCount`\n- `docCompletenessConcernCount`\n\nIf `reviewDocPath` exists, keep it aligned for human readability only. Notes/context remain workflow truth.",
|
|
226
235
|
"requireConfirmation": false
|
|
227
236
|
},
|
|
228
237
|
{
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "personal-learning-materials-creation-branched",
|
|
3
|
-
"name": "Personal Learning Materials Creation Workflow
|
|
3
|
+
"name": "Personal Learning Materials Creation Workflow",
|
|
4
4
|
"version": "1.1.0",
|
|
5
5
|
"description": "Use this to create learning materials for a course or subject. Adapts depth and format to your time budget — Quick Start, Balanced, or Comprehensive.",
|
|
6
6
|
"about": "## Personal Learning Materials Creation Workflow\n\nUse this to create the actual study materials for a course or subject you are learning -- study guides, exercises, assessments, and spaced-repetition review materials. This workflow assumes you already have a learning plan or course design with defined objectives; it focuses on producing materials that directly support those objectives.\n\n### What it produces\n\nDepending on the path you choose:\n\n- **Quick Start (2-3 weeks)**: study guides and basic exercises for immediate use.\n- **Balanced (4-6 weeks)**: a complete learning system -- study guides, exercises, assessments, and spaced repetition materials.\n- **Comprehensive (8-12 weeks)**: a full learning ecosystem with interactive elements, effectiveness measurement, and a scalable update protocol.\n\n### When to use it\n\n- You have a learning plan and need to turn it into usable materials.\n- You are preparing for a certification, exam, or structured self-study program.\n- You want materials tailored to your specific objectives rather than relying entirely on off-the-shelf resources.\n\n### When NOT to use it\n\n- You haven't designed your learning course yet -- use the Personal Learning Course Design workflow first to define objectives and structure.\n- You need to design a course for others to take -- use the Learner-Centered Course workflow instead.\n\n### How to get good results\n\n- Select the path honestly based on available time. Starting with Quick Start and expanding later is better than committing to Comprehensive and abandoning it.\n- Have your learning objectives written out before starting -- the workflow maps every material directly to an objective.\n- Be specific about your preferred learning formats (text, diagrams, flashcards, practice problems) at the start.",
|
|
@@ -192,4 +192,4 @@
|
|
|
192
192
|
"hasValidation": true
|
|
193
193
|
}
|
|
194
194
|
]
|
|
195
|
-
}
|
|
195
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "production-readiness-audit",
|
|
3
|
-
"name": "Production Readiness Audit
|
|
3
|
+
"name": "Production Readiness Audit",
|
|
4
4
|
"version": "0.1.0",
|
|
5
5
|
"description": "Use this to audit a codebase scope for production readiness. Checks debugging correctness, runtime operability, artifact realism, technical debt, and anything that would prevent honest production deployment.",
|
|
6
6
|
"about": "## Production Readiness Audit\n\nThis workflow performs a structured, evidence-driven audit to answer one question honestly: is this code actually ready for production? It goes beyond style and lint -- it looks for debugging correctness, runtime operability under real conditions, artifact realism (stale code, fake completeness, placeholder behavior), maintainability debt, test and observability gaps, and security or performance risks.\n\n**What it does:**\nThe workflow bounds the audit scope, states a readiness hypothesis, freezes a neutral fact packet, then runs parallel reviewer families -- each specializing in a different readiness dimension. It reconciles contradictions through an evidence loop and produces a final verdict: `ready`, `ready_with_conditions`, `not_ready`, or `inconclusive`.\n\n**When to use it:**\n- Before shipping a new service, feature, or major refactor to production\n- When a codebase has been under rapid development and you want an honest readiness check before a launch deadline\n- When onboarding to a codebase and wanting a structured assessment of its production posture\n- When a post-incident review surfaces questions about whether the system was truly ready\n\n**What it produces:**\nA verdict with a confidence band, a prioritized list of blocker-grade and major findings, debugging leads, runtime and operational risk callouts, artifact-realism concerns (misleading completeness, stale docs, dead paths), a coverage ledger by audit domain, and a remediation order with specific follow-up recommendations.\n\n**How to get good results:**\nProvide a clear scope -- a service name, a module path, or a feature boundary. The narrower and more concrete the scope, the sharper the findings. If \"production-ready\" has a specific meaning for your team (e.g. SLA requirements, specific deployment constraints), mention it. The workflow will try to infer the production bar from repo patterns and context, but explicit criteria improve accuracy.",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "relocation-workflow-us",
|
|
3
|
-
"name": "US Relocation Decision Workflow
|
|
3
|
+
"name": "US Relocation Decision Workflow",
|
|
4
4
|
"version": "1.0.0",
|
|
5
5
|
"description": "Use this to evaluate US cities or regions for a potential relocation. Discovers your preferences, generates candidate areas, screens them, and produces a ranked dossier with evidence.",
|
|
6
6
|
"about": "## US Relocation Decision Workflow\n\nUse this to evaluate US cities and regions for a potential move. The workflow takes a structured, evidence-driven approach: it starts by calibrating your preferences and dealbreakers, generates a broad diverse pool of candidate areas (including non-obvious ones), screens them systematically, and produces a ranked dossier you can actually act on.\n\n### What it produces\n\n- A `RELOCATION_DOSSIER.md` with your full preference model, screening results, and comparison matrix.\n- Individual per-candidate profiles at `relocation-profiles/<slug>.md` covering housing, cost of living, taxes, safety, climate risk, schools, healthcare, commute, and any other modules you activate.\n- A scored ranking with explainable reasoning and an explicit disclosure of any data gaps.\n- A next-steps plan: visit recommendations, open questions per candidate, and pivot triggers.\n\n### When to use it\n\n- You are seriously considering a US relocation and want a rigorous, evidence-backed shortlist.\n- You want to surface non-obvious candidates you wouldn't have considered on your own.\n- You've been anchoring on a handful of cities and want a structured process to either validate or challenge that.\n\n### How to get good results\n\n- Be honest about dealbreakers upfront -- the workflow builds these into screening and filters candidates early.\n- The MaxDiff weight calibration exercise (offered in Phase 1) is worth doing if you're unsure how to weight competing priorities. It takes 5-10 minutes and produces more reliable weights than guessing.\n- The calibration deck in Phase 1 shows you lifestyle archetypes and asks for reactions -- engage with this seriously. Surprises in your reactions are valuable signal.\n- The workflow activates only the research modules you need. Keep it focused on what actually matters to your household.",
|
|
@@ -245,4 +245,4 @@
|
|
|
245
245
|
"requireConfirmation": true
|
|
246
246
|
}
|
|
247
247
|
]
|
|
248
|
-
}
|
|
248
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "ui-ux-design-workflow",
|
|
3
|
-
"name": "UI/UX Design Workflow
|
|
3
|
+
"name": "UI/UX Design Workflow",
|
|
4
4
|
"version": "0.1.0",
|
|
5
5
|
"description": "Design UI/UX from scratch with enforced process. Makes problem framing structurally required before solution proposals, forces exploration of multiple design directions before convergence, and applies reviewer families for information architecture, UX laws, accessibility, edge cases, and content. Output: a design spec concrete enough to implement or review.",
|
|
6
6
|
"about": "## UI/UX Design Workflow\n\nThis workflow produces a design spec for a new feature, screen, component, or interaction. It is built around a single principle: problem framing must happen before any solutions are proposed. The workflow makes this structurally impossible to skip, which prevents the most common failure mode in AI-assisted design -- going straight from \"I need a settings screen\" to a layout without ever asking who uses it or what they are trying to do.\n\n**What it does:**\nPhase 0 frames the problem by reading existing code patterns and asking only what tools cannot answer. Phase 1 generates 2-3 genuinely different design directions before any one is chosen. Phases 2-5 run parallel reviewer families -- information architecture, UX laws (Hick's Law, Miller's Law, Fitts's Law, and others), accessibility (specific WCAG requirements, not just \"follow WCAG\"), edge cases (empty, error, loading, first-use), and content quality -- then synthesize their findings, resolve contradictions, and write a spec only after all hard quality gates pass.\n\n**When to use it:**\n- You need to design a new screen, feature, or non-trivial component\n- You want explicit coverage of accessibility, edge states, and UX laws, not just a layout sketch\n- You need a spec concrete enough for an engineer to implement or a designer to review\n- Simple single-component changes also work through a lighter direct-spec path\n\n**What it produces:**\nA design spec with 8 sections: design decision, information architecture, interaction design, all element states, specific accessibility requirements, content copy, reviewer findings with citations, and open questions that still require human visual review.\n\n**How to get good results:**\nPoint the workflow to your codebase so it can read existing components and patterns. Provide the design system location if it is not in the repo. Share any known user pain points or research. The workflow will surface what it cannot determine on its own.",
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
],
|
|
25
25
|
"metaGuidance": [
|
|
26
26
|
"PROCESS IS THE VALUE: the biggest failure mode in AI-assisted design is skipping to solutions before understanding the problem. This workflow makes that structurally impossible. Do not shortcut Phase 0.",
|
|
27
|
-
"EVIDENCE OVER PLATITUDES: every finding must cite a specific element from the context packet. 'Consider reducing cognitive load' is not a finding. 'The settings panel has 14 options, violating Miller
|
|
27
|
+
"EVIDENCE OVER PLATITUDES: every finding must cite a specific element from the context packet. 'Consider reducing cognitive load' is not a finding. 'The settings panel has 14 options, violating Miller’s Law (7±2)' is a finding.",
|
|
28
28
|
"SIMPLE CRITERIA: designComplexity=Simple is only valid for a single existing component with a minor change, no new user flows, no information architecture changes, and no new interaction patterns. If uncertain, classify upward.",
|
|
29
29
|
"HONEST LIMITS: this workflow produces a text-based design spec. It cannot produce visual mockups, conduct usability testing, or verify visual quality. Say so explicitly in the handoff and flag what still needs human visual review.",
|
|
30
30
|
"CONTEXT BLINDNESS: if the user has not provided design system, existing component patterns, or platform conventions, surface this gap in Phase 0 and ask. Do not silently design without this context.",
|
|
@@ -74,14 +74,14 @@
|
|
|
74
74
|
"promptBlocks": {
|
|
75
75
|
"goal": "Generate 2-3 genuinely different design directions before committing to any one of them.",
|
|
76
76
|
"constraints": [
|
|
77
|
-
"Directions must be genuinely different
|
|
77
|
+
"Directions must be genuinely different — not variations of the same pattern with different labels.",
|
|
78
78
|
"Each direction needs an information architecture sketch: how is content organized, what is the primary navigation path, what is the visual hierarchy?",
|
|
79
79
|
"Do not select a direction in this phase. Exploration comes before convergence."
|
|
80
80
|
],
|
|
81
81
|
"procedure": [
|
|
82
82
|
"Generate Direction A: the most conventional approach that follows existing platform patterns and design system. Low risk, familiar to users.",
|
|
83
|
-
"Generate Direction B: an approach that prioritizes the primary user goal differently
|
|
84
|
-
"Generate Direction C (if designComplexity=Complex): a third direction that challenges the assumptions in A and B
|
|
83
|
+
"Generate Direction B: an approach that prioritizes the primary user goal differently — different IA, different entry point, or different interaction model.",
|
|
84
|
+
"Generate Direction C (if designComplexity=Complex): a third direction that challenges the assumptions in A and B — a more radical rethinking of the problem.",
|
|
85
85
|
"For each direction, describe: (1) the primary IA sketch (main sections, navigation path, content hierarchy), (2) the core interaction model (how does the user accomplish their goal?), (3) the key tradeoffs relative to user goals and constraints.",
|
|
86
86
|
"After describing all directions, restate which user goals each direction serves well and where each direction is weakest."
|
|
87
87
|
],
|
|
@@ -107,8 +107,8 @@
|
|
|
107
107
|
"promptBlocks": {
|
|
108
108
|
"goal": "Assemble a neutral context packet that all reviewer families will use as shared truth, then declare which reviewers are needed.",
|
|
109
109
|
"constraints": [
|
|
110
|
-
"The context packet is neutral
|
|
111
|
-
"Select the direction to develop further before running reviewers
|
|
110
|
+
"The context packet is neutral — it presents the design problem and directions without advocating for any one.",
|
|
111
|
+
"Select the direction to develop further before running reviewers — reviewers evaluate a specific direction, not an abstract problem.",
|
|
112
112
|
"All 5 reviewer families are active for Complex designs; IA and UX laws reviewers are always included for Standard."
|
|
113
113
|
],
|
|
114
114
|
"procedure": [
|
|
@@ -151,7 +151,7 @@
|
|
|
151
151
|
"procedure": [
|
|
152
152
|
"Before delegating, restate the selected direction and the user goal it serves best.",
|
|
153
153
|
"Spawn one WorkRail Executor per selected reviewer family simultaneously. Each executor receives: the designContextPacket, their specific reviewer mission, and the finding format requirement.",
|
|
154
|
-
"Reviewer family missions: (1) IA reviewer
|
|
154
|
+
"Reviewer family missions: (1) IA reviewer — evaluate content hierarchy, navigation paths, grouping logic, and information scent against user goals; cite specific IA decisions; (2) UX laws reviewer — check each relevant law: Hick's Law (decision count), Miller's Law (working memory), Jakob's Law (familiar patterns), Fitts's Law (target size and distance), Peak-End Rule (emotional journey), Tesler's Law (irreducible complexity), Von Restorff Effect (visual differentiation of important elements); cite specific violations or confirmations; (3) accessibility reviewer — check WCAG requirements: color contrast ratios (4.5:1 normal, 3:1 large text), keyboard navigation path, touch target sizes (44x44px minimum), screen reader labels, focus indicators, animation controls; produce specific requirements not 'follow WCAG'; (4) edge cases reviewer — for each interactive element, explicitly address: empty state (no data), error state (failed action), loading state, first-use/onboarding, offline or degraded state, destructive actions; flag any state not addressed in the current design; (5) content reviewer — evaluate every label, button copy, placeholder, error message, and helper text against clarity, user language vs. technical jargon, and actionability of error messages.",
|
|
155
155
|
"After receiving all executor outputs, synthesize explicitly: what was confirmed, what was new, what looks weak or generic, and what has citations vs. what is speculation.",
|
|
156
156
|
"Set evidenceWeakCount to the number of findings without specific citations."
|
|
157
157
|
],
|
|
@@ -249,13 +249,13 @@
|
|
|
249
249
|
"promptBlocks": {
|
|
250
250
|
"goal": "Verify all quality gates pass before writing the design spec.",
|
|
251
251
|
"constraints": [
|
|
252
|
-
"If any gate fails, fix the underlying issue before advancing
|
|
252
|
+
"If any gate fails, fix the underlying issue before advancing — do not write the spec over known gaps."
|
|
253
253
|
],
|
|
254
254
|
"procedure": [
|
|
255
|
-
"Gate 1
|
|
256
|
-
"Gate 2
|
|
257
|
-
"Gate 3
|
|
258
|
-
"Gate 4
|
|
255
|
+
"Gate 1 — Evidence citations: confirm every finding in reviewerFindings cites a specific design element from the context packet. Flag any finding that is generic advice without a specific reference and either improve it or mark it advisory-only.",
|
|
256
|
+
"Gate 2 — Reviewer coverage: confirm every declared reviewer family has at least one substantive finding. If a family has no findings, state explicitly why (e.g., 'IA reviewer found no issues — the single-screen design has no navigation structure to evaluate').",
|
|
257
|
+
"Gate 3 — Edge case coverage: confirm empty state, error state, loading state, and first-use are addressed for each interactive element in the selected design direction. List any that are not yet addressed.",
|
|
258
|
+
"Gate 4 — Accessibility specificity: confirm accessibility requirements are listed as specific constraints (color contrast ratios, touch target sizes, keyboard tab order), not as a generic 'follow WCAG' instruction."
|
|
259
259
|
],
|
|
260
260
|
"outputRequired": {
|
|
261
261
|
"notesMarkdown": "Gate check results: which passed, which failed, what was fixed.",
|
|
@@ -280,7 +280,7 @@
|
|
|
280
280
|
"Do not drift into implementation planning (specific component libraries, code) unless explicitly asked."
|
|
281
281
|
],
|
|
282
282
|
"procedure": [
|
|
283
|
-
"Write the design spec covering: (1) Design Decision
|
|
283
|
+
"Write the design spec covering: (1) Design Decision — which direction was chosen and the specific reason it was chosen over the others; (2) Information Architecture — content hierarchy, navigation structure, primary user path; (3) Interaction Design — how each interactive element works, what triggers what, what feedback the user gets; (4) States — for each element: default, hover/focus, loading, error, empty, first-use, disabled; (5) Accessibility Requirements — specific requirements (color contrast ratios, keyboard tab order, touch target sizes, screen reader labels); (6) Content — all copy, labels, error messages, placeholders, and onboarding text; (7) Reviewer Findings — per-dimension findings with citations that the design should address or has already addressed; (8) Open Questions — what still needs human input (visual design, usability testing, design system component availability).",
|
|
284
284
|
"Close the spec by naming: what visual review a human designer should perform, and what this workflow cannot verify (visual quality, usability, emotional feel)."
|
|
285
285
|
],
|
|
286
286
|
"outputRequired": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "workflow-for-workflows",
|
|
3
|
-
"name": "Workflow Authoring Workflow
|
|
3
|
+
"name": "Workflow Authoring Workflow",
|
|
4
4
|
"version": "2.4.0",
|
|
5
5
|
"description": "Use this to author or modernize a WorkRail workflow. Guides through understanding the task, defining effectiveness targets, designing architecture and quality gates, drafting, validating, assigning tags, and handing off.",
|
|
6
6
|
"about": "## Workflow Authoring Workflow\n\nThis is the standard WorkRail workflow for creating a new workflow from scratch or modernizing an existing one. It is the trust gate for all other workflows: a workflow is not considered production-ready until it has passed through here.\n\n**What it does:**\nThe workflow walks through the full authoring lifecycle: understanding the task, choosing the right baseline and archetype, designing the phase and quality-gate architecture, drafting the workflow JSON, running structural validators, auditing state fields for bloat, simulating execution against real scenarios, running an adversarial quality review, and producing a final trust handoff. For modernization tasks it builds a value inventory first to ensure enforcement mechanisms, domain knowledge, and behavioral rules are preserved or equivalently replaced.\n\n**When to use it:**\n- You want to author a new WorkRail workflow for a recurring task or problem\n- You have an existing workflow that is outdated, uses legacy patterns (pseudo-DSL, regex validation, satisfaction-score loops), or produces shallow results\n- You want a workflow that will pass the WorkRail quality bar and be trusted to run in production\n\n**What it produces:**\nA validated, tagged workflow JSON file with a `validatedAgainstSpecVersion` stamp. A final trust handoff with readiness verdict, known failure modes, residual weaknesses, and testing guidance.\n\n**How to get good results:**\nDescribe the recurring task the workflow should solve, who will run it, and what a satisfying result looks like. For modernization, point to the existing workflow file. The workflow reads the schema and authoring spec itself -- you do not need to know the JSON format in advance.",
|
|
@@ -210,7 +210,7 @@
|
|
|
210
210
|
],
|
|
211
211
|
"procedure": [
|
|
212
212
|
"Decide the phase list, one-line goal for each phase, and overall ordering.",
|
|
213
|
-
"Identify meaningful input classifications that require different workflow paths. For each variant dimension, decide the branching mechanism: `runCondition` on separate steps (diverging paths), `promptFragments` (additive behavior on a shared base), or a separate workflow entirely. For each captured variable that drives branching, define its closed set of valid values
|
|
213
|
+
"Identify meaningful input classifications that require different workflow paths. For each variant dimension, decide the branching mechanism: `runCondition` on separate steps (diverging paths), `promptFragments` (additive behavior on a shared base), or a separate workflow entirely. For each captured variable that drives branching, define its closed set of valid values — unexpected values are a common source of silent misbehavior.",
|
|
214
214
|
"Design loops with explicit exit rules, bounded maxIterations, and real reasons for another pass.",
|
|
215
215
|
"Decide confirmation gates, delegation vs template injection vs direct execution, promptFragments, references, artifacts, and metaGuidance.",
|
|
216
216
|
"If the authored workflow encodes domain knowledge tied to a specific version of an external system or codebase, decide how to handle staleness: prefer reading the codebase at runtime over hardcoding patterns, or explicitly document versioned assumptions so they surface as maintenance debt."
|
|
@@ -266,7 +266,7 @@
|
|
|
266
266
|
"procedure": [
|
|
267
267
|
"Decide whether the authored workflow needs a hypothesis step, neutral fact packet, reviewer or validator families, contradiction loop, final validation bundle, or explicit blind-spot handling.",
|
|
268
268
|
"Design the confidence model, blind-spot model, and state economy plan.",
|
|
269
|
-
"Decide the hard-gate dimensions that would make the authored workflow unsafe or unsatisfying if they fail. Choose the right enforcement mechanism for each gate: `assessments` + `assessmentRefs` + `assessmentConsequences` for bounded confidence judgments (each dimension captures a distinct orthogonal failure mode
|
|
269
|
+
"Decide the hard-gate dimensions that would make the authored workflow unsafe or unsatisfying if they fail. Choose the right enforcement mechanism for each gate: `assessments` + `assessmentRefs` + `assessmentConsequences` for bounded confidence judgments (each dimension captures a distinct orthogonal failure mode — see `mr-review-workflow.agentic.v2.json` and `bug-investigation.agentic.v2.json`); `validationCriteria` with context-aware conditions for completion-gating on structured checklists or required output content (the engine enforces that required content appears in the response before the step can complete, without a loop — conditions on individual rules can match the workflow's branching context); a re-verification loop for fix-and-verify cycles where the agent must act then prove the action worked. Do not default to a loop when `validationCriteria` is the right tool, or to `requireConfirmation` when a hard gate is needed.",
|
|
270
270
|
"Write the redesign triggers that should force architectural revision rather than cosmetic refinement."
|
|
271
271
|
],
|
|
272
272
|
"outputRequired": {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "workflow-for-workflows",
|
|
3
|
-
"name": "Workflow Authoring Workflow
|
|
4
|
-
"version": "2.
|
|
3
|
+
"name": "Workflow Authoring Workflow",
|
|
4
|
+
"version": "2.5.0",
|
|
5
5
|
"description": "Use this to author or modernize a WorkRail workflow. Guides through understanding the task, defining effectiveness targets, designing architecture and quality gates, drafting, validating, assigning tags, and handing off.",
|
|
6
6
|
"about": "## Workflow Authoring Workflow\n\nThis is the standard WorkRail workflow for creating a new workflow from scratch or modernizing an existing one. It is the trust gate for all other workflows: a workflow is not considered production-ready until it has passed through here.\n\n**What it does:**\nThe workflow walks through the full authoring lifecycle: understanding the task, choosing the right baseline and archetype, designing the phase and quality-gate architecture, drafting the workflow JSON, running structural validators, auditing state fields for bloat, simulating execution against real scenarios, running an adversarial quality review, and producing a final trust handoff. For modernization tasks it builds a value inventory first to ensure enforcement mechanisms, domain knowledge, and behavioral rules are preserved or equivalently replaced.\n\n**When to use it:**\n- You want to author a new WorkRail workflow for a recurring task or problem\n- You have an existing workflow that is outdated, uses legacy patterns (pseudo-DSL, regex validation, satisfaction-score loops), or produces shallow results\n- You want a workflow that will pass the WorkRail quality bar and be trusted to run in production\n\n**What it produces:**\nA validated, tagged workflow JSON file with a `validatedAgainstSpecVersion` stamp. A final trust handoff with readiness verdict, known failure modes, residual weaknesses, and testing guidance.\n\n**How to get good results:**\nDescribe the recurring task the workflow should solve, who will run it, and what a satisfying result looks like. For modernization, point to the existing workflow file. The workflow reads the schema and authoring spec itself -- you do not need to know the JSON format in advance.",
|
|
7
7
|
"examples": [
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "wr.discovery",
|
|
3
|
-
"name": "Discovery Workflow
|
|
4
|
-
"version": "3.
|
|
3
|
+
"name": "Discovery Workflow",
|
|
4
|
+
"version": "3.2.0",
|
|
5
|
+
"validatedAgainstSpecVersion": 3,
|
|
5
6
|
"description": "Use this to explore and think through a problem end-to-end. Moves between landscape exploration, problem framing, candidate generation, adversarial challenge, and uncertainty resolution.",
|
|
6
|
-
"about": "## Discovery Workflow\n\nThis workflow is for structured thinking through an ambiguous problem, opportunity, or decision
|
|
7
|
+
"about": "## Discovery Workflow\n\nThis workflow is for structured thinking through an ambiguous problem, opportunity, or decision -- the kind where you are not sure of the right answer yet and jumping straight to solutions would be premature.\n\n**What it does:**\nBefore starting any research, the workflow challenges the stated goal: it determines whether you handed it a problem or a solution, surfaces hidden assumptions, and defines what success looks like in concrete observable terms. It then selects one of three emphasis paths based on your actual need: `landscape_first` for understanding the current state and comparing options, `full_spectrum` for important or ambiguous problems where both landscape grounding and reframing are needed, and `design_first` when the dominant risk is solving the wrong problem. The workflow moves through landscape research, stakeholder and problem framing, candidate direction generation, adversarial challenge, and an uncertainty-resolution stage that can close with a recommendation, a targeted research follow-up, or a prototype/test plan. A design document is maintained throughout as the human-facing artifact.\n\n**When to use it:**\n- You face a decision, architectural question, or design problem with no obvious right answer\n- You want to explore an opportunity space before committing to a direction\n- You suspect the stated problem might not be the real problem\n- You need a structured recommendation with explicit tradeoffs and alternatives rather than the first plausible answer\n- You want to make sure you are solving the right problem, not just the one you described\n\n**What it produces:**\nA design document covering: the reframed problem (if the original was solution-framed), the selected path and framing, landscape takeaways, chosen direction and why it won, the strongest alternative and why it lost, confidence band, residual risks, and next actions.\n\n**How to get good results:**\nDescribe the problem, opportunity, or decision you want help thinking through -- or describe the solution you are considering, and let the workflow figure out the underlying problem. State what outcome you want (a recommendation, a comparison, a research plan, a prototype direction). The more context you provide upfront about constraints and anti-goals, the sharper the framing will be.",
|
|
7
8
|
"examples": [
|
|
8
9
|
"Decide whether to build a custom notification system or adopt a third-party service",
|
|
9
10
|
"Explore what the right architecture is for moving our monolith to services",
|
|
@@ -53,6 +54,40 @@
|
|
|
53
54
|
}
|
|
54
55
|
],
|
|
55
56
|
"steps": [
|
|
57
|
+
{
|
|
58
|
+
"id": "phase-0-reframe",
|
|
59
|
+
"title": "Phase 0a: Reframe the Goal Before Jumping to Solutions",
|
|
60
|
+
"promptBlocks": {
|
|
61
|
+
"goal": "Challenge the stated goal before we start researching it. Figure out whether I handed you a problem or a solution, surface the assumptions baked into the framing, and define what success actually looks like.",
|
|
62
|
+
"constraints": [
|
|
63
|
+
[
|
|
64
|
+
{
|
|
65
|
+
"kind": "ref",
|
|
66
|
+
"refId": "wr.refs.notes_first_durability"
|
|
67
|
+
}
|
|
68
|
+
],
|
|
69
|
+
"Do not begin landscape research or candidate generation in this step.",
|
|
70
|
+
"If the goal is stated as a solution, find the problem behind it -- do not just accept the solution as given.",
|
|
71
|
+
"Challenge assumptions with specificity: name each assumption, state why it might be wrong, and say what evidence would confirm or refute it.",
|
|
72
|
+
"Define success in terms of outcomes and observable signals, not just delivery of the stated goal."
|
|
73
|
+
],
|
|
74
|
+
"procedure": [
|
|
75
|
+
"Classify the stated goal as a `solution_statement` (names a specific solution or approach) or a `problem_statement` (describes an outcome, pain, or gap without prescribing the fix). Record this as `goalType`.",
|
|
76
|
+
"If `goalType = solution_statement`: identify the underlying problem this solution is trying to solve, name at least one materially different way to solve that problem, and state explicitly whether the stated solution is the best match for the problem or just the most familiar one.",
|
|
77
|
+
"Challenge exactly 3 key assumptions embedded in the stated goal. For each assumption: state the assumption clearly, explain why it might be wrong, and identify what evidence would confirm or refute it. Record these as `challengedAssumptions`.",
|
|
78
|
+
"Define what success looks like before any candidates are generated. Success criteria must be concrete and observable -- not 'the problem is solved' but 'we can measure X, users do Y, the system behaves Z'. Record these as `successCriteria`.",
|
|
79
|
+
"Record a one-sentence `reframedProblem` that captures the real underlying problem, stripped of any solution bias from the original goal.",
|
|
80
|
+
"Set these keys in the next `continue_workflow` call's `context` object: `goalType`, `reframedProblem`, `challengedAssumptions`, `successCriteria`, `goalWasSolutionStatement`."
|
|
81
|
+
],
|
|
82
|
+
"verify": [
|
|
83
|
+
"Each of the 3 challenged assumptions is specific enough that someone could disagree with the challenge.",
|
|
84
|
+
"The success criteria would let a skeptic determine whether the work actually succeeded.",
|
|
85
|
+
"If the goal was a solution-statement, the underlying problem is stated independently of the proposed solution.",
|
|
86
|
+
"The reframed problem is meaningfully different from the original goal wording, or you have explicitly noted why the original framing was already problem-shaped."
|
|
87
|
+
]
|
|
88
|
+
},
|
|
89
|
+
"requireConfirmation": false
|
|
90
|
+
},
|
|
56
91
|
{
|
|
57
92
|
"id": "phase-0-select-path",
|
|
58
93
|
"title": "Phase 0: Understand, Classify, and Recommend a Path",
|
|
@@ -71,13 +106,15 @@
|
|
|
71
106
|
],
|
|
72
107
|
"procedure": [
|
|
73
108
|
"Capture: `problemStatement`, `desiredOutcome`, `coreConstraints`, `antiGoals`, `primaryUncertainty`, `knownApproaches`, `importantStakeholders`, `rigorMode`, `automationLevel`, `pathRecommendation`, `pathRationale`, `designDocPath`.",
|
|
74
|
-
"
|
|
109
|
+
"If `goalWasSolutionStatement = true`, set `problemStatement` from `reframedProblem` rather than from the stated goal. Record the original stated goal as `statedGoal` in the design doc so the distinction is visible.",
|
|
110
|
+
"Choose `landscape_first` when my dominant need is understanding the current landscape or comparing options. Choose `full_spectrum` when both landscape grounding and reframing are needed. Choose `design_first` when the dominant risk is solving the wrong problem or shaping the wrong concept. If `goalWasSolutionStatement = true`, bias toward `design_first` unless the stated solution is clearly the correct framing.",
|
|
75
111
|
"Create or update `designDocPath` with sections for Context / Ask, Path Recommendation, Constraints / Anti-goals, Landscape Packet, Problem Frame Packet, Candidate Directions, Challenge Notes, Resolution Notes, Decision Log, and Final Summary.",
|
|
76
112
|
"Set these keys in the next `continue_workflow` call's `context` object: `problemStatement`, `desiredOutcome`, `coreConstraints`, `antiGoals`, `primaryUncertainty`, `knownApproaches`, `importantStakeholders`, `rigorMode`, `automationLevel`, `pathRecommendation`, `pathRationale`, `designDocPath`.",
|
|
77
113
|
"Also set `goal` in the context object: one sentence describing what you are trying to accomplish. This populates the session title in the Workspace console immediately."
|
|
78
114
|
],
|
|
79
115
|
"verify": [
|
|
80
116
|
"The chosen path is justified against the other two, not just named.",
|
|
117
|
+
"If `goalWasSolutionStatement = true`, the `problemStatement` reflects the reframed problem, not the stated solution.",
|
|
81
118
|
"The design doc exists and the path recommendation is recorded there."
|
|
82
119
|
]
|
|
83
120
|
},
|
|
@@ -284,10 +321,11 @@
|
|
|
284
321
|
"Capture users or stakeholders, jobs or outcomes, pains or tensions, constraints that matter in lived use, success criteria, assumptions, and at least 2 reframes or HMW questions.",
|
|
285
322
|
"If `delegationAvailable = true`, decide whether parallel stakeholder lenses would actually sharpen the result. If yes, run them in parallel. If not, keep going yourself. In either case, you must synthesize the result yourself.",
|
|
286
323
|
"Update `designDocPath` using `problemFrameTemplate`.",
|
|
324
|
+
"Before finishing, name ONE specific concrete condition that would make the current framing wrong -- not a generic caveat, but a specific thing that if discovered to be true would change the path or direction. Record this as `primaryFramingRisk` in the design doc.",
|
|
287
325
|
"Set these keys in the next `continue_workflow` call's `context` object: `problemFrame`, `primaryUsers`, `tensionCount`, `successCriteriaCount`, `framingRiskCount`, `needsChallenge`, `retriageNeeded`."
|
|
288
326
|
],
|
|
289
327
|
"verify": [
|
|
290
|
-
"The framing names what could still be wrong.",
|
|
328
|
+
"The framing names what could still be wrong -- specifically, not generically.",
|
|
291
329
|
"The frame is strong enough to influence candidate generation and later selection."
|
|
292
330
|
]
|
|
293
331
|
},
|
|
@@ -319,10 +357,11 @@
|
|
|
319
357
|
"Capture users or stakeholders, jobs or outcomes, pains or tensions, constraints that matter in lived use, success criteria, assumptions, and at least 2 reframes or HMW questions.",
|
|
320
358
|
"If `delegationAvailable = true`, decide whether parallel stakeholder lenses would actually sharpen the result. If yes, run them in parallel. If not, keep going yourself. In either case, you must synthesize the result yourself.",
|
|
321
359
|
"Update `designDocPath` using `problemFrameTemplate`.",
|
|
360
|
+
"Before finishing, name ONE specific concrete condition that would make the current framing wrong -- not a generic caveat, but a specific thing that if discovered to be true would change the path or direction. Record this as `primaryFramingRisk` in the design doc.",
|
|
322
361
|
"Set these keys in the next `continue_workflow` call's `context` object: `problemFrame`, `primaryUsers`, `tensionCount`, `successCriteriaCount`, `framingRiskCount`, `needsChallenge`, `retriageNeeded`."
|
|
323
362
|
],
|
|
324
363
|
"verify": [
|
|
325
|
-
"The framing names what could still be wrong.",
|
|
364
|
+
"The framing names what could still be wrong -- specifically, not generically.",
|
|
326
365
|
"The framing depth is strong enough to justify a design-first path."
|
|
327
366
|
]
|
|
328
367
|
},
|
|
@@ -335,8 +374,20 @@
|
|
|
335
374
|
"id": "phase-1g-retriage",
|
|
336
375
|
"title": "Phase 1g: Re-Triage After Early Context",
|
|
337
376
|
"runCondition": {
|
|
338
|
-
"
|
|
339
|
-
|
|
377
|
+
"or": [
|
|
378
|
+
{
|
|
379
|
+
"var": "retriageNeeded",
|
|
380
|
+
"equals": true
|
|
381
|
+
},
|
|
382
|
+
{
|
|
383
|
+
"var": "pathRecommendation",
|
|
384
|
+
"equals": "design_first"
|
|
385
|
+
},
|
|
386
|
+
{
|
|
387
|
+
"var": "pathRecommendation",
|
|
388
|
+
"equals": "full_spectrum"
|
|
389
|
+
}
|
|
390
|
+
]
|
|
340
391
|
},
|
|
341
392
|
"promptBlocks": {
|
|
342
393
|
"goal": "Reassess the path now that you have real landscape and framing context instead of just my initial wording.",
|