@exaudeus/workrail 3.13.0 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/application/services/validation-engine.js +4 -9
  2. package/dist/application/services/workflow-compiler.js +4 -6
  3. package/dist/console/assets/index-BZYIjrzJ.js +28 -0
  4. package/dist/console/assets/index-OLCKbDdm.css +1 -0
  5. package/dist/console/index.html +2 -2
  6. package/dist/engine/engine-factory.js +2 -2
  7. package/dist/engine/types.d.ts +1 -1
  8. package/dist/manifest.json +63 -63
  9. package/dist/mcp/handlers/shared/request-workflow-reader.d.ts +5 -0
  10. package/dist/mcp/handlers/shared/request-workflow-reader.js +47 -2
  11. package/dist/mcp/handlers/v2-advance-core/assessment-consequences.d.ts +1 -1
  12. package/dist/mcp/handlers/v2-advance-core/assessment-consequences.js +4 -5
  13. package/dist/mcp/handlers/v2-advance-core/index.js +1 -1
  14. package/dist/mcp/handlers/v2-advance-core/outcome-blocked.js +1 -1
  15. package/dist/mcp/handlers/v2-execution/start.d.ts +1 -0
  16. package/dist/mcp/handlers/v2-execution/start.js +20 -1
  17. package/dist/mcp/handlers/v2-workflow.d.ts +23 -0
  18. package/dist/mcp/handlers/v2-workflow.js +177 -10
  19. package/dist/mcp/output-schemas.d.ts +202 -8
  20. package/dist/mcp/output-schemas.js +38 -11
  21. package/dist/mcp/server.js +48 -1
  22. package/dist/mcp/tool-descriptions.js +17 -9
  23. package/dist/mcp/v2/tools.d.ts +6 -0
  24. package/dist/mcp/v2/tools.js +2 -0
  25. package/dist/mcp/workflow-protocol-contracts.js +5 -1
  26. package/dist/types/workflow-definition.d.ts +2 -2
  27. package/dist/v2/infra/local/workspace-anchor/index.js +4 -1
  28. package/dist/v2/usecases/console-routes.js +49 -1
  29. package/dist/v2/usecases/console-service.d.ts +1 -0
  30. package/dist/v2/usecases/console-service.js +4 -1
  31. package/dist/v2/usecases/console-types.d.ts +12 -0
  32. package/dist/v2/usecases/worktree-service.js +55 -7
  33. package/package.json +3 -2
  34. package/spec/authoring-spec.json +91 -3
  35. package/spec/workflow-tags.json +132 -0
  36. package/spec/workflow.schema.json +411 -97
  37. package/workflows/adaptive-ticket-creation.json +40 -22
  38. package/workflows/architecture-scalability-audit.json +65 -31
  39. package/workflows/bug-investigation.agentic.v2.json +36 -14
  40. package/workflows/coding-task-workflow-agentic.json +50 -38
  41. package/workflows/coding-task-workflow-agentic.lean.v2.json +124 -37
  42. package/workflows/coding-task-workflow-agentic.v2.json +90 -30
  43. package/workflows/cross-platform-code-conversion.v2.json +168 -48
  44. package/workflows/document-creation-workflow.json +47 -17
  45. package/workflows/documentation-update-workflow.json +8 -8
  46. package/workflows/intelligent-test-case-generation.json +2 -2
  47. package/workflows/learner-centered-course-workflow.json +267 -267
  48. package/workflows/mr-review-workflow.agentic.v2.json +81 -14
  49. package/workflows/personal-learning-materials-creation-branched.json +175 -175
  50. package/workflows/presentation-creation.json +159 -159
  51. package/workflows/production-readiness-audit.json +54 -15
  52. package/workflows/relocation-workflow-us.json +44 -35
  53. package/workflows/routines/tension-driven-design.json +1 -1
  54. package/workflows/scoped-documentation-workflow.json +25 -25
  55. package/workflows/test-artifact-loop-control.json +1 -2
  56. package/workflows/ui-ux-design-workflow.json +327 -0
  57. package/workflows/workflow-diagnose-environment.json +1 -1
  58. package/workflows/workflow-for-workflows.json +507 -484
  59. package/workflows/workflow-for-workflows.v2.json +90 -18
  60. package/workflows/wr.discovery.json +112 -30
  61. package/dist/console/assets/index-DW78t31j.css +0 -1
  62. package/dist/console/assets/index-EsSXrC_a.js +0 -28
@@ -2,15 +2,15 @@
2
2
  "id": "adaptive-ticket-creation",
3
3
  "name": "Adaptive Ticket Creation Workflow",
4
4
  "version": "1.0.0",
5
- "description": "Create high-quality Jira tickets by automatically selecting the right complexity path (Simple, Standard, or Epic) based on request analysis. One polished ticket for simple requests; structured decomposition with estimates for epics.",
5
+ "description": "Use this to create high-quality Jira tickets for features, tasks, or epics. Automatically selects the right complexity path (Simple, Standard, or Epic) and generates properly structured tickets with acceptance criteria and estimates.",
6
6
  "preconditions": [
7
7
  "User has provided a description of the feature, task, or work to be ticketed.",
8
8
  "Agent has file system access for loading team preferences and persisting rules."
9
9
  ],
10
10
  "metaGuidance": [
11
- "ROLE: expert Product Manager and Mobile Tech Lead. Triage autonomously, write developer-ready tickets with full context, and produce objectively testable acceptance criteria not user-story paraphrases.",
11
+ "ROLE: expert Product Manager and Mobile Tech Lead. Triage autonomously, write developer-ready tickets with full context, and produce objectively testable acceptance criteria \u2014 not user-story paraphrases.",
12
12
  "EXPLORE FIRST: use tools to gather context before asking the user anything. Ask only for information you genuinely cannot determine with tools or from the request itself.",
13
- "TEAM RULES: load and follow ./.workflow_rules/ticket_creation.md when it exists. Preferences there override your defaults. Rules are captured only on the Epic path complex sessions are where durable conventions emerge and where the investment pays off.",
13
+ "TEAM RULES: load and follow ./.workflow_rules/ticket_creation.md when it exists. Preferences there override your defaults. Rules are captured only on the Epic path \u2014 complex sessions are where durable conventions emerge and where the investment pays off.",
14
14
  "AUTONOMOUS TRIAGE: decide pathComplexity (Simple / Standard / Epic) yourself from the request. Surface your reasoning, then wait for confirmation.",
15
15
  "QUALITY FLOOR: every ticket must have a context-rich description, checkbox-style acceptance criteria that are objectively testable, and an effort estimate."
16
16
  ],
@@ -21,7 +21,7 @@
21
21
  "promptBlocks": {
22
22
  "goal": "Analyze the request, gather available context, and select the right complexity path before doing any ticket work.",
23
23
  "constraints": [
24
- "Decide the path yourself do not ask the user to choose.",
24
+ "Decide the path yourself \u2014 do not ask the user to choose.",
25
25
  "Load ./.workflow_rules/ticket_creation.md if it exists and let it influence your triage. If the file does not exist, note this explicitly in your output so the user knows team conventions were not applied.",
26
26
  "Set pathComplexity to exactly one of: Simple, Standard, or Epic."
27
27
  ],
@@ -29,7 +29,7 @@
29
29
  "Read any attached documents, linked PRDs, or referenced specs.",
30
30
  "Identify complexity signals: scope breadth, number of distinct deliverables, cross-team dependencies, technical unknowns, and estimated ticket count.",
31
31
  "Apply the triage rubric: Simple = single ticket, clear requirements, no blocking unknowns, minimal dependencies. Standard = multiple related tickets, moderate scope, some analysis needed. Epic = complex feature requiring decomposition, multiple teams or significant unknowns, likely 6+ tickets.",
32
- "Upgrade triggers escalate to Standard if: request implies more than one clearly separate work item. Escalate to Epic if: multiple teams are involved, architecture decisions are unresolved, or you estimate more than five tickets.",
32
+ "Upgrade triggers \u2014 escalate to Standard if: request implies more than one clearly separate work item. Escalate to Epic if: multiple teams are involved, architecture decisions are unresolved, or you estimate more than five tickets.",
33
33
  "State your selected path and the top three reasons. Capture pathComplexity in context."
34
34
  ],
35
35
  "outputRequired": {
@@ -53,7 +53,7 @@
53
53
  "promptBlocks": {
54
54
  "goal": "Generate one complete, developer-ready Jira ticket for this request.",
55
55
  "constraints": [
56
- "Acceptance criteria must be phrased as observable, testable conditions not user-story restatements.",
56
+ "Acceptance criteria must be phrased as observable, testable conditions \u2014 not user-story restatements.",
57
57
  "Follow any team conventions from ./.workflow_rules/ticket_creation.md.",
58
58
  "Include all fields a developer needs to start work without asking follow-up questions."
59
59
  ],
@@ -81,8 +81,14 @@
81
81
  "title": "Path C, Phase 1: Gather Context and Surface Gaps",
82
82
  "runCondition": {
83
83
  "or": [
84
- { "var": "pathComplexity", "equals": "Standard" },
85
- { "var": "pathComplexity", "equals": "Epic" }
84
+ {
85
+ "var": "pathComplexity",
86
+ "equals": "Standard"
87
+ },
88
+ {
89
+ "var": "pathComplexity",
90
+ "equals": "Epic"
91
+ }
86
92
  ]
87
93
  },
88
94
  "promptBlocks": {
@@ -97,7 +103,7 @@
97
103
  "Load ./.workflow_rules/ticket_creation.md and note any relevant team conventions.",
98
104
  "Identify: key stakeholders, team dependencies, technical constraints, known risks, and any conflicting requirements.",
99
105
  "Classify each gap as: Critical (blocks planning), Important (affects scope), or Nice-to-have (can proceed without it).",
100
- "For Critical and Important gaps that tools cannot resolve, ask the user in a single consolidated question block, not one at a time.",
106
+ "For Critical and Important gaps that tools cannot resolve, ask the user \u2014 in a single consolidated question block, not one at a time.",
101
107
  "After receiving answers, check whether any response reveals scope that would change `pathComplexity` (e.g. the user confirms three teams are involved, or the feature is narrower than initially assessed). If so, state the new classification and reasoning, and ask the user to confirm before continuing to Phase 2."
102
108
  ],
103
109
  "outputRequired": {
@@ -116,23 +122,29 @@
116
122
  "title": "Path C, Phase 2: Create High-Level Plan",
117
123
  "runCondition": {
118
124
  "or": [
119
- { "var": "pathComplexity", "equals": "Standard" },
120
- { "var": "pathComplexity", "equals": "Epic" }
125
+ {
126
+ "var": "pathComplexity",
127
+ "equals": "Standard"
128
+ },
129
+ {
130
+ "var": "pathComplexity",
131
+ "equals": "Epic"
132
+ }
121
133
  ]
122
134
  },
123
135
  "promptBlocks": {
124
136
  "goal": "Produce a structured plan that will drive ticket generation. This plan is the source of truth for scope.",
125
137
  "constraints": [
126
- "Be explicit about scope boundaries ambiguous scope will produce ambiguous tickets.",
138
+ "Be explicit about scope boundaries \u2014 ambiguous scope will produce ambiguous tickets.",
127
139
  "Success criteria must be measurable, not just descriptive.",
128
140
  "For Standard path: this plan feeds directly into batch ticket generation."
129
141
  ],
130
142
  "procedure": [
131
143
  "Write: Project Summary (2-3 sentences, what is being built and why).",
132
144
  "Write: Key Deliverables (bulleted list of distinct components or features).",
133
- "Write: In-Scope (explicit list prevents scope creep).",
134
- "Write: Out-of-Scope (explicit exclusions prevents misunderstandings).",
135
- "Write: Success Criteria (measurable definition of done each item verifiable).",
145
+ "Write: In-Scope (explicit list \u2014 prevents scope creep).",
146
+ "Write: Out-of-Scope (explicit exclusions \u2014 prevents misunderstandings).",
147
+ "Write: Success Criteria (measurable definition of done \u2014 each item verifiable).",
136
148
  "Write: High-Level Timeline (phases or milestones with rough sizing).",
137
149
  "Review: does every deliverable map clearly to implementable work? Is anything in scope that should be out?"
138
150
  ],
@@ -158,7 +170,7 @@
158
170
  "goal": "Break the approved plan into a logical work hierarchy that development teams can execute.",
159
171
  "constraints": [
160
172
  "Every item in the plan's In-Scope list must map to at least one work item in the hierarchy.",
161
- "Dependencies must be explicit not implied by ordering alone.",
173
+ "Dependencies must be explicit \u2014 not implied by ordering alone.",
162
174
  "Oversized stories (more than one sprint of work) should be split."
163
175
  ],
164
176
  "procedure": [
@@ -190,7 +202,7 @@
190
202
  "promptBlocks": {
191
203
  "goal": "Add effort estimates, risk assessments, and team assignments to each story in the hierarchy.",
192
204
  "constraints": [
193
- "Conservative estimates are better than optimistic ones note uncertainty explicitly.",
205
+ "Conservative estimates are better than optimistic ones \u2014 note uncertainty explicitly.",
194
206
  "Justify each estimate with one sentence of reasoning.",
195
207
  "Flag stories on the critical path."
196
208
  ],
@@ -200,7 +212,7 @@
200
212
  "Assign priority: must-have for MVP, should-have, nice-to-have.",
201
213
  "Note suggested team or skill area for each story.",
202
214
  "Identify critical path: which stories block the most downstream work? Surface these explicitly.",
203
- "Flag any stories whose estimates feel uncertain surface the unknowns rather than hiding them in a range."
215
+ "Flag any stories whose estimates feel uncertain \u2014 surface the unknowns rather than hiding them in a range."
204
216
  ],
205
217
  "outputRequired": {
206
218
  "notesMarkdown": "Total story point estimate, critical path items, high-risk stories."
@@ -218,8 +230,14 @@
218
230
  "title": "Path C/E, Phase 5: Batch Ticket Generation",
219
231
  "runCondition": {
220
232
  "or": [
221
- { "var": "pathComplexity", "equals": "Standard" },
222
- { "var": "pathComplexity", "equals": "Epic" }
233
+ {
234
+ "var": "pathComplexity",
235
+ "equals": "Standard"
236
+ },
237
+ {
238
+ "var": "pathComplexity",
239
+ "equals": "Epic"
240
+ }
223
241
  ]
224
242
  },
225
243
  "promptBlocks": {
@@ -259,7 +277,7 @@
259
277
  "promptBlocks": {
260
278
  "goal": "Extract actionable team preferences from this session and persist them so future runs use them automatically.",
261
279
  "constraints": [
262
- "Only write rules that are genuinely reusable across future tickets skip one-off project specifics.",
280
+ "Only write rules that are genuinely reusable across future tickets \u2014 skip one-off project specifics.",
263
281
  "Keep rules concise and actionable, not narrative.",
264
282
  "Append to ./.workflow_rules/ticket_creation.md rather than replacing it."
265
283
  ],
@@ -267,7 +285,7 @@
267
285
  "Review what conventions, preferences, or requirements emerged during this session.",
268
286
  "Identify patterns worth preserving: naming conventions, field usage, AC format preferences, estimation approach, labeling rules.",
269
287
  "Draft new rules as short, imperative statements (e.g., 'Use T-shirt sizing not Fibonacci', 'Always include a Figma link in design tickets').",
270
- "Check against existing rules avoid duplicates or contradictions.",
288
+ "Check against existing rules \u2014 avoid duplicates or contradictions.",
271
289
  "Append new rules to ./.workflow_rules/ticket_creation.md, creating the file if it does not exist."
272
290
  ],
273
291
  "outputRequired": {
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "id": "architecture-scalability-audit",
3
- "name": "Architecture Scalability Audit (v1 Evidence-Driven Dimension-Scoped rigorMode-Adaptive)",
3
+ "name": "Architecture Scalability Audit (v1 \u2022 Evidence-Driven \u2022 Dimension-Scoped \u2022 rigorMode-Adaptive)",
4
4
  "version": "0.1.0",
5
- "description": "Audit a bounded codebase scope for architecture scalability. The user declares which scalability dimensions matter (load, data volume, team/org, feature extensibility, operational); the workflow audits only those dimensions and produces per-dimension verdicts grounded in actual code, not generic advice.",
5
+ "description": "Use this to audit a bounded codebase scope for architecture scalability. Declare which scalability dimensions matter (load, data volume, team size, feature extensibility, operational); the workflow investigates each and produces evidence-grounded findings.",
6
6
  "recommendedPreferences": {
7
7
  "recommendedAutonomy": "guided",
8
8
  "recommendedRiskPolicy": "conservative"
@@ -20,7 +20,7 @@
20
20
  "DEFAULT BEHAVIOR: self-execute with tools. Ask only for true scope or dimension decisions you cannot resolve yourself.",
21
21
  "V2 DURABILITY: keep workflow truth in output.notesMarkdown and explicit context fields. Human-facing markdown artifacts are optional companions only.",
22
22
  "OWNERSHIP: the main agent owns the fact packet, synthesis, verdict calibration, and final handoff. Delegated dimension audits are evidence, not authority.",
23
- "DIMENSION DISCIPLINE: audit only the dimensions the user declared. Do not add dimensions the user did not select, even if they look relevant surface them as advisory notes instead.",
23
+ "DIMENSION DISCIPLINE: audit only the dimensions the user declared. Do not add dimensions the user did not select, even if they look relevant \u2014 surface them as advisory notes instead.",
24
24
  "EVIDENCE FIRST: every risk or will_break finding must cite a specific file, class, method, or pattern in the codebase. Technology name alone is not evidence.",
25
25
  "GROWTH SCENARIO: every concern must name a growth scenario (e.g. 10x traffic, 100x records, 3x team size). Generic 'won't scale' findings are not acceptable.",
26
26
  "VERDICT TIERS: use will_break / risk / fine. Do not force a cleaner answer than the evidence supports.",
@@ -33,16 +33,21 @@
33
33
  "promptBlocks": {
34
34
  "goal": "Establish a precise bounded scope and confirm which scalability dimensions this audit will cover.",
35
35
  "constraints": [
36
- [{ "kind": "ref", "refId": "wr.refs.notes_first_durability" }],
36
+ [
37
+ {
38
+ "kind": "ref",
39
+ "refId": "wr.refs.notes_first_durability"
40
+ }
41
+ ],
37
42
  "Scope must be bounded before investigation begins. Unbounded scope produces generic findings.",
38
43
  "Dimension selection is the user's decision. Explore the codebase to inform the conversation, but the user declares which dimensions matter."
39
44
  ],
40
45
  "procedure": [
41
46
  "Read the codebase to understand the architecture: key components, entry points, data flows, and main patterns within the declared scope.",
42
- "Present the five scalability dimensions and ask the user to select which apply: (1) load handles more requests, users, or throughput; (2) data_volume handles more records, storage, or query size; (3) team_org more teams or developers working on this scope; (4) feature_extensibility more features added without rearchitecting; (5) operational more deployments, environments, or operational complexity.",
43
- "Ask the user to confirm the scope boundary what is explicitly in and explicitly out.",
44
- "Classify audit complexity: Simple (1–2 dimensions, small scope), Medium (2–3 dimensions, moderate scope), Complex (4–5 dimensions or large scope).",
45
- "Run a context-clarity check: score boundary_clarity, dimension_clarity, and codebase_familiarity 1–3. If any score is 1, gather more context before advancing."
47
+ "Present the five scalability dimensions and ask the user to select which apply: (1) load \u2014 handles more requests, users, or throughput; (2) data_volume \u2014 handles more records, storage, or query size; (3) team_org \u2014 more teams or developers working on this scope; (4) feature_extensibility \u2014 more features added without rearchitecting; (5) operational \u2014 more deployments, environments, or operational complexity.",
48
+ "Ask the user to confirm the scope boundary \u2014 what is explicitly in and explicitly out.",
49
+ "Classify audit complexity: Simple (1\u20132 dimensions, small scope), Medium (2\u20133 dimensions, moderate scope), Complex (4\u20135 dimensions or large scope).",
50
+ "Run a context-clarity check: score boundary_clarity, dimension_clarity, and codebase_familiarity 1\u20133. If any score is 1, gather more context before advancing."
46
51
  ],
47
52
  "outputRequired": {
48
53
  "notesMarkdown": "Scope boundary (in and out), declared dimensions with rationale, audit complexity classification, and any open boundary questions.",
@@ -87,7 +92,12 @@
87
92
  "promptBlocks": {
88
93
  "goal": "Freeze a neutral scalability fact packet and assign one reviewer family per declared dimension.",
89
94
  "constraints": [
90
- [{ "kind": "ref", "refId": "wr.refs.notes_first_durability" }],
95
+ [
96
+ {
97
+ "kind": "ref",
98
+ "refId": "wr.refs.notes_first_durability"
99
+ }
100
+ ],
91
101
  "The fact packet is the primary truth for all dimension reviewer families.",
92
102
  "Keep the scalability hypothesis as a reference to challenge, not a frame to defend.",
93
103
  "One reviewer family per declared dimension only. Do not add families for undeclared dimensions."
@@ -95,7 +105,7 @@
95
105
  "procedure": [
96
106
  "Create a neutral `scalabilityFactPacket` containing: scope boundary (in and out), declared dimensions, key architectural patterns found, main components and their roles, data flow and storage patterns, concurrency and state management approach, dependency boundaries and coupling, deployment and runtime assumptions, and explicit open unknowns.",
97
107
  "Include realism signals: code that looks scalable at a glance but may have hidden limits (e.g. in-memory state, synchronous choke points, missing pagination, tight coupling between components).",
98
- "For each declared dimension, assign a reviewer family mission: load = examine request handling, concurrency, session/state management, caching, connection pools, and horizontal scaling readiness check whether session state is in-memory or distributed, whether connection pools are bounded, whether synchronous bottlenecks exist in hot paths; data_volume = examine query patterns, pagination, indexing, result set bounds, storage growth, and data access layer scalability check for unbounded queries (missing LIMIT/pagination), missing indexes on filtered columns, N+1 patterns in repository/service layers, and data structures that grow unboundedly; team_org = examine module coupling, shared state, and parallel development friction specifically check import graphs for cross-module dependencies that would cause merge conflicts, identify shared mutable singletons or global state, look for test setup that requires spinning up adjacent modules, and check whether public interfaces change frequently or are stable; feature_extensibility = examine how much code changes when a new variant of a core concept is added specifically look for switch/when/if-else chains on type discriminators that would need a new branch per feature, hardcoded business-rule constants, direct concrete dependencies instead of interfaces or abstractions, and files that are edited for every new feature; operational = examine deployment complexity, environment-specific behavior, observability, configuration surface, and operational runbook needs specifically check for environment-specific code paths (if/switch on env vars that create different behavior per environment), configuration that must be updated in multiple places per deployment, whether logs and metrics cover the main operational failure modes, and whether a new deployment of this scope would require manual steps beyond a standard deploy.",
108
+ "For each declared dimension, assign a reviewer family mission: load = examine request handling, concurrency, session/state management, caching, connection pools, and horizontal scaling readiness \u2014 check whether session state is in-memory or distributed, whether connection pools are bounded, whether synchronous bottlenecks exist in hot paths; data_volume = examine query patterns, pagination, indexing, result set bounds, storage growth, and data access layer scalability \u2014 check for unbounded queries (missing LIMIT/pagination), missing indexes on filtered columns, N+1 patterns in repository/service layers, and data structures that grow unboundedly; team_org = examine module coupling, shared state, and parallel development friction \u2014 specifically check import graphs for cross-module dependencies that would cause merge conflicts, identify shared mutable singletons or global state, look for test setup that requires spinning up adjacent modules, and check whether public interfaces change frequently or are stable; feature_extensibility = examine how much code changes when a new variant of a core concept is added \u2014 specifically look for switch/when/if-else chains on type discriminators that would need a new branch per feature, hardcoded business-rule constants, direct concrete dependencies instead of interfaces or abstractions, and files that are edited for every new feature; operational = examine deployment complexity, environment-specific behavior, observability, configuration surface, and operational runbook needs \u2014 specifically check for environment-specific code paths (if/switch on env vars that create different behavior per environment), configuration that must be updated in multiple places per deployment, whether logs and metrics cover the main operational failure modes, and whether a new deployment of this scope would require manual steps beyond a standard deploy.",
99
109
  "Set selectedReviewerFamilies to the list of assigned families (one per declared dimension). Set contradictionCount and blindSpotCount to 0."
100
110
  ],
101
111
  "outputRequired": {
@@ -110,8 +120,11 @@
110
120
  "promptFragments": [
111
121
  {
112
122
  "id": "phase-2-quick",
113
- "when": { "var": "auditComplexity", "equals": "Simple" },
114
- "text": "For a Simple audit, keep the fact packet compact — scope summary, key patterns, and declared dimensions only. Skip exhaustive realism signal enumeration."
123
+ "when": {
124
+ "var": "auditComplexity",
125
+ "equals": "Simple"
126
+ },
127
+ "text": "For a Simple audit, keep the fact packet compact \u2014 scope summary, key patterns, and declared dimensions only. Skip exhaustive realism signal enumeration."
115
128
  }
116
129
  ],
117
130
  "requireConfirmation": false
@@ -122,15 +135,25 @@
122
135
  "promptBlocks": {
123
136
  "goal": "Run one reviewer family per declared dimension in parallel, then synthesize their findings as evidence rather than verdicts.",
124
137
  "constraints": [
125
- [{ "kind": "ref", "refId": "wr.refs.notes_first_durability" }],
126
- [{ "kind": "ref", "refId": "wr.refs.synthesis_under_disagreement" }],
138
+ [
139
+ {
140
+ "kind": "ref",
141
+ "refId": "wr.refs.notes_first_durability"
142
+ }
143
+ ],
144
+ [
145
+ {
146
+ "kind": "ref",
147
+ "refId": "wr.refs.synthesis_under_disagreement"
148
+ }
149
+ ],
127
150
  "Each reviewer family uses scalabilityFactPacket as primary truth.",
128
151
  "Reviewer-family outputs are raw evidence. The main agent owns synthesis and verdict assignment.",
129
- "Each reviewer family audits only its declared dimension no cross-dimension scope creep."
152
+ "Each reviewer family audits only its declared dimension \u2014 no cross-dimension scope creep."
130
153
  ],
131
154
  "procedure": [
132
155
  "Before investigating, restate your scalabilityHypothesis and name which dimension is most likely to challenge it.",
133
- "Run one investigation per declared dimension. For each dimension, the investigation must return: top findings, evidence for each finding (specific file, class, method, or pattern references not just technology names), verdict tier per finding (will_break / risk / fine), growth scenario for each concern (e.g. 10x traffic, 100x records, 3x team size), biggest uncertainty, and likely false-confidence vector for this dimension.",
156
+ "Run one investigation per declared dimension. For each dimension, the investigation must return: top findings, evidence for each finding (specific file, class, method, or pattern references \u2014 not just technology names), verdict tier per finding (will_break / risk / fine), growth scenario for each concern (e.g. 10x traffic, 100x records, 3x team size), biggest uncertainty, and likely false-confidence vector for this dimension.",
134
157
  "After completing all dimension investigations, synthesize explicitly: what was confirmed, what was genuinely new, what looks weak or overstated, and what changed your current hypothesis.",
135
158
  "Build dimensionFindings keyed by dimension containing: findings list, verdict summary, evidence quality assessment, and open questions.",
136
159
  "Identify cross-cutting concerns: architectural patterns or components that appear in findings from multiple dimensions."
@@ -148,12 +171,18 @@
148
171
  "promptFragments": [
149
172
  {
150
173
  "id": "phase-3-quick",
151
- "when": { "var": "auditComplexity", "equals": "Simple" },
174
+ "when": {
175
+ "var": "auditComplexity",
176
+ "equals": "Simple"
177
+ },
152
178
  "text": "For a Simple audit, self-execute each dimension investigation directly without spawning WorkRail Executors. One dimension at a time, using tools to inspect the codebase. This keeps the audit proportionate to the scope."
153
179
  },
154
180
  {
155
181
  "id": "phase-3-thorough",
156
- "when": { "var": "auditComplexity", "equals": "Complex" },
182
+ "when": {
183
+ "var": "auditComplexity",
184
+ "equals": "Complex"
185
+ },
157
186
  "text": "For a Complex audit, spawn all dimension executors simultaneously, then after synthesis run routine-hypothesis-challenge against any will_break finding before closing this phase. This adds an adversarial check on the most serious findings."
158
187
  }
159
188
  ],
@@ -179,7 +208,12 @@
179
208
  "promptBlocks": {
180
209
  "goal": "Resolve contradictions between dimension findings and sharpen cross-cutting concerns.",
181
210
  "constraints": [
182
- [{ "kind": "ref", "refId": "wr.refs.parallelize_cognition_serialize_synthesis" }],
211
+ [
212
+ {
213
+ "kind": "ref",
214
+ "refId": "wr.refs.parallelize_cognition_serialize_synthesis"
215
+ }
216
+ ],
183
217
  "Contradiction resolution is main-agent work. Do not delegate synthesis.",
184
218
  "A cross-cutting concern that spans multiple dimensions is its own finding."
185
219
  ],
@@ -210,10 +244,10 @@
210
244
  "This is a structured four-item check, not a free-form review."
211
245
  ],
212
246
  "procedure": [
213
- "Check 1 Technology-vs-usage: did any reviewer identify a scalable technology without checking actual usage patterns in the code? (e.g. Postgres was identified as the DB, but were N+1 queries, missing indexes, or unbounded result sets actually checked?) Fix any instances found.",
214
- "Check 2 Scope drift: did any reviewer audit components outside the declared scope boundary? Remove out-of-scope findings.",
215
- "Check 3 Undeclared relevant dimensions: does the codebase have patterns suggesting a declared-out dimension actually matters for this scope? If so, surface it as an advisory note without adding it to the audit verdict.",
216
- "Check 4 Growth scenario vagueness: does every concern name a specific growth scenario? If not, assign one now based on the most realistic growth pattern for this scope.",
247
+ "Check 1 \u2014 Technology-vs-usage: did any reviewer identify a scalable technology without checking actual usage patterns in the code? (e.g. Postgres was identified as the DB, but were N+1 queries, missing indexes, or unbounded result sets actually checked?) Fix any instances found.",
248
+ "Check 2 \u2014 Scope drift: did any reviewer audit components outside the declared scope boundary? Remove out-of-scope findings.",
249
+ "Check 3 \u2014 Undeclared relevant dimensions: does the codebase have patterns suggesting a declared-out dimension actually matters for this scope? If so, surface it as an advisory note without adding it to the audit verdict.",
250
+ "Check 4 \u2014 Growth scenario vagueness: does every concern name a specific growth scenario? If not, assign one now based on the most realistic growth pattern for this scope.",
217
251
  "Set blindSpotCount to the number of blind spots found across all four checks."
218
252
  ],
219
253
  "outputRequired": {
@@ -265,11 +299,11 @@
265
299
  "Do not advance to handoff with known hard gate failures."
266
300
  ],
267
301
  "procedure": [
268
- "Verdict aggregation derive scalabilityVerdict from dimensionFindings using these explicit rules: (1) at_risk if any declared dimension has a will_break finding; (2) conditional if no will_break findings exist but at least one dimension has a risk finding; (3) ready_to_scale if all declared dimensions have only fine findings; (4) inconclusive if any dimension still has evidenceWeak = true after the synthesis loop, making a reliable verdict impossible. Capture verdictRationale naming the specific dimension and finding that drove the verdict.",
269
- "Hard gate 1 Evidence grounding: for every will_break and risk finding in dimensionFindings, confirm it cites a specific file, class, method, or code pattern. Technology name alone fails this gate. Fix by locating the code evidence or downgrading to risk with an evidence-needed note.",
270
- "Hard gate 2 Dimension coverage: confirm every declared dimension has at least one substantive finding. A verdict of fine with supporting evidence counts. A dimension with no findings at all fails this gate.",
271
- "Hard gate 3 Hypothesis revisited: confirm that scalabilityHypothesis from Phase 1 is either confirmed or explicitly revised in synthesis notes. If it was never addressed, address it now.",
272
- "Hard gate 4 Growth scenario specificity: confirm every concern in dimensionFindings names a growth scenario. If any do not, assign one now.",
302
+ "Verdict aggregation \u2014 derive scalabilityVerdict from dimensionFindings using these explicit rules: (1) at_risk if any declared dimension has a will_break finding; (2) conditional if no will_break findings exist but at least one dimension has a risk finding; (3) ready_to_scale if all declared dimensions have only fine findings; (4) inconclusive if any dimension still has evidenceWeak = true after the synthesis loop, making a reliable verdict impossible. Capture verdictRationale naming the specific dimension and finding that drove the verdict.",
303
+ "Hard gate 1 \u2014 Evidence grounding: for every will_break and risk finding in dimensionFindings, confirm it cites a specific file, class, method, or code pattern. Technology name alone fails this gate. Fix by locating the code evidence or downgrading to risk with an evidence-needed note.",
304
+ "Hard gate 2 \u2014 Dimension coverage: confirm every declared dimension has at least one substantive finding. A verdict of fine with supporting evidence counts. A dimension with no findings at all fails this gate.",
305
+ "Hard gate 3 \u2014 Hypothesis revisited: confirm that scalabilityHypothesis from Phase 1 is either confirmed or explicitly revised in synthesis notes. If it was never addressed, address it now.",
306
+ "Hard gate 4 \u2014 Growth scenario specificity: confirm every concern in dimensionFindings names a growth scenario. If any do not, assign one now.",
273
307
  "Set hardGatesPassed = true only when the verdict aggregation and all four gates pass. Set hardGateFailures to the list of any that needed fixing."
274
308
  ],
275
309
  "outputRequired": {
@@ -293,13 +327,13 @@
293
327
  "Do not drift into implementation planning or remediation design unless the user explicitly asks."
294
328
  ],
295
329
  "procedure": [
296
- "Open with the overall scalability readiness verdict (ready_to_scale / conditional / at_risk / inconclusive) and the verdictRationale name the specific dimension and finding that drove it.",
330
+ "Open with the overall scalability readiness verdict (ready_to_scale / conditional / at_risk / inconclusive) and the verdictRationale \u2014 name the specific dimension and finding that drove it.",
297
331
  "For each declared dimension, give: dimension name, verdict tier (will_break / risk / fine), top finding with specific code reference, growth scenario, and severity.",
298
332
  "List cross-cutting concerns: patterns that create scalability risk across multiple dimensions.",
299
333
  "Revisit scalabilityHypothesis from Phase 1: was it confirmed or revised? What evidence changed your view?",
300
334
  "Give a prioritized concern list ordered by: (1) will_break findings first, (2) risk findings by severity, (3) cross-cutting concerns, (4) fine findings worth noting as already solid.",
301
335
  "Surface any advisory notes for undeclared dimensions that may be worth considering.",
302
- "State what is already well-designed for scale not everything should be a concern."
336
+ "State what is already well-designed for scale \u2014 not everything should be a concern."
303
337
  ],
304
338
  "outputRequired": {
305
339
  "notesMarkdown": "Decision-ready scalability handoff: overall verdict, per-dimension summary with code references, prioritized concerns, cross-cutting concerns, hypothesis outcome, and what is already solid."
@@ -308,7 +342,7 @@
308
342
  "The handoff is verdict-first and evidence-grounded.",
309
343
  "Every concern is tied to a specific code reference and growth scenario.",
310
344
  "The hypothesis from Phase 1 is explicitly addressed.",
311
- "What is already well-designed is stated not just the concerns."
345
+ "What is already well-designed is stated \u2014 not just the concerns."
312
346
  ]
313
347
  },
314
348
  "requireConfirmation": false
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "id": "bug-investigation-agentic",
3
- "name": "Bug Investigation (v2 Notes-First WorkRail Executor)",
3
+ "name": "Bug Investigation (v2 \u2022 Notes-First \u2022 WorkRail Executor)",
4
4
  "version": "2.0.0",
5
- "description": "A v2-first bug investigation workflow focused on moving from theory to proof with notes-first durability, explicit trigger fields, de-anchored fresh-eye review, and investigation-only handoff boundaries.",
5
+ "description": "Use this to diagnose a bug or unexpected behavior in code. Builds a hypothesis, gathers evidence, and proves or disproves the root cause before concluding.",
6
6
  "recommendedPreferences": {
7
7
  "recommendedAutonomy": "guided",
8
8
  "recommendedRiskPolicy": "conservative"
@@ -39,7 +39,10 @@
39
39
  {
40
40
  "id": "confidence",
41
41
  "purpose": "How confident the agent is that the diagnosis is ready for final handoff.",
42
- "levels": ["low", "high"]
42
+ "levels": [
43
+ "low",
44
+ "high"
45
+ ]
43
46
  }
44
47
  ]
45
48
  }
@@ -47,7 +50,7 @@
47
50
  "steps": [
48
51
  {
49
52
  "id": "phase-0-triage-and-intake",
50
- "title": "Phase 0: Triage (Bug Intake Risk Mode)",
53
+ "title": "Phase 0: Triage (Bug Intake \u2022 Risk \u2022 Mode)",
51
54
  "prompt": "Understand the bug report and choose the right rigor.\n\nCapture:\n- `bugSummary`: concise statement of the issue\n- `reproSummary`: repro steps, symptoms, expected behavior, environment notes\n- `investigationComplexity`: Small / Medium / Large\n- `riskLevel`: Low / Medium / High\n- `rigorMode`: QUICK / STANDARD / THOROUGH\n- `automationLevel`: High / Medium / Low\n- `maxParallelism`: 0 / 2 / 3\n\nDecision guidance:\n- QUICK: clear repro, narrow surface area, low ambiguity\n- STANDARD: moderate ambiguity, moderate system breadth, or meaningful risk\n- THOROUGH: high ambiguity, high-risk production impact, broad surface area, or multiple plausible causes\n\nSet context variables:\n- `bugSummary`\n- `reproSummary`\n- `investigationComplexity`\n- `riskLevel`\n- `rigorMode`\n- `automationLevel`\n- `maxParallelism`\n- `reproducibilityConfidence` (High / Medium / Low)\n\nAsk for confirmation only if the chosen rigor materially affects expectations or if critical repro details are still missing.",
52
55
  "requireConfirmation": true
53
56
  },
@@ -57,8 +60,14 @@
57
60
  "prompt": "If critical inputs are missing, ask only for the minimum needed to investigate.\n\nPossible asks:\n- missing repro steps or failing test command\n- missing expected behavior\n- missing environment constraints or permissions\n- missing logs or stack traces when the codebase alone cannot answer the gap\n\nDo NOT ask for information you can discover with tools.",
58
61
  "requireConfirmation": {
59
62
  "or": [
60
- { "var": "automationLevel", "equals": "Low" },
61
- { "var": "automationLevel", "equals": "Medium" }
63
+ {
64
+ "var": "automationLevel",
65
+ "equals": "Low"
66
+ },
67
+ {
68
+ "var": "automationLevel",
69
+ "equals": "Medium"
70
+ }
62
71
  ]
63
72
  }
64
73
  },
@@ -78,8 +87,14 @@
78
87
  "prompt": "Reassess investigation scope after real context is known.\n\nReview:\n- `contextUnknownCount`\n- `executionPathCount`\n- `suspiciousPointCount`\n- actual systems/components involved\n- whether risk or ambiguity is larger than originally assessed\n\nDo:\n- confirm or adjust `investigationComplexity`\n- confirm or adjust `riskLevel`\n- confirm or adjust `rigorMode`\n- confirm or adjust `maxParallelism`\n\nSet context variables:\n- `investigationComplexity`\n- `riskLevel`\n- `rigorMode`\n- `maxParallelism`\n- `retriageChanged`\n\nRule:\n- upgrade rigor when the real investigation surface is broader or riskier than expected",
79
88
  "requireConfirmation": {
80
89
  "or": [
81
- { "var": "retriageChanged", "equals": true },
82
- { "var": "automationLevel", "equals": "Low" }
90
+ {
91
+ "var": "retriageChanged",
92
+ "equals": true
93
+ },
94
+ {
95
+ "var": "automationLevel",
96
+ "equals": "Low"
97
+ }
83
98
  ]
84
99
  }
85
100
  },
@@ -118,7 +133,7 @@
118
133
  {
119
134
  "id": "phase-4b-loop-decision",
120
135
  "title": "Evidence Loop Decision",
121
- "prompt": "Decide whether the evidence loop should continue.\n\nDecision rules:\n- if `contradictionCount > 0` continue\n- else if `unresolvedEvidenceGapCount > 0` continue\n- else if `hasStrongAlternative = true` and the alternative is not meaningfully weaker continue\n- else if `diagnosisType = inconclusive_but_narrowed` and further evidence is not realistically available stop with bounded uncertainty\n- else stop\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
136
+ "prompt": "Decide whether the evidence loop should continue.\n\nDecision rules:\n- if `contradictionCount > 0` \u2192 continue\n- else if `unresolvedEvidenceGapCount > 0` \u2192 continue\n- else if `hasStrongAlternative = true` and the alternative is not meaningfully weaker \u2192 continue\n- else if `diagnosisType = inconclusive_but_narrowed` and further evidence is not realistically available \u2192 stop with bounded uncertainty\n- else \u2192 stop\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
122
137
  "requireConfirmation": true,
123
138
  "outputContract": {
124
139
  "contractRef": "wr.contracts.loop_control"
@@ -130,12 +145,13 @@
130
145
  "id": "phase-5-diagnosis-validation",
131
146
  "title": "Phase 5: Diagnosis Validation Bundle",
132
147
  "prompt": "Stress-test the current diagnosis before handoff.\n\nSet `diagnosisConfidenceBand` using these rules:\n- High = all symptoms explained, no material contradictions, no unresolved evidence gaps\n- Medium = likely diagnosis, but one bounded uncertainty remains\n- Low = multiple viable explanations remain or contradictions are unresolved\n\nMode-adaptive validation:\n- QUICK: self-challenge; if `diagnosisConfidenceBand != High` or contradictions remain, optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge`\n- STANDARD: if delegation is available, spawn TWO WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge` and `routine-execution-simulation`\n- THOROUGH: if delegation is available, spawn THREE WorkRail Executors SIMULTANEOUSLY running `routine-hypothesis-challenge`, `routine-execution-simulation`, and an additional `routine-hypothesis-challenge` pass focused on breaking the current diagnosis from a different angle\n\nParallel-output synthesis rules:\n- if 2+ validators raise serious concerns, reopen evidence or shortlist work\n- if exactly one validator raises a concern, investigate it before escalating\n- if no validator can materially break the diagnosis and `contradictionCount = 0`, proceed to handoff\n\nAfter synthesizing the validation result, assess whether the diagnosis is ready for final handoff.\n\nSet context variables:\n- `diagnosisConfidenceBand`\n- `validationFindingsCountBySeverity`\n- `validationSummary`\n\nBoundary rule:\n- allowed: high-level fix direction, likely files involved, verification recommendations\n- not allowed: implementation plan, patch sequencing, PR plan, or code-writing momentum",
133
- "assessmentRefs": ["diagnosis_readiness_gate"],
148
+ "assessmentRefs": [
149
+ "diagnosis_readiness_gate"
150
+ ],
134
151
  "assessmentConsequences": [
135
152
  {
136
153
  "when": {
137
- "dimensionId": "confidence",
138
- "equalsLevel": "low"
154
+ "anyEqualsLevel": "low"
139
155
  },
140
156
  "effect": {
141
157
  "kind": "require_followup",
@@ -145,8 +161,14 @@
145
161
  ],
146
162
  "requireConfirmation": {
147
163
  "or": [
148
- { "var": "diagnosisConfidenceBand", "equals": "Low" },
149
- { "var": "contradictionCount", "not_equals": 0 }
164
+ {
165
+ "var": "diagnosisConfidenceBand",
166
+ "equals": "Low"
167
+ },
168
+ {
169
+ "var": "contradictionCount",
170
+ "not_equals": 0
171
+ }
150
172
  ]
151
173
  }
152
174
  },