@exaudeus/workrail 3.15.0 → 3.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/application/services/workflow-service.d.ts +2 -0
- package/dist/application/services/workflow-service.js +3 -0
- package/dist/console/assets/index-BE5PAgPO.js +28 -0
- package/dist/console/assets/index-BZNM03t1.css +1 -0
- package/dist/console/index.html +2 -2
- package/dist/env-flags.d.ts +1 -0
- package/dist/env-flags.js +4 -0
- package/dist/infrastructure/session/HttpServer.d.ts +3 -3
- package/dist/infrastructure/session/HttpServer.js +68 -74
- package/dist/infrastructure/storage/caching-workflow-storage.d.ts +2 -0
- package/dist/infrastructure/storage/caching-workflow-storage.js +15 -6
- package/dist/infrastructure/storage/file-workflow-storage.js +3 -4
- package/dist/infrastructure/storage/schema-validating-workflow-storage.js +9 -8
- package/dist/manifest.json +257 -193
- package/dist/mcp/assert-output.d.ts +37 -0
- package/dist/mcp/assert-output.js +52 -0
- package/dist/mcp/boundary-coercion.d.ts +1 -0
- package/dist/mcp/boundary-coercion.js +44 -0
- package/dist/mcp/dev-mode.d.ts +1 -0
- package/dist/mcp/dev-mode.js +4 -0
- package/dist/mcp/handler-factory.js +12 -9
- package/dist/mcp/handlers/session.js +8 -9
- package/dist/mcp/handlers/v2-advance-core/event-builders.d.ts +2 -0
- package/dist/mcp/handlers/v2-advance-core/event-builders.js +6 -6
- package/dist/mcp/handlers/v2-advance-core/index.d.ts +2 -0
- package/dist/mcp/handlers/v2-advance-core/index.js +4 -3
- package/dist/mcp/handlers/v2-advance-core/input-validation.d.ts +2 -0
- package/dist/mcp/handlers/v2-advance-core/input-validation.js +32 -9
- package/dist/mcp/handlers/v2-advance-core/outcome-blocked.d.ts +2 -0
- package/dist/mcp/handlers/v2-advance-core/outcome-blocked.js +1 -1
- package/dist/mcp/handlers/v2-advance-core/outcome-success.d.ts +2 -0
- package/dist/mcp/handlers/v2-advance-core/outcome-success.js +1 -1
- package/dist/mcp/handlers/v2-checkpoint.d.ts +1 -1
- package/dist/mcp/handlers/v2-checkpoint.js +5 -6
- package/dist/mcp/handlers/v2-execution/advance.d.ts +4 -2
- package/dist/mcp/handlers/v2-execution/advance.js +5 -7
- package/dist/mcp/handlers/v2-execution/continue-advance.js +56 -26
- package/dist/mcp/handlers/v2-execution/continue-rehydrate.d.ts +1 -1
- package/dist/mcp/handlers/v2-execution/continue-rehydrate.js +9 -9
- package/dist/mcp/handlers/v2-execution/replay.d.ts +6 -4
- package/dist/mcp/handlers/v2-execution/replay.js +47 -30
- package/dist/mcp/handlers/v2-execution/start.d.ts +2 -3
- package/dist/mcp/handlers/v2-execution/start.js +11 -11
- package/dist/mcp/handlers/v2-execution/workflow-object-cache.d.ts +5 -0
- package/dist/mcp/handlers/v2-execution/workflow-object-cache.js +19 -0
- package/dist/mcp/handlers/v2-execution-helpers.d.ts +1 -0
- package/dist/mcp/handlers/v2-execution-helpers.js +23 -7
- package/dist/mcp/handlers/v2-resume.d.ts +1 -1
- package/dist/mcp/handlers/v2-resume.js +3 -4
- package/dist/mcp/handlers/v2-state-conversion.js +5 -1
- package/dist/mcp/handlers/v2-workflow.d.ts +80 -0
- package/dist/mcp/handlers/v2-workflow.js +36 -21
- package/dist/mcp/handlers/workflow.d.ts +2 -5
- package/dist/mcp/handlers/workflow.js +15 -12
- package/dist/mcp/output-schemas.d.ts +20 -27
- package/dist/mcp/output-schemas.js +5 -7
- package/dist/mcp/server.js +22 -4
- package/dist/mcp/tool-call-timing.d.ts +24 -0
- package/dist/mcp/tool-call-timing.js +85 -0
- package/dist/mcp/transports/http-entry.js +3 -2
- package/dist/mcp/transports/http-listener.d.ts +1 -0
- package/dist/mcp/transports/http-listener.js +25 -0
- package/dist/mcp/transports/shutdown-hooks.d.ts +4 -1
- package/dist/mcp/transports/shutdown-hooks.js +3 -2
- package/dist/mcp/transports/stdio-entry.js +6 -28
- package/dist/mcp/v2-response-formatter.js +2 -4
- package/dist/mcp/validation/schema-introspection.d.ts +1 -0
- package/dist/mcp/validation/schema-introspection.js +15 -5
- package/dist/mcp/validation/suggestion-generator.js +2 -2
- package/dist/runtime/adapters/node-process-signals.d.ts +1 -0
- package/dist/runtime/adapters/node-process-signals.js +5 -0
- package/dist/runtime/adapters/noop-process-signals.d.ts +1 -0
- package/dist/runtime/adapters/noop-process-signals.js +2 -0
- package/dist/runtime/ports/process-signals.d.ts +1 -0
- package/dist/types/workflow-definition.d.ts +2 -0
- package/dist/types/workflow.d.ts +3 -0
- package/dist/types/workflow.js +35 -26
- package/dist/v2/durable-core/domain/context-template-resolver.js +2 -2
- package/dist/v2/durable-core/domain/function-definition-expander.js +2 -17
- package/dist/v2/durable-core/domain/prompt-renderer.d.ts +1 -0
- package/dist/v2/durable-core/domain/prompt-renderer.js +23 -18
- package/dist/v2/durable-core/domain/recap-recovery.js +23 -16
- package/dist/v2/durable-core/domain/retrieval-contract.js +13 -7
- package/dist/v2/durable-core/session-index.d.ts +22 -0
- package/dist/v2/durable-core/session-index.js +58 -0
- package/dist/v2/durable-core/sorted-event-log.d.ts +6 -0
- package/dist/v2/durable-core/sorted-event-log.js +15 -0
- package/dist/v2/infra/local/fs/index.js +8 -8
- package/dist/v2/infra/local/session-store/index.d.ts +1 -1
- package/dist/v2/infra/local/session-store/index.js +71 -61
- package/dist/v2/infra/local/session-summary-provider/index.js +9 -4
- package/dist/v2/infra/local/snapshot-store/index.js +2 -1
- package/dist/v2/ports/session-event-log-store.port.d.ts +1 -1
- package/dist/v2/projections/assessment-consequences.d.ts +2 -1
- package/dist/v2/projections/assessment-consequences.js +0 -5
- package/dist/v2/projections/assessments.d.ts +2 -1
- package/dist/v2/projections/assessments.js +2 -4
- package/dist/v2/projections/gaps.d.ts +2 -1
- package/dist/v2/projections/gaps.js +0 -5
- package/dist/v2/projections/preferences.d.ts +2 -1
- package/dist/v2/projections/preferences.js +0 -5
- package/dist/v2/projections/run-context.d.ts +2 -2
- package/dist/v2/projections/run-context.js +0 -5
- package/dist/v2/projections/run-dag.js +7 -1
- package/dist/v2/projections/run-execution-trace.d.ts +8 -0
- package/dist/v2/projections/run-execution-trace.js +124 -0
- package/dist/v2/projections/run-status-signals.d.ts +2 -2
- package/dist/v2/usecases/console-routes.d.ts +3 -1
- package/dist/v2/usecases/console-routes.js +123 -25
- package/dist/v2/usecases/console-service.d.ts +1 -0
- package/dist/v2/usecases/console-service.js +83 -25
- package/dist/v2/usecases/console-types.d.ts +53 -0
- package/dist/v2/usecases/worktree-service.js +32 -1
- package/package.json +6 -5
- package/spec/workflow.schema.json +18 -0
- package/workflows/adaptive-ticket-creation.json +23 -16
- package/workflows/architecture-scalability-audit.json +29 -22
- package/workflows/bug-investigation.agentic.v2.json +7 -0
- package/workflows/coding-task-workflow-agentic.json +7 -0
- package/workflows/coding-task-workflow-agentic.lean.v2.json +16 -8
- package/workflows/coding-task-workflow-agentic.v2.json +7 -0
- package/workflows/cross-platform-code-conversion.v2.json +7 -0
- package/workflows/document-creation-workflow.json +15 -8
- package/workflows/documentation-update-workflow.json +15 -8
- package/workflows/intelligent-test-case-generation.json +7 -0
- package/workflows/learner-centered-course-workflow.json +9 -2
- package/workflows/mr-review-workflow.agentic.v2.json +7 -0
- package/workflows/personal-learning-materials-creation-branched.json +15 -8
- package/workflows/presentation-creation.json +12 -5
- package/workflows/production-readiness-audit.json +7 -0
- package/workflows/relocation-workflow-us.json +39 -32
- package/workflows/scoped-documentation-workflow.json +33 -26
- package/workflows/ui-ux-design-workflow.json +7 -0
- package/workflows/workflow-diagnose-environment.json +6 -0
- package/workflows/workflow-for-workflows.json +7 -0
- package/workflows/workflow-for-workflows.v2.json +23 -11
- package/workflows/wr.discovery.json +8 -1
- package/dist/console/assets/index-BZYIjrzJ.js +0 -28
- package/dist/console/assets/index-OLCKbDdm.css +0 -1
|
@@ -31,6 +31,10 @@ export interface ConsoleDagNode {
|
|
|
31
31
|
readonly isPreferredTip: boolean;
|
|
32
32
|
readonly isTip: boolean;
|
|
33
33
|
readonly stepLabel: string | null;
|
|
34
|
+
readonly hasRecap: boolean;
|
|
35
|
+
readonly hasFailedValidations: boolean;
|
|
36
|
+
readonly hasGaps: boolean;
|
|
37
|
+
readonly hasArtifacts: boolean;
|
|
34
38
|
}
|
|
35
39
|
export interface ConsoleDagEdge {
|
|
36
40
|
readonly edgeKind: 'acked_step' | 'checkpoint';
|
|
@@ -38,6 +42,25 @@ export interface ConsoleDagEdge {
|
|
|
38
42
|
readonly toNodeId: string;
|
|
39
43
|
readonly createdAtEventIndex: number;
|
|
40
44
|
}
|
|
45
|
+
export type ConsoleExecutionTraceItemKind = 'selected_next_step' | 'evaluated_condition' | 'entered_loop' | 'exited_loop' | 'detected_non_tip_advance' | 'context_fact' | 'divergence';
|
|
46
|
+
export interface ConsoleExecutionTraceRef {
|
|
47
|
+
readonly kind: 'node_id' | 'step_id' | 'loop_id' | 'condition_id';
|
|
48
|
+
readonly value: string;
|
|
49
|
+
}
|
|
50
|
+
export interface ConsoleExecutionTraceItem {
|
|
51
|
+
readonly kind: ConsoleExecutionTraceItemKind;
|
|
52
|
+
readonly summary: string;
|
|
53
|
+
readonly recordedAtEventIndex: number;
|
|
54
|
+
readonly refs: readonly ConsoleExecutionTraceRef[];
|
|
55
|
+
}
|
|
56
|
+
export interface ConsoleExecutionTraceFact {
|
|
57
|
+
readonly key: string;
|
|
58
|
+
readonly value: string;
|
|
59
|
+
}
|
|
60
|
+
export interface ConsoleExecutionTraceSummary {
|
|
61
|
+
readonly items: readonly ConsoleExecutionTraceItem[];
|
|
62
|
+
readonly contextFacts: readonly ConsoleExecutionTraceFact[];
|
|
63
|
+
}
|
|
41
64
|
export interface ConsoleDagRun {
|
|
42
65
|
readonly runId: string;
|
|
43
66
|
readonly workflowId: string | null;
|
|
@@ -49,6 +72,7 @@ export interface ConsoleDagRun {
|
|
|
49
72
|
readonly tipNodeIds: readonly string[];
|
|
50
73
|
readonly status: ConsoleRunStatus;
|
|
51
74
|
readonly hasUnresolvedCriticalGaps: boolean;
|
|
75
|
+
readonly executionTraceSummary: ConsoleExecutionTraceSummary | null;
|
|
52
76
|
}
|
|
53
77
|
export interface ConsoleSessionDetail {
|
|
54
78
|
readonly sessionId: string;
|
|
@@ -128,3 +152,32 @@ export interface ConsoleNodeDetail {
|
|
|
128
152
|
readonly validations: readonly ConsoleValidationResult[];
|
|
129
153
|
readonly gaps: readonly ConsoleNodeGap[];
|
|
130
154
|
}
|
|
155
|
+
export interface ConsoleWorkflowSourceInfo {
|
|
156
|
+
readonly kind: 'bundled' | 'user' | 'project' | 'custom' | 'git' | 'remote' | 'plugin';
|
|
157
|
+
readonly displayName: string;
|
|
158
|
+
}
|
|
159
|
+
export interface ConsoleWorkflowSummary {
|
|
160
|
+
readonly id: string;
|
|
161
|
+
readonly name: string;
|
|
162
|
+
readonly description: string;
|
|
163
|
+
readonly version: string;
|
|
164
|
+
readonly tags: readonly string[];
|
|
165
|
+
readonly source: ConsoleWorkflowSourceInfo;
|
|
166
|
+
readonly about?: string;
|
|
167
|
+
readonly examples?: readonly string[];
|
|
168
|
+
}
|
|
169
|
+
export interface ConsoleWorkflowListResponse {
|
|
170
|
+
readonly workflows: readonly ConsoleWorkflowSummary[];
|
|
171
|
+
}
|
|
172
|
+
export interface ConsoleWorkflowDetail {
|
|
173
|
+
readonly id: string;
|
|
174
|
+
readonly name: string;
|
|
175
|
+
readonly description: string;
|
|
176
|
+
readonly version: string;
|
|
177
|
+
readonly tags: readonly string[];
|
|
178
|
+
readonly source: ConsoleWorkflowSourceInfo;
|
|
179
|
+
readonly stepCount: number;
|
|
180
|
+
readonly about?: string;
|
|
181
|
+
readonly examples?: readonly string[];
|
|
182
|
+
readonly preconditions?: readonly string[];
|
|
183
|
+
}
|
|
@@ -44,6 +44,29 @@ function parseWorktreePorcelain(raw) {
|
|
|
44
44
|
}
|
|
45
45
|
return entries;
|
|
46
46
|
}
|
|
47
|
+
const MAX_CONCURRENT_ENRICHMENTS = 8;
|
|
48
|
+
let activeEnrichments = 0;
|
|
49
|
+
const enrichmentQueue = [];
|
|
50
|
+
function acquireEnrichmentSlot() {
|
|
51
|
+
return new Promise((resolve) => {
|
|
52
|
+
if (activeEnrichments < MAX_CONCURRENT_ENRICHMENTS) {
|
|
53
|
+
activeEnrichments++;
|
|
54
|
+
resolve();
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
enrichmentQueue.push(() => { activeEnrichments++; resolve(); });
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
function releaseEnrichmentSlot() {
|
|
62
|
+
const next = enrichmentQueue.shift();
|
|
63
|
+
if (next) {
|
|
64
|
+
next();
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
activeEnrichments--;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
47
70
|
function parseFileStatus(xy) {
|
|
48
71
|
if (xy === '??')
|
|
49
72
|
return 'untracked';
|
|
@@ -113,7 +136,15 @@ async function enrichRepo(repoRoot, activeSessions) {
|
|
|
113
136
|
if (porcelain === null)
|
|
114
137
|
return null;
|
|
115
138
|
const rawWorktrees = parseWorktreePorcelain(porcelain);
|
|
116
|
-
const results = await Promise.allSettled(rawWorktrees.map(wt =>
|
|
139
|
+
const results = await Promise.allSettled(rawWorktrees.map(async (wt) => {
|
|
140
|
+
await acquireEnrichmentSlot();
|
|
141
|
+
try {
|
|
142
|
+
return await enrichWorktree(wt);
|
|
143
|
+
}
|
|
144
|
+
finally {
|
|
145
|
+
releaseEnrichmentSlot();
|
|
146
|
+
}
|
|
147
|
+
}));
|
|
117
148
|
const worktrees = rawWorktrees.flatMap((wt, i) => {
|
|
118
149
|
const result = results[i];
|
|
119
150
|
if (result.status === 'rejected') {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exaudeus/workrail",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.16.0",
|
|
4
4
|
"description": "Step-by-step workflow enforcement for AI agents via MCP",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -77,7 +77,8 @@
|
|
|
77
77
|
"codemod:v2-contexts": "npx ts-node scripts/codemods/run.ts --mod v2-contexts --tsconfig tsconfig.test.json --write",
|
|
78
78
|
"codemod:v2-prune": "npx ts-node scripts/codemods/run.ts --mod v2-prune --tsconfig tsconfig.test.json --write",
|
|
79
79
|
"codemod:guard": "npx ts-node scripts/codemods/run.ts --mod guard --tsconfig tsconfig.test.json",
|
|
80
|
-
"codemod:test-platform-guard": "npx ts-node scripts/codemods/run.ts --mod test-platform-guard --tsconfig tsconfig.test.json"
|
|
80
|
+
"codemod:test-platform-guard": "npx ts-node scripts/codemods/run.ts --mod test-platform-guard --tsconfig tsconfig.test.json",
|
|
81
|
+
"prepare": "bash scripts/setup-hooks.sh"
|
|
81
82
|
},
|
|
82
83
|
"dependencies": {
|
|
83
84
|
"@modelcontextprotocol/sdk": "^1.24.0",
|
|
@@ -89,7 +90,7 @@
|
|
|
89
90
|
"dotenv": "^17.2.0",
|
|
90
91
|
"express": "^5.1.0",
|
|
91
92
|
"neverthrow": "^8.2.0",
|
|
92
|
-
"open": "^
|
|
93
|
+
"open": "^11.0.0",
|
|
93
94
|
"reflect-metadata": "^0.2.0",
|
|
94
95
|
"semver": "^7.7.2",
|
|
95
96
|
"tsconfig-paths": "^4.2.0",
|
|
@@ -121,8 +122,8 @@
|
|
|
121
122
|
"happy-dom": "^20.0.11",
|
|
122
123
|
"jsdom": "^27.0.0",
|
|
123
124
|
"lit": "^3.3.1",
|
|
124
|
-
"node-fetch": "^
|
|
125
|
-
"semantic-release": "^
|
|
125
|
+
"node-fetch": "^3.3.2",
|
|
126
|
+
"semantic-release": "^25.0.3",
|
|
126
127
|
"ts-morph": "^27.0.2",
|
|
127
128
|
"typescript": "^5.9.3",
|
|
128
129
|
"vite": "^7.1.9",
|
|
@@ -182,6 +182,24 @@
|
|
|
182
182
|
"type": "integer",
|
|
183
183
|
"minimum": 1,
|
|
184
184
|
"description": "The authoring spec version this workflow was last validated against."
|
|
185
|
+
},
|
|
186
|
+
"about": {
|
|
187
|
+
"type": "string",
|
|
188
|
+
"description": "Human-readable overview for display in the console and other UIs. Markdown is supported. Write for a user deciding whether to use this workflow: what it does, when to use it, what it produces, and how to get good results. User-facing surface -- not an agent instruction (use metaGuidance for that).",
|
|
189
|
+
"minLength": 1,
|
|
190
|
+
"maxLength": 4096
|
|
191
|
+
},
|
|
192
|
+
"examples": {
|
|
193
|
+
"type": "array",
|
|
194
|
+
"description": "Short illustrative goal strings showing what this workflow is used for. Write for humans browsing the catalog and for agents selecting the right workflow. Each item should be concrete and specific enough to be informative.",
|
|
195
|
+
"items": {
|
|
196
|
+
"type": "string",
|
|
197
|
+
"minLength": 10,
|
|
198
|
+
"maxLength": 120
|
|
199
|
+
},
|
|
200
|
+
"minItems": 1,
|
|
201
|
+
"maxItems": 6,
|
|
202
|
+
"uniqueItems": true
|
|
185
203
|
}
|
|
186
204
|
},
|
|
187
205
|
"required": [
|
|
@@ -3,14 +3,21 @@
|
|
|
3
3
|
"name": "Adaptive Ticket Creation Workflow",
|
|
4
4
|
"version": "1.0.0",
|
|
5
5
|
"description": "Use this to create high-quality Jira tickets for features, tasks, or epics. Automatically selects the right complexity path (Simple, Standard, or Epic) and generates properly structured tickets with acceptance criteria and estimates.",
|
|
6
|
+
"about": "## Adaptive Ticket Creation Workflow\n\nUse this to create well-structured Jira tickets for features, tasks, or epics. The workflow automatically selects the right complexity path (Simple, Standard, or Epic) based on the request, so you don't have to decide upfront how much process you need.\n\n### What it produces\n\n- **Simple path**: one complete, developer-ready Jira ticket with a context-rich description, checkbox-style acceptance criteria, and an effort estimate.\n- **Standard path**: a high-level plan plus a batch of related tickets covering all deliverables.\n- **Epic path**: everything in Standard, plus full epic decomposition, per-story estimates with risk ratings, dependency mapping, and a reusable team rules file at `.workflow_rules/ticket_creation.md` that future runs load automatically.\n\n### When to use it\n\n- You need to create one or more Jira tickets and want them to be genuinely developer-ready.\n- You have a feature request, bug, task, or epic that needs to be broken down and estimated.\n- Your team has specific ticket conventions (naming, sizing, labels) -- the workflow learns and stores these on the Epic path.\n\n### How to get good results\n\n- Provide as much context as you have: PRD links, design files, existing related tickets, and any known constraints.\n- If your team has a `.workflow_rules/ticket_creation.md` file, the workflow loads it automatically and applies your conventions.\n- On the Epic path, the workflow asks you to approve the high-level plan and the decomposition before generating tickets. Use these checkpoints to catch scope issues early.\n- Acceptance criteria are written as checkbox-style observable conditions, not restatements of requirements. If your team has a specific AC format, describe it in the rules file.",
|
|
7
|
+
"examples": [
|
|
8
|
+
"Create a Jira ticket for adding biometric authentication to the mobile login screen",
|
|
9
|
+
"Break down the new real-time notifications feature into an epic with stories and estimates",
|
|
10
|
+
"Write tickets for all backend work needed to support the v2 search API",
|
|
11
|
+
"Create a single bug ticket for the checkout crash when applying a promo code on iOS 17"
|
|
12
|
+
],
|
|
6
13
|
"preconditions": [
|
|
7
14
|
"User has provided a description of the feature, task, or work to be ticketed.",
|
|
8
15
|
"Agent has file system access for loading team preferences and persisting rules."
|
|
9
16
|
],
|
|
10
17
|
"metaGuidance": [
|
|
11
|
-
"ROLE: expert Product Manager and Mobile Tech Lead. Triage autonomously, write developer-ready tickets with full context, and produce objectively testable acceptance criteria
|
|
18
|
+
"ROLE: expert Product Manager and Mobile Tech Lead. Triage autonomously, write developer-ready tickets with full context, and produce objectively testable acceptance criteria — not user-story paraphrases.",
|
|
12
19
|
"EXPLORE FIRST: use tools to gather context before asking the user anything. Ask only for information you genuinely cannot determine with tools or from the request itself.",
|
|
13
|
-
"TEAM RULES: load and follow ./.workflow_rules/ticket_creation.md when it exists. Preferences there override your defaults. Rules are captured only on the Epic path
|
|
20
|
+
"TEAM RULES: load and follow ./.workflow_rules/ticket_creation.md when it exists. Preferences there override your defaults. Rules are captured only on the Epic path — complex sessions are where durable conventions emerge and where the investment pays off.",
|
|
14
21
|
"AUTONOMOUS TRIAGE: decide pathComplexity (Simple / Standard / Epic) yourself from the request. Surface your reasoning, then wait for confirmation.",
|
|
15
22
|
"QUALITY FLOOR: every ticket must have a context-rich description, checkbox-style acceptance criteria that are objectively testable, and an effort estimate."
|
|
16
23
|
],
|
|
@@ -21,7 +28,7 @@
|
|
|
21
28
|
"promptBlocks": {
|
|
22
29
|
"goal": "Analyze the request, gather available context, and select the right complexity path before doing any ticket work.",
|
|
23
30
|
"constraints": [
|
|
24
|
-
"Decide the path yourself
|
|
31
|
+
"Decide the path yourself — do not ask the user to choose.",
|
|
25
32
|
"Load ./.workflow_rules/ticket_creation.md if it exists and let it influence your triage. If the file does not exist, note this explicitly in your output so the user knows team conventions were not applied.",
|
|
26
33
|
"Set pathComplexity to exactly one of: Simple, Standard, or Epic."
|
|
27
34
|
],
|
|
@@ -29,7 +36,7 @@
|
|
|
29
36
|
"Read any attached documents, linked PRDs, or referenced specs.",
|
|
30
37
|
"Identify complexity signals: scope breadth, number of distinct deliverables, cross-team dependencies, technical unknowns, and estimated ticket count.",
|
|
31
38
|
"Apply the triage rubric: Simple = single ticket, clear requirements, no blocking unknowns, minimal dependencies. Standard = multiple related tickets, moderate scope, some analysis needed. Epic = complex feature requiring decomposition, multiple teams or significant unknowns, likely 6+ tickets.",
|
|
32
|
-
"Upgrade triggers
|
|
39
|
+
"Upgrade triggers — escalate to Standard if: request implies more than one clearly separate work item. Escalate to Epic if: multiple teams are involved, architecture decisions are unresolved, or you estimate more than five tickets.",
|
|
33
40
|
"State your selected path and the top three reasons. Capture pathComplexity in context."
|
|
34
41
|
],
|
|
35
42
|
"outputRequired": {
|
|
@@ -53,7 +60,7 @@
|
|
|
53
60
|
"promptBlocks": {
|
|
54
61
|
"goal": "Generate one complete, developer-ready Jira ticket for this request.",
|
|
55
62
|
"constraints": [
|
|
56
|
-
"Acceptance criteria must be phrased as observable, testable conditions
|
|
63
|
+
"Acceptance criteria must be phrased as observable, testable conditions — not user-story restatements.",
|
|
57
64
|
"Follow any team conventions from ./.workflow_rules/ticket_creation.md.",
|
|
58
65
|
"Include all fields a developer needs to start work without asking follow-up questions."
|
|
59
66
|
],
|
|
@@ -103,7 +110,7 @@
|
|
|
103
110
|
"Load ./.workflow_rules/ticket_creation.md and note any relevant team conventions.",
|
|
104
111
|
"Identify: key stakeholders, team dependencies, technical constraints, known risks, and any conflicting requirements.",
|
|
105
112
|
"Classify each gap as: Critical (blocks planning), Important (affects scope), or Nice-to-have (can proceed without it).",
|
|
106
|
-
"For Critical and Important gaps that tools cannot resolve, ask the user
|
|
113
|
+
"For Critical and Important gaps that tools cannot resolve, ask the user — in a single consolidated question block, not one at a time.",
|
|
107
114
|
"After receiving answers, check whether any response reveals scope that would change `pathComplexity` (e.g. the user confirms three teams are involved, or the feature is narrower than initially assessed). If so, state the new classification and reasoning, and ask the user to confirm before continuing to Phase 2."
|
|
108
115
|
],
|
|
109
116
|
"outputRequired": {
|
|
@@ -135,16 +142,16 @@
|
|
|
135
142
|
"promptBlocks": {
|
|
136
143
|
"goal": "Produce a structured plan that will drive ticket generation. This plan is the source of truth for scope.",
|
|
137
144
|
"constraints": [
|
|
138
|
-
"Be explicit about scope boundaries
|
|
145
|
+
"Be explicit about scope boundaries — ambiguous scope will produce ambiguous tickets.",
|
|
139
146
|
"Success criteria must be measurable, not just descriptive.",
|
|
140
147
|
"For Standard path: this plan feeds directly into batch ticket generation."
|
|
141
148
|
],
|
|
142
149
|
"procedure": [
|
|
143
150
|
"Write: Project Summary (2-3 sentences, what is being built and why).",
|
|
144
151
|
"Write: Key Deliverables (bulleted list of distinct components or features).",
|
|
145
|
-
"Write: In-Scope (explicit list
|
|
146
|
-
"Write: Out-of-Scope (explicit exclusions
|
|
147
|
-
"Write: Success Criteria (measurable definition of done
|
|
152
|
+
"Write: In-Scope (explicit list — prevents scope creep).",
|
|
153
|
+
"Write: Out-of-Scope (explicit exclusions — prevents misunderstandings).",
|
|
154
|
+
"Write: Success Criteria (measurable definition of done — each item verifiable).",
|
|
148
155
|
"Write: High-Level Timeline (phases or milestones with rough sizing).",
|
|
149
156
|
"Review: does every deliverable map clearly to implementable work? Is anything in scope that should be out?"
|
|
150
157
|
],
|
|
@@ -170,7 +177,7 @@
|
|
|
170
177
|
"goal": "Break the approved plan into a logical work hierarchy that development teams can execute.",
|
|
171
178
|
"constraints": [
|
|
172
179
|
"Every item in the plan's In-Scope list must map to at least one work item in the hierarchy.",
|
|
173
|
-
"Dependencies must be explicit
|
|
180
|
+
"Dependencies must be explicit — not implied by ordering alone.",
|
|
174
181
|
"Oversized stories (more than one sprint of work) should be split."
|
|
175
182
|
],
|
|
176
183
|
"procedure": [
|
|
@@ -202,7 +209,7 @@
|
|
|
202
209
|
"promptBlocks": {
|
|
203
210
|
"goal": "Add effort estimates, risk assessments, and team assignments to each story in the hierarchy.",
|
|
204
211
|
"constraints": [
|
|
205
|
-
"Conservative estimates are better than optimistic ones
|
|
212
|
+
"Conservative estimates are better than optimistic ones — note uncertainty explicitly.",
|
|
206
213
|
"Justify each estimate with one sentence of reasoning.",
|
|
207
214
|
"Flag stories on the critical path."
|
|
208
215
|
],
|
|
@@ -212,7 +219,7 @@
|
|
|
212
219
|
"Assign priority: must-have for MVP, should-have, nice-to-have.",
|
|
213
220
|
"Note suggested team or skill area for each story.",
|
|
214
221
|
"Identify critical path: which stories block the most downstream work? Surface these explicitly.",
|
|
215
|
-
"Flag any stories whose estimates feel uncertain
|
|
222
|
+
"Flag any stories whose estimates feel uncertain — surface the unknowns rather than hiding them in a range."
|
|
216
223
|
],
|
|
217
224
|
"outputRequired": {
|
|
218
225
|
"notesMarkdown": "Total story point estimate, critical path items, high-risk stories."
|
|
@@ -277,7 +284,7 @@
|
|
|
277
284
|
"promptBlocks": {
|
|
278
285
|
"goal": "Extract actionable team preferences from this session and persist them so future runs use them automatically.",
|
|
279
286
|
"constraints": [
|
|
280
|
-
"Only write rules that are genuinely reusable across future tickets
|
|
287
|
+
"Only write rules that are genuinely reusable across future tickets — skip one-off project specifics.",
|
|
281
288
|
"Keep rules concise and actionable, not narrative.",
|
|
282
289
|
"Append to ./.workflow_rules/ticket_creation.md rather than replacing it."
|
|
283
290
|
],
|
|
@@ -285,7 +292,7 @@
|
|
|
285
292
|
"Review what conventions, preferences, or requirements emerged during this session.",
|
|
286
293
|
"Identify patterns worth preserving: naming conventions, field usage, AC format preferences, estimation approach, labeling rules.",
|
|
287
294
|
"Draft new rules as short, imperative statements (e.g., 'Use T-shirt sizing not Fibonacci', 'Always include a Figma link in design tickets').",
|
|
288
|
-
"Check against existing rules
|
|
295
|
+
"Check against existing rules — avoid duplicates or contradictions.",
|
|
289
296
|
"Append new rules to ./.workflow_rules/ticket_creation.md, creating the file if it does not exist."
|
|
290
297
|
],
|
|
291
298
|
"outputRequired": {
|
|
@@ -300,4 +307,4 @@
|
|
|
300
307
|
"requireConfirmation": false
|
|
301
308
|
}
|
|
302
309
|
]
|
|
303
|
-
}
|
|
310
|
+
}
|
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "architecture-scalability-audit",
|
|
3
|
-
"name": "Architecture Scalability Audit (v1
|
|
3
|
+
"name": "Architecture Scalability Audit (v1 • Evidence-Driven • Dimension-Scoped • rigorMode-Adaptive)",
|
|
4
4
|
"version": "0.1.0",
|
|
5
5
|
"description": "Use this to audit a bounded codebase scope for architecture scalability. Declare which scalability dimensions matter (load, data volume, team size, feature extensibility, operational); the workflow investigates each and produces evidence-grounded findings.",
|
|
6
|
+
"about": "## Architecture Scalability Audit\n\nThis workflow audits a bounded codebase scope for scalability across the dimensions you care about. It does not produce generic \"won't scale\" warnings -- every finding must cite a specific file, class, method, or pattern, and every concern must name a concrete growth scenario (e.g. 10x traffic, 100x records, 3x team size).\n\n**What it does:**\nYou declare the scope boundary and the scalability dimensions that matter for your context. The workflow reads the codebase to understand the architecture, assigns one dedicated reviewer family per dimension, runs them in parallel from a shared fact packet, reconciles contradictions and blind spots through a synthesis loop, and delivers a per-dimension verdict (will_break / risk / fine) with an overall scalability readiness verdict.\n\n**The five scalability dimensions you can select:**\n- **load** -- handles more requests, users, or throughput\n- **data_volume** -- handles more records, storage, or query size\n- **team_org** -- more teams or developers working on this scope without friction\n- **feature_extensibility** -- more features added without rearchitecting\n- **operational** -- more deployments, environments, or operational complexity\n\n**When to use it:**\n- Before investing significantly in a component you expect to grow\n- When planning capacity for a new traffic tier or data volume increase\n- When evaluating a codebase acquired through a merger, partnership, or open-source adoption\n- When a team is growing and you want to know if the architecture will hold under parallel development\n\n**What it produces:**\nAn overall scalability verdict, per-dimension findings with specific code references and growth scenarios, cross-cutting concerns that span multiple dimensions, a prioritized concern list, and explicit callouts of what is already well-designed for scale.\n\n**How to get good results:**\nBe specific about the scope boundary -- name the service, module, or feature explicitly and say what is out of scope. Choose the dimensions relevant to your actual growth pressures; the workflow will not add dimensions you did not select. If you know a specific growth target (e.g. \"we expect 50x user growth in 18 months\"), mention it.",
|
|
7
|
+
"examples": [
|
|
8
|
+
"Audit the search service for load and data_volume scalability before the Black Friday traffic ramp",
|
|
9
|
+
"Check the analytics pipeline for data_volume and operational scalability -- we are moving from 1M to 100M events/day",
|
|
10
|
+
"Scalability audit of the user management module for team_org and feature_extensibility as we split into three squads",
|
|
11
|
+
"Audit the cart and checkout services for load scalability -- scope is /cart and /checkout only"
|
|
12
|
+
],
|
|
6
13
|
"recommendedPreferences": {
|
|
7
14
|
"recommendedAutonomy": "guided",
|
|
8
15
|
"recommendedRiskPolicy": "conservative"
|
|
@@ -20,7 +27,7 @@
|
|
|
20
27
|
"DEFAULT BEHAVIOR: self-execute with tools. Ask only for true scope or dimension decisions you cannot resolve yourself.",
|
|
21
28
|
"V2 DURABILITY: keep workflow truth in output.notesMarkdown and explicit context fields. Human-facing markdown artifacts are optional companions only.",
|
|
22
29
|
"OWNERSHIP: the main agent owns the fact packet, synthesis, verdict calibration, and final handoff. Delegated dimension audits are evidence, not authority.",
|
|
23
|
-
"DIMENSION DISCIPLINE: audit only the dimensions the user declared. Do not add dimensions the user did not select, even if they look relevant
|
|
30
|
+
"DIMENSION DISCIPLINE: audit only the dimensions the user declared. Do not add dimensions the user did not select, even if they look relevant — surface them as advisory notes instead.",
|
|
24
31
|
"EVIDENCE FIRST: every risk or will_break finding must cite a specific file, class, method, or pattern in the codebase. Technology name alone is not evidence.",
|
|
25
32
|
"GROWTH SCENARIO: every concern must name a growth scenario (e.g. 10x traffic, 100x records, 3x team size). Generic 'won't scale' findings are not acceptable.",
|
|
26
33
|
"VERDICT TIERS: use will_break / risk / fine. Do not force a cleaner answer than the evidence supports.",
|
|
@@ -44,10 +51,10 @@
|
|
|
44
51
|
],
|
|
45
52
|
"procedure": [
|
|
46
53
|
"Read the codebase to understand the architecture: key components, entry points, data flows, and main patterns within the declared scope.",
|
|
47
|
-
"Present the five scalability dimensions and ask the user to select which apply: (1) load
|
|
48
|
-
"Ask the user to confirm the scope boundary
|
|
49
|
-
"Classify audit complexity: Simple (1
|
|
50
|
-
"Run a context-clarity check: score boundary_clarity, dimension_clarity, and codebase_familiarity 1
|
|
54
|
+
"Present the five scalability dimensions and ask the user to select which apply: (1) load — handles more requests, users, or throughput; (2) data_volume — handles more records, storage, or query size; (3) team_org — more teams or developers working on this scope; (4) feature_extensibility — more features added without rearchitecting; (5) operational — more deployments, environments, or operational complexity.",
|
|
55
|
+
"Ask the user to confirm the scope boundary — what is explicitly in and explicitly out.",
|
|
56
|
+
"Classify audit complexity: Simple (1–2 dimensions, small scope), Medium (2–3 dimensions, moderate scope), Complex (4–5 dimensions or large scope).",
|
|
57
|
+
"Run a context-clarity check: score boundary_clarity, dimension_clarity, and codebase_familiarity 1–3. If any score is 1, gather more context before advancing."
|
|
51
58
|
],
|
|
52
59
|
"outputRequired": {
|
|
53
60
|
"notesMarkdown": "Scope boundary (in and out), declared dimensions with rationale, audit complexity classification, and any open boundary questions.",
|
|
@@ -105,7 +112,7 @@
|
|
|
105
112
|
"procedure": [
|
|
106
113
|
"Create a neutral `scalabilityFactPacket` containing: scope boundary (in and out), declared dimensions, key architectural patterns found, main components and their roles, data flow and storage patterns, concurrency and state management approach, dependency boundaries and coupling, deployment and runtime assumptions, and explicit open unknowns.",
|
|
107
114
|
"Include realism signals: code that looks scalable at a glance but may have hidden limits (e.g. in-memory state, synchronous choke points, missing pagination, tight coupling between components).",
|
|
108
|
-
"For each declared dimension, assign a reviewer family mission: load = examine request handling, concurrency, session/state management, caching, connection pools, and horizontal scaling readiness
|
|
115
|
+
"For each declared dimension, assign a reviewer family mission: load = examine request handling, concurrency, session/state management, caching, connection pools, and horizontal scaling readiness — check whether session state is in-memory or distributed, whether connection pools are bounded, whether synchronous bottlenecks exist in hot paths; data_volume = examine query patterns, pagination, indexing, result set bounds, storage growth, and data access layer scalability — check for unbounded queries (missing LIMIT/pagination), missing indexes on filtered columns, N+1 patterns in repository/service layers, and data structures that grow unboundedly; team_org = examine module coupling, shared state, and parallel development friction — specifically check import graphs for cross-module dependencies that would cause merge conflicts, identify shared mutable singletons or global state, look for test setup that requires spinning up adjacent modules, and check whether public interfaces change frequently or are stable; feature_extensibility = examine how much code changes when a new variant of a core concept is added — specifically look for switch/when/if-else chains on type discriminators that would need a new branch per feature, hardcoded business-rule constants, direct concrete dependencies instead of interfaces or abstractions, and files that are edited for every new feature; operational = examine deployment complexity, environment-specific behavior, observability, configuration surface, and operational runbook needs — specifically check for environment-specific code paths (if/switch on env vars that create different behavior per environment), configuration that must be updated in multiple places per deployment, whether logs and metrics cover the main operational failure modes, and whether a new deployment of this scope would require manual steps beyond a standard deploy.",
|
|
109
116
|
"Set selectedReviewerFamilies to the list of assigned families (one per declared dimension). Set contradictionCount and blindSpotCount to 0."
|
|
110
117
|
],
|
|
111
118
|
"outputRequired": {
|
|
@@ -124,7 +131,7 @@
|
|
|
124
131
|
"var": "auditComplexity",
|
|
125
132
|
"equals": "Simple"
|
|
126
133
|
},
|
|
127
|
-
"text": "For a Simple audit, keep the fact packet compact
|
|
134
|
+
"text": "For a Simple audit, keep the fact packet compact — scope summary, key patterns, and declared dimensions only. Skip exhaustive realism signal enumeration."
|
|
128
135
|
}
|
|
129
136
|
],
|
|
130
137
|
"requireConfirmation": false
|
|
@@ -149,11 +156,11 @@
|
|
|
149
156
|
],
|
|
150
157
|
"Each reviewer family uses scalabilityFactPacket as primary truth.",
|
|
151
158
|
"Reviewer-family outputs are raw evidence. The main agent owns synthesis and verdict assignment.",
|
|
152
|
-
"Each reviewer family audits only its declared dimension
|
|
159
|
+
"Each reviewer family audits only its declared dimension — no cross-dimension scope creep."
|
|
153
160
|
],
|
|
154
161
|
"procedure": [
|
|
155
162
|
"Before investigating, restate your scalabilityHypothesis and name which dimension is most likely to challenge it.",
|
|
156
|
-
"Run one investigation per declared dimension. For each dimension, the investigation must return: top findings, evidence for each finding (specific file, class, method, or pattern references
|
|
163
|
+
"Run one investigation per declared dimension. For each dimension, the investigation must return: top findings, evidence for each finding (specific file, class, method, or pattern references — not just technology names), verdict tier per finding (will_break / risk / fine), growth scenario for each concern (e.g. 10x traffic, 100x records, 3x team size), biggest uncertainty, and likely false-confidence vector for this dimension.",
|
|
157
164
|
"After completing all dimension investigations, synthesize explicitly: what was confirmed, what was genuinely new, what looks weak or overstated, and what changed your current hypothesis.",
|
|
158
165
|
"Build dimensionFindings keyed by dimension containing: findings list, verdict summary, evidence quality assessment, and open questions.",
|
|
159
166
|
"Identify cross-cutting concerns: architectural patterns or components that appear in findings from multiple dimensions."
|
|
@@ -244,10 +251,10 @@
|
|
|
244
251
|
"This is a structured four-item check, not a free-form review."
|
|
245
252
|
],
|
|
246
253
|
"procedure": [
|
|
247
|
-
"Check 1
|
|
248
|
-
"Check 2
|
|
249
|
-
"Check 3
|
|
250
|
-
"Check 4
|
|
254
|
+
"Check 1 — Technology-vs-usage: did any reviewer identify a scalable technology without checking actual usage patterns in the code? (e.g. Postgres was identified as the DB, but were N+1 queries, missing indexes, or unbounded result sets actually checked?) Fix any instances found.",
|
|
255
|
+
"Check 2 — Scope drift: did any reviewer audit components outside the declared scope boundary? Remove out-of-scope findings.",
|
|
256
|
+
"Check 3 — Undeclared relevant dimensions: does the codebase have patterns suggesting a declared-out dimension actually matters for this scope? If so, surface it as an advisory note without adding it to the audit verdict.",
|
|
257
|
+
"Check 4 — Growth scenario vagueness: does every concern name a specific growth scenario? If not, assign one now based on the most realistic growth pattern for this scope.",
|
|
251
258
|
"Set blindSpotCount to the number of blind spots found across all four checks."
|
|
252
259
|
],
|
|
253
260
|
"outputRequired": {
|
|
@@ -299,11 +306,11 @@
|
|
|
299
306
|
"Do not advance to handoff with known hard gate failures."
|
|
300
307
|
],
|
|
301
308
|
"procedure": [
|
|
302
|
-
"Verdict aggregation
|
|
303
|
-
"Hard gate 1
|
|
304
|
-
"Hard gate 2
|
|
305
|
-
"Hard gate 3
|
|
306
|
-
"Hard gate 4
|
|
309
|
+
"Verdict aggregation — derive scalabilityVerdict from dimensionFindings using these explicit rules: (1) at_risk if any declared dimension has a will_break finding; (2) conditional if no will_break findings exist but at least one dimension has a risk finding; (3) ready_to_scale if all declared dimensions have only fine findings; (4) inconclusive if any dimension still has evidenceWeak = true after the synthesis loop, making a reliable verdict impossible. Capture verdictRationale naming the specific dimension and finding that drove the verdict.",
|
|
310
|
+
"Hard gate 1 — Evidence grounding: for every will_break and risk finding in dimensionFindings, confirm it cites a specific file, class, method, or code pattern. Technology name alone fails this gate. Fix by locating the code evidence or downgrading to risk with an evidence-needed note.",
|
|
311
|
+
"Hard gate 2 — Dimension coverage: confirm every declared dimension has at least one substantive finding. A verdict of fine with supporting evidence counts. A dimension with no findings at all fails this gate.",
|
|
312
|
+
"Hard gate 3 — Hypothesis revisited: confirm that scalabilityHypothesis from Phase 1 is either confirmed or explicitly revised in synthesis notes. If it was never addressed, address it now.",
|
|
313
|
+
"Hard gate 4 — Growth scenario specificity: confirm every concern in dimensionFindings names a growth scenario. If any do not, assign one now.",
|
|
307
314
|
"Set hardGatesPassed = true only when the verdict aggregation and all four gates pass. Set hardGateFailures to the list of any that needed fixing."
|
|
308
315
|
],
|
|
309
316
|
"outputRequired": {
|
|
@@ -327,13 +334,13 @@
|
|
|
327
334
|
"Do not drift into implementation planning or remediation design unless the user explicitly asks."
|
|
328
335
|
],
|
|
329
336
|
"procedure": [
|
|
330
|
-
"Open with the overall scalability readiness verdict (ready_to_scale / conditional / at_risk / inconclusive) and the verdictRationale
|
|
337
|
+
"Open with the overall scalability readiness verdict (ready_to_scale / conditional / at_risk / inconclusive) and the verdictRationale — name the specific dimension and finding that drove it.",
|
|
331
338
|
"For each declared dimension, give: dimension name, verdict tier (will_break / risk / fine), top finding with specific code reference, growth scenario, and severity.",
|
|
332
339
|
"List cross-cutting concerns: patterns that create scalability risk across multiple dimensions.",
|
|
333
340
|
"Revisit scalabilityHypothesis from Phase 1: was it confirmed or revised? What evidence changed your view?",
|
|
334
341
|
"Give a prioritized concern list ordered by: (1) will_break findings first, (2) risk findings by severity, (3) cross-cutting concerns, (4) fine findings worth noting as already solid.",
|
|
335
342
|
"Surface any advisory notes for undeclared dimensions that may be worth considering.",
|
|
336
|
-
"State what is already well-designed for scale
|
|
343
|
+
"State what is already well-designed for scale — not everything should be a concern."
|
|
337
344
|
],
|
|
338
345
|
"outputRequired": {
|
|
339
346
|
"notesMarkdown": "Decision-ready scalability handoff: overall verdict, per-dimension summary with code references, prioritized concerns, cross-cutting concerns, hypothesis outcome, and what is already solid."
|
|
@@ -342,7 +349,7 @@
|
|
|
342
349
|
"The handoff is verdict-first and evidence-grounded.",
|
|
343
350
|
"Every concern is tied to a specific code reference and growth scenario.",
|
|
344
351
|
"The hypothesis from Phase 1 is explicitly addressed.",
|
|
345
|
-
"What is already well-designed is stated
|
|
352
|
+
"What is already well-designed is stated — not just the concerns."
|
|
346
353
|
]
|
|
347
354
|
},
|
|
348
355
|
"requireConfirmation": false
|
|
@@ -3,6 +3,13 @@
|
|
|
3
3
|
"name": "Bug Investigation (v2 \u2022 Notes-First \u2022 WorkRail Executor)",
|
|
4
4
|
"version": "2.0.0",
|
|
5
5
|
"description": "Use this to diagnose a bug or unexpected behavior in code. Builds a hypothesis, gathers evidence, and proves or disproves the root cause before concluding.",
|
|
6
|
+
"about": "## Bug Investigation Workflow\n\nThis workflow guides an AI agent through a rigorous, evidence-driven investigation of a bug or unexpected behavior. It is designed to prevent the most common failure mode in AI debugging: jumping to a plausible-sounding conclusion without sufficient proof.\n\n**What it does:**\nThe workflow moves through triage, context gathering, hypothesis generation, evidence planning, iterative evidence collection, diagnosis validation, and a final handoff. It explicitly distinguishes between theories (formed by reading code) and proof (confirmed by running tests or reproducing the failure). The final output is a diagnosis with a confidence rating, the strongest alternative explanations that were ruled out, and a high-level fix direction -- not a patch.\n\n**When to use it:**\n- You have a specific bug report, failing test, or production incident to investigate\n- The root cause is not immediately obvious and multiple explanations are plausible\n- You want a trustworthy diagnosis before spending time writing a fix\n- The bug carries enough risk that you need to be confident before changing code\n\n**What it produces:**\nA structured investigation handoff covering: root cause type (single cause, multi-factor, working as designed, etc.), proof summary, ruled-out alternatives, residual uncertainty, likely files involved, and verification steps for whoever implements the fix.\n\n**How to get good results:**\nProvide repro steps, observed symptoms, and expected behavior upfront. Include any relevant logs, failing test commands, or environment details you already have. The more concrete the repro, the faster the workflow can gather real evidence rather than theorizing. If the bug is intermittent, say so -- the workflow adapts its rigor based on reproducibility confidence.",
|
|
7
|
+
"examples": [
|
|
8
|
+
"Investigate why the payments API returns 500 after deploying the rate limiter",
|
|
9
|
+
"Debug why the mobile app crashes on logout when a background sync is in progress",
|
|
10
|
+
"Find out why search results are missing items added in the last 10 minutes",
|
|
11
|
+
"Diagnose why CI passes locally but the integration test fails on the build server"
|
|
12
|
+
],
|
|
6
13
|
"recommendedPreferences": {
|
|
7
14
|
"recommendedAutonomy": "guided",
|
|
8
15
|
"recommendedRiskPolicy": "conservative"
|
|
@@ -3,6 +3,13 @@
|
|
|
3
3
|
"name": "Agentic Task Dev Workflow (Invariants \u2022 Architecture \u2022 Vertical Slices \u2022 PR Sizing \u2022 Audits \u2022 Resumable)",
|
|
4
4
|
"version": "1.5.0",
|
|
5
5
|
"description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
|
|
6
|
+
"about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",
|
|
7
|
+
"examples": [
|
|
8
|
+
"Implement JWT refresh token rotation in the auth service",
|
|
9
|
+
"Fix the race condition in the cache invalidation path when concurrent writes occur",
|
|
10
|
+
"Refactor the payment flow to use a Result type instead of throwing exceptions",
|
|
11
|
+
"Add pagination support to the messaging inbox API endpoint"
|
|
12
|
+
],
|
|
6
13
|
"recommendedPreferences": {
|
|
7
14
|
"recommendedAutonomy": "guided",
|
|
8
15
|
"recommendedRiskPolicy": "conservative"
|
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "coding-task-workflow-agentic",
|
|
3
|
-
"name": "Agentic Task Dev Workflow (Lean
|
|
3
|
+
"name": "Agentic Task Dev Workflow (Lean • Notes-First • WorkRail Executor)",
|
|
4
4
|
"version": "1.0.0",
|
|
5
5
|
"description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
|
|
6
|
+
"about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",
|
|
7
|
+
"examples": [
|
|
8
|
+
"Implement JWT refresh token rotation in the auth service",
|
|
9
|
+
"Fix the race condition in the cache invalidation path when concurrent writes occur",
|
|
10
|
+
"Refactor the payment flow to use a Result type instead of throwing exceptions",
|
|
11
|
+
"Add pagination support to the messaging inbox API endpoint"
|
|
12
|
+
],
|
|
6
13
|
"recommendedPreferences": {
|
|
7
14
|
"recommendedAutonomy": "guided",
|
|
8
15
|
"recommendedRiskPolicy": "conservative"
|
|
@@ -21,9 +28,10 @@
|
|
|
21
28
|
"SUBAGENT SYNTHESIS: treat subagent output as evidence, not conclusions. State your hypothesis before delegating, then interrogate what came back: what was missed, wrong, or new? Say what changed your mind or what you still reject, and why.",
|
|
22
29
|
"PARALLELISM: when reads, audits, or delegations are independent, run them in parallel inside the phase. Parallelize cognition; serialize synthesis and canonical writes.",
|
|
23
30
|
"PHILOSOPHY LENS: apply the user's coding philosophy (from active session rules) as the evaluation lens. Flag violations by principle name, not as generic feedback. If principles conflict, surface the tension explicitly instead of silently choosing.",
|
|
24
|
-
"VALIDATION: prefer static/compile-time safety over runtime checks. Use build, type-checking, and tests as the primary proof of correctness
|
|
31
|
+
"VALIDATION: prefer static/compile-time safety over runtime checks. Use build, type-checking, and tests as the primary proof of correctness — in that order of reliability.",
|
|
25
32
|
"DRIFT HANDLING: when reality diverges from the plan, update the plan artifact and re-audit deliberately rather than accumulating undocumented drift.",
|
|
26
|
-
"NEVER COMMIT MARKDOWN FILES UNLESS USER EXPLICITLY ASKS."
|
|
33
|
+
"NEVER COMMIT MARKDOWN FILES UNLESS USER EXPLICITLY ASKS.",
|
|
34
|
+
"SLICE DISCIPLINE: Phase 6 is a loop -- implement ONE slice per iteration. Do not implement multiple slices at once. The verification loop exists to catch drift per slice, not retroactively."
|
|
27
35
|
],
|
|
28
36
|
"references": [
|
|
29
37
|
{
|
|
@@ -107,7 +115,7 @@
|
|
|
107
115
|
},
|
|
108
116
|
{
|
|
109
117
|
"id": "phase-1b-design-deep",
|
|
110
|
-
"title": "Phase 1b: Design Generation (Injected Routine
|
|
118
|
+
"title": "Phase 1b: Design Generation (Injected Routine — Tension-Driven Design)",
|
|
111
119
|
"runCondition": {
|
|
112
120
|
"and": [
|
|
113
121
|
{
|
|
@@ -134,7 +142,7 @@
|
|
|
134
142
|
"var": "taskComplexity",
|
|
135
143
|
"not_equals": "Small"
|
|
136
144
|
},
|
|
137
|
-
"prompt": "Read `design-candidates.md`, compare it to your original guess, and make the call.\n\nBe explicit about three things:\n- what the design work confirmed\n- what changed your mind\n- what you missed the first time\n\nThen pressure-test the leading option:\n- what's the strongest case against it?\n- what assumption breaks it?\n\nAfter the challenge batch, say:\n- what changed your mind\n- what didn't\n- which findings you reject and why\n\nPick the approach yourself. Don't hide behind the artifact. If the simplest thing works, prefer it. If the front-runner stops looking right after challenge, switch.\n\nCapture:\n- `selectedApproach`
|
|
145
|
+
"prompt": "Read `design-candidates.md`, compare it to your original guess, and make the call.\n\nBe explicit about three things:\n- what the design work confirmed\n- what changed your mind\n- what you missed the first time\n\nThen pressure-test the leading option:\n- what's the strongest case against it?\n- what assumption breaks it?\n\nAfter the challenge batch, say:\n- what changed your mind\n- what didn't\n- which findings you reject and why\n\nPick the approach yourself. Don't hide behind the artifact. If the simplest thing works, prefer it. If the front-runner stops looking right after challenge, switch.\n\nCapture:\n- `selectedApproach` — chosen design with rationale tied to tensions\n- `runnerUpApproach` — next-best option and why it lost\n- `architectureRationale` — tensions resolved vs accepted\n- `pivotTriggers` — conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` — failure mode of the selected approach\n- `acceptedTradeoffs`\n- `identifiedFailureModes`",
|
|
138
146
|
"promptFragments": [
|
|
139
147
|
{
|
|
140
148
|
"id": "phase-1c-challenge-standard",
|
|
@@ -242,7 +250,7 @@
|
|
|
242
250
|
"var": "taskComplexity",
|
|
243
251
|
"not_equals": "Small"
|
|
244
252
|
},
|
|
245
|
-
"prompt": "Turn the decision into a plan someone else could execute without guessing.\n\nUpdate `implementation_plan.md`.\n\nIt should cover:\n1. Problem statement\n2. Acceptance criteria (mirror `spec.md` if it exists; `spec.md` owns observable behavior)\n3. Non-goals\n4. Philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only if they actually help\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n - [principle] -> [satisfied / tension / violated + 1-line why]\n\nCapture:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount`
|
|
253
|
+
"prompt": "Turn the decision into a plan someone else could execute without guessing.\n\nUpdate `implementation_plan.md`.\n\nIt should cover:\n1. Problem statement\n2. Acceptance criteria (mirror `spec.md` if it exists; `spec.md` owns observable behavior)\n3. Non-goals\n4. Philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only if they actually help\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n - [principle] -> [satisfied / tension / violated + 1-line why]\n\nCapture:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount` — count of open issues that would materially affect implementation quality\n- `planConfidenceBand` — Low / Medium / High\n\nThe plan is the deliverable for this step. Do not implement anything -- not a \"quick win\", not a file read that bleeds into edits, nothing. Execution begins in Phase 6, one slice at a time. If you find yourself writing code or editing source files right now, stop immediately.",
|
|
246
254
|
"requireConfirmation": false
|
|
247
255
|
},
|
|
248
256
|
{
|
|
@@ -332,7 +340,7 @@
|
|
|
332
340
|
{
|
|
333
341
|
"id": "phase-4b-loop-decision",
|
|
334
342
|
"title": "Loop Exit Decision",
|
|
335
|
-
"prompt": "Decide whether the plan needs another pass.\n\nIf `planFindings` is non-empty, keep going.\nIf it's empty, stop
|
|
343
|
+
"prompt": "Decide whether the plan needs another pass.\n\nIf `planFindings` is non-empty, keep going.\nIf it's empty, stop — but say what you checked so the clean pass means something.\nIf you've hit the limit, stop and record what still bothers you.\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
|
|
336
344
|
"requireConfirmation": true,
|
|
337
345
|
"outputContract": {
|
|
338
346
|
"contractRef": "wr.contracts.loop_control"
|
|
@@ -369,7 +377,7 @@
|
|
|
369
377
|
{
|
|
370
378
|
"id": "phase-6a-implement-slice",
|
|
371
379
|
"title": "Implement Slice",
|
|
372
|
-
"prompt": "Implement
|
|
380
|
+
"prompt": "Implement the current slice: `{{currentSlice.name}}`.\n\nBefore writing a single line of code, declare your scope:\n- List the exact files and symbols this slice touches\n- Confirm none of them belong to a later slice\n- If you have already edited files from this or any other slice in a previous step, stop and report it\n\nHard scope rule: you may only modify what is described in `{{currentSlice.name}}`. Anything outside that boundary is out of scope for this iteration -- not \"do it early\", not \"while I'm here\". If you discover you need to touch something outside this slice to make it compile or integrate, set `unexpectedScopeChange = true` and do the minimum necessary to stay green, then stop.\n\nImplement incrementally. Run tests and build to prove the slice works before advancing.\n\nTrack:\n- `specialCaseIntroduced` -- did this slice require a new special-case?\n- `unplannedAbstractionIntroduced` -- did this slice introduce an abstraction not in the plan?\n- `unexpectedScopeChange` -- did this slice touch files outside its planned scope?\n\nSet `verifyNeeded` to true if ANY of:\n- `sliceIndex` is odd (verify every 2 slices)\n- `prStrategy = MultiPR`\n- `specialCaseIntroduced = true`\n- `unplannedAbstractionIntroduced = true`\n- `unexpectedScopeChange = true`\n- tests or build failed\n\nCapture: `specialCaseIntroduced`, `unplannedAbstractionIntroduced`, `unexpectedScopeChange`, `verifyNeeded`",
|
|
373
381
|
"requireConfirmation": false
|
|
374
382
|
},
|
|
375
383
|
{
|