npm - @exaudeus/workrail - Versions diffs - 3.15.0 → 3.16.0 - Mend

@exaudeus/workrail 3.15.0 → 3.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

package/dist/application/services/workflow-service.d.ts +2 -0
package/dist/application/services/workflow-service.js +3 -0
package/dist/console/assets/index-BE5PAgPO.js +28 -0
package/dist/console/assets/index-BZNM03t1.css +1 -0
package/dist/console/index.html +2 -2
package/dist/env-flags.d.ts +1 -0
package/dist/env-flags.js +4 -0
package/dist/infrastructure/session/HttpServer.d.ts +3 -3
package/dist/infrastructure/session/HttpServer.js +68 -74
package/dist/infrastructure/storage/caching-workflow-storage.d.ts +2 -0
package/dist/infrastructure/storage/caching-workflow-storage.js +15 -6
package/dist/infrastructure/storage/file-workflow-storage.js +3 -4
package/dist/infrastructure/storage/schema-validating-workflow-storage.js +9 -8
package/dist/manifest.json +257 -193
package/dist/mcp/assert-output.d.ts +37 -0
package/dist/mcp/assert-output.js +52 -0
package/dist/mcp/boundary-coercion.d.ts +1 -0
package/dist/mcp/boundary-coercion.js +44 -0
package/dist/mcp/dev-mode.d.ts +1 -0
package/dist/mcp/dev-mode.js +4 -0
package/dist/mcp/handler-factory.js +12 -9
package/dist/mcp/handlers/session.js +8 -9
package/dist/mcp/handlers/v2-advance-core/event-builders.d.ts +2 -0
package/dist/mcp/handlers/v2-advance-core/event-builders.js +6 -6
package/dist/mcp/handlers/v2-advance-core/index.d.ts +2 -0
package/dist/mcp/handlers/v2-advance-core/index.js +4 -3
package/dist/mcp/handlers/v2-advance-core/input-validation.d.ts +2 -0
package/dist/mcp/handlers/v2-advance-core/input-validation.js +32 -9
package/dist/mcp/handlers/v2-advance-core/outcome-blocked.d.ts +2 -0
package/dist/mcp/handlers/v2-advance-core/outcome-blocked.js +1 -1
package/dist/mcp/handlers/v2-advance-core/outcome-success.d.ts +2 -0
package/dist/mcp/handlers/v2-advance-core/outcome-success.js +1 -1
package/dist/mcp/handlers/v2-checkpoint.d.ts +1 -1
package/dist/mcp/handlers/v2-checkpoint.js +5 -6
package/dist/mcp/handlers/v2-execution/advance.d.ts +4 -2
package/dist/mcp/handlers/v2-execution/advance.js +5 -7
package/dist/mcp/handlers/v2-execution/continue-advance.js +56 -26
package/dist/mcp/handlers/v2-execution/continue-rehydrate.d.ts +1 -1
package/dist/mcp/handlers/v2-execution/continue-rehydrate.js +9 -9
package/dist/mcp/handlers/v2-execution/replay.d.ts +6 -4
package/dist/mcp/handlers/v2-execution/replay.js +47 -30
package/dist/mcp/handlers/v2-execution/start.d.ts +2 -3
package/dist/mcp/handlers/v2-execution/start.js +11 -11
package/dist/mcp/handlers/v2-execution/workflow-object-cache.d.ts +5 -0
package/dist/mcp/handlers/v2-execution/workflow-object-cache.js +19 -0
package/dist/mcp/handlers/v2-execution-helpers.d.ts +1 -0
package/dist/mcp/handlers/v2-execution-helpers.js +23 -7
package/dist/mcp/handlers/v2-resume.d.ts +1 -1
package/dist/mcp/handlers/v2-resume.js +3 -4
package/dist/mcp/handlers/v2-state-conversion.js +5 -1
package/dist/mcp/handlers/v2-workflow.d.ts +80 -0
package/dist/mcp/handlers/v2-workflow.js +36 -21
package/dist/mcp/handlers/workflow.d.ts +2 -5
package/dist/mcp/handlers/workflow.js +15 -12
package/dist/mcp/output-schemas.d.ts +20 -27
package/dist/mcp/output-schemas.js +5 -7
package/dist/mcp/server.js +22 -4
package/dist/mcp/tool-call-timing.d.ts +24 -0
package/dist/mcp/tool-call-timing.js +85 -0
package/dist/mcp/transports/http-entry.js +3 -2
package/dist/mcp/transports/http-listener.d.ts +1 -0
package/dist/mcp/transports/http-listener.js +25 -0
package/dist/mcp/transports/shutdown-hooks.d.ts +4 -1
package/dist/mcp/transports/shutdown-hooks.js +3 -2
package/dist/mcp/transports/stdio-entry.js +6 -28
package/dist/mcp/v2-response-formatter.js +2 -4
package/dist/mcp/validation/schema-introspection.d.ts +1 -0
package/dist/mcp/validation/schema-introspection.js +15 -5
package/dist/mcp/validation/suggestion-generator.js +2 -2
package/dist/runtime/adapters/node-process-signals.d.ts +1 -0
package/dist/runtime/adapters/node-process-signals.js +5 -0
package/dist/runtime/adapters/noop-process-signals.d.ts +1 -0
package/dist/runtime/adapters/noop-process-signals.js +2 -0
package/dist/runtime/ports/process-signals.d.ts +1 -0
package/dist/types/workflow-definition.d.ts +2 -0
package/dist/types/workflow.d.ts +3 -0
package/dist/types/workflow.js +35 -26
package/dist/v2/durable-core/domain/context-template-resolver.js +2 -2
package/dist/v2/durable-core/domain/function-definition-expander.js +2 -17
package/dist/v2/durable-core/domain/prompt-renderer.d.ts +1 -0
package/dist/v2/durable-core/domain/prompt-renderer.js +23 -18
package/dist/v2/durable-core/domain/recap-recovery.js +23 -16
package/dist/v2/durable-core/domain/retrieval-contract.js +13 -7
package/dist/v2/durable-core/session-index.d.ts +22 -0
package/dist/v2/durable-core/session-index.js +58 -0
package/dist/v2/durable-core/sorted-event-log.d.ts +6 -0
package/dist/v2/durable-core/sorted-event-log.js +15 -0
package/dist/v2/infra/local/fs/index.js +8 -8
package/dist/v2/infra/local/session-store/index.d.ts +1 -1
package/dist/v2/infra/local/session-store/index.js +71 -61
package/dist/v2/infra/local/session-summary-provider/index.js +9 -4
package/dist/v2/infra/local/snapshot-store/index.js +2 -1
package/dist/v2/ports/session-event-log-store.port.d.ts +1 -1
package/dist/v2/projections/assessment-consequences.d.ts +2 -1
package/dist/v2/projections/assessment-consequences.js +0 -5
package/dist/v2/projections/assessments.d.ts +2 -1
package/dist/v2/projections/assessments.js +2 -4
package/dist/v2/projections/gaps.d.ts +2 -1
package/dist/v2/projections/gaps.js +0 -5
package/dist/v2/projections/preferences.d.ts +2 -1
package/dist/v2/projections/preferences.js +0 -5
package/dist/v2/projections/run-context.d.ts +2 -2
package/dist/v2/projections/run-context.js +0 -5
package/dist/v2/projections/run-dag.js +7 -1
package/dist/v2/projections/run-execution-trace.d.ts +8 -0
package/dist/v2/projections/run-execution-trace.js +124 -0
package/dist/v2/projections/run-status-signals.d.ts +2 -2
package/dist/v2/usecases/console-routes.d.ts +3 -1
package/dist/v2/usecases/console-routes.js +123 -25
package/dist/v2/usecases/console-service.d.ts +1 -0
package/dist/v2/usecases/console-service.js +83 -25
package/dist/v2/usecases/console-types.d.ts +53 -0
package/dist/v2/usecases/worktree-service.js +32 -1
package/package.json +6 -5
package/spec/workflow.schema.json +18 -0
package/workflows/adaptive-ticket-creation.json +23 -16
package/workflows/architecture-scalability-audit.json +29 -22
package/workflows/bug-investigation.agentic.v2.json +7 -0
package/workflows/coding-task-workflow-agentic.json +7 -0
package/workflows/coding-task-workflow-agentic.lean.v2.json +16 -8
package/workflows/coding-task-workflow-agentic.v2.json +7 -0
package/workflows/cross-platform-code-conversion.v2.json +7 -0
package/workflows/document-creation-workflow.json +15 -8
package/workflows/documentation-update-workflow.json +15 -8
package/workflows/intelligent-test-case-generation.json +7 -0
package/workflows/learner-centered-course-workflow.json +9 -2
package/workflows/mr-review-workflow.agentic.v2.json +7 -0
package/workflows/personal-learning-materials-creation-branched.json +15 -8
package/workflows/presentation-creation.json +12 -5
package/workflows/production-readiness-audit.json +7 -0
package/workflows/relocation-workflow-us.json +39 -32
package/workflows/scoped-documentation-workflow.json +33 -26
package/workflows/ui-ux-design-workflow.json +7 -0
package/workflows/workflow-diagnose-environment.json +6 -0
package/workflows/workflow-for-workflows.json +7 -0
package/workflows/workflow-for-workflows.v2.json +23 -11
package/workflows/wr.discovery.json +8 -1
package/dist/console/assets/index-BZYIjrzJ.js +0 -28
package/dist/console/assets/index-OLCKbDdm.css +0 -1

package/dist/v2/usecases/console-types.d.ts CHANGED Viewed

@@ -31,6 +31,10 @@ export interface ConsoleDagNode {
     readonly isPreferredTip: boolean;
     readonly isTip: boolean;
     readonly stepLabel: string | null;
+    readonly hasRecap: boolean;
+    readonly hasFailedValidations: boolean;
+    readonly hasGaps: boolean;
+    readonly hasArtifacts: boolean;
 }
 export interface ConsoleDagEdge {
     readonly edgeKind: 'acked_step' | 'checkpoint';
@@ -38,6 +42,25 @@ export interface ConsoleDagEdge {
     readonly toNodeId: string;
     readonly createdAtEventIndex: number;
 }
+export type ConsoleExecutionTraceItemKind = 'selected_next_step' | 'evaluated_condition' | 'entered_loop' | 'exited_loop' | 'detected_non_tip_advance' | 'context_fact' | 'divergence';
+export interface ConsoleExecutionTraceRef {
+    readonly kind: 'node_id' | 'step_id' | 'loop_id' | 'condition_id';
+    readonly value: string;
+}
+export interface ConsoleExecutionTraceItem {
+    readonly kind: ConsoleExecutionTraceItemKind;
+    readonly summary: string;
+    readonly recordedAtEventIndex: number;
+    readonly refs: readonly ConsoleExecutionTraceRef[];
+}
+export interface ConsoleExecutionTraceFact {
+    readonly key: string;
+    readonly value: string;
+}
+export interface ConsoleExecutionTraceSummary {
+    readonly items: readonly ConsoleExecutionTraceItem[];
+    readonly contextFacts: readonly ConsoleExecutionTraceFact[];
+}
 export interface ConsoleDagRun {
     readonly runId: string;
     readonly workflowId: string | null;
@@ -49,6 +72,7 @@ export interface ConsoleDagRun {
     readonly tipNodeIds: readonly string[];
     readonly status: ConsoleRunStatus;
     readonly hasUnresolvedCriticalGaps: boolean;
+    readonly executionTraceSummary: ConsoleExecutionTraceSummary | null;
 }
 export interface ConsoleSessionDetail {
     readonly sessionId: string;
@@ -128,3 +152,32 @@ export interface ConsoleNodeDetail {
     readonly validations: readonly ConsoleValidationResult[];
     readonly gaps: readonly ConsoleNodeGap[];
 }
+export interface ConsoleWorkflowSourceInfo {
+    readonly kind: 'bundled' | 'user' | 'project' | 'custom' | 'git' | 'remote' | 'plugin';
+    readonly displayName: string;
+}
+export interface ConsoleWorkflowSummary {
+    readonly id: string;
+    readonly name: string;
+    readonly description: string;
+    readonly version: string;
+    readonly tags: readonly string[];
+    readonly source: ConsoleWorkflowSourceInfo;
+    readonly about?: string;
+    readonly examples?: readonly string[];
+}
+export interface ConsoleWorkflowListResponse {
+    readonly workflows: readonly ConsoleWorkflowSummary[];
+}
+export interface ConsoleWorkflowDetail {
+    readonly id: string;
+    readonly name: string;
+    readonly description: string;
+    readonly version: string;
+    readonly tags: readonly string[];
+    readonly source: ConsoleWorkflowSourceInfo;
+    readonly stepCount: number;
+    readonly about?: string;
+    readonly examples?: readonly string[];
+    readonly preconditions?: readonly string[];
+}

package/dist/v2/usecases/worktree-service.js CHANGED Viewed

@@ -44,6 +44,29 @@ function parseWorktreePorcelain(raw) {
     }
     return entries;
 }
+const MAX_CONCURRENT_ENRICHMENTS = 8;
+let activeEnrichments = 0;
+const enrichmentQueue = [];
+function acquireEnrichmentSlot() {
+    return new Promise((resolve) => {
+        if (activeEnrichments < MAX_CONCURRENT_ENRICHMENTS) {
+            activeEnrichments++;
+            resolve();
+        }
+        else {
+            enrichmentQueue.push(() => { activeEnrichments++; resolve(); });
+        }
+    });
+}
+function releaseEnrichmentSlot() {
+    const next = enrichmentQueue.shift();
+    if (next) {
+        next();
+    }
+    else {
+        activeEnrichments--;
+    }
+}
 function parseFileStatus(xy) {
     if (xy === '??')
         return 'untracked';
@@ -113,7 +136,15 @@ async function enrichRepo(repoRoot, activeSessions) {
     if (porcelain === null)
         return null;
     const rawWorktrees = parseWorktreePorcelain(porcelain);
-    const results = await Promise.allSettled(rawWorktrees.map(wt => enrichWorktree(wt)));
+    const results = await Promise.allSettled(rawWorktrees.map(async (wt) => {
+        await acquireEnrichmentSlot();
+        try {
+            return await enrichWorktree(wt);
+        }
+        finally {
+            releaseEnrichmentSlot();
+        }
+    }));
     const worktrees = rawWorktrees.flatMap((wt, i) => {
         const result = results[i];
         if (result.status === 'rejected') {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exaudeus/workrail",
-  "version": "3.15.0",
+  "version": "3.16.0",
   "description": "Step-by-step workflow enforcement for AI agents via MCP",
   "license": "MIT",
   "repository": {
@@ -77,7 +77,8 @@
     "codemod:v2-contexts": "npx ts-node scripts/codemods/run.ts --mod v2-contexts --tsconfig tsconfig.test.json --write",
     "codemod:v2-prune": "npx ts-node scripts/codemods/run.ts --mod v2-prune --tsconfig tsconfig.test.json --write",
     "codemod:guard": "npx ts-node scripts/codemods/run.ts --mod guard --tsconfig tsconfig.test.json",
-    "codemod:test-platform-guard": "npx ts-node scripts/codemods/run.ts --mod test-platform-guard --tsconfig tsconfig.test.json"
+    "codemod:test-platform-guard": "npx ts-node scripts/codemods/run.ts --mod test-platform-guard --tsconfig tsconfig.test.json",
+    "prepare": "bash scripts/setup-hooks.sh"
   },
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.24.0",
@@ -89,7 +90,7 @@
     "dotenv": "^17.2.0",
     "express": "^5.1.0",
     "neverthrow": "^8.2.0",
-    "open": "^10.2.0",
+    "open": "^11.0.0",
     "reflect-metadata": "^0.2.0",
     "semver": "^7.7.2",
     "tsconfig-paths": "^4.2.0",
@@ -121,8 +122,8 @@
     "happy-dom": "^20.0.11",
     "jsdom": "^27.0.0",
     "lit": "^3.3.1",
-    "node-fetch": "^2.7.0",
-    "semantic-release": "^24.2.0",
+    "node-fetch": "^3.3.2",
+    "semantic-release": "^25.0.3",
     "ts-morph": "^27.0.2",
     "typescript": "^5.9.3",
     "vite": "^7.1.9",

package/spec/workflow.schema.json CHANGED Viewed

@@ -182,6 +182,24 @@
       "type": "integer",
       "minimum": 1,
       "description": "The authoring spec version this workflow was last validated against."
+    },
+    "about": {
+      "type": "string",
+      "description": "Human-readable overview for display in the console and other UIs. Markdown is supported. Write for a user deciding whether to use this workflow: what it does, when to use it, what it produces, and how to get good results. User-facing surface -- not an agent instruction (use metaGuidance for that).",
+      "minLength": 1,
+      "maxLength": 4096
+    },
+    "examples": {
+      "type": "array",
+      "description": "Short illustrative goal strings showing what this workflow is used for. Write for humans browsing the catalog and for agents selecting the right workflow. Each item should be concrete and specific enough to be informative.",
+      "items": {
+        "type": "string",
+        "minLength": 10,
+        "maxLength": 120
+      },
+      "minItems": 1,
+      "maxItems": 6,
+      "uniqueItems": true
     }
   },
   "required": [

package/workflows/adaptive-ticket-creation.json CHANGED Viewed

@@ -3,14 +3,21 @@
   "name": "Adaptive Ticket Creation Workflow",
   "version": "1.0.0",
   "description": "Use this to create high-quality Jira tickets for features, tasks, or epics. Automatically selects the right complexity path (Simple, Standard, or Epic) and generates properly structured tickets with acceptance criteria and estimates.",
+  "about": "## Adaptive Ticket Creation Workflow\n\nUse this to create well-structured Jira tickets for features, tasks, or epics. The workflow automatically selects the right complexity path (Simple, Standard, or Epic) based on the request, so you don't have to decide upfront how much process you need.\n\n### What it produces\n\n- **Simple path**: one complete, developer-ready Jira ticket with a context-rich description, checkbox-style acceptance criteria, and an effort estimate.\n- **Standard path**: a high-level plan plus a batch of related tickets covering all deliverables.\n- **Epic path**: everything in Standard, plus full epic decomposition, per-story estimates with risk ratings, dependency mapping, and a reusable team rules file at `.workflow_rules/ticket_creation.md` that future runs load automatically.\n\n### When to use it\n\n- You need to create one or more Jira tickets and want them to be genuinely developer-ready.\n- You have a feature request, bug, task, or epic that needs to be broken down and estimated.\n- Your team has specific ticket conventions (naming, sizing, labels) -- the workflow learns and stores these on the Epic path.\n\n### How to get good results\n\n- Provide as much context as you have: PRD links, design files, existing related tickets, and any known constraints.\n- If your team has a `.workflow_rules/ticket_creation.md` file, the workflow loads it automatically and applies your conventions.\n- On the Epic path, the workflow asks you to approve the high-level plan and the decomposition before generating tickets. Use these checkpoints to catch scope issues early.\n- Acceptance criteria are written as checkbox-style observable conditions, not restatements of requirements. If your team has a specific AC format, describe it in the rules file.",
+  "examples": [
+    "Create a Jira ticket for adding biometric authentication to the mobile login screen",
+    "Break down the new real-time notifications feature into an epic with stories and estimates",
+    "Write tickets for all backend work needed to support the v2 search API",
+    "Create a single bug ticket for the checkout crash when applying a promo code on iOS 17"
+  ],
   "preconditions": [
     "User has provided a description of the feature, task, or work to be ticketed.",
     "Agent has file system access for loading team preferences and persisting rules."
   ],
   "metaGuidance": [
-    "ROLE: expert Product Manager and Mobile Tech Lead. Triage autonomously, write developer-ready tickets with full context, and produce objectively testable acceptance criteria \u2014 not user-story paraphrases.",
+    "ROLE: expert Product Manager and Mobile Tech Lead. Triage autonomously, write developer-ready tickets with full context, and produce objectively testable acceptance criteria — not user-story paraphrases.",
     "EXPLORE FIRST: use tools to gather context before asking the user anything. Ask only for information you genuinely cannot determine with tools or from the request itself.",
-    "TEAM RULES: load and follow ./.workflow_rules/ticket_creation.md when it exists. Preferences there override your defaults. Rules are captured only on the Epic path \u2014 complex sessions are where durable conventions emerge and where the investment pays off.",
+    "TEAM RULES: load and follow ./.workflow_rules/ticket_creation.md when it exists. Preferences there override your defaults. Rules are captured only on the Epic path — complex sessions are where durable conventions emerge and where the investment pays off.",
     "AUTONOMOUS TRIAGE: decide pathComplexity (Simple / Standard / Epic) yourself from the request. Surface your reasoning, then wait for confirmation.",
     "QUALITY FLOOR: every ticket must have a context-rich description, checkbox-style acceptance criteria that are objectively testable, and an effort estimate."
   ],
@@ -21,7 +28,7 @@
       "promptBlocks": {
         "goal": "Analyze the request, gather available context, and select the right complexity path before doing any ticket work.",
         "constraints": [
-          "Decide the path yourself \u2014 do not ask the user to choose.",
+          "Decide the path yourself — do not ask the user to choose.",
           "Load ./.workflow_rules/ticket_creation.md if it exists and let it influence your triage. If the file does not exist, note this explicitly in your output so the user knows team conventions were not applied.",
           "Set pathComplexity to exactly one of: Simple, Standard, or Epic."
         ],
@@ -29,7 +36,7 @@
           "Read any attached documents, linked PRDs, or referenced specs.",
           "Identify complexity signals: scope breadth, number of distinct deliverables, cross-team dependencies, technical unknowns, and estimated ticket count.",
           "Apply the triage rubric: Simple = single ticket, clear requirements, no blocking unknowns, minimal dependencies. Standard = multiple related tickets, moderate scope, some analysis needed. Epic = complex feature requiring decomposition, multiple teams or significant unknowns, likely 6+ tickets.",
-          "Upgrade triggers \u2014 escalate to Standard if: request implies more than one clearly separate work item. Escalate to Epic if: multiple teams are involved, architecture decisions are unresolved, or you estimate more than five tickets.",
+          "Upgrade triggers — escalate to Standard if: request implies more than one clearly separate work item. Escalate to Epic if: multiple teams are involved, architecture decisions are unresolved, or you estimate more than five tickets.",
           "State your selected path and the top three reasons. Capture pathComplexity in context."
         ],
         "outputRequired": {
@@ -53,7 +60,7 @@
       "promptBlocks": {
         "goal": "Generate one complete, developer-ready Jira ticket for this request.",
         "constraints": [
-          "Acceptance criteria must be phrased as observable, testable conditions \u2014 not user-story restatements.",
+          "Acceptance criteria must be phrased as observable, testable conditions — not user-story restatements.",
           "Follow any team conventions from ./.workflow_rules/ticket_creation.md.",
           "Include all fields a developer needs to start work without asking follow-up questions."
         ],
@@ -103,7 +110,7 @@
           "Load ./.workflow_rules/ticket_creation.md and note any relevant team conventions.",
           "Identify: key stakeholders, team dependencies, technical constraints, known risks, and any conflicting requirements.",
           "Classify each gap as: Critical (blocks planning), Important (affects scope), or Nice-to-have (can proceed without it).",
-          "For Critical and Important gaps that tools cannot resolve, ask the user \u2014 in a single consolidated question block, not one at a time.",
+          "For Critical and Important gaps that tools cannot resolve, ask the user — in a single consolidated question block, not one at a time.",
           "After receiving answers, check whether any response reveals scope that would change `pathComplexity` (e.g. the user confirms three teams are involved, or the feature is narrower than initially assessed). If so, state the new classification and reasoning, and ask the user to confirm before continuing to Phase 2."
         ],
         "outputRequired": {
@@ -135,16 +142,16 @@
       "promptBlocks": {
         "goal": "Produce a structured plan that will drive ticket generation. This plan is the source of truth for scope.",
         "constraints": [
-          "Be explicit about scope boundaries \u2014 ambiguous scope will produce ambiguous tickets.",
+          "Be explicit about scope boundaries — ambiguous scope will produce ambiguous tickets.",
           "Success criteria must be measurable, not just descriptive.",
           "For Standard path: this plan feeds directly into batch ticket generation."
         ],
         "procedure": [
           "Write: Project Summary (2-3 sentences, what is being built and why).",
           "Write: Key Deliverables (bulleted list of distinct components or features).",
-          "Write: In-Scope (explicit list \u2014 prevents scope creep).",
-          "Write: Out-of-Scope (explicit exclusions \u2014 prevents misunderstandings).",
-          "Write: Success Criteria (measurable definition of done \u2014 each item verifiable).",
+          "Write: In-Scope (explicit list — prevents scope creep).",
+          "Write: Out-of-Scope (explicit exclusions — prevents misunderstandings).",
+          "Write: Success Criteria (measurable definition of done — each item verifiable).",
           "Write: High-Level Timeline (phases or milestones with rough sizing).",
           "Review: does every deliverable map clearly to implementable work? Is anything in scope that should be out?"
         ],
@@ -170,7 +177,7 @@
         "goal": "Break the approved plan into a logical work hierarchy that development teams can execute.",
         "constraints": [
           "Every item in the plan's In-Scope list must map to at least one work item in the hierarchy.",
-          "Dependencies must be explicit \u2014 not implied by ordering alone.",
+          "Dependencies must be explicit — not implied by ordering alone.",
           "Oversized stories (more than one sprint of work) should be split."
         ],
         "procedure": [
@@ -202,7 +209,7 @@
       "promptBlocks": {
         "goal": "Add effort estimates, risk assessments, and team assignments to each story in the hierarchy.",
         "constraints": [
-          "Conservative estimates are better than optimistic ones \u2014 note uncertainty explicitly.",
+          "Conservative estimates are better than optimistic ones — note uncertainty explicitly.",
           "Justify each estimate with one sentence of reasoning.",
           "Flag stories on the critical path."
         ],
@@ -212,7 +219,7 @@
           "Assign priority: must-have for MVP, should-have, nice-to-have.",
           "Note suggested team or skill area for each story.",
           "Identify critical path: which stories block the most downstream work? Surface these explicitly.",
-          "Flag any stories whose estimates feel uncertain \u2014 surface the unknowns rather than hiding them in a range."
+          "Flag any stories whose estimates feel uncertain — surface the unknowns rather than hiding them in a range."
         ],
         "outputRequired": {
           "notesMarkdown": "Total story point estimate, critical path items, high-risk stories."
@@ -277,7 +284,7 @@
       "promptBlocks": {
         "goal": "Extract actionable team preferences from this session and persist them so future runs use them automatically.",
         "constraints": [
-          "Only write rules that are genuinely reusable across future tickets \u2014 skip one-off project specifics.",
+          "Only write rules that are genuinely reusable across future tickets — skip one-off project specifics.",
           "Keep rules concise and actionable, not narrative.",
           "Append to ./.workflow_rules/ticket_creation.md rather than replacing it."
         ],
@@ -285,7 +292,7 @@
           "Review what conventions, preferences, or requirements emerged during this session.",
           "Identify patterns worth preserving: naming conventions, field usage, AC format preferences, estimation approach, labeling rules.",
           "Draft new rules as short, imperative statements (e.g., 'Use T-shirt sizing not Fibonacci', 'Always include a Figma link in design tickets').",
-          "Check against existing rules \u2014 avoid duplicates or contradictions.",
+          "Check against existing rules — avoid duplicates or contradictions.",
           "Append new rules to ./.workflow_rules/ticket_creation.md, creating the file if it does not exist."
         ],
         "outputRequired": {
@@ -300,4 +307,4 @@
       "requireConfirmation": false
     }
   ]
-}
+}

package/workflows/architecture-scalability-audit.json CHANGED Viewed

@@ -1,8 +1,15 @@
 {
   "id": "architecture-scalability-audit",
-  "name": "Architecture Scalability Audit (v1 \u2022 Evidence-Driven \u2022 Dimension-Scoped \u2022 rigorMode-Adaptive)",
+  "name": "Architecture Scalability Audit (v1 • Evidence-Driven • Dimension-Scoped • rigorMode-Adaptive)",
   "version": "0.1.0",
   "description": "Use this to audit a bounded codebase scope for architecture scalability. Declare which scalability dimensions matter (load, data volume, team size, feature extensibility, operational); the workflow investigates each and produces evidence-grounded findings.",
+  "about": "## Architecture Scalability Audit\n\nThis workflow audits a bounded codebase scope for scalability across the dimensions you care about. It does not produce generic \"won't scale\" warnings -- every finding must cite a specific file, class, method, or pattern, and every concern must name a concrete growth scenario (e.g. 10x traffic, 100x records, 3x team size).\n\n**What it does:**\nYou declare the scope boundary and the scalability dimensions that matter for your context. The workflow reads the codebase to understand the architecture, assigns one dedicated reviewer family per dimension, runs them in parallel from a shared fact packet, reconciles contradictions and blind spots through a synthesis loop, and delivers a per-dimension verdict (will_break / risk / fine) with an overall scalability readiness verdict.\n\n**The five scalability dimensions you can select:**\n- **load** -- handles more requests, users, or throughput\n- **data_volume** -- handles more records, storage, or query size\n- **team_org** -- more teams or developers working on this scope without friction\n- **feature_extensibility** -- more features added without rearchitecting\n- **operational** -- more deployments, environments, or operational complexity\n\n**When to use it:**\n- Before investing significantly in a component you expect to grow\n- When planning capacity for a new traffic tier or data volume increase\n- When evaluating a codebase acquired through a merger, partnership, or open-source adoption\n- When a team is growing and you want to know if the architecture will hold under parallel development\n\n**What it produces:**\nAn overall scalability verdict, per-dimension findings with specific code references and growth scenarios, cross-cutting concerns that span multiple dimensions, a prioritized concern list, and explicit callouts of what is already well-designed for scale.\n\n**How to get good results:**\nBe specific about the scope boundary -- name the service, module, or feature explicitly and say what is out of scope. Choose the dimensions relevant to your actual growth pressures; the workflow will not add dimensions you did not select. If you know a specific growth target (e.g. \"we expect 50x user growth in 18 months\"), mention it.",
+  "examples": [
+    "Audit the search service for load and data_volume scalability before the Black Friday traffic ramp",
+    "Check the analytics pipeline for data_volume and operational scalability -- we are moving from 1M to 100M events/day",
+    "Scalability audit of the user management module for team_org and feature_extensibility as we split into three squads",
+    "Audit the cart and checkout services for load scalability -- scope is /cart and /checkout only"
+  ],
   "recommendedPreferences": {
     "recommendedAutonomy": "guided",
     "recommendedRiskPolicy": "conservative"
@@ -20,7 +27,7 @@
     "DEFAULT BEHAVIOR: self-execute with tools. Ask only for true scope or dimension decisions you cannot resolve yourself.",
     "V2 DURABILITY: keep workflow truth in output.notesMarkdown and explicit context fields. Human-facing markdown artifacts are optional companions only.",
     "OWNERSHIP: the main agent owns the fact packet, synthesis, verdict calibration, and final handoff. Delegated dimension audits are evidence, not authority.",
-    "DIMENSION DISCIPLINE: audit only the dimensions the user declared. Do not add dimensions the user did not select, even if they look relevant \u2014 surface them as advisory notes instead.",
+    "DIMENSION DISCIPLINE: audit only the dimensions the user declared. Do not add dimensions the user did not select, even if they look relevant — surface them as advisory notes instead.",
     "EVIDENCE FIRST: every risk or will_break finding must cite a specific file, class, method, or pattern in the codebase. Technology name alone is not evidence.",
     "GROWTH SCENARIO: every concern must name a growth scenario (e.g. 10x traffic, 100x records, 3x team size). Generic 'won't scale' findings are not acceptable.",
     "VERDICT TIERS: use will_break / risk / fine. Do not force a cleaner answer than the evidence supports.",
@@ -44,10 +51,10 @@
         ],
         "procedure": [
           "Read the codebase to understand the architecture: key components, entry points, data flows, and main patterns within the declared scope.",
-          "Present the five scalability dimensions and ask the user to select which apply: (1) load \u2014 handles more requests, users, or throughput; (2) data_volume \u2014 handles more records, storage, or query size; (3) team_org \u2014 more teams or developers working on this scope; (4) feature_extensibility \u2014 more features added without rearchitecting; (5) operational \u2014 more deployments, environments, or operational complexity.",
-          "Ask the user to confirm the scope boundary \u2014 what is explicitly in and explicitly out.",
-          "Classify audit complexity: Simple (1\u20132 dimensions, small scope), Medium (2\u20133 dimensions, moderate scope), Complex (4\u20135 dimensions or large scope).",
-          "Run a context-clarity check: score boundary_clarity, dimension_clarity, and codebase_familiarity 1\u20133. If any score is 1, gather more context before advancing."
+          "Present the five scalability dimensions and ask the user to select which apply: (1) load — handles more requests, users, or throughput; (2) data_volume — handles more records, storage, or query size; (3) team_org — more teams or developers working on this scope; (4) feature_extensibility — more features added without rearchitecting; (5) operational — more deployments, environments, or operational complexity.",
+          "Ask the user to confirm the scope boundary — what is explicitly in and explicitly out.",
+          "Classify audit complexity: Simple (1–2 dimensions, small scope), Medium (2–3 dimensions, moderate scope), Complex (4–5 dimensions or large scope).",
+          "Run a context-clarity check: score boundary_clarity, dimension_clarity, and codebase_familiarity 1–3. If any score is 1, gather more context before advancing."
         ],
         "outputRequired": {
           "notesMarkdown": "Scope boundary (in and out), declared dimensions with rationale, audit complexity classification, and any open boundary questions.",
@@ -105,7 +112,7 @@
         "procedure": [
           "Create a neutral `scalabilityFactPacket` containing: scope boundary (in and out), declared dimensions, key architectural patterns found, main components and their roles, data flow and storage patterns, concurrency and state management approach, dependency boundaries and coupling, deployment and runtime assumptions, and explicit open unknowns.",
           "Include realism signals: code that looks scalable at a glance but may have hidden limits (e.g. in-memory state, synchronous choke points, missing pagination, tight coupling between components).",
-          "For each declared dimension, assign a reviewer family mission: load = examine request handling, concurrency, session/state management, caching, connection pools, and horizontal scaling readiness \u2014 check whether session state is in-memory or distributed, whether connection pools are bounded, whether synchronous bottlenecks exist in hot paths; data_volume = examine query patterns, pagination, indexing, result set bounds, storage growth, and data access layer scalability \u2014 check for unbounded queries (missing LIMIT/pagination), missing indexes on filtered columns, N+1 patterns in repository/service layers, and data structures that grow unboundedly; team_org = examine module coupling, shared state, and parallel development friction \u2014 specifically check import graphs for cross-module dependencies that would cause merge conflicts, identify shared mutable singletons or global state, look for test setup that requires spinning up adjacent modules, and check whether public interfaces change frequently or are stable; feature_extensibility = examine how much code changes when a new variant of a core concept is added \u2014 specifically look for switch/when/if-else chains on type discriminators that would need a new branch per feature, hardcoded business-rule constants, direct concrete dependencies instead of interfaces or abstractions, and files that are edited for every new feature; operational = examine deployment complexity, environment-specific behavior, observability, configuration surface, and operational runbook needs \u2014 specifically check for environment-specific code paths (if/switch on env vars that create different behavior per environment), configuration that must be updated in multiple places per deployment, whether logs and metrics cover the main operational failure modes, and whether a new deployment of this scope would require manual steps beyond a standard deploy.",
+          "For each declared dimension, assign a reviewer family mission: load = examine request handling, concurrency, session/state management, caching, connection pools, and horizontal scaling readiness — check whether session state is in-memory or distributed, whether connection pools are bounded, whether synchronous bottlenecks exist in hot paths; data_volume = examine query patterns, pagination, indexing, result set bounds, storage growth, and data access layer scalability — check for unbounded queries (missing LIMIT/pagination), missing indexes on filtered columns, N+1 patterns in repository/service layers, and data structures that grow unboundedly; team_org = examine module coupling, shared state, and parallel development friction — specifically check import graphs for cross-module dependencies that would cause merge conflicts, identify shared mutable singletons or global state, look for test setup that requires spinning up adjacent modules, and check whether public interfaces change frequently or are stable; feature_extensibility = examine how much code changes when a new variant of a core concept is added — specifically look for switch/when/if-else chains on type discriminators that would need a new branch per feature, hardcoded business-rule constants, direct concrete dependencies instead of interfaces or abstractions, and files that are edited for every new feature; operational = examine deployment complexity, environment-specific behavior, observability, configuration surface, and operational runbook needs — specifically check for environment-specific code paths (if/switch on env vars that create different behavior per environment), configuration that must be updated in multiple places per deployment, whether logs and metrics cover the main operational failure modes, and whether a new deployment of this scope would require manual steps beyond a standard deploy.",
           "Set selectedReviewerFamilies to the list of assigned families (one per declared dimension). Set contradictionCount and blindSpotCount to 0."
         ],
         "outputRequired": {
@@ -124,7 +131,7 @@
             "var": "auditComplexity",
             "equals": "Simple"
           },
-          "text": "For a Simple audit, keep the fact packet compact \u2014 scope summary, key patterns, and declared dimensions only. Skip exhaustive realism signal enumeration."
+          "text": "For a Simple audit, keep the fact packet compact — scope summary, key patterns, and declared dimensions only. Skip exhaustive realism signal enumeration."
         }
       ],
       "requireConfirmation": false
@@ -149,11 +156,11 @@
           ],
           "Each reviewer family uses scalabilityFactPacket as primary truth.",
           "Reviewer-family outputs are raw evidence. The main agent owns synthesis and verdict assignment.",
-          "Each reviewer family audits only its declared dimension \u2014 no cross-dimension scope creep."
+          "Each reviewer family audits only its declared dimension — no cross-dimension scope creep."
         ],
         "procedure": [
           "Before investigating, restate your scalabilityHypothesis and name which dimension is most likely to challenge it.",
-          "Run one investigation per declared dimension. For each dimension, the investigation must return: top findings, evidence for each finding (specific file, class, method, or pattern references \u2014 not just technology names), verdict tier per finding (will_break / risk / fine), growth scenario for each concern (e.g. 10x traffic, 100x records, 3x team size), biggest uncertainty, and likely false-confidence vector for this dimension.",
+          "Run one investigation per declared dimension. For each dimension, the investigation must return: top findings, evidence for each finding (specific file, class, method, or pattern references — not just technology names), verdict tier per finding (will_break / risk / fine), growth scenario for each concern (e.g. 10x traffic, 100x records, 3x team size), biggest uncertainty, and likely false-confidence vector for this dimension.",
           "After completing all dimension investigations, synthesize explicitly: what was confirmed, what was genuinely new, what looks weak or overstated, and what changed your current hypothesis.",
           "Build dimensionFindings keyed by dimension containing: findings list, verdict summary, evidence quality assessment, and open questions.",
           "Identify cross-cutting concerns: architectural patterns or components that appear in findings from multiple dimensions."
@@ -244,10 +251,10 @@
               "This is a structured four-item check, not a free-form review."
             ],
             "procedure": [
-              "Check 1 \u2014 Technology-vs-usage: did any reviewer identify a scalable technology without checking actual usage patterns in the code? (e.g. Postgres was identified as the DB, but were N+1 queries, missing indexes, or unbounded result sets actually checked?) Fix any instances found.",
-              "Check 2 \u2014 Scope drift: did any reviewer audit components outside the declared scope boundary? Remove out-of-scope findings.",
-              "Check 3 \u2014 Undeclared relevant dimensions: does the codebase have patterns suggesting a declared-out dimension actually matters for this scope? If so, surface it as an advisory note without adding it to the audit verdict.",
-              "Check 4 \u2014 Growth scenario vagueness: does every concern name a specific growth scenario? If not, assign one now based on the most realistic growth pattern for this scope.",
+              "Check 1 — Technology-vs-usage: did any reviewer identify a scalable technology without checking actual usage patterns in the code? (e.g. Postgres was identified as the DB, but were N+1 queries, missing indexes, or unbounded result sets actually checked?) Fix any instances found.",
+              "Check 2 — Scope drift: did any reviewer audit components outside the declared scope boundary? Remove out-of-scope findings.",
+              "Check 3 — Undeclared relevant dimensions: does the codebase have patterns suggesting a declared-out dimension actually matters for this scope? If so, surface it as an advisory note without adding it to the audit verdict.",
+              "Check 4 — Growth scenario vagueness: does every concern name a specific growth scenario? If not, assign one now based on the most realistic growth pattern for this scope.",
               "Set blindSpotCount to the number of blind spots found across all four checks."
             ],
             "outputRequired": {
@@ -299,11 +306,11 @@
           "Do not advance to handoff with known hard gate failures."
         ],
         "procedure": [
-          "Verdict aggregation \u2014 derive scalabilityVerdict from dimensionFindings using these explicit rules: (1) at_risk if any declared dimension has a will_break finding; (2) conditional if no will_break findings exist but at least one dimension has a risk finding; (3) ready_to_scale if all declared dimensions have only fine findings; (4) inconclusive if any dimension still has evidenceWeak = true after the synthesis loop, making a reliable verdict impossible. Capture verdictRationale naming the specific dimension and finding that drove the verdict.",
-          "Hard gate 1 \u2014 Evidence grounding: for every will_break and risk finding in dimensionFindings, confirm it cites a specific file, class, method, or code pattern. Technology name alone fails this gate. Fix by locating the code evidence or downgrading to risk with an evidence-needed note.",
-          "Hard gate 2 \u2014 Dimension coverage: confirm every declared dimension has at least one substantive finding. A verdict of fine with supporting evidence counts. A dimension with no findings at all fails this gate.",
-          "Hard gate 3 \u2014 Hypothesis revisited: confirm that scalabilityHypothesis from Phase 1 is either confirmed or explicitly revised in synthesis notes. If it was never addressed, address it now.",
-          "Hard gate 4 \u2014 Growth scenario specificity: confirm every concern in dimensionFindings names a growth scenario. If any do not, assign one now.",
+          "Verdict aggregation — derive scalabilityVerdict from dimensionFindings using these explicit rules: (1) at_risk if any declared dimension has a will_break finding; (2) conditional if no will_break findings exist but at least one dimension has a risk finding; (3) ready_to_scale if all declared dimensions have only fine findings; (4) inconclusive if any dimension still has evidenceWeak = true after the synthesis loop, making a reliable verdict impossible. Capture verdictRationale naming the specific dimension and finding that drove the verdict.",
+          "Hard gate 1 — Evidence grounding: for every will_break and risk finding in dimensionFindings, confirm it cites a specific file, class, method, or code pattern. Technology name alone fails this gate. Fix by locating the code evidence or downgrading to risk with an evidence-needed note.",
+          "Hard gate 2 — Dimension coverage: confirm every declared dimension has at least one substantive finding. A verdict of fine with supporting evidence counts. A dimension with no findings at all fails this gate.",
+          "Hard gate 3 — Hypothesis revisited: confirm that scalabilityHypothesis from Phase 1 is either confirmed or explicitly revised in synthesis notes. If it was never addressed, address it now.",
+          "Hard gate 4 — Growth scenario specificity: confirm every concern in dimensionFindings names a growth scenario. If any do not, assign one now.",
           "Set hardGatesPassed = true only when the verdict aggregation and all four gates pass. Set hardGateFailures to the list of any that needed fixing."
         ],
         "outputRequired": {
@@ -327,13 +334,13 @@
           "Do not drift into implementation planning or remediation design unless the user explicitly asks."
         ],
         "procedure": [
-          "Open with the overall scalability readiness verdict (ready_to_scale / conditional / at_risk / inconclusive) and the verdictRationale \u2014 name the specific dimension and finding that drove it.",
+          "Open with the overall scalability readiness verdict (ready_to_scale / conditional / at_risk / inconclusive) and the verdictRationale — name the specific dimension and finding that drove it.",
           "For each declared dimension, give: dimension name, verdict tier (will_break / risk / fine), top finding with specific code reference, growth scenario, and severity.",
           "List cross-cutting concerns: patterns that create scalability risk across multiple dimensions.",
           "Revisit scalabilityHypothesis from Phase 1: was it confirmed or revised? What evidence changed your view?",
           "Give a prioritized concern list ordered by: (1) will_break findings first, (2) risk findings by severity, (3) cross-cutting concerns, (4) fine findings worth noting as already solid.",
           "Surface any advisory notes for undeclared dimensions that may be worth considering.",
-          "State what is already well-designed for scale \u2014 not everything should be a concern."
+          "State what is already well-designed for scale — not everything should be a concern."
         ],
         "outputRequired": {
           "notesMarkdown": "Decision-ready scalability handoff: overall verdict, per-dimension summary with code references, prioritized concerns, cross-cutting concerns, hypothesis outcome, and what is already solid."
@@ -342,7 +349,7 @@
           "The handoff is verdict-first and evidence-grounded.",
           "Every concern is tied to a specific code reference and growth scenario.",
           "The hypothesis from Phase 1 is explicitly addressed.",
-          "What is already well-designed is stated \u2014 not just the concerns."
+          "What is already well-designed is stated — not just the concerns."
         ]
       },
       "requireConfirmation": false

package/workflows/bug-investigation.agentic.v2.json CHANGED Viewed

@@ -3,6 +3,13 @@
   "name": "Bug Investigation (v2 \u2022 Notes-First \u2022 WorkRail Executor)",
   "version": "2.0.0",
   "description": "Use this to diagnose a bug or unexpected behavior in code. Builds a hypothesis, gathers evidence, and proves or disproves the root cause before concluding.",
+  "about": "## Bug Investigation Workflow\n\nThis workflow guides an AI agent through a rigorous, evidence-driven investigation of a bug or unexpected behavior. It is designed to prevent the most common failure mode in AI debugging: jumping to a plausible-sounding conclusion without sufficient proof.\n\n**What it does:**\nThe workflow moves through triage, context gathering, hypothesis generation, evidence planning, iterative evidence collection, diagnosis validation, and a final handoff. It explicitly distinguishes between theories (formed by reading code) and proof (confirmed by running tests or reproducing the failure). The final output is a diagnosis with a confidence rating, the strongest alternative explanations that were ruled out, and a high-level fix direction  -- not a patch.\n\n**When to use it:**\n- You have a specific bug report, failing test, or production incident to investigate\n- The root cause is not immediately obvious and multiple explanations are plausible\n- You want a trustworthy diagnosis before spending time writing a fix\n- The bug carries enough risk that you need to be confident before changing code\n\n**What it produces:**\nA structured investigation handoff covering: root cause type (single cause, multi-factor, working as designed, etc.), proof summary, ruled-out alternatives, residual uncertainty, likely files involved, and verification steps for whoever implements the fix.\n\n**How to get good results:**\nProvide repro steps, observed symptoms, and expected behavior upfront. Include any relevant logs, failing test commands, or environment details you already have. The more concrete the repro, the faster the workflow can gather real evidence rather than theorizing. If the bug is intermittent, say so  -- the workflow adapts its rigor based on reproducibility confidence.",
+  "examples": [
+    "Investigate why the payments API returns 500 after deploying the rate limiter",
+    "Debug why the mobile app crashes on logout when a background sync is in progress",
+    "Find out why search results are missing items added in the last 10 minutes",
+    "Diagnose why CI passes locally but the integration test fails on the build server"
+  ],
   "recommendedPreferences": {
     "recommendedAutonomy": "guided",
     "recommendedRiskPolicy": "conservative"

package/workflows/coding-task-workflow-agentic.json CHANGED Viewed

@@ -3,6 +3,13 @@
   "name": "Agentic Task Dev Workflow (Invariants \u2022 Architecture \u2022 Vertical Slices \u2022 PR Sizing \u2022 Audits \u2022 Resumable)",
   "version": "1.5.0",
   "description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
+  "about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",
+  "examples": [
+    "Implement JWT refresh token rotation in the auth service",
+    "Fix the race condition in the cache invalidation path when concurrent writes occur",
+    "Refactor the payment flow to use a Result type instead of throwing exceptions",
+    "Add pagination support to the messaging inbox API endpoint"
+  ],
   "recommendedPreferences": {
     "recommendedAutonomy": "guided",
     "recommendedRiskPolicy": "conservative"

package/workflows/coding-task-workflow-agentic.lean.v2.json CHANGED Viewed

@@ -1,8 +1,15 @@
 {
   "id": "coding-task-workflow-agentic",
-  "name": "Agentic Task Dev Workflow (Lean \u2022 Notes-First \u2022 WorkRail Executor)",
+  "name": "Agentic Task Dev Workflow (Lean • Notes-First • WorkRail Executor)",
   "version": "1.0.0",
   "description": "Use this to implement a software feature or task. Follows a plan-then-execute approach with architecture decisions, invariant tracking, and final verification.",
+  "about": "## Agentic Coding Task Workflow\n\nThis workflow structures the full lifecycle of a software implementation task: from understanding and classifying the work, through architecture decisions and incremental implementation, to final verification and handoff.\n\n### What it does\n\nThe workflow guides an AI agent through a disciplined plan-then-execute process. It begins by analyzing the task to determine complexity, risk, and the right level of rigor (QUICK, STANDARD, or THOROUGH). For non-trivial tasks, it then gathers codebase context, surfaces invariants and non-goals, generates competing design candidates, and selects an approach before writing a single line of code. Implementation proceeds slice by slice, with built-in verification gates after each slice. A final integration verification pass confirms acceptance criteria are met before handoff.\n\n### When to use it\n\nUse this workflow whenever you are implementing a feature, fixing a non-trivial bug, or making an architectural change in a real codebase. It is especially valuable when:\n- The task touches multiple files or systems\n- There is meaningful risk of regressions or invariant violations\n- You want the agent to surface trade-offs and commit to a reasoned design decision rather than guessing\n- You need a resumable, auditable record of what was decided and why\n\nFor quick one-liner fixes or very small changes, the workflow includes a fast path that skips heavyweight planning.\n\n### What it produces\n\n- An `implementation_plan.md` artifact covering the selected approach, vertical slices, test design, and philosophy alignment\n- A `spec.md` for large or high-risk tasks, capturing observable behavior and acceptance criteria\n- Step-level notes in WorkRail that serve as a durable execution log\n- A PR-ready handoff summary with acceptance criteria status, invariant proofs, and follow-up tickets\n\n### How to get good results\n\n- Provide a clear task description and at least partial acceptance criteria before starting\n- If you have coding philosophy or project conventions configured in session rules or Memory MCP, the workflow will apply them automatically as a design lens\n- Let the workflow classify complexity and rigor itself; override only if the classification is clearly wrong\n- For large or high-risk tasks, review the architecture decision step before implementation begins",
+  "examples": [
+    "Implement JWT refresh token rotation in the auth service",
+    "Fix the race condition in the cache invalidation path when concurrent writes occur",
+    "Refactor the payment flow to use a Result type instead of throwing exceptions",
+    "Add pagination support to the messaging inbox API endpoint"
+  ],
   "recommendedPreferences": {
     "recommendedAutonomy": "guided",
     "recommendedRiskPolicy": "conservative"
@@ -21,9 +28,10 @@
     "SUBAGENT SYNTHESIS: treat subagent output as evidence, not conclusions. State your hypothesis before delegating, then interrogate what came back: what was missed, wrong, or new? Say what changed your mind or what you still reject, and why.",
     "PARALLELISM: when reads, audits, or delegations are independent, run them in parallel inside the phase. Parallelize cognition; serialize synthesis and canonical writes.",
     "PHILOSOPHY LENS: apply the user's coding philosophy (from active session rules) as the evaluation lens. Flag violations by principle name, not as generic feedback. If principles conflict, surface the tension explicitly instead of silently choosing.",
-    "VALIDATION: prefer static/compile-time safety over runtime checks. Use build, type-checking, and tests as the primary proof of correctness \u2014 in that order of reliability.",
+    "VALIDATION: prefer static/compile-time safety over runtime checks. Use build, type-checking, and tests as the primary proof of correctness — in that order of reliability.",
     "DRIFT HANDLING: when reality diverges from the plan, update the plan artifact and re-audit deliberately rather than accumulating undocumented drift.",
-    "NEVER COMMIT MARKDOWN FILES UNLESS USER EXPLICITLY ASKS."
+    "NEVER COMMIT MARKDOWN FILES UNLESS USER EXPLICITLY ASKS.",
+    "SLICE DISCIPLINE: Phase 6 is a loop -- implement ONE slice per iteration. Do not implement multiple slices at once. The verification loop exists to catch drift per slice, not retroactively."
   ],
   "references": [
     {
@@ -107,7 +115,7 @@
     },
     {
       "id": "phase-1b-design-deep",
-      "title": "Phase 1b: Design Generation (Injected Routine \u2014 Tension-Driven Design)",
+      "title": "Phase 1b: Design Generation (Injected Routine — Tension-Driven Design)",
       "runCondition": {
         "and": [
           {
@@ -134,7 +142,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Read `design-candidates.md`, compare it to your original guess, and make the call.\n\nBe explicit about three things:\n- what the design work confirmed\n- what changed your mind\n- what you missed the first time\n\nThen pressure-test the leading option:\n- what's the strongest case against it?\n- what assumption breaks it?\n\nAfter the challenge batch, say:\n- what changed your mind\n- what didn't\n- which findings you reject and why\n\nPick the approach yourself. Don't hide behind the artifact. If the simplest thing works, prefer it. If the front-runner stops looking right after challenge, switch.\n\nCapture:\n- `selectedApproach` \u2014 chosen design with rationale tied to tensions\n- `runnerUpApproach` \u2014 next-best option and why it lost\n- `architectureRationale` \u2014 tensions resolved vs accepted\n- `pivotTriggers` \u2014 conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` \u2014 failure mode of the selected approach\n- `acceptedTradeoffs`\n- `identifiedFailureModes`",
+      "prompt": "Read `design-candidates.md`, compare it to your original guess, and make the call.\n\nBe explicit about three things:\n- what the design work confirmed\n- what changed your mind\n- what you missed the first time\n\nThen pressure-test the leading option:\n- what's the strongest case against it?\n- what assumption breaks it?\n\nAfter the challenge batch, say:\n- what changed your mind\n- what didn't\n- which findings you reject and why\n\nPick the approach yourself. Don't hide behind the artifact. If the simplest thing works, prefer it. If the front-runner stops looking right after challenge, switch.\n\nCapture:\n- `selectedApproach` — chosen design with rationale tied to tensions\n- `runnerUpApproach` — next-best option and why it lost\n- `architectureRationale` — tensions resolved vs accepted\n- `pivotTriggers` — conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` — failure mode of the selected approach\n- `acceptedTradeoffs`\n- `identifiedFailureModes`",
       "promptFragments": [
         {
           "id": "phase-1c-challenge-standard",
@@ -242,7 +250,7 @@
         "var": "taskComplexity",
         "not_equals": "Small"
       },
-      "prompt": "Turn the decision into a plan someone else could execute without guessing.\n\nUpdate `implementation_plan.md`.\n\nIt should cover:\n1. Problem statement\n2. Acceptance criteria (mirror `spec.md` if it exists; `spec.md` owns observable behavior)\n3. Non-goals\n4. Philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only if they actually help\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n   - [principle] -> [satisfied / tension / violated + 1-line why]\n\nCapture:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount` \u2014 count of open issues that would materially affect implementation quality\n- `planConfidenceBand` \u2014 Low / Medium / High",
+      "prompt": "Turn the decision into a plan someone else could execute without guessing.\n\nUpdate `implementation_plan.md`.\n\nIt should cover:\n1. Problem statement\n2. Acceptance criteria (mirror `spec.md` if it exists; `spec.md` owns observable behavior)\n3. Non-goals\n4. Philosophy-driven constraints\n5. Invariants\n6. Selected approach + rationale + runner-up\n7. Vertical slices\n8. Work packages only if they actually help\n9. Test design\n10. Risk register\n11. PR packaging strategy\n12. Philosophy alignment per slice:\n   - [principle] -> [satisfied / tension / violated + 1-line why]\n\nCapture:\n- `implementationPlan`\n- `slices`\n- `testDesign`\n- `estimatedPRCount`\n- `followUpTickets` (initialize if needed)\n- `unresolvedUnknownCount` — count of open issues that would materially affect implementation quality\n- `planConfidenceBand` — Low / Medium / High\n\nThe plan is the deliverable for this step. Do not implement anything -- not a \"quick win\", not a file read that bleeds into edits, nothing. Execution begins in Phase 6, one slice at a time. If you find yourself writing code or editing source files right now, stop immediately.",
       "requireConfirmation": false
     },
     {
@@ -332,7 +340,7 @@
         {
           "id": "phase-4b-loop-decision",
           "title": "Loop Exit Decision",
-          "prompt": "Decide whether the plan needs another pass.\n\nIf `planFindings` is non-empty, keep going.\nIf it's empty, stop \u2014 but say what you checked so the clean pass means something.\nIf you've hit the limit, stop and record what still bothers you.\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n  \"artifacts\": [{\n    \"kind\": \"wr.loop_control\",\n    \"decision\": \"continue\"\n  }]\n}\n```",
+          "prompt": "Decide whether the plan needs another pass.\n\nIf `planFindings` is non-empty, keep going.\nIf it's empty, stop — but say what you checked so the clean pass means something.\nIf you've hit the limit, stop and record what still bothers you.\n\nThen emit the required loop-control artifact in this shape (`decision` must be `continue` or `stop`):\n```json\n{\n  \"artifacts\": [{\n    \"kind\": \"wr.loop_control\",\n    \"decision\": \"continue\"\n  }]\n}\n```",
           "requireConfirmation": true,
           "outputContract": {
             "contractRef": "wr.contracts.loop_control"
@@ -369,7 +377,7 @@
         {
           "id": "phase-6a-implement-slice",
           "title": "Implement Slice",
-          "prompt": "Implement only the current slice: `{{currentSlice.name}}`.\n\nBefore you code, check whether the plan is still valid:\n- if the pivot triggers fired or the assumptions went stale, stop and go back to planning\n- if the target files or symbols no longer match, stop and re-plan\n\nStay in this slice.\n- don't do the rest of the plan early\n- only pull forward later-slice work if you absolutely need it to make this slice compile or integrate, and count that as `unexpectedScopeChange = true`\n- keep the changes incremental\n- run tests and build to prove the slice works\n\nTrack whether this slice required:\n- a new special-case (`specialCaseIntroduced`)\n- an unplanned abstraction (`unplannedAbstractionIntroduced`)\n- unexpected file changes outside planned scope (`unexpectedScopeChange`)\n\nSet `verifyNeeded` to true if ANY of:\n- `sliceIndex` is odd (verify every 2 slices)\n- `prStrategy = MultiPR`\n- `specialCaseIntroduced = true`\n- `unplannedAbstractionIntroduced = true`\n- `unexpectedScopeChange = true`\n- tests or build failed\n\nCapture:\n- `specialCaseIntroduced`\n- `unplannedAbstractionIntroduced`\n- `unexpectedScopeChange`\n- `verifyNeeded`",
+          "prompt": "Implement the current slice: `{{currentSlice.name}}`.\n\nBefore writing a single line of code, declare your scope:\n- List the exact files and symbols this slice touches\n- Confirm none of them belong to a later slice\n- If you have already edited files from this or any other slice in a previous step, stop and report it\n\nHard scope rule: you may only modify what is described in `{{currentSlice.name}}`. Anything outside that boundary is out of scope for this iteration -- not \"do it early\", not \"while I'm here\". If you discover you need to touch something outside this slice to make it compile or integrate, set `unexpectedScopeChange = true` and do the minimum necessary to stay green, then stop.\n\nImplement incrementally. Run tests and build to prove the slice works before advancing.\n\nTrack:\n- `specialCaseIntroduced` -- did this slice require a new special-case?\n- `unplannedAbstractionIntroduced` -- did this slice introduce an abstraction not in the plan?\n- `unexpectedScopeChange` -- did this slice touch files outside its planned scope?\n\nSet `verifyNeeded` to true if ANY of:\n- `sliceIndex` is odd (verify every 2 slices)\n- `prStrategy = MultiPR`\n- `specialCaseIntroduced = true`\n- `unplannedAbstractionIntroduced = true`\n- `unexpectedScopeChange = true`\n- tests or build failed\n\nCapture: `specialCaseIntroduced`, `unplannedAbstractionIntroduced`, `unexpectedScopeChange`, `verifyNeeded`",
           "requireConfirmation": false
         },
         {