npm - @exaudeus/workrail - Versions diffs - 3.64.0 → 3.66.0 - Mend

@exaudeus/workrail 3.64.0 → 3.66.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/dist/v2/projections/session-metrics.js ADDED Viewed

@@ -0,0 +1,102 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.projectSessionMetricsV2 = projectSessionMetricsV2;
+const constants_js_1 = require("../durable-core/constants.js");
+function projectSessionMetricsV2(events) {
+    let runCompletedData = null;
+    let runCompletedRunId = null;
+    for (const e of events) {
+        const asUnknown = e;
+        if (asUnknown.kind === 'run_completed') {
+            runCompletedData = asUnknown.data;
+            runCompletedRunId = asUnknown.scope?.runId ?? null;
+            break;
+        }
+    }
+    if (runCompletedData === null) {
+        return null;
+    }
+    const metricsContext = {};
+    for (const e of events) {
+        if (e.kind !== constants_js_1.EVENT_KIND.CONTEXT_SET)
+            continue;
+        if (runCompletedRunId !== null && e.scope?.runId !== runCompletedRunId)
+            continue;
+        const ctx = e.data.context;
+        if (!ctx || typeof ctx !== 'object' || Array.isArray(ctx))
+            continue;
+        const ctxObj = ctx;
+        for (const [key, value] of Object.entries(ctxObj)) {
+            if (key.startsWith('metrics_')) {
+                metricsContext[key] = value;
+            }
+        }
+    }
+    const d = runCompletedData;
+    const startGitSha = typeof d.startGitSha === 'string' ? d.startGitSha : null;
+    const endGitSha = typeof d.endGitSha === 'string' ? d.endGitSha : null;
+    const gitBranch = typeof d.gitBranch === 'string' ? d.gitBranch : null;
+    const agentCommitShas = [];
+    if (Array.isArray(d.agentCommitShas)) {
+        for (const sha of d.agentCommitShas) {
+            if (typeof sha === 'string') {
+                agentCommitShas.push(sha);
+            }
+        }
+    }
+    const captureConfidenceRaw = d.captureConfidence;
+    const captureConfidence = captureConfidenceRaw === 'high' || captureConfidenceRaw === 'medium' || captureConfidenceRaw === 'none'
+        ? captureConfidenceRaw
+        : 'none';
+    const durationMs = typeof d.durationMs === 'number' && Number.isFinite(d.durationMs)
+        ? d.durationMs
+        : undefined;
+    const outcomeRaw = metricsContext['metrics_outcome'];
+    const outcome = outcomeRaw === 'success' || outcomeRaw === 'partial' || outcomeRaw === 'abandoned' || outcomeRaw === 'error'
+        ? outcomeRaw
+        : null;
+    const prNumbers = [];
+    const prNumbersRaw = metricsContext['metrics_pr_numbers'];
+    if (Array.isArray(prNumbersRaw)) {
+        for (const n of prNumbersRaw) {
+            if (typeof n === 'number' && Number.isFinite(n)) {
+                prNumbers.push(n);
+            }
+        }
+    }
+    const commitShasRaw = metricsContext['metrics_commit_shas'];
+    const metricCommitShas = [];
+    if (Array.isArray(commitShasRaw)) {
+        for (const sha of commitShasRaw) {
+            if (typeof sha === 'string') {
+                metricCommitShas.push(sha);
+            }
+        }
+    }
+    const finalAgentCommitShas = metricCommitShas.length > 0 ? metricCommitShas : agentCommitShas;
+    const filesChangedRaw = metricsContext['metrics_files_changed'];
+    const filesChanged = typeof filesChangedRaw === 'number' && Number.isFinite(filesChangedRaw)
+        ? filesChangedRaw
+        : null;
+    const linesAddedRaw = metricsContext['metrics_lines_added'];
+    const linesAdded = typeof linesAddedRaw === 'number' && Number.isFinite(linesAddedRaw)
+        ? linesAddedRaw
+        : null;
+    const linesRemovedRaw = metricsContext['metrics_lines_removed'];
+    const linesRemoved = typeof linesRemovedRaw === 'number' && Number.isFinite(linesRemovedRaw)
+        ? linesRemovedRaw
+        : null;
+    return {
+        startGitSha,
+        endGitSha,
+        gitBranch,
+        agentCommitShas: finalAgentCommitShas,
+        captureConfidence,
+        durationMs,
+        outcome,
+        prNumbers,
+        filesChanged,
+        linesAdded,
+        linesRemoved,
+    };
+}

package/dist/v2/usecases/console-routes.js CHANGED Viewed

@@ -41,6 +41,8 @@ const express_1 = __importDefault(require("express"));
 const path_1 = __importDefault(require("path"));
 const fs_1 = __importDefault(require("fs"));
 const os_1 = __importDefault(require("os"));
+const child_process_1 = require("child_process");
+const util_1 = require("util");
 const worktree_service_js_1 = require("./worktree-service.js");
 const workflow_js_1 = require("../../types/workflow.js");
 const dev_mode_js_1 = require("../../mcp/dev-mode.js");
@@ -467,6 +469,63 @@ function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuff
             res.status(status).json({ success: false, error: error.message });
         });
     });
+    const execFileAsync = (0, util_1.promisify)(child_process_1.execFile);
+    const DIFF_GIT_TIMEOUT_MS = 10000;
+    function isDiffExecError(e) {
+        if (!(e instanceof Error))
+            return false;
+        if ('killed' in e)
+            return true;
+        const sys = e.syscall ?? '';
+        return sys.startsWith('spawn');
+    }
+    app.get('/api/v2/sessions/:sessionId/diff-summary', async (req, res) => {
+        const { sessionId } = req.params;
+        const sessionResult = await consoleService.getSessionDetail(sessionId);
+        if (sessionResult.isErr()) {
+            const status = sessionResult.error.code === 'SESSION_LOAD_FAILED' ? 404 : 500;
+            res.status(status).json({ success: false, error: sessionResult.error.message });
+            return;
+        }
+        const sessionDetail = sessionResult.value;
+        const metrics = sessionDetail.metrics;
+        if (!metrics) {
+            res.status(422).json({ success: false, error: 'No metrics available for this session' });
+            return;
+        }
+        const { startGitSha, endGitSha } = metrics;
+        if (!startGitSha || !endGitSha) {
+            res.status(422).json({ success: false, error: 'Git SHAs not available in session metrics' });
+            return;
+        }
+        const repoRoot = sessionDetail.repoRoot;
+        if (!repoRoot) {
+            res.status(422).json({ success: false, error: 'Repo root not available for this session' });
+            return;
+        }
+        try {
+            const { stdout } = await execFileAsync('git', ['diff', `${startGitSha}..${endGitSha}`, '--shortstat'], { cwd: repoRoot, encoding: 'utf-8', timeout: DIFF_GIT_TIMEOUT_MS });
+            const match = stdout.trim().match(/(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?/);
+            if (!match) {
+                res.json({ success: true, data: { linesAdded: 0, linesRemoved: 0, filesChanged: 0 } });
+                return;
+            }
+            const filesChanged = parseInt(match[1] ?? '0', 10);
+            const linesAdded = parseInt(match[2] ?? '0', 10);
+            const linesRemoved = parseInt(match[3] ?? '0', 10);
+            res.json({ success: true, data: { linesAdded, linesRemoved, filesChanged } });
+        }
+        catch (e) {
+            if (isDiffExecError(e)) {
+                const errMsg = e instanceof Error && 'killed' in e && e.killed
+                    ? 'Diff timed out: repository too large or slow'
+                    : `Diff failed: git unavailable or invalid SHAs`;
+                res.status(503).json({ success: false, error: errMsg });
+                return;
+            }
+            throw e;
+        }
+    });
     if (workflowService) {
         app.get('/api/v2/workflows', async (_req, res) => {
             try {

package/dist/v2/usecases/console-service.js CHANGED Viewed

@@ -47,6 +47,7 @@ const node_outputs_js_1 = require("../projections/node-outputs.js");
 const advance_outcomes_js_1 = require("../projections/advance-outcomes.js");
 const artifacts_js_1 = require("../projections/artifacts.js");
 const run_context_js_1 = require("../projections/run-context.js");
+const session_metrics_js_1 = require("../projections/session-metrics.js");
 const sorted_event_log_js_1 = require("../durable-core/sorted-event-log.js");
 const run_execution_trace_js_1 = require("../projections/run-execution-trace.js");
 const constants_js_1 = require("../durable-core/constants.js");
@@ -198,18 +199,28 @@ class ConsoleService {
             message: `Failed to load session ${sessionIdStr}: ${storeErr.message}`,
         }))
             .andThen((truth) => {
+            const metrics = (0, session_metrics_js_1.projectSessionMetricsV2)(truth.events);
+            const repoRoot = extractRepoRoot(truth.events);
             const dagRes = (0, run_dag_js_1.projectRunDagV2)(truth.events);
             const detailRA = (() => {
                 if (dagRes.isErr()) {
                     return resolveRunCompletion(truth.events, this.ports.snapshotStore)
-                        .map((completionMap) => projectSessionDetail(sessionId, truth, completionMap, {}, {}));
+                        .map((completionMap) => ({
+                        ...projectSessionDetail(sessionId, truth, completionMap, {}, {}),
+                        metrics,
+                        repoRoot,
+                    }));
                 }
                 const dag = dagRes.value;
                 return neverthrow_1.ResultAsync.combine([
                     resolveRunCompletion(truth.events, this.ports.snapshotStore),
                     resolveStepLabels(dag, this.ports.snapshotStore, this.ports.pinnedWorkflowStore),
                     resolveWorkflowNames(dag, this.ports.pinnedWorkflowStore),
-                ]).map(([completionMap, stepLabels, workflowNames]) => projectSessionDetail(sessionId, truth, completionMap, stepLabels, workflowNames));
+                ]).map(([completionMap, stepLabels, workflowNames]) => ({
+                    ...projectSessionDetail(sessionId, truth, completionMap, stepLabels, workflowNames),
+                    metrics,
+                    repoRoot,
+                }));
             })();
             const isLiveRA = neverthrow_1.ResultAsync.fromSafePromise(isSessionLiveFromEventLog(sessionIdStr));
             return neverthrow_1.ResultAsync.combine([detailRA, isLiveRA]).andThen(([detail, isLive]) => {
@@ -632,6 +643,7 @@ function projectSessionSummary(sessionId, truth, completionByRunId, workflowName
             return false;
         return Object.values(contextRes.value.byRunId).some((runCtx) => runCtx.context['is_autonomous'] === 'true');
     })();
+    const metrics = (0, session_metrics_js_1.projectSessionMetricsV2)(events);
     const runs = Object.values(dag.runsById);
     const run = runs[0];
     if (!run) {
@@ -656,6 +668,7 @@ function projectSessionSummary(sessionId, truth, completionByRunId, workflowName
             isAutonomous,
             isLive,
             parentSessionId,
+            metrics,
         };
     }
     const workflow = run.workflow;
@@ -702,6 +715,7 @@ function projectSessionSummary(sessionId, truth, completionByRunId, workflowName
         isAutonomous,
         isLive,
         parentSessionId,
+        metrics,
     };
 }
 function projectSessionDetail(sessionId, truth, completionByRunId, stepLabels, workflowNames, skippedStepsMap = {}) {
@@ -712,7 +726,7 @@ function projectSessionDetail(sessionId, truth, completionByRunId, stepLabels, w
     const sessionTitle = sortedEventsRes.isOk() ? deriveSessionTitle(sortedEventsRes.value) : null;
     const dagRes = (0, run_dag_js_1.projectRunDagV2)(events);
     if (dagRes.isErr()) {
-        return { sessionId, sessionTitle, health: sessionHealth, runs: [] };
+        return { sessionId, sessionTitle, health: sessionHealth, runs: [], metrics: null, repoRoot: null };
     }
     const statusRes = sortedEventsRes.isOk() ? (0, run_status_signals_js_1.projectRunStatusSignalsV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
     const gapsRes = sortedEventsRes.isOk() ? (0, gaps_js_1.projectGapsV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
@@ -781,7 +795,7 @@ function projectSessionDetail(sessionId, truth, completionByRunId, stepLabels, w
             skippedSteps: skippedStepsMap[run.runId] ?? [],
         };
     });
-    return { sessionId, sessionTitle, health: sessionHealth, runs };
+    return { sessionId, sessionTitle, health: sessionHealth, runs, metrics: null, repoRoot: null };
 }
 function deriveRunStatus(isBlocked, hasUnresolvedCriticalGaps, isComplete) {
     if (isBlocked)

package/dist/v2/usecases/console-types.d.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+import type { SessionMetricsV2 } from '../projections/session-metrics.js';
+export type { SessionMetricsV2 };
 export type ConsoleRunStatus = 'in_progress' | 'complete' | 'complete_with_gaps' | 'blocked';
 export type ConsoleSessionStatus = ConsoleRunStatus | 'dormant';
 export type ConsoleSessionHealth = 'healthy' | 'corrupt';
@@ -21,6 +23,7 @@ export interface ConsoleSessionSummary {
     readonly isAutonomous: boolean;
     readonly isLive: boolean;
     readonly parentSessionId: string | null;
+    readonly metrics: SessionMetricsV2 | null;
 }
 export interface ConsoleSessionListResponse {
     readonly sessions: readonly ConsoleSessionSummary[];
@@ -90,6 +93,8 @@ export interface ConsoleSessionDetail {
     readonly runs: readonly ConsoleDagRun[];
     readonly isLive?: boolean;
     readonly liveActivity?: readonly ConsoleToolActivity[] | null;
+    readonly metrics: SessionMetricsV2 | null;
+    readonly repoRoot: string | null;
 }
 export type ConsoleValidationOutcome = 'pass' | 'fail';
 export interface ConsoleValidationResult {

package/docs/authoring-v2.md CHANGED Viewed

@@ -219,22 +219,47 @@ Important implementation detail:
 ### Session analytics context keys (`metrics_*`)
-The `projectSessionMetricsV2` projection (planned -- not yet implemented) reads a set of `metrics_*` context keys to build session attribution data. These keys are not validated by the engine -- nothing will fail if they are absent or malformed. But absent or wrong data produces permanently incorrect analytics with no error or warning.
+The engine reads `metrics_*` context keys from the final `continue_workflow` call to build session attribution data for the `run_completed` event. These keys feed `captureConfidence`, `agentCommitShas`, and related fields.
-Set these keys in your step's `Capture:` footer.
+**Recommended approach: set `metricsProfile` at workflow level**
+The simplest way to instrument a workflow is to declare `metricsProfile` as a top-level field in the workflow JSON. The engine then injects the appropriate footer instructions into step prompts automatically -- no per-step `Capture:` text needed.
+```json
+{
+  "metricsProfile": "coding"
+}
+```
+Profile selection guide:
+| Profile | When to use | What the engine injects |
+|---|---|---|
+| `"coding"` | Workflow produces git commits (implementation, refactoring, bug-fix) | SHA accumulation reminder on every step; outcome/PR/diff reminder on final step |
+| `"review"` | Workflow produces a review decision on a PR or MR | PR numbers + outcome reminder on final step only |
+| `"research"` | Workflow produces a finding or recommendation but no commits | Outcome-only reminder on final step only |
+| `"none"` or absent | Meta-workflows, utilities, authoring tools | No injection -- existing behavior unchanged |
+The engine does NOT derive the profile from tags automatically. Authors must set this field explicitly. When using `workflow-for-workflows` to author or modernize a workflow, the `phase-7b` step will prompt you for this decision.
+**Final step detection**: The engine injects the final-step footer on the last top-level step, or on the exit step of a loop that is the last top-level step. A loop in a non-terminal position does not trigger the final-step footer on its exit step.
 **SHA accumulation rule (critical)**
 `context_set` uses shallow merge: each key is replaced, not merged. If you set `metrics_commit_shas: ["abc123"]` at step 5 and then set `metrics_commit_shas: ["def456"]` at step 9, the value at step 9 is `["def456"]` -- `abc123` is permanently gone.
-Every step that adds commits must send the **full accumulated list** -- read the current value from context, append new SHAs, and send the complete list.
+Every step that adds commits must send the **full accumulated list** -- read the current value from context, append new SHAs, and send the complete list. The engine-injected footer includes an explicit reminder of this rule.
 ```
 Example (correct): metrics_commit_shas: ["abc123", "def456", "ghi789"]
 Example (wrong):   metrics_commit_shas: ["ghi789"]  -- loses abc123 and def456
 ```
-**Commit step `Capture:` footer** (copy this into every step that creates commits):
+**Manual `Capture:` footers (if you cannot use `metricsProfile`)**
+If `metricsProfile` is not appropriate for your workflow, add these footers manually.
+Commit step `Capture:` footer (copy into every step that creates commits):
 ```
 Capture (every time you commit code):
@@ -248,7 +273,7 @@ Capture (every time you commit code):
   Example (wrong):   metrics_commit_shas: ["ghi789"]  -- loses abc123 and def456
 ```
-**Final handoff `Capture:` footer** (copy this into your final step):
+Final handoff `Capture:` footer (copy into your final step):
 ```
 Capture (at final handoff only):
@@ -266,8 +291,6 @@ Capture (at final handoff only):
   (same accumulation rule as commit steps -- full list, not just final-step SHAs)
 ```
-Note: adding these keys to existing workflow JSON files (`coding-task-workflow-agentic.json` and others) is a separate follow-on PR. The templates above let you add them to new or custom workflows now.
 ### Assessment-gate authoring (v1)
 Assessment gates are now a shipped authoring/runtime feature, but the first slice is intentionally narrow.

package/docs/authoring.md CHANGED Viewed

@@ -731,6 +731,34 @@ Canonical current rules for authoring good WorkRail workflows. workflow.schema.j
 - Using a nested key context.metrics.commit_shas instead of the flat key metrics_commit_shas
 - Setting metrics_outcome at intermediate steps before the session outcome is known
+### metrics-profile-declaration
+- **Level**: recommended
+- **Status**: active
+- **Scope**: workflow.definition, step.context-capture
+- **Rule**: Declare metricsProfile at workflow level to enable engine-injected metrics instrumentation footers. Use 'coding' for implementation workflows, 'review' for code review workflows, 'research' for investigation workflows, 'design' for design/planning artifacts, 'ticket' for work-item creation. Omit or use 'none' for meta-workflows and utilities.
+- **Why**: Without metricsProfile, captureConfidence is always 'none' and run_completed events carry no usable attribution data. The engine cannot auto-derive the profile from tags -- authors must set it explicitly.
+- **Enforced by**: advisory
+**Checks**
+- Select 'coding' when the workflow produces git commits (implementation, refactoring, bug-fix, migration, documentation updates).
+- Select 'review' when the workflow produces a review decision on a PR or MR.
+- Select 'research' when the workflow produces a finding or recommendation but no commits (investigation, audit, analysis).
+- Select 'design' when the workflow produces a design artifact (pitch, spec, ADR, architecture doc) but no commits.
+- Select 'ticket' when the workflow creates or updates work items in an external system (Jira, GitHub Issues, Linear).
+- Omit or use 'none' for authoring tools, meta-workflows, or workflows with no measurable outcome.
+- Do not invent new profile values -- the closed set is: 'coding', 'review', 'research', 'design', 'ticket', 'none'.
+- The engine does NOT derive the profile from spec/workflow-tags.json at runtime. Set the field explicitly.
+- When using workflow-for-workflows to author or modernize a workflow, the phase-7b step will prompt for this decision.
+**Anti-patterns**
+- Leaving metricsProfile absent from a coding or review workflow and expecting automatic instrumentation
+- Using metricsProfile 'coding' on a workflow that produces no commits (e.g., a documentation or planning workflow)
+- Assuming the engine reads tags and derives the profile automatically
+**Source refs**
+- `src/v2/durable-core/domain/prompt-renderer.ts` (runtime) — buildMetricsSection() implements render-time footer injection based on metricsProfile.
+- `spec/workflow.schema.json` (schema) — metricsProfile optional enum field definition.
 ## Artifacts and planning surfaces
 ### artifact-canonicality

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@exaudeus/workrail",
-  "version": "3.64.0",
+  "version": "3.66.0",
   "description": "Step-by-step workflow enforcement for AI agents via MCP",
   "license": "MIT",
   "repository": {

package/spec/authoring-spec.json CHANGED Viewed

@@ -1385,6 +1385,43 @@
             "Using a nested key context.metrics.commit_shas instead of the flat key metrics_commit_shas",
             "Setting metrics_outcome at intermediate steps before the session outcome is known"
           ]
+        },
+        {
+          "id": "metrics-profile-declaration",
+          "status": "active",
+          "level": "recommended",
+          "scope": ["workflow.definition", "step.context-capture"],
+          "rule": "Declare metricsProfile at workflow level to enable engine-injected metrics instrumentation footers. Use 'coding' for implementation workflows, 'review' for code review workflows, 'research' for investigation workflows, 'design' for design/planning artifacts, 'ticket' for work-item creation. Omit or use 'none' for meta-workflows and utilities.",
+          "why": "Without metricsProfile, captureConfidence is always 'none' and run_completed events carry no usable attribution data. The engine cannot auto-derive the profile from tags -- authors must set it explicitly.",
+          "enforcement": ["advisory"],
+          "checks": [
+            "Select 'coding' when the workflow produces git commits (implementation, refactoring, bug-fix, migration, documentation updates).",
+            "Select 'review' when the workflow produces a review decision on a PR or MR.",
+            "Select 'research' when the workflow produces a finding or recommendation but no commits (investigation, audit, analysis).",
+            "Select 'design' when the workflow produces a design artifact (pitch, spec, ADR, architecture doc) but no commits.",
+            "Select 'ticket' when the workflow creates or updates work items in an external system (Jira, GitHub Issues, Linear).",
+            "Omit or use 'none' for authoring tools, meta-workflows, or workflows with no measurable outcome.",
+            "Do not invent new profile values -- the closed set is: 'coding', 'review', 'research', 'design', 'ticket', 'none'.",
+            "The engine does NOT derive the profile from spec/workflow-tags.json at runtime. Set the field explicitly.",
+            "When using workflow-for-workflows to author or modernize a workflow, the phase-7b step will prompt for this decision."
+          ],
+          "antiPatterns": [
+            "Leaving metricsProfile absent from a coding or review workflow and expecting automatic instrumentation",
+            "Using metricsProfile 'coding' on a workflow that produces no commits (e.g., a documentation or planning workflow)",
+            "Assuming the engine reads tags and derives the profile automatically"
+          ],
+          "sourceRefs": [
+            {
+              "kind": "runtime",
+              "path": "src/v2/durable-core/domain/prompt-renderer.ts",
+              "note": "buildMetricsSection() implements render-time footer injection based on metricsProfile."
+            },
+            {
+              "kind": "schema",
+              "path": "spec/workflow.schema.json",
+              "note": "metricsProfile optional enum field definition."
+            }
+          ]
         }
       ]
     },

package/spec/workflow.schema.json CHANGED Viewed

@@ -200,6 +200,11 @@
       "minItems": 1,
       "maxItems": 6,
       "uniqueItems": true
+    },
+    "metricsProfile": {
+      "type": "string",
+      "enum": ["coding", "review", "research", "design", "ticket", "none"],
+      "description": "Metrics instrumentation profile for this workflow. When set, the engine injects a footer into step prompts instructing the agent to accumulate and report metrics context keys (metrics_commit_shas, metrics_outcome, etc.). 'coding' injects SHA accumulation on every step and outcome/PR reporting on the final step. 'review' injects PR numbers + outcome on the final step. 'research', 'design', and 'ticket' inject outcome-only on the final step (identical behavior today, distinct semantics). 'none' or absent field disables injection entirely."
     }
   },
   "required": [

package/workflows/adaptive-ticket-creation.json CHANGED Viewed

@@ -2,6 +2,7 @@
   "id": "adaptive-ticket-creation",
   "name": "Adaptive Ticket Creation Workflow",
   "version": "1.0.0",
+  "metricsProfile": "ticket",
   "description": "Use this to create high-quality Jira tickets for features, tasks, or epics. Automatically selects the right complexity path (Simple, Standard, or Epic) and generates properly structured tickets with acceptance criteria and estimates.",
   "about": "## Adaptive Ticket Creation Workflow\n\nUse this to create well-structured Jira tickets for features, tasks, or epics. The workflow automatically selects the right complexity path (Simple, Standard, or Epic) based on the request, so you don't have to decide upfront how much process you need.\n\n### What it produces\n\n- **Simple path**: one complete, developer-ready Jira ticket with a context-rich description, checkbox-style acceptance criteria, and an effort estimate.\n- **Standard path**: a high-level plan plus a batch of related tickets covering all deliverables.\n- **Epic path**: everything in Standard, plus full epic decomposition, per-story estimates with risk ratings, dependency mapping, and a reusable team rules file at `.workflow_rules/ticket_creation.md` that future runs load automatically.\n\n### When to use it\n\n- You need to create one or more Jira tickets and want them to be genuinely developer-ready.\n- You have a feature request, bug, task, or epic that needs to be broken down and estimated.\n- Your team has specific ticket conventions (naming, sizing, labels) -- the workflow learns and stores these on the Epic path.\n\n### How to get good results\n\n- Provide as much context as you have: PRD links, design files, existing related tickets, and any known constraints.\n- If your team has a `.workflow_rules/ticket_creation.md` file, the workflow loads it automatically and applies your conventions.\n- On the Epic path, the workflow asks you to approve the high-level plan and the decomposition before generating tickets. Use these checkpoints to catch scope issues early.\n- Acceptance criteria are written as checkbox-style observable conditions, not restatements of requirements. If your team has a specific AC format, describe it in the rules file.",
   "examples": [
@@ -307,4 +308,4 @@
       "requireConfirmation": false
     }
   ]
-}
+}

package/workflows/architecture-scalability-audit.json CHANGED Viewed

@@ -2,6 +2,7 @@
   "id": "architecture-scalability-audit",
   "name": "Architecture Scalability Audit",
   "version": "0.1.0",
+  "metricsProfile": "research",
   "description": "Use this to audit a bounded codebase scope for architecture scalability. Declare which scalability dimensions matter (load, data volume, team size, feature extensibility, operational); the workflow investigates each and produces evidence-grounded findings.",
   "about": "## Architecture Scalability Audit\n\nThis workflow audits a bounded codebase scope for scalability across the dimensions you care about. It does not produce generic \"won't scale\" warnings -- every finding must cite a specific file, class, method, or pattern, and every concern must name a concrete growth scenario (e.g. 10x traffic, 100x records, 3x team size).\n\n**What it does:**\nYou declare the scope boundary and the scalability dimensions that matter for your context. The workflow reads the codebase to understand the architecture, assigns one dedicated reviewer family per dimension, runs them in parallel from a shared fact packet, reconciles contradictions and blind spots through a synthesis loop, and delivers a per-dimension verdict (will_break / risk / fine) with an overall scalability readiness verdict.\n\n**The five scalability dimensions you can select:**\n- **load** -- handles more requests, users, or throughput\n- **data_volume** -- handles more records, storage, or query size\n- **team_org** -- more teams or developers working on this scope without friction\n- **feature_extensibility** -- more features added without rearchitecting\n- **operational** -- more deployments, environments, or operational complexity\n\n**When to use it:**\n- Before investing significantly in a component you expect to grow\n- When planning capacity for a new traffic tier or data volume increase\n- When evaluating a codebase acquired through a merger, partnership, or open-source adoption\n- When a team is growing and you want to know if the architecture will hold under parallel development\n\n**What it produces:**\nAn overall scalability verdict, per-dimension findings with specific code references and growth scenarios, cross-cutting concerns that span multiple dimensions, a prioritized concern list, and explicit callouts of what is already well-designed for scale.\n\n**How to get good results:**\nBe specific about the scope boundary -- name the service, module, or feature explicitly and say what is out of scope. Choose the dimensions relevant to your actual growth pressures; the workflow will not add dimensions you did not select. If you know a specific growth target (e.g. \"we expect 50x user growth in 18 months\"), mention it.",
   "examples": [

package/workflows/bug-investigation.agentic.v2.json CHANGED Viewed

@@ -14,6 +14,7 @@
     "recommendedAutonomy": "guided",
     "recommendedRiskPolicy": "conservative"
   },
+  "metricsProfile": "research",
   "preconditions": [
     "User has a specific bug report, failing test, or unexpected behavior to investigate.",
     "Agent has codebase access and can run tests, commands, or other deterministic evidence-gathering steps.",

package/workflows/classify-task-workflow.json CHANGED Viewed

@@ -2,6 +2,7 @@
   "id": "classify-task-workflow",
   "name": "Classify Task",
   "version": "0.1.0",
+  "metricsProfile": "none",
   "description": "Classifies a software task from the session goal into structured output variables used by coordinator scripts to decide which pipeline phases to run.",
   "about": "## Classify Task Workflow\n\nThis is a fast, single-step classification utility. It reads the session goal and outputs structured variables that coordinator scripts use to decide which pipeline phases to run.\n\n### What it does\n\nGiven a task description, the agent classifies the work along seven dimensions and recommends an ordered pipeline of workflow IDs to execute.\n\n### When to use it\n\nUse this workflow at the start of a coordinator pipeline when you need to decide which downstream workflows to run. It is intentionally fast and cheap -- one LLM step, no subagents, no codebase reads.\n\n### What it produces\n\nA structured classification block in the step notes containing all seven output variables:\n- `taskComplexity` -- Small / Medium / Large\n- `riskLevel` -- Low / Medium / High\n- `hasUI` -- true / false\n- `touchesArchitecture` -- true / false\n- `taskType` -- feature / bug-fix / refactor / investigation / docs / chore\n- `affectedDomains` -- array of likely codebase areas\n- `recommendedPipeline` -- ordered array of workflow IDs\n\n### How to get good results\n\nProvide a specific, concrete task description as the session goal. The more specific the goal, the more accurate the classification. When the goal is ambiguous, the workflow defaults to conservative (higher complexity, more pipeline phases).",
   "examples": [

package/workflows/coding-task-workflow-agentic.json CHANGED Viewed

@@ -14,6 +14,7 @@
     "recommendedAutonomy": "guided",
     "recommendedRiskPolicy": "conservative"
   },
+  "metricsProfile": "coding",
   "assessments": [
     {
       "id": "design-soundness-gate",

package/workflows/cross-platform-code-conversion.v2.json CHANGED Viewed

@@ -2,6 +2,7 @@
   "id": "cross-platform-code-conversion",
   "name": "Cross-Platform Code Conversion",
   "version": "0.1.0",
+  "metricsProfile": "coding",
   "description": "Use this to convert code from one platform to another (e.g. Android to iOS, iOS to Web). Triages files by difficulty, parallelizes easy translations, and handles platform-specific design decisions.",
   "about": "## Cross-Platform Code Conversion Workflow\n\nThis workflow guides an AI agent through converting code from one platform to another - for example, Android (Kotlin) to iOS (Swift), iOS to Web (TypeScript/React), or any similar migration. It handles everything from scoping and analysis through idiomatic conversion, build verification, and final handoff.\n\n### What it does\n\nThe workflow starts by scoping the migration and classifying its complexity (Small, Medium, or Large) and adaptation depth (low, moderate, or high). It then analyzes the source architecture to understand patterns, dependencies, concurrency models, and semantic contracts. Files are triaged into three buckets: mechanical translations delegated to subagents in parallel (Bucket A), library substitutions (Bucket B), and platform-specific code needing design decisions (Bucket C). For high-adaptation migrations, the workflow runs a full design generation phase to choose an idiomatic target-platform architecture before any code is written. Implementation proceeds batch by batch, with drift detection after each batch to catch files that turn out harder than classified. A final build-and-integration loop verifies the full converted codebase before handoff.\n\n### When to use it\n\nUse this workflow when migrating a module, feature, or full component from one platform to another. It is especially valuable when:\n- The source and target platforms have meaningfully different idioms (e.g., Kotlin coroutines vs Swift async/await, Hilt vs Swinject)\n- You want parallel delegation of mechanical work while keeping design-sensitive boundaries with the main agent\n- Semantic contracts (lifecycle, threading, cancellation, error handling) must be preserved across the migration\n- The target repo has existing architectural patterns the migrated code must fit into\n\nFor very small, straightforward file-by-file translations, the workflow includes a fast path that skips planning and triage.\n\n### What it produces\n\n- A triage matrix classifying every file into a conversion bucket\n- A semantic contract inventory for non-trivial migration boundaries\n- A target integration analysis mapping boundaries to their destination repo seams\n- Converted source files in the target platform's idioms\n- A passing build or typecheck on the full converted output\n- A handoff summary covering adaptation decisions, known gaps, and items needing manual review\n\n### How to get good results\n\n- Specify the exact scope of the migration - which files, modules, or features to convert\n- If the target repo is not in the same workspace, point the agent to it explicitly or configure the source-to-target path mapping\n- Review the triage and semantic contract inventory steps before conversion begins, especially for high-adaptation migrations\n- Flag any invariants that must survive the migration (API contracts, behavioral guarantees, threading assumptions)",
   "examples": [
@@ -78,7 +79,7 @@
     {
       "id": "phase-1-understand-source",
       "title": "Phase 1: Understand Source Code",
-      "prompt": "Read and analyze the source code through a conversion lens \u2014 what will be easy to convert, what will be hard, and why.\n\nMap out:\n- Architecture and module structure\n- Key patterns used (MVI, MVVM, dependency injection, etc.)\n- External dependencies and what they do\n- Entry points and public API surface\n- Platform coupling depth: is the code cleanly layered or is platform-specific code smeared throughout? This directly determines how much falls into easy vs. hard buckets.\n- Concurrency model: Coroutines, Combine, RxJS, async/await? This is often the single hardest mapping decision.\n- DI approach: Dagger/Hilt, Swinject, Koin? DI frameworks rarely map 1:1.\n- Test coverage shape: unit tests on business logic (convert easily), UI tests (likely rewrite), integration tests (depends on infra).\n- Shared code boundaries: is there already a shared/common module that might not need conversion at all?\n- Non-trivial migration boundaries: public APIs, externally consumed module boundaries, and lifecycle/state/concurrency/resource boundaries that callers depend on.\n- Caller-visible guarantees for those boundaries. Examples include lifecycle/ownership, laziness vs eagerness, shared vs per-consumer behavior, cancellation/disposal, ordering/replay/buffering, failure behavior, threading/scheduling, or consistency/transaction guarantees.\n- Adaptation depth: classify whether the migration is `low`, `moderate`, or `high` adaptation based on architectural mismatch, missing target-side equivalents, lifecycle/state/concurrency mismatch, and the amount of adapter or redesign work needed.\n\nIdentify which files define or materially affect those boundaries and which of them will require target-repo integration analysis.\n\nCapture:\n- `sourceArchitecture`\n- `dependencies`\n- `publicApiSurface`\n- `platformCouplingAssessment`\n- `concurrencyModel`\n- `testCoverageShape`\n- `semanticBoundaryCandidates`\n- `boundaryCriticalFiles`\n- `adaptationProfile`",
+      "prompt": "Read and analyze the source code through a conversion lens — what will be easy to convert, what will be hard, and why.\n\nMap out:\n- Architecture and module structure\n- Key patterns used (MVI, MVVM, dependency injection, etc.)\n- External dependencies and what they do\n- Entry points and public API surface\n- Platform coupling depth: is the code cleanly layered or is platform-specific code smeared throughout? This directly determines how much falls into easy vs. hard buckets.\n- Concurrency model: Coroutines, Combine, RxJS, async/await? This is often the single hardest mapping decision.\n- DI approach: Dagger/Hilt, Swinject, Koin? DI frameworks rarely map 1:1.\n- Test coverage shape: unit tests on business logic (convert easily), UI tests (likely rewrite), integration tests (depends on infra).\n- Shared code boundaries: is there already a shared/common module that might not need conversion at all?\n- Non-trivial migration boundaries: public APIs, externally consumed module boundaries, and lifecycle/state/concurrency/resource boundaries that callers depend on.\n- Caller-visible guarantees for those boundaries. Examples include lifecycle/ownership, laziness vs eagerness, shared vs per-consumer behavior, cancellation/disposal, ordering/replay/buffering, failure behavior, threading/scheduling, or consistency/transaction guarantees.\n- Adaptation depth: classify whether the migration is `low`, `moderate`, or `high` adaptation based on architectural mismatch, missing target-side equivalents, lifecycle/state/concurrency mismatch, and the amount of adapter or redesign work needed.\n\nIdentify which files define or materially affect those boundaries and which of them will require target-repo integration analysis.\n\nCapture:\n- `sourceArchitecture`\n- `dependencies`\n- `publicApiSurface`\n- `platformCouplingAssessment`\n- `concurrencyModel`\n- `testCoverageShape`\n- `semanticBoundaryCandidates`\n- `boundaryCriticalFiles`\n- `adaptationProfile`",
       "promptFragments": [
         {
           "id": "phase-1-small-light",
@@ -86,7 +87,7 @@
             "var": "conversionComplexity",
             "equals": "Small"
           },
-          "text": "For Small conversions, keep this lightweight. A quick read of the files in scope is enough \u2014 don't map the entire architecture. Focus on identifying any platform-specific code that would prevent a straight translation."
+          "text": "For Small conversions, keep this lightweight. A quick read of the files in scope is enough — don't map the entire architecture. Focus on identifying any platform-specific code that would prevent a straight translation."
         }
       ],
       "requireConfirmation": {
@@ -109,7 +110,7 @@
           }
         ]
       },
-      "prompt": "For Small conversions, skip triage and planning \u2014 just convert.\n\n- Translate the files to the target platform idiomatically\n- Follow target platform naming and structure conventions\n- Map any dependencies to target equivalents\n- Convert tests if they exist\n- Run build or typecheck to verify\n\nIf something turns out harder than expected (deep platform coupling, no clean dependency equivalent, or meaningful architectural mismatch), update `conversionComplexity` to `Medium`, update `adaptationProfile` to `moderate` or `high` based on the newly discovered mismatch, and stop. The full triage and planning pipeline will activate for the remaining work.\n\nCapture:\n- `filesConverted`\n- `buildPassed`\n- `conversionComplexity`\n- `adaptationProfile`",
+      "prompt": "For Small conversions, skip triage and planning — just convert.\n\n- Translate the files to the target platform idiomatically\n- Follow target platform naming and structure conventions\n- Map any dependencies to target equivalents\n- Convert tests if they exist\n- Run build or typecheck to verify\n\nIf something turns out harder than expected (deep platform coupling, no clean dependency equivalent, or meaningful architectural mismatch), update `conversionComplexity` to `Medium`, update `adaptationProfile` to `moderate` or `high` based on the newly discovered mismatch, and stop. The full triage and planning pipeline will activate for the remaining work.\n\nCapture:\n- `filesConverted`\n- `buildPassed`\n- `conversionComplexity`\n- `adaptationProfile`",
       "requireConfirmation": false
     },
     {
@@ -127,7 +128,7 @@
           }
         ]
       },
-      "prompt": "Classify every file or module in scope into one of three buckets:\n\n**Bucket A \u2014 Literal translation**: Platform-agnostic business logic, data models, utilities, pure functions. These use no platform-specific APIs or libraries. Conversion is mechanical: translate the language syntax, follow target naming conventions, done. These will be delegated to subagents.\n\n**Bucket B \u2014 Library substitution**: Code that uses platform-specific libraries (networking, persistence, serialization, DI) but follows standard patterns. These need dependency mapping but the structure stays the same.\n\n**Bucket C \u2014 Platform-specific**: Code deeply tied to the platform (UI layer, lifecycle management, concurrency/threading, navigation, platform APIs). These need design decisions about target-platform idioms.\n\nFor each file or module, list:\n- File/module name\n- Bucket (A, B, or C)\n- One-line reason for classification\n- Dependencies it has on other files in scope (so we know conversion order)\n- Whether it is `boundaryCritical` for a non-trivial migration boundary\n- Which semantic boundaries it affects from `semanticBoundaryCandidates`\n- Whether it will require target-repo integration analysis\n\nBoundary-critical files must not be treated as blind mechanical translation just because the syntax looks simple. If a file materially affects a semantic boundary or destination-repo seam, keep it with main-agent review.\n\nSort the work items within each bucket by dependency order (convert dependencies first).\n\nGroup Bucket A files into parallel batches of 3-5 files each. Each batch should contain files with no cross-dependencies so subagents can work independently.\n\nGroup Bucket B and C files into sequential batches by dependency order.\n\nEach batch should have: `name` (short label), `bucket` (A, B, or C), and `files` (list of file paths).\n\nCapture:\n- `bucketABatches` (parallel batches for subagent delegation)\n- `bucketBCBatches` (sequential batches for main agent)\n- `bucketACounts`\n- `bucketBCounts`\n- `bucketCCounts`\n- `boundaryCriticalItems`",
+      "prompt": "Classify every file or module in scope into one of three buckets:\n\n**Bucket A — Literal translation**: Platform-agnostic business logic, data models, utilities, pure functions. These use no platform-specific APIs or libraries. Conversion is mechanical: translate the language syntax, follow target naming conventions, done. These will be delegated to subagents.\n\n**Bucket B — Library substitution**: Code that uses platform-specific libraries (networking, persistence, serialization, DI) but follows standard patterns. These need dependency mapping but the structure stays the same.\n\n**Bucket C — Platform-specific**: Code deeply tied to the platform (UI layer, lifecycle management, concurrency/threading, navigation, platform APIs). These need design decisions about target-platform idioms.\n\nFor each file or module, list:\n- File/module name\n- Bucket (A, B, or C)\n- One-line reason for classification\n- Dependencies it has on other files in scope (so we know conversion order)\n- Whether it is `boundaryCritical` for a non-trivial migration boundary\n- Which semantic boundaries it affects from `semanticBoundaryCandidates`\n- Whether it will require target-repo integration analysis\n\nBoundary-critical files must not be treated as blind mechanical translation just because the syntax looks simple. If a file materially affects a semantic boundary or destination-repo seam, keep it with main-agent review.\n\nSort the work items within each bucket by dependency order (convert dependencies first).\n\nGroup Bucket A files into parallel batches of 3-5 files each. Each batch should contain files with no cross-dependencies so subagents can work independently.\n\nGroup Bucket B and C files into sequential batches by dependency order.\n\nEach batch should have: `name` (short label), `bucket` (A, B, or C), and `files` (list of file paths).\n\nCapture:\n- `bucketABatches` (parallel batches for subagent delegation)\n- `bucketBCBatches` (sequential batches for main agent)\n- `bucketACounts`\n- `bucketBCounts`\n- `bucketCCounts`\n- `boundaryCriticalItems`",
       "requireConfirmation": true
     },
     {
@@ -274,7 +275,7 @@
             "var": "conversionComplexity",
             "equals": "Medium"
           },
-          "text": "For Medium conversions, focus the plan on the items that actually need design decisions. Don't exhaustively map every dimension \u2014 only the ones relevant to the files in scope."
+          "text": "For Medium conversions, focus the plan on the items that actually need design decisions. Don't exhaustively map every dimension — only the ones relevant to the files in scope."
         },
         {
           "id": "phase-3f-high-adaptation",
@@ -519,7 +520,7 @@
         {
           "id": "phase-6a-full-build",
           "title": "Full Build and Integration Check",
-          "prompt": "Run a full build or typecheck on the entire converted codebase \u2014 both subagent-converted and main-agent-converted code together.\n\nCheck for:\n- Build/compile errors from cross-batch integration issues\n- Inconsistencies between subagent output and main agent output (naming, patterns)\n- Non-idiomatic patterns that slipped through\n- Missing error handling at module boundaries\n- Threading or concurrency issues across modules\n- Broken public API contracts\n- Contract inventory drift: every row in `semanticContractInventory` is still accounted for, no `uncertain` rows remain, preserved contracts still look preserved, and intentional changes are still justified\n- Target integration drift: code landed in the intended target layer/module, reuse/adaptation decisions still fit the observed target seams, and no unresolved target integration uncertainties remain\n- High-adaptation architecture drift: if `adaptationProfile` is `high`, the final code still matches `architectureAdaptationPlan` and any deviations are explicit and justified\n\nFix each issue. If a fix is a band-aid over a deeper mapping problem, go back and fix the mapping.\n\nCapture:\n- `fullBuildPassed`\n- `integrationIssues`\n- `issuesFixed`",
+          "prompt": "Run a full build or typecheck on the entire converted codebase — both subagent-converted and main-agent-converted code together.\n\nCheck for:\n- Build/compile errors from cross-batch integration issues\n- Inconsistencies between subagent output and main agent output (naming, patterns)\n- Non-idiomatic patterns that slipped through\n- Missing error handling at module boundaries\n- Threading or concurrency issues across modules\n- Broken public API contracts\n- Contract inventory drift: every row in `semanticContractInventory` is still accounted for, no `uncertain` rows remain, preserved contracts still look preserved, and intentional changes are still justified\n- Target integration drift: code landed in the intended target layer/module, reuse/adaptation decisions still fit the observed target seams, and no unresolved target integration uncertainties remain\n- High-adaptation architecture drift: if `adaptationProfile` is `high`, the final code still matches `architectureAdaptationPlan` and any deviations are explicit and justified\n\nFix each issue. If a fix is a band-aid over a deeper mapping problem, go back and fix the mapping.\n\nCapture:\n- `fullBuildPassed`\n- `integrationIssues`\n- `issuesFixed`",
           "requireConfirmation": false
         },
         {
@@ -544,7 +545,7 @@
             "var": "conversionComplexity",
             "equals": "Small"
           },
-          "text": "For Small conversions, keep the summary brief \u2014 just list what was converted, build status, and any issues."
+          "text": "For Small conversions, keep the summary brief — just list what was converted, build status, and any issues."
         },
         {
           "id": "phase-7-full-summary",

package/workflows/document-creation-workflow.json CHANGED Viewed

@@ -2,6 +2,7 @@
   "id": "document-creation-workflow",
   "name": "Document Creation Workflow",
   "version": "1.0.0",
+  "metricsProfile": "coding",
   "description": "Use this to create broad or comprehensive documentation spanning multiple components or systems — project READMEs, complete API docs, user guides, or technical specifications.",
   "about": "## Document Creation Workflow\n\nThis workflow guides you through creating new documentation from scratch -- ranging from a simple project README to a full technical specification spanning multiple systems. It automatically calibrates depth to match the complexity of your request: simple tasks go straight to writing, while complex documentation gets a full analysis-and-planning phase first.\n\n### What it produces\n\nA complete, saved documentation file ready for use. Depending on complexity, it may also include a quality review pass covering accuracy, completeness, audience fit, usability, and style consistency.\n\n### When to use it\n\n- You need to create a **new** document (not update an existing one -- see the Documentation Update workflow for that).\n- The document spans one or more systems, components, or audiences.\n- Examples: project READMEs, API reference docs, user guides, onboarding docs, technical specifications, architecture overviews.\n\n### When NOT to use it\n\n- You want to update or refresh an existing doc -- use the Documentation Update workflow instead.\n- You need tight scope discipline for a single class or mechanism -- the Scoped Documentation workflow is better suited.\n\n### How to get good results\n\n- Be specific about the document type and intended audience upfront. The workflow probes for these, but the clearer your initial goal, the less back-and-forth.\n- If your project has existing documentation or style conventions, mention them -- the workflow will follow them.\n- For complex documentation, the workflow asks a small number of targeted questions it cannot answer from the codebase. Answer these concisely to keep momentum.",
   "examples": [
@@ -148,4 +149,4 @@
       "requireConfirmation": false
     }
   ]
-}
+}