@exaudeus/workrail 3.65.0 → 3.67.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/application/validation.js +1 -1
  2. package/dist/console/standalone-console.js +4 -1
  3. package/dist/console-ui/assets/{index-DmFHE8v_.js → index-tOl8Vowf.js} +1 -1
  4. package/dist/console-ui/index.html +1 -1
  5. package/dist/infrastructure/storage/schema-validating-workflow-storage.d.ts +21 -2
  6. package/dist/infrastructure/storage/schema-validating-workflow-storage.js +48 -0
  7. package/dist/manifest.json +29 -29
  8. package/dist/mcp/handlers/v2-workflow.js +23 -6
  9. package/dist/mcp/output-schemas.d.ts +36 -0
  10. package/dist/mcp/output-schemas.js +11 -1
  11. package/dist/types/workflow-definition.d.ts +1 -0
  12. package/dist/v2/durable-core/domain/prompt-renderer.d.ts +1 -0
  13. package/dist/v2/durable-core/domain/prompt-renderer.js +40 -0
  14. package/dist/v2/projections/session-metrics.d.ts +1 -1
  15. package/dist/v2/projections/session-metrics.js +16 -35
  16. package/dist/v2/usecases/console-routes.d.ts +2 -2
  17. package/docs/authoring-v2.md +30 -7
  18. package/docs/authoring.md +28 -0
  19. package/package.json +1 -1
  20. package/spec/authoring-spec.json +37 -0
  21. package/spec/workflow.schema.json +5 -0
  22. package/workflows/adaptive-ticket-creation.json +2 -1
  23. package/workflows/architecture-scalability-audit.json +1 -0
  24. package/workflows/bug-investigation.agentic.v2.json +1 -0
  25. package/workflows/classify-task-workflow.json +1 -0
  26. package/workflows/coding-task-workflow-agentic.json +1 -0
  27. package/workflows/cross-platform-code-conversion.v2.json +8 -7
  28. package/workflows/document-creation-workflow.json +2 -1
  29. package/workflows/documentation-update-workflow.json +2 -1
  30. package/workflows/intelligent-test-case-generation.json +2 -1
  31. package/workflows/learner-centered-course-workflow.json +2 -1
  32. package/workflows/mr-review-workflow.agentic.v2.json +1 -0
  33. package/workflows/personal-learning-materials-creation-branched.json +1 -0
  34. package/workflows/presentation-creation.json +2 -1
  35. package/workflows/production-readiness-audit.json +1 -0
  36. package/workflows/relocation-workflow-us.json +1 -0
  37. package/workflows/routines/context-gathering.json +2 -1
  38. package/workflows/routines/design-review.json +1 -0
  39. package/workflows/routines/execution-simulation.json +2 -1
  40. package/workflows/routines/feature-implementation.json +4 -3
  41. package/workflows/routines/final-verification.json +1 -0
  42. package/workflows/routines/hypothesis-challenge.json +13 -3
  43. package/workflows/routines/ideation.json +1 -1
  44. package/workflows/routines/parallel-work-partitioning.json +1 -0
  45. package/workflows/routines/philosophy-alignment.json +2 -1
  46. package/workflows/routines/plan-analysis.json +2 -1
  47. package/workflows/routines/plan-generation.json +2 -1
  48. package/workflows/routines/tension-driven-design.json +1 -0
  49. package/workflows/scoped-documentation-workflow.json +2 -1
  50. package/workflows/test-artifact-loop-control.json +8 -2
  51. package/workflows/test-session-persistence.json +1 -0
  52. package/workflows/ui-ux-design-workflow.json +1 -0
  53. package/workflows/workflow-diagnose-environment.json +1 -0
  54. package/workflows/workflow-for-workflows.json +32 -76
  55. package/workflows/wr.discovery.json +1 -0
  56. package/workflows/wr.shaping.json +21 -6
  57. package/workflows/workflow-for-workflows.v2.json +0 -760
@@ -219,22 +219,47 @@ Important implementation detail:
219
219
 
220
220
  ### Session analytics context keys (`metrics_*`)
221
221
 
222
- The `projectSessionMetricsV2` projection (planned -- not yet implemented) reads a set of `metrics_*` context keys to build session attribution data. These keys are not validated by the engine -- nothing will fail if they are absent or malformed. But absent or wrong data produces permanently incorrect analytics with no error or warning.
222
+ The engine reads `metrics_*` context keys from the final `continue_workflow` call to build session attribution data for the `run_completed` event. These keys feed `captureConfidence`, `agentCommitShas`, and related fields.
223
223
 
224
- Set these keys in your step's `Capture:` footer.
224
+ **Recommended approach: set `metricsProfile` at workflow level**
225
+
226
+ The simplest way to instrument a workflow is to declare `metricsProfile` as a top-level field in the workflow JSON. The engine then injects the appropriate footer instructions into step prompts automatically -- no per-step `Capture:` text needed.
227
+
228
+ ```json
229
+ {
230
+ "metricsProfile": "coding"
231
+ }
232
+ ```
233
+
234
+ Profile selection guide:
235
+
236
+ | Profile | When to use | What the engine injects |
237
+ |---|---|---|
238
+ | `"coding"` | Workflow produces git commits (implementation, refactoring, bug-fix) | SHA accumulation reminder on every step; outcome/PR/diff reminder on final step |
239
+ | `"review"` | Workflow produces a review decision on a PR or MR | PR numbers + outcome reminder on final step only |
240
+ | `"research"` | Workflow produces a finding or recommendation but no commits | Outcome-only reminder on final step only |
241
+ | `"none"` or absent | Meta-workflows, utilities, authoring tools | No injection -- existing behavior unchanged |
242
+
243
+ The engine does NOT derive the profile from tags automatically. Authors must set this field explicitly. When using `workflow-for-workflows` to author or modernize a workflow, the `phase-7b` step will prompt you for this decision.
244
+
245
+ **Final step detection**: The engine injects the final-step footer on the last top-level step, or on the exit step of a loop that is the last top-level step. A loop in a non-terminal position does not trigger the final-step footer on its exit step.
225
246
 
226
247
  **SHA accumulation rule (critical)**
227
248
 
228
249
  `context_set` uses shallow merge: each key is replaced, not merged. If you set `metrics_commit_shas: ["abc123"]` at step 5 and then set `metrics_commit_shas: ["def456"]` at step 9, the value at step 9 is `["def456"]` -- `abc123` is permanently gone.
229
250
 
230
- Every step that adds commits must send the **full accumulated list** -- read the current value from context, append new SHAs, and send the complete list.
251
+ Every step that adds commits must send the **full accumulated list** -- read the current value from context, append new SHAs, and send the complete list. The engine-injected footer includes an explicit reminder of this rule.
231
252
 
232
253
  ```
233
254
  Example (correct): metrics_commit_shas: ["abc123", "def456", "ghi789"]
234
255
  Example (wrong): metrics_commit_shas: ["ghi789"] -- loses abc123 and def456
235
256
  ```
236
257
 
237
- **Commit step `Capture:` footer** (copy this into every step that creates commits):
258
+ **Manual `Capture:` footers (if you cannot use `metricsProfile`)**
259
+
260
+ If `metricsProfile` is not appropriate for your workflow, add these footers manually.
261
+
262
+ Commit step `Capture:` footer (copy into every step that creates commits):
238
263
 
239
264
  ```
240
265
  Capture (every time you commit code):
@@ -248,7 +273,7 @@ Capture (every time you commit code):
248
273
  Example (wrong): metrics_commit_shas: ["ghi789"] -- loses abc123 and def456
249
274
  ```
250
275
 
251
- **Final handoff `Capture:` footer** (copy this into your final step):
276
+ Final handoff `Capture:` footer (copy into your final step):
252
277
 
253
278
  ```
254
279
  Capture (at final handoff only):
@@ -266,8 +291,6 @@ Capture (at final handoff only):
266
291
  (same accumulation rule as commit steps -- full list, not just final-step SHAs)
267
292
  ```
268
293
 
269
- Note: adding these keys to existing workflow JSON files (`coding-task-workflow-agentic.json` and others) is a separate follow-on PR. The templates above let you add them to new or custom workflows now.
270
-
271
294
  ### Assessment-gate authoring (v1)
272
295
 
273
296
  Assessment gates are now a shipped authoring/runtime feature, but the first slice is intentionally narrow.
package/docs/authoring.md CHANGED
@@ -731,6 +731,34 @@ Canonical current rules for authoring good WorkRail workflows. workflow.schema.j
731
731
  - Using a nested key context.metrics.commit_shas instead of the flat key metrics_commit_shas
732
732
  - Setting metrics_outcome at intermediate steps before the session outcome is known
733
733
 
734
+ ### metrics-profile-declaration
735
+ - **Level**: recommended
736
+ - **Status**: active
737
+ - **Scope**: workflow.definition, step.context-capture
738
+ - **Rule**: Declare metricsProfile at workflow level to enable engine-injected metrics instrumentation footers. Use 'coding' for implementation workflows, 'review' for code review workflows, 'research' for investigation workflows, 'design' for design/planning artifacts, 'ticket' for work-item creation. Omit or use 'none' for meta-workflows and utilities.
739
+ - **Why**: Without metricsProfile, captureConfidence is always 'none' and run_completed events carry no usable attribution data. The engine cannot auto-derive the profile from tags -- authors must set it explicitly.
740
+ - **Enforced by**: advisory
741
+
742
+ **Checks**
743
+ - Select 'coding' when the workflow produces git commits (implementation, refactoring, bug-fix, migration, documentation updates).
744
+ - Select 'review' when the workflow produces a review decision on a PR or MR.
745
+ - Select 'research' when the workflow produces a finding or recommendation but no commits (investigation, audit, analysis).
746
+ - Select 'design' when the workflow produces a design artifact (pitch, spec, ADR, architecture doc) but no commits.
747
+ - Select 'ticket' when the workflow creates or updates work items in an external system (Jira, GitHub Issues, Linear).
748
+ - Omit or use 'none' for authoring tools, meta-workflows, or workflows with no measurable outcome.
749
+ - Do not invent new profile values -- the closed set is: 'coding', 'review', 'research', 'design', 'ticket', 'none'.
750
+ - The engine does NOT derive the profile from spec/workflow-tags.json at runtime. Set the field explicitly.
751
+ - When using workflow-for-workflows to author or modernize a workflow, the phase-7b step will prompt for this decision.
752
+
753
+ **Anti-patterns**
754
+ - Leaving metricsProfile absent from a coding or review workflow and expecting automatic instrumentation
755
+ - Using metricsProfile 'coding' on a workflow that produces no commits (e.g., a documentation or planning workflow)
756
+ - Assuming the engine reads tags and derives the profile automatically
757
+
758
+ **Source refs**
759
+ - `src/v2/durable-core/domain/prompt-renderer.ts` (runtime) — buildMetricsSection() implements render-time footer injection based on metricsProfile.
760
+ - `spec/workflow.schema.json` (schema) — metricsProfile optional enum field definition.
761
+
734
762
 
735
763
  ## Artifacts and planning surfaces
736
764
  ### artifact-canonicality
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exaudeus/workrail",
3
- "version": "3.65.0",
3
+ "version": "3.67.0",
4
4
  "description": "Step-by-step workflow enforcement for AI agents via MCP",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -1385,6 +1385,43 @@
1385
1385
  "Using a nested key context.metrics.commit_shas instead of the flat key metrics_commit_shas",
1386
1386
  "Setting metrics_outcome at intermediate steps before the session outcome is known"
1387
1387
  ]
1388
+ },
1389
+ {
1390
+ "id": "metrics-profile-declaration",
1391
+ "status": "active",
1392
+ "level": "recommended",
1393
+ "scope": ["workflow.definition", "step.context-capture"],
1394
+ "rule": "Declare metricsProfile at workflow level to enable engine-injected metrics instrumentation footers. Use 'coding' for implementation workflows, 'review' for code review workflows, 'research' for investigation workflows, 'design' for design/planning artifacts, 'ticket' for work-item creation. Omit or use 'none' for meta-workflows and utilities.",
1395
+ "why": "Without metricsProfile, captureConfidence is always 'none' and run_completed events carry no usable attribution data. The engine cannot auto-derive the profile from tags -- authors must set it explicitly.",
1396
+ "enforcement": ["advisory"],
1397
+ "checks": [
1398
+ "Select 'coding' when the workflow produces git commits (implementation, refactoring, bug-fix, migration, documentation updates).",
1399
+ "Select 'review' when the workflow produces a review decision on a PR or MR.",
1400
+ "Select 'research' when the workflow produces a finding or recommendation but no commits (investigation, audit, analysis).",
1401
+ "Select 'design' when the workflow produces a design artifact (pitch, spec, ADR, architecture doc) but no commits.",
1402
+ "Select 'ticket' when the workflow creates or updates work items in an external system (Jira, GitHub Issues, Linear).",
1403
+ "Omit or use 'none' for authoring tools, meta-workflows, or workflows with no measurable outcome.",
1404
+ "Do not invent new profile values -- the closed set is: 'coding', 'review', 'research', 'design', 'ticket', 'none'.",
1405
+ "The engine does NOT derive the profile from spec/workflow-tags.json at runtime. Set the field explicitly.",
1406
+ "When using workflow-for-workflows to author or modernize a workflow, the phase-7b step will prompt for this decision."
1407
+ ],
1408
+ "antiPatterns": [
1409
+ "Leaving metricsProfile absent from a coding or review workflow and expecting automatic instrumentation",
1410
+ "Using metricsProfile 'coding' on a workflow that produces no commits (e.g., a documentation or planning workflow)",
1411
+ "Assuming the engine reads tags and derives the profile automatically"
1412
+ ],
1413
+ "sourceRefs": [
1414
+ {
1415
+ "kind": "runtime",
1416
+ "path": "src/v2/durable-core/domain/prompt-renderer.ts",
1417
+ "note": "buildMetricsSection() implements render-time footer injection based on metricsProfile."
1418
+ },
1419
+ {
1420
+ "kind": "schema",
1421
+ "path": "spec/workflow.schema.json",
1422
+ "note": "metricsProfile optional enum field definition."
1423
+ }
1424
+ ]
1388
1425
  }
1389
1426
  ]
1390
1427
  },
@@ -200,6 +200,11 @@
200
200
  "minItems": 1,
201
201
  "maxItems": 6,
202
202
  "uniqueItems": true
203
+ },
204
+ "metricsProfile": {
205
+ "type": "string",
206
+ "enum": ["coding", "review", "research", "design", "ticket", "none"],
207
+ "description": "Metrics instrumentation profile for this workflow. When set, the engine injects a footer into step prompts instructing the agent to accumulate and report metrics context keys (metrics_commit_shas, metrics_outcome, etc.). 'coding' injects SHA accumulation on every step and outcome/PR reporting on the final step. 'review' injects PR numbers + outcome on the final step. 'research', 'design', and 'ticket' inject outcome-only on the final step (identical behavior today, distinct semantics). 'none' or absent field disables injection entirely."
203
208
  }
204
209
  },
205
210
  "required": [
@@ -2,6 +2,7 @@
2
2
  "id": "adaptive-ticket-creation",
3
3
  "name": "Adaptive Ticket Creation Workflow",
4
4
  "version": "1.0.0",
5
+ "metricsProfile": "ticket",
5
6
  "description": "Use this to create high-quality Jira tickets for features, tasks, or epics. Automatically selects the right complexity path (Simple, Standard, or Epic) and generates properly structured tickets with acceptance criteria and estimates.",
6
7
  "about": "## Adaptive Ticket Creation Workflow\n\nUse this to create well-structured Jira tickets for features, tasks, or epics. The workflow automatically selects the right complexity path (Simple, Standard, or Epic) based on the request, so you don't have to decide upfront how much process you need.\n\n### What it produces\n\n- **Simple path**: one complete, developer-ready Jira ticket with a context-rich description, checkbox-style acceptance criteria, and an effort estimate.\n- **Standard path**: a high-level plan plus a batch of related tickets covering all deliverables.\n- **Epic path**: everything in Standard, plus full epic decomposition, per-story estimates with risk ratings, dependency mapping, and a reusable team rules file at `.workflow_rules/ticket_creation.md` that future runs load automatically.\n\n### When to use it\n\n- You need to create one or more Jira tickets and want them to be genuinely developer-ready.\n- You have a feature request, bug, task, or epic that needs to be broken down and estimated.\n- Your team has specific ticket conventions (naming, sizing, labels) -- the workflow learns and stores these on the Epic path.\n\n### How to get good results\n\n- Provide as much context as you have: PRD links, design files, existing related tickets, and any known constraints.\n- If your team has a `.workflow_rules/ticket_creation.md` file, the workflow loads it automatically and applies your conventions.\n- On the Epic path, the workflow asks you to approve the high-level plan and the decomposition before generating tickets. Use these checkpoints to catch scope issues early.\n- Acceptance criteria are written as checkbox-style observable conditions, not restatements of requirements. If your team has a specific AC format, describe it in the rules file.",
7
8
  "examples": [
@@ -307,4 +308,4 @@
307
308
  "requireConfirmation": false
308
309
  }
309
310
  ]
310
- }
311
+ }
@@ -2,6 +2,7 @@
2
2
  "id": "architecture-scalability-audit",
3
3
  "name": "Architecture Scalability Audit",
4
4
  "version": "0.1.0",
5
+ "metricsProfile": "research",
5
6
  "description": "Use this to audit a bounded codebase scope for architecture scalability. Declare which scalability dimensions matter (load, data volume, team size, feature extensibility, operational); the workflow investigates each and produces evidence-grounded findings.",
6
7
  "about": "## Architecture Scalability Audit\n\nThis workflow audits a bounded codebase scope for scalability across the dimensions you care about. It does not produce generic \"won't scale\" warnings -- every finding must cite a specific file, class, method, or pattern, and every concern must name a concrete growth scenario (e.g. 10x traffic, 100x records, 3x team size).\n\n**What it does:**\nYou declare the scope boundary and the scalability dimensions that matter for your context. The workflow reads the codebase to understand the architecture, assigns one dedicated reviewer family per dimension, runs them in parallel from a shared fact packet, reconciles contradictions and blind spots through a synthesis loop, and delivers a per-dimension verdict (will_break / risk / fine) with an overall scalability readiness verdict.\n\n**The five scalability dimensions you can select:**\n- **load** -- handles more requests, users, or throughput\n- **data_volume** -- handles more records, storage, or query size\n- **team_org** -- more teams or developers working on this scope without friction\n- **feature_extensibility** -- more features added without rearchitecting\n- **operational** -- more deployments, environments, or operational complexity\n\n**When to use it:**\n- Before investing significantly in a component you expect to grow\n- When planning capacity for a new traffic tier or data volume increase\n- When evaluating a codebase acquired through a merger, partnership, or open-source adoption\n- When a team is growing and you want to know if the architecture will hold under parallel development\n\n**What it produces:**\nAn overall scalability verdict, per-dimension findings with specific code references and growth scenarios, cross-cutting concerns that span multiple dimensions, a prioritized concern list, and explicit callouts of what is already well-designed for scale.\n\n**How to get good results:**\nBe specific about the scope boundary -- name the service, module, or feature explicitly and say what is out of scope. Choose the dimensions relevant to your actual growth pressures; the workflow will not add dimensions you did not select. If you know a specific growth target (e.g. \"we expect 50x user growth in 18 months\"), mention it.",
7
8
  "examples": [
@@ -14,6 +14,7 @@
14
14
  "recommendedAutonomy": "guided",
15
15
  "recommendedRiskPolicy": "conservative"
16
16
  },
17
+ "metricsProfile": "research",
17
18
  "preconditions": [
18
19
  "User has a specific bug report, failing test, or unexpected behavior to investigate.",
19
20
  "Agent has codebase access and can run tests, commands, or other deterministic evidence-gathering steps.",
@@ -2,6 +2,7 @@
2
2
  "id": "classify-task-workflow",
3
3
  "name": "Classify Task",
4
4
  "version": "0.1.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Classifies a software task from the session goal into structured output variables used by coordinator scripts to decide which pipeline phases to run.",
6
7
  "about": "## Classify Task Workflow\n\nThis is a fast, single-step classification utility. It reads the session goal and outputs structured variables that coordinator scripts use to decide which pipeline phases to run.\n\n### What it does\n\nGiven a task description, the agent classifies the work along seven dimensions and recommends an ordered pipeline of workflow IDs to execute.\n\n### When to use it\n\nUse this workflow at the start of a coordinator pipeline when you need to decide which downstream workflows to run. It is intentionally fast and cheap -- one LLM step, no subagents, no codebase reads.\n\n### What it produces\n\nA structured classification block in the step notes containing all seven output variables:\n- `taskComplexity` -- Small / Medium / Large\n- `riskLevel` -- Low / Medium / High\n- `hasUI` -- true / false\n- `touchesArchitecture` -- true / false\n- `taskType` -- feature / bug-fix / refactor / investigation / docs / chore\n- `affectedDomains` -- array of likely codebase areas\n- `recommendedPipeline` -- ordered array of workflow IDs\n\n### How to get good results\n\nProvide a specific, concrete task description as the session goal. The more specific the goal, the more accurate the classification. When the goal is ambiguous, the workflow defaults to conservative (higher complexity, more pipeline phases).",
7
8
  "examples": [
@@ -14,6 +14,7 @@
14
14
  "recommendedAutonomy": "guided",
15
15
  "recommendedRiskPolicy": "conservative"
16
16
  },
17
+ "metricsProfile": "coding",
17
18
  "assessments": [
18
19
  {
19
20
  "id": "design-soundness-gate",
@@ -2,6 +2,7 @@
2
2
  "id": "cross-platform-code-conversion",
3
3
  "name": "Cross-Platform Code Conversion",
4
4
  "version": "0.1.0",
5
+ "metricsProfile": "coding",
5
6
  "description": "Use this to convert code from one platform to another (e.g. Android to iOS, iOS to Web). Triages files by difficulty, parallelizes easy translations, and handles platform-specific design decisions.",
6
7
  "about": "## Cross-Platform Code Conversion Workflow\n\nThis workflow guides an AI agent through converting code from one platform to another - for example, Android (Kotlin) to iOS (Swift), iOS to Web (TypeScript/React), or any similar migration. It handles everything from scoping and analysis through idiomatic conversion, build verification, and final handoff.\n\n### What it does\n\nThe workflow starts by scoping the migration and classifying its complexity (Small, Medium, or Large) and adaptation depth (low, moderate, or high). It then analyzes the source architecture to understand patterns, dependencies, concurrency models, and semantic contracts. Files are triaged into three buckets: mechanical translations delegated to subagents in parallel (Bucket A), library substitutions (Bucket B), and platform-specific code needing design decisions (Bucket C). For high-adaptation migrations, the workflow runs a full design generation phase to choose an idiomatic target-platform architecture before any code is written. Implementation proceeds batch by batch, with drift detection after each batch to catch files that turn out harder than classified. A final build-and-integration loop verifies the full converted codebase before handoff.\n\n### When to use it\n\nUse this workflow when migrating a module, feature, or full component from one platform to another. It is especially valuable when:\n- The source and target platforms have meaningfully different idioms (e.g., Kotlin coroutines vs Swift async/await, Hilt vs Swinject)\n- You want parallel delegation of mechanical work while keeping design-sensitive boundaries with the main agent\n- Semantic contracts (lifecycle, threading, cancellation, error handling) must be preserved across the migration\n- The target repo has existing architectural patterns the migrated code must fit into\n\nFor very small, straightforward file-by-file translations, the workflow includes a fast path that skips planning and triage.\n\n### What it produces\n\n- A triage matrix classifying every file into a conversion bucket\n- A semantic contract inventory for non-trivial migration boundaries\n- A target integration analysis mapping boundaries to their destination repo seams\n- Converted source files in the target platform's idioms\n- A passing build or typecheck on the full converted output\n- A handoff summary covering adaptation decisions, known gaps, and items needing manual review\n\n### How to get good results\n\n- Specify the exact scope of the migration - which files, modules, or features to convert\n- If the target repo is not in the same workspace, point the agent to it explicitly or configure the source-to-target path mapping\n- Review the triage and semantic contract inventory steps before conversion begins, especially for high-adaptation migrations\n- Flag any invariants that must survive the migration (API contracts, behavioral guarantees, threading assumptions)",
7
8
  "examples": [
@@ -78,7 +79,7 @@
78
79
  {
79
80
  "id": "phase-1-understand-source",
80
81
  "title": "Phase 1: Understand Source Code",
81
- "prompt": "Read and analyze the source code through a conversion lens \u2014 what will be easy to convert, what will be hard, and why.\n\nMap out:\n- Architecture and module structure\n- Key patterns used (MVI, MVVM, dependency injection, etc.)\n- External dependencies and what they do\n- Entry points and public API surface\n- Platform coupling depth: is the code cleanly layered or is platform-specific code smeared throughout? This directly determines how much falls into easy vs. hard buckets.\n- Concurrency model: Coroutines, Combine, RxJS, async/await? This is often the single hardest mapping decision.\n- DI approach: Dagger/Hilt, Swinject, Koin? DI frameworks rarely map 1:1.\n- Test coverage shape: unit tests on business logic (convert easily), UI tests (likely rewrite), integration tests (depends on infra).\n- Shared code boundaries: is there already a shared/common module that might not need conversion at all?\n- Non-trivial migration boundaries: public APIs, externally consumed module boundaries, and lifecycle/state/concurrency/resource boundaries that callers depend on.\n- Caller-visible guarantees for those boundaries. Examples include lifecycle/ownership, laziness vs eagerness, shared vs per-consumer behavior, cancellation/disposal, ordering/replay/buffering, failure behavior, threading/scheduling, or consistency/transaction guarantees.\n- Adaptation depth: classify whether the migration is `low`, `moderate`, or `high` adaptation based on architectural mismatch, missing target-side equivalents, lifecycle/state/concurrency mismatch, and the amount of adapter or redesign work needed.\n\nIdentify which files define or materially affect those boundaries and which of them will require target-repo integration analysis.\n\nCapture:\n- `sourceArchitecture`\n- `dependencies`\n- `publicApiSurface`\n- `platformCouplingAssessment`\n- `concurrencyModel`\n- `testCoverageShape`\n- `semanticBoundaryCandidates`\n- `boundaryCriticalFiles`\n- `adaptationProfile`",
82
+ "prompt": "Read and analyze the source code through a conversion lens what will be easy to convert, what will be hard, and why.\n\nMap out:\n- Architecture and module structure\n- Key patterns used (MVI, MVVM, dependency injection, etc.)\n- External dependencies and what they do\n- Entry points and public API surface\n- Platform coupling depth: is the code cleanly layered or is platform-specific code smeared throughout? This directly determines how much falls into easy vs. hard buckets.\n- Concurrency model: Coroutines, Combine, RxJS, async/await? This is often the single hardest mapping decision.\n- DI approach: Dagger/Hilt, Swinject, Koin? DI frameworks rarely map 1:1.\n- Test coverage shape: unit tests on business logic (convert easily), UI tests (likely rewrite), integration tests (depends on infra).\n- Shared code boundaries: is there already a shared/common module that might not need conversion at all?\n- Non-trivial migration boundaries: public APIs, externally consumed module boundaries, and lifecycle/state/concurrency/resource boundaries that callers depend on.\n- Caller-visible guarantees for those boundaries. Examples include lifecycle/ownership, laziness vs eagerness, shared vs per-consumer behavior, cancellation/disposal, ordering/replay/buffering, failure behavior, threading/scheduling, or consistency/transaction guarantees.\n- Adaptation depth: classify whether the migration is `low`, `moderate`, or `high` adaptation based on architectural mismatch, missing target-side equivalents, lifecycle/state/concurrency mismatch, and the amount of adapter or redesign work needed.\n\nIdentify which files define or materially affect those boundaries and which of them will require target-repo integration analysis.\n\nCapture:\n- `sourceArchitecture`\n- `dependencies`\n- `publicApiSurface`\n- `platformCouplingAssessment`\n- `concurrencyModel`\n- `testCoverageShape`\n- `semanticBoundaryCandidates`\n- `boundaryCriticalFiles`\n- `adaptationProfile`",
82
83
  "promptFragments": [
83
84
  {
84
85
  "id": "phase-1-small-light",
@@ -86,7 +87,7 @@
86
87
  "var": "conversionComplexity",
87
88
  "equals": "Small"
88
89
  },
89
- "text": "For Small conversions, keep this lightweight. A quick read of the files in scope is enough \u2014 don't map the entire architecture. Focus on identifying any platform-specific code that would prevent a straight translation."
90
+ "text": "For Small conversions, keep this lightweight. A quick read of the files in scope is enough don't map the entire architecture. Focus on identifying any platform-specific code that would prevent a straight translation."
90
91
  }
91
92
  ],
92
93
  "requireConfirmation": {
@@ -109,7 +110,7 @@
109
110
  }
110
111
  ]
111
112
  },
112
- "prompt": "For Small conversions, skip triage and planning \u2014 just convert.\n\n- Translate the files to the target platform idiomatically\n- Follow target platform naming and structure conventions\n- Map any dependencies to target equivalents\n- Convert tests if they exist\n- Run build or typecheck to verify\n\nIf something turns out harder than expected (deep platform coupling, no clean dependency equivalent, or meaningful architectural mismatch), update `conversionComplexity` to `Medium`, update `adaptationProfile` to `moderate` or `high` based on the newly discovered mismatch, and stop. The full triage and planning pipeline will activate for the remaining work.\n\nCapture:\n- `filesConverted`\n- `buildPassed`\n- `conversionComplexity`\n- `adaptationProfile`",
113
+ "prompt": "For Small conversions, skip triage and planning just convert.\n\n- Translate the files to the target platform idiomatically\n- Follow target platform naming and structure conventions\n- Map any dependencies to target equivalents\n- Convert tests if they exist\n- Run build or typecheck to verify\n\nIf something turns out harder than expected (deep platform coupling, no clean dependency equivalent, or meaningful architectural mismatch), update `conversionComplexity` to `Medium`, update `adaptationProfile` to `moderate` or `high` based on the newly discovered mismatch, and stop. The full triage and planning pipeline will activate for the remaining work.\n\nCapture:\n- `filesConverted`\n- `buildPassed`\n- `conversionComplexity`\n- `adaptationProfile`",
113
114
  "requireConfirmation": false
114
115
  },
115
116
  {
@@ -127,7 +128,7 @@
127
128
  }
128
129
  ]
129
130
  },
130
- "prompt": "Classify every file or module in scope into one of three buckets:\n\n**Bucket A \u2014 Literal translation**: Platform-agnostic business logic, data models, utilities, pure functions. These use no platform-specific APIs or libraries. Conversion is mechanical: translate the language syntax, follow target naming conventions, done. These will be delegated to subagents.\n\n**Bucket B \u2014 Library substitution**: Code that uses platform-specific libraries (networking, persistence, serialization, DI) but follows standard patterns. These need dependency mapping but the structure stays the same.\n\n**Bucket C \u2014 Platform-specific**: Code deeply tied to the platform (UI layer, lifecycle management, concurrency/threading, navigation, platform APIs). These need design decisions about target-platform idioms.\n\nFor each file or module, list:\n- File/module name\n- Bucket (A, B, or C)\n- One-line reason for classification\n- Dependencies it has on other files in scope (so we know conversion order)\n- Whether it is `boundaryCritical` for a non-trivial migration boundary\n- Which semantic boundaries it affects from `semanticBoundaryCandidates`\n- Whether it will require target-repo integration analysis\n\nBoundary-critical files must not be treated as blind mechanical translation just because the syntax looks simple. If a file materially affects a semantic boundary or destination-repo seam, keep it with main-agent review.\n\nSort the work items within each bucket by dependency order (convert dependencies first).\n\nGroup Bucket A files into parallel batches of 3-5 files each. Each batch should contain files with no cross-dependencies so subagents can work independently.\n\nGroup Bucket B and C files into sequential batches by dependency order.\n\nEach batch should have: `name` (short label), `bucket` (A, B, or C), and `files` (list of file paths).\n\nCapture:\n- `bucketABatches` (parallel batches for subagent delegation)\n- `bucketBCBatches` (sequential batches for main agent)\n- `bucketACounts`\n- `bucketBCounts`\n- `bucketCCounts`\n- `boundaryCriticalItems`",
131
+ "prompt": "Classify every file or module in scope into one of three buckets:\n\n**Bucket A Literal translation**: Platform-agnostic business logic, data models, utilities, pure functions. These use no platform-specific APIs or libraries. Conversion is mechanical: translate the language syntax, follow target naming conventions, done. These will be delegated to subagents.\n\n**Bucket B Library substitution**: Code that uses platform-specific libraries (networking, persistence, serialization, DI) but follows standard patterns. These need dependency mapping but the structure stays the same.\n\n**Bucket C Platform-specific**: Code deeply tied to the platform (UI layer, lifecycle management, concurrency/threading, navigation, platform APIs). These need design decisions about target-platform idioms.\n\nFor each file or module, list:\n- File/module name\n- Bucket (A, B, or C)\n- One-line reason for classification\n- Dependencies it has on other files in scope (so we know conversion order)\n- Whether it is `boundaryCritical` for a non-trivial migration boundary\n- Which semantic boundaries it affects from `semanticBoundaryCandidates`\n- Whether it will require target-repo integration analysis\n\nBoundary-critical files must not be treated as blind mechanical translation just because the syntax looks simple. If a file materially affects a semantic boundary or destination-repo seam, keep it with main-agent review.\n\nSort the work items within each bucket by dependency order (convert dependencies first).\n\nGroup Bucket A files into parallel batches of 3-5 files each. Each batch should contain files with no cross-dependencies so subagents can work independently.\n\nGroup Bucket B and C files into sequential batches by dependency order.\n\nEach batch should have: `name` (short label), `bucket` (A, B, or C), and `files` (list of file paths).\n\nCapture:\n- `bucketABatches` (parallel batches for subagent delegation)\n- `bucketBCBatches` (sequential batches for main agent)\n- `bucketACounts`\n- `bucketBCounts`\n- `bucketCCounts`\n- `boundaryCriticalItems`",
131
132
  "requireConfirmation": true
132
133
  },
133
134
  {
@@ -274,7 +275,7 @@
274
275
  "var": "conversionComplexity",
275
276
  "equals": "Medium"
276
277
  },
277
- "text": "For Medium conversions, focus the plan on the items that actually need design decisions. Don't exhaustively map every dimension \u2014 only the ones relevant to the files in scope."
278
+ "text": "For Medium conversions, focus the plan on the items that actually need design decisions. Don't exhaustively map every dimension only the ones relevant to the files in scope."
278
279
  },
279
280
  {
280
281
  "id": "phase-3f-high-adaptation",
@@ -519,7 +520,7 @@
519
520
  {
520
521
  "id": "phase-6a-full-build",
521
522
  "title": "Full Build and Integration Check",
522
- "prompt": "Run a full build or typecheck on the entire converted codebase \u2014 both subagent-converted and main-agent-converted code together.\n\nCheck for:\n- Build/compile errors from cross-batch integration issues\n- Inconsistencies between subagent output and main agent output (naming, patterns)\n- Non-idiomatic patterns that slipped through\n- Missing error handling at module boundaries\n- Threading or concurrency issues across modules\n- Broken public API contracts\n- Contract inventory drift: every row in `semanticContractInventory` is still accounted for, no `uncertain` rows remain, preserved contracts still look preserved, and intentional changes are still justified\n- Target integration drift: code landed in the intended target layer/module, reuse/adaptation decisions still fit the observed target seams, and no unresolved target integration uncertainties remain\n- High-adaptation architecture drift: if `adaptationProfile` is `high`, the final code still matches `architectureAdaptationPlan` and any deviations are explicit and justified\n\nFix each issue. If a fix is a band-aid over a deeper mapping problem, go back and fix the mapping.\n\nCapture:\n- `fullBuildPassed`\n- `integrationIssues`\n- `issuesFixed`",
523
+ "prompt": "Run a full build or typecheck on the entire converted codebase both subagent-converted and main-agent-converted code together.\n\nCheck for:\n- Build/compile errors from cross-batch integration issues\n- Inconsistencies between subagent output and main agent output (naming, patterns)\n- Non-idiomatic patterns that slipped through\n- Missing error handling at module boundaries\n- Threading or concurrency issues across modules\n- Broken public API contracts\n- Contract inventory drift: every row in `semanticContractInventory` is still accounted for, no `uncertain` rows remain, preserved contracts still look preserved, and intentional changes are still justified\n- Target integration drift: code landed in the intended target layer/module, reuse/adaptation decisions still fit the observed target seams, and no unresolved target integration uncertainties remain\n- High-adaptation architecture drift: if `adaptationProfile` is `high`, the final code still matches `architectureAdaptationPlan` and any deviations are explicit and justified\n\nFix each issue. If a fix is a band-aid over a deeper mapping problem, go back and fix the mapping.\n\nCapture:\n- `fullBuildPassed`\n- `integrationIssues`\n- `issuesFixed`",
523
524
  "requireConfirmation": false
524
525
  },
525
526
  {
@@ -544,7 +545,7 @@
544
545
  "var": "conversionComplexity",
545
546
  "equals": "Small"
546
547
  },
547
- "text": "For Small conversions, keep the summary brief \u2014 just list what was converted, build status, and any issues."
548
+ "text": "For Small conversions, keep the summary brief just list what was converted, build status, and any issues."
548
549
  },
549
550
  {
550
551
  "id": "phase-7-full-summary",
@@ -2,6 +2,7 @@
2
2
  "id": "document-creation-workflow",
3
3
  "name": "Document Creation Workflow",
4
4
  "version": "1.0.0",
5
+ "metricsProfile": "coding",
5
6
  "description": "Use this to create broad or comprehensive documentation spanning multiple components or systems — project READMEs, complete API docs, user guides, or technical specifications.",
6
7
  "about": "## Document Creation Workflow\n\nThis workflow guides you through creating new documentation from scratch -- ranging from a simple project README to a full technical specification spanning multiple systems. It automatically calibrates depth to match the complexity of your request: simple tasks go straight to writing, while complex documentation gets a full analysis-and-planning phase first.\n\n### What it produces\n\nA complete, saved documentation file ready for use. Depending on complexity, it may also include a quality review pass covering accuracy, completeness, audience fit, usability, and style consistency.\n\n### When to use it\n\n- You need to create a **new** document (not update an existing one -- see the Documentation Update workflow for that).\n- The document spans one or more systems, components, or audiences.\n- Examples: project READMEs, API reference docs, user guides, onboarding docs, technical specifications, architecture overviews.\n\n### When NOT to use it\n\n- You want to update or refresh an existing doc -- use the Documentation Update workflow instead.\n- You need tight scope discipline for a single class or mechanism -- the Scoped Documentation workflow is better suited.\n\n### How to get good results\n\n- Be specific about the document type and intended audience upfront. The workflow probes for these, but the clearer your initial goal, the less back-and-forth.\n- If your project has existing documentation or style conventions, mention them -- the workflow will follow them.\n- For complex documentation, the workflow asks a small number of targeted questions it cannot answer from the codebase. Answer these concisely to keep momentum.",
7
8
  "examples": [
@@ -148,4 +149,4 @@
148
149
  "requireConfirmation": false
149
150
  }
150
151
  ]
151
- }
152
+ }
@@ -2,6 +2,7 @@
2
2
  "id": "documentation-update-workflow",
3
3
  "name": "Documentation Update & Maintenance Workflow",
4
4
  "version": "2.0.0",
5
+ "metricsProfile": "coding",
5
6
  "description": "Use this to update and maintain existing documentation. Uses git history to detect staleness, maps sections to current code, and refreshes outdated content while preserving what's still accurate.",
6
7
  "about": "## Documentation Update & Maintenance Workflow\n\nUse this when you have **existing** documentation that may be out of date and needs to be refreshed to match the current state of the codebase. The workflow uses git history as its primary evidence source: it checks when the docs were last committed, what changed in the relevant code since then, and classifies staleness before touching anything.\n\n### What it produces\n\nUpdated documentation files with stale or inaccurate sections corrected, missing coverage added, and removed content pruned. A completion summary is written to notes for future maintainers, including maintenance recommendations and sections at risk of going stale again quickly.\n\n### When to use it\n\n- A feature shipped and the docs were never updated.\n- You suspect a doc is outdated but aren't sure which parts.\n- You want a systematic, section-by-section audit rather than a quick edit.\n- The repo has git history covering both code and docs (the workflow degrades gracefully without git, but git history is the primary evidence source).\n\n### When NOT to use it\n\n- You are writing a doc from scratch -- use the Document Creation workflow instead.\n- You only need to fix a single known typo or sentence -- just edit the file directly.\n\n### How to get good results\n\n- Point the workflow at the specific documentation files and the code directories they describe.\n- The workflow will ask you to approve an update plan before making any edits -- review it carefully. This is the main checkpoint where you control scope.\n- If you want to defer lower-priority improvements, say so during plan review.",
7
8
  "examples": [
@@ -104,4 +105,4 @@
104
105
  "requireConfirmation": false
105
106
  }
106
107
  ]
107
- }
108
+ }
@@ -2,6 +2,7 @@
2
2
  "id": "intelligent-test-case-generation",
3
3
  "name": "Test Case Generation from Tickets",
4
4
  "version": "1.0.0",
5
+ "metricsProfile": "research",
5
6
  "description": "Use this to generate integration and end-to-end test cases from ticket requirements. Reads the ticket, traces affected code paths, identifies boundary conditions, and produces developer-readable test case descriptions.",
6
7
  "about": "## Intelligent Test Case Generation\n\nThis workflow generates structured integration and end-to-end test cases directly from a ticket. It reads the ticket requirements, traces the affected code paths in the codebase, identifies boundary conditions and failure scenarios, and produces developer-readable test case descriptions that a developer can implement without guessing.\n\n**What it does:**\nThe workflow extracts every acceptance criterion from the ticket, traces which modules, endpoints, and integration boundaries are involved, identifies the existing test patterns in the repo (so generated cases match the team's style), then systematically generates happy path, boundary, and failure scenarios for each criterion. It checks coverage before writing, resolves ambiguities with you before generating anything uncertain, and finishes with a full test case list plus a coverage summary.\n\n**When to use it:**\n- When a ticket has clear acceptance criteria and you want comprehensive test coverage without manually reasoning through every edge case\n- When onboarding to a feature area and wanting to understand the expected behavior through its test scenarios\n- When a ticket spans multiple services or integration points and you need coverage across all of them\n- When preparing for a QA handoff or code review where test coverage must be explicitly demonstrated\n\n**What it produces:**\nNumbered test cases (TC-1, TC-2, ...) each with a title, acceptance criterion mapping, test type (Integration or E2E), risk level, preconditions, numbered test steps, expected result, and implementation notes. Cases are grouped by acceptance criterion and followed by a summary table. Open ambiguities and coverage gaps are disclosed explicitly.\n\n**How to get good results:**\nProvide the ticket in any standard format -- title, description, and acceptance criteria are enough. The workflow will trace the codebase itself. If the ticket has linked specs, API docs, or architecture diagrams, mention them. The more complete the acceptance criteria, the fewer clarifying questions the workflow will need to ask.",
7
8
  "examples": [
@@ -38,7 +39,7 @@
38
39
  "var": "ambiguities",
39
40
  "not_equals": []
40
41
  },
41
- "prompt": "Before generating test scenarios, resolve the ambiguities you found.\n\nFor each ambiguity in `ambiguities`:\n1. State what is unclear and why it matters for test design\n2. Propose the most reasonable interpretation based on context\n3. Ask me to confirm, adjust, or provide the missing information\n\nKeep questions targeted. If the ticket, codebase, or docs can answer the question, answer it yourself first.\n\nIf the user's response significantly changes the scope or adds new acceptance criteria, revisit Phase 1 scenario identification before continuing \u2014 do not carry stale scenarios forward.\n\nCapture:\n- `resolvedAmbiguities` -- list of ambiguities with chosen interpretation\n- `openAmbiguities` -- ambiguities the user still needs to resolve (initialize as empty)",
42
+ "prompt": "Before generating test scenarios, resolve the ambiguities you found.\n\nFor each ambiguity in `ambiguities`:\n1. State what is unclear and why it matters for test design\n2. Propose the most reasonable interpretation based on context\n3. Ask me to confirm, adjust, or provide the missing information\n\nKeep questions targeted. If the ticket, codebase, or docs can answer the question, answer it yourself first.\n\nIf the user's response significantly changes the scope or adds new acceptance criteria, revisit Phase 1 scenario identification before continuing do not carry stale scenarios forward.\n\nCapture:\n- `resolvedAmbiguities` -- list of ambiguities with chosen interpretation\n- `openAmbiguities` -- ambiguities the user still needs to resolve (initialize as empty)",
42
43
  "requireConfirmation": true
43
44
  },
44
45
  {
@@ -2,6 +2,7 @@
2
2
  "id": "personal-learning-course-design",
3
3
  "name": "Personal Learning Course Design Workflow",
4
4
  "version": "1.0.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Use this to design a personal learning course. Creates structured learning objectives, sequencing, and a course outline suited to your time constraints.",
6
7
  "about": "## Personal Learning Course Design Workflow\n\nUse this to design a structured personal learning course -- defining clear objectives, sequencing, assessments, and a schedule that fits your time constraints. This workflow focuses on the **design** phase: building the blueprint for your learning program before you create any materials.\n\n### What it produces\n\nDepending on the path you choose:\n\n- **Quick Start (3-5 days)**: a functional learning plan with 2-3 focused objectives, a weekly schedule, a resource list, and basic progress tracking.\n- **Balanced (1-2 weeks)**: a comprehensive learning system with modules, structured assessments, active learning activities, and accountability measures.\n- **Comprehensive (2-3 weeks)**: a professional-grade learning system with full Bloom's Taxonomy-aligned objectives, spaced repetition design, multi-layer assessments, and long-term retention planning.\n\n### When to use it\n\n- You want to learn something specific and want a structured plan rather than ad-hoc resource consumption.\n- You are preparing for a certification, career transition, or skill upgrade and need a realistic timeline and sequence.\n- You've tried self-study before and found it hard to stay on track -- a well-designed plan with clear checkpoints helps.\n\n### When NOT to use it\n\n- You need to create the actual study materials -- use the Personal Learning Materials Creation workflow after this one.\n- You're designing a course for other learners, not for yourself -- consider an instructional design workflow instead.\n\n### How to get good results\n\n- Be honest about your weekly time budget. An ambitious plan that doesn't fit your schedule is worse than a modest plan you actually follow.\n- Start with the Quick Start path if you're uncertain -- you can always expand. Choosing Comprehensive when you're time-constrained leads to abandonment.\n- The more specific your goal (\"pass the AWS SAA exam in 3 months\"), the better the resulting plan will be compared to a vague goal (\"learn cloud\").",
7
8
  "examples": [
@@ -292,4 +293,4 @@
292
293
  "hasValidation": true
293
294
  }
294
295
  ]
295
- }
296
+ }
@@ -14,6 +14,7 @@
14
14
  "recommendedAutonomy": "guided",
15
15
  "recommendedRiskPolicy": "conservative"
16
16
  },
17
+ "metricsProfile": "review",
17
18
  "features": [
18
19
  "wr.features.subagent_guidance"
19
20
  ],
@@ -2,6 +2,7 @@
2
2
  "id": "personal-learning-materials-creation-branched",
3
3
  "name": "Personal Learning Materials Creation Workflow",
4
4
  "version": "1.1.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Use this to create learning materials for a course or subject. Adapts depth and format to your time budget — Quick Start, Balanced, or Comprehensive.",
6
7
  "about": "## Personal Learning Materials Creation Workflow\n\nUse this to create the actual study materials for a course or subject you are learning -- study guides, exercises, assessments, and spaced-repetition review materials. This workflow assumes you already have a learning plan or course design with defined objectives; it focuses on producing materials that directly support those objectives.\n\n### What it produces\n\nDepending on the path you choose:\n\n- **Quick Start (2-3 weeks)**: study guides and basic exercises for immediate use.\n- **Balanced (4-6 weeks)**: a complete learning system -- study guides, exercises, assessments, and spaced repetition materials.\n- **Comprehensive (8-12 weeks)**: a full learning ecosystem with interactive elements, effectiveness measurement, and a scalable update protocol.\n\n### When to use it\n\n- You have a learning plan and need to turn it into usable materials.\n- You are preparing for a certification, exam, or structured self-study program.\n- You want materials tailored to your specific objectives rather than relying entirely on off-the-shelf resources.\n\n### When NOT to use it\n\n- You haven't designed your learning course yet -- use the Personal Learning Course Design workflow first to define objectives and structure.\n- You need to design a course for others to take -- use the Learner-Centered Course workflow instead.\n\n### How to get good results\n\n- Select the path honestly based on available time. Starting with Quick Start and expanding later is better than committing to Comprehensive and abandoning it.\n- Have your learning objectives written out before starting -- the workflow maps every material directly to an objective.\n- Be specific about your preferred learning formats (text, diagrams, flashcards, practice problems) at the start.",
7
8
  "examples": [
@@ -2,6 +2,7 @@
2
2
  "id": "presentation-creation",
3
3
  "name": "Presentation Creation Workflow",
4
4
  "version": "1.0.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Use this to create a compelling presentation. Covers audience analysis, content strategy, slide structure, and delivery preparation. Output works with any presentation tool.",
6
7
  "about": "## Presentation Creation Workflow\n\nUse this to build a compelling, audience-specific presentation from scratch -- whether for a conference talk, internal strategy review, client pitch, or team demo. The workflow grounds every content decision in a concrete audience profile, so the result is written for real people in a real context rather than a generic slide deck.\n\n### What it produces\n\n- An audience profile and context map.\n- A content strategy with a single core message, supporting arguments, and a call-to-action.\n- A numbered slide outline with content types and timing estimates.\n- Full slide content and speaker notes for every slide.\n- Backup slides for anticipated deep-dive questions.\n- A delivery preparation plan including practice schedule, Q&A prep, and technical checklist.\n\n### When to use it\n\n- You are building a presentation that needs to persuade, inform, or motivate a specific audience.\n- You want structured help moving from \"I have a topic\" to \"I have a complete, rehearsal-ready deck.\"\n- The presentation has real stakes -- a client pitch, a leadership review, a conference talk.\n\n### When NOT to use it\n\n- You just need to slap a few bullets onto slides quickly -- this workflow is for presentations where quality matters.\n\n### How to get good results\n\n- The more specific you are about your audience, the better the content strategy will be. \"Engineering managers at a Series B fintech\" beats \"technical people.\"\n- The workflow has two confirmation gates: after the audience profile and after the slide outline. Use these to redirect before content gets written.\n- Bring source materials, data, and any existing slides you want to incorporate -- the content development step can ingest these.",
7
8
  "examples": [
@@ -169,4 +170,4 @@
169
170
  "requireConfirmation": false
170
171
  }
171
172
  ]
172
- }
173
+ }
@@ -2,6 +2,7 @@
2
2
  "id": "production-readiness-audit",
3
3
  "name": "Production Readiness Audit",
4
4
  "version": "0.1.0",
5
+ "metricsProfile": "research",
5
6
  "description": "Use this to audit a codebase scope for production readiness. Checks debugging correctness, runtime operability, artifact realism, technical debt, and anything that would prevent honest production deployment.",
6
7
  "about": "## Production Readiness Audit\n\nThis workflow performs a structured, evidence-driven audit to answer one question honestly: is this code actually ready for production? It goes beyond style and lint -- it looks for debugging correctness, runtime operability under real conditions, artifact realism (stale code, fake completeness, placeholder behavior), maintainability debt, test and observability gaps, and security or performance risks.\n\n**What it does:**\nThe workflow bounds the audit scope, states a readiness hypothesis, freezes a neutral fact packet, then runs parallel reviewer families -- each specializing in a different readiness dimension. It reconciles contradictions through an evidence loop and produces a final verdict: `ready`, `ready_with_conditions`, `not_ready`, or `inconclusive`.\n\n**When to use it:**\n- Before shipping a new service, feature, or major refactor to production\n- When a codebase has been under rapid development and you want an honest readiness check before a launch deadline\n- When onboarding to a codebase and wanting a structured assessment of its production posture\n- When a post-incident review surfaces questions about whether the system was truly ready\n\n**What it produces:**\nA verdict with a confidence band, a prioritized list of blocker-grade and major findings, debugging leads, runtime and operational risk callouts, artifact-realism concerns (misleading completeness, stale docs, dead paths), a coverage ledger by audit domain, and a remediation order with specific follow-up recommendations.\n\n**How to get good results:**\nProvide a clear scope -- a service name, a module path, or a feature boundary. The narrower and more concrete the scope, the sharper the findings. If \"production-ready\" has a specific meaning for your team (e.g. SLA requirements, specific deployment constraints), mention it. The workflow will try to infer the production bar from repo patterns and context, but explicit criteria improve accuracy.",
7
8
  "examples": [
@@ -2,6 +2,7 @@
2
2
  "id": "relocation-workflow-us",
3
3
  "name": "US Relocation Decision Workflow",
4
4
  "version": "1.0.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Use this to evaluate US cities or regions for a potential relocation. Discovers your preferences, generates candidate areas, screens them, and produces a ranked dossier with evidence.",
6
7
  "about": "## US Relocation Decision Workflow\n\nUse this to evaluate US cities and regions for a potential move. The workflow takes a structured, evidence-driven approach: it starts by calibrating your preferences and dealbreakers, generates a broad diverse pool of candidate areas (including non-obvious ones), screens them systematically, and produces a ranked dossier you can actually act on.\n\n### What it produces\n\n- A `RELOCATION_DOSSIER.md` with your full preference model, screening results, and comparison matrix.\n- Individual per-candidate profiles at `relocation-profiles/<slug>.md` covering housing, cost of living, taxes, safety, climate risk, schools, healthcare, commute, and any other modules you activate.\n- A scored ranking with explainable reasoning and an explicit disclosure of any data gaps.\n- A next-steps plan: visit recommendations, open questions per candidate, and pivot triggers.\n\n### When to use it\n\n- You are seriously considering a US relocation and want a rigorous, evidence-backed shortlist.\n- You want to surface non-obvious candidates you wouldn't have considered on your own.\n- You've been anchoring on a handful of cities and want a structured process to either validate or challenge that.\n\n### How to get good results\n\n- Be honest about dealbreakers upfront -- the workflow builds these into screening and filters candidates early.\n- The MaxDiff weight calibration exercise (offered in Phase 1) is worth doing if you're unsure how to weight competing priorities. It takes 5-10 minutes and produces more reliable weights than guessing.\n- The calibration deck in Phase 1 shows you lifestyle archetypes and asks for reactions -- engage with this seriously. Surprises in your reactions are valuable signal.\n- The workflow activates only the research modules you need. Keep it focused on what actually matters to your household.",
7
8
  "examples": [
@@ -2,6 +2,7 @@
2
2
  "id": "routine-context-gathering",
3
3
  "name": "Context Gathering Routine",
4
4
  "version": "2.1.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Systematic codebase exploration using an Ideate -> Plan -> Execute strategy. Configurable depth levels (0-4) allow for progressively deeper understanding.",
6
7
  "clarificationPrompts": [
7
8
  "What specific area or files should I investigate?",
@@ -142,4 +143,4 @@
142
143
  ]
143
144
  }
144
145
  ]
145
- }
146
+ }
@@ -2,6 +2,7 @@
2
2
  "id": "routine-design-review",
3
3
  "name": "Design Review Routine",
4
4
  "version": "1.0.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Reviews a selected design using explicit tradeoffs, failure modes, simpler-alternative checks, runner-up comparison, and philosophy alignment. Produces a reusable design-review findings artifact.",
6
7
  "clarificationPrompts": [
7
8
  "What design artifact or summary should I review?",
@@ -2,6 +2,7 @@
2
2
  "id": "routine-execution-simulation",
3
3
  "name": "Execution Simulation Routine",
4
4
  "version": "1.1.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Simulates code execution step-by-step through mental tracing and state tracking. Uses an Ideate -> Plan -> Execute strategy to identify the best simulation paths before tracing.",
6
7
  "clarificationPrompts": [
7
8
  "What function should I simulate? (function name, file, line)",
@@ -81,4 +82,4 @@
81
82
  ]
82
83
  }
83
84
  ]
84
- }
85
+ }
@@ -2,6 +2,7 @@
2
2
  "id": "routine-feature-implementation",
3
3
  "name": "Feature Implementation Routine",
4
4
  "version": "1.0.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Implements code precisely according to a detailed approved plan within a bounded scope. Follows existing patterns, writes tests, and maintains code quality. Intended as an optional execution utility, not the default strategy for the main coding workflow.",
6
7
  "clarificationPrompts": [
7
8
  "What plan file should I implement? (e.g., implementation-plan.md)",
@@ -67,7 +68,7 @@
67
68
  {
68
69
  "id": "step-2-verify-acceptance-criteria",
69
70
  "title": "Step 2: Verify Acceptance Criteria",
70
- "prompt": "**VERIFY ALL ACCEPTANCE CRITERIA ARE MET**\n\nNow verify that your implementation meets all acceptance criteria.\n\n**YOUR MISSION:** Systematically check each acceptance criterion and verify it's met.\n\n**PLAN YOUR APPROACH:**\nBefore verifying, think:\n- How will I test each criterion?\n- What evidence proves each criterion is met?\n- Are there any criteria I might have missed?\n- What manual testing should I do?\n\n**EXECUTE:**\nFor each acceptance criterion:\n\n1. **State the Criterion**\n - Quote the exact criterion from the plan\n\n2. **Verify It's Met**\n - Run relevant tests\n - Perform manual testing if needed\n - Check code implementation\n - Gather evidence\n\n3. **Mark Status**\n - \u2705 Met (with evidence)\n - \u274c Not Met (with reason)\n - \u26a0\ufe0f Partially Met (with details)\n\n4. **Provide Evidence**\n - Test results\n - Code references (file:line)\n - Manual test outcomes\n - Metrics or measurements\n\n**REFLECT:**\nAs you verify, ask yourself:\n- Did I test each criterion thoroughly?\n- Is my evidence convincing?\n- Are there edge cases I should test?\n- Did I miss any criteria?\n\n**WORKING NOTES:**\nCapture your verification:\n- Acceptance criteria checklist (with status)\n- Evidence for each criterion\n- Test results\n- Manual testing performed\n- Edge cases tested\n- Any criteria not met (with reasons)",
71
+ "prompt": "**VERIFY ALL ACCEPTANCE CRITERIA ARE MET**\n\nNow verify that your implementation meets all acceptance criteria.\n\n**YOUR MISSION:** Systematically check each acceptance criterion and verify it's met.\n\n**PLAN YOUR APPROACH:**\nBefore verifying, think:\n- How will I test each criterion?\n- What evidence proves each criterion is met?\n- Are there any criteria I might have missed?\n- What manual testing should I do?\n\n**EXECUTE:**\nFor each acceptance criterion:\n\n1. **State the Criterion**\n - Quote the exact criterion from the plan\n\n2. **Verify It's Met**\n - Run relevant tests\n - Perform manual testing if needed\n - Check code implementation\n - Gather evidence\n\n3. **Mark Status**\n - Met (with evidence)\n - Not Met (with reason)\n - ⚠️ Partially Met (with details)\n\n4. **Provide Evidence**\n - Test results\n - Code references (file:line)\n - Manual test outcomes\n - Metrics or measurements\n\n**REFLECT:**\nAs you verify, ask yourself:\n- Did I test each criterion thoroughly?\n- Is my evidence convincing?\n- Are there edge cases I should test?\n- Did I miss any criteria?\n\n**WORKING NOTES:**\nCapture your verification:\n- Acceptance criteria checklist (with status)\n- Evidence for each criterion\n- Test results\n- Manual testing performed\n- Edge cases tested\n- Any criteria not met (with reasons)",
71
72
  "agentRole": "You are a quality assurance specialist verifying that all requirements are met. Be thorough and provide evidence.",
72
73
  "requireConfirmation": false,
73
74
  "guidance": [
@@ -100,7 +101,7 @@
100
101
  {
101
102
  "id": "step-4-synthesize-deliverable",
102
103
  "title": "Step 4: Synthesize & Deliver Implementation Report",
103
- "prompt": "**DELIVER YOUR IMPLEMENTATION REPORT**\n\nNow compile your work into a clear, comprehensive deliverable.\n\n**YOUR MISSION:** Create an implementation report that documents all changes, tests, and verification.\n\n**PLAN YOUR APPROACH:**\nBefore writing, think:\n- Who will read this and what do they need?\n- How can I make it easy to review?\n- What's the most important information?\n- How can I prove I met all criteria?\n\n**EXECUTE:**\nCreate your implementation report with these sections:\n\n1. **Summary** (3-5 bullets)\n - What was implemented\n - Key changes made\n - Test coverage added\n - Any deviations from plan\n\n2. **Implementation Details**\n - Files modified (with summary of changes and code snippets)\n - Files created (with purpose and key code)\n - Files deleted (if any)\n - Pattern adherence notes\n\n3. **Tests Added/Updated**\n - New tests (with descriptions)\n - Updated tests (with reasons)\n - Test results (pass/fail counts)\n - Test coverage metrics (if available)\n\n4. **Verification Steps**\n - How to run tests\n - How to check linter\n - How to manually verify\n - How to check metrics (if applicable)\n\n5. **Deviations from Plan**\n - Any deviations (with reasons and impact)\n - Approval status (needed/not needed)\n\n6. **Acceptance Criteria Status**\n - Checklist of all criteria (\u2705/\u274c/\u26a0\ufe0f)\n - Evidence for each\n - Overall status\n\n7. **Known Issues / TODOs**\n - Issues encountered\n - TODOs for future work\n - Blockers (if any)\n\n8. **Build & Lint Status**\n - Build results\n - Linter results\n - Overall quality status\n\n**REFLECT:**\nBefore delivering, ask yourself:\n- Is my report complete and accurate?\n- Did I document all changes?\n- Can someone review this easily?\n- Did I provide enough evidence?\n- Are there any issues I should flag?\n\n**DELIVERABLE:**\nA comprehensive implementation report (markdown format) with all sections above.",
104
+ "prompt": "**DELIVER YOUR IMPLEMENTATION REPORT**\n\nNow compile your work into a clear, comprehensive deliverable.\n\n**YOUR MISSION:** Create an implementation report that documents all changes, tests, and verification.\n\n**PLAN YOUR APPROACH:**\nBefore writing, think:\n- Who will read this and what do they need?\n- How can I make it easy to review?\n- What's the most important information?\n- How can I prove I met all criteria?\n\n**EXECUTE:**\nCreate your implementation report with these sections:\n\n1. **Summary** (3-5 bullets)\n - What was implemented\n - Key changes made\n - Test coverage added\n - Any deviations from plan\n\n2. **Implementation Details**\n - Files modified (with summary of changes and code snippets)\n - Files created (with purpose and key code)\n - Files deleted (if any)\n - Pattern adherence notes\n\n3. **Tests Added/Updated**\n - New tests (with descriptions)\n - Updated tests (with reasons)\n - Test results (pass/fail counts)\n - Test coverage metrics (if available)\n\n4. **Verification Steps**\n - How to run tests\n - How to check linter\n - How to manually verify\n - How to check metrics (if applicable)\n\n5. **Deviations from Plan**\n - Any deviations (with reasons and impact)\n - Approval status (needed/not needed)\n\n6. **Acceptance Criteria Status**\n - Checklist of all criteria (✅/❌/⚠️)\n - Evidence for each\n - Overall status\n\n7. **Known Issues / TODOs**\n - Issues encountered\n - TODOs for future work\n - Blockers (if any)\n\n8. **Build & Lint Status**\n - Build results\n - Linter results\n - Overall quality status\n\n**REFLECT:**\nBefore delivering, ask yourself:\n- Is my report complete and accurate?\n- Did I document all changes?\n- Can someone review this easily?\n- Did I provide enough evidence?\n- Are there any issues I should flag?\n\n**DELIVERABLE:**\nA comprehensive implementation report (markdown format) with all sections above.",
104
105
  "agentRole": "You are a technical writer synthesizing your implementation work into a clear, reviewable report. Make it easy to understand and verify.",
105
106
  "requireConfirmation": false,
106
107
  "guidance": [
@@ -117,4 +118,4 @@
117
118
  ]
118
119
  }
119
120
  ]
120
- }
121
+ }
@@ -2,6 +2,7 @@
2
2
  "id": "routine-final-verification",
3
3
  "name": "Final Verification Routine",
4
4
  "version": "1.0.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Performs reusable final verification over acceptance criteria, invariants, validation evidence, regressions, cumulative drift, and philosophy alignment. Produces a proof-oriented verification artifact built around claim -> evidence -> gap -> severity -> readiness verdict.",
6
7
  "clarificationPrompts": [
7
8
  "What implementation or slices should I verify?",
@@ -2,6 +2,7 @@
2
2
  "id": "routine-hypothesis-challenge",
3
3
  "name": "Hypothesis Challenge Routine",
4
4
  "version": "1.0.0",
5
+ "metricsProfile": "none",
5
6
  "description": "Lean adversarial review of a hypothesis, recommendation, or diagnosis. Produces the strongest counter-argument, exposes weak assumptions and evidence gaps, identifies likely failure modes, and defines the critical tests needed to keep, revise, or reject the current claim.",
6
7
  "clarificationPrompts": [
7
8
  "What hypothesis, recommendation, or diagnosis should I challenge?",
@@ -49,9 +50,18 @@
49
50
  "title": "Step 4: Generate Alternative Explanations and Critical Tests",
50
51
  "runCondition": {
51
52
  "or": [
52
- { "var": "depth", "equals": "THOROUGH" },
53
- { "var": "rigorMode", "equals": "THOROUGH" },
54
- { "var": "rigor", "gte": 5 }
53
+ {
54
+ "var": "depth",
55
+ "equals": "THOROUGH"
56
+ },
57
+ {
58
+ "var": "rigorMode",
59
+ "equals": "THOROUGH"
60
+ },
61
+ {
62
+ "var": "rigor",
63
+ "gte": 5
64
+ }
55
65
  ]
56
66
  },
57
67
  "prompt": "For THOROUGH review, go beyond the primary counter-argument into alternatives and discrimination strategy.\n\nProduce:\n- the 1-2 strongest alternative explanations or competing hypotheses\n- why each might beat the current claim\n- the critical tests, observations, or traces that would discriminate between them\n- what result would cause you to keep, revise, or reject the current claim\n\nThis step exists to make THOROUGH meaningfully deeper than STANDARD, not just wordier.",