@exaudeus/workrail 2.1.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/application/services/compiler/resolve-templates.d.ts +5 -0
  2. package/dist/application/services/compiler/resolve-templates.js +35 -0
  3. package/dist/application/services/compiler/routine-loader.d.ts +11 -0
  4. package/dist/application/services/compiler/routine-loader.js +45 -0
  5. package/dist/application/services/compiler/template-registry.d.ts +4 -2
  6. package/dist/application/services/compiler/template-registry.js +105 -4
  7. package/dist/application/services/workflow-compiler.js +34 -3
  8. package/dist/di/container.js +10 -1
  9. package/dist/di/tokens.d.ts +1 -0
  10. package/dist/di/tokens.js +1 -0
  11. package/dist/engine/engine-factory.d.ts +3 -0
  12. package/dist/engine/engine-factory.js +295 -0
  13. package/dist/engine/index.d.ts +3 -0
  14. package/dist/engine/index.js +12 -0
  15. package/dist/engine/types.d.ts +130 -0
  16. package/dist/engine/types.js +18 -0
  17. package/dist/manifest.json +146 -74
  18. package/dist/mcp/handlers/v2-checkpoint.d.ts +31 -1
  19. package/dist/mcp/handlers/v2-checkpoint.js +76 -64
  20. package/dist/mcp/handlers/v2-execution/continue-advance.d.ts +2 -0
  21. package/dist/mcp/handlers/v2-execution/continue-advance.js +5 -5
  22. package/dist/mcp/handlers/v2-execution/continue-rehydrate.d.ts +2 -0
  23. package/dist/mcp/handlers/v2-execution/continue-rehydrate.js +17 -22
  24. package/dist/mcp/handlers/v2-execution/index.d.ts +10 -17
  25. package/dist/mcp/handlers/v2-execution/index.js +44 -54
  26. package/dist/mcp/handlers/v2-execution/replay.d.ts +4 -15
  27. package/dist/mcp/handlers/v2-execution/replay.js +52 -128
  28. package/dist/mcp/handlers/v2-execution/start.d.ts +3 -2
  29. package/dist/mcp/handlers/v2-execution/start.js +18 -46
  30. package/dist/mcp/handlers/v2-token-ops.d.ts +45 -24
  31. package/dist/mcp/handlers/v2-token-ops.js +372 -32
  32. package/dist/mcp/output-schemas.d.ts +104 -283
  33. package/dist/mcp/output-schemas.js +24 -22
  34. package/dist/mcp/server.js +8 -0
  35. package/dist/mcp/types.d.ts +4 -0
  36. package/dist/mcp/v2/tools.d.ts +22 -52
  37. package/dist/mcp/v2/tools.js +18 -32
  38. package/dist/mcp/v2-response-formatter.js +12 -16
  39. package/dist/runtime/runtime-mode.d.ts +2 -0
  40. package/dist/v2/durable-core/domain/prompt-renderer.d.ts +1 -0
  41. package/dist/v2/durable-core/domain/prompt-renderer.js +5 -3
  42. package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +14 -14
  43. package/dist/v2/durable-core/schemas/session/events.d.ts +4 -4
  44. package/dist/v2/durable-core/schemas/session/validation-event.d.ts +2 -2
  45. package/dist/v2/durable-core/tokens/payloads.d.ts +32 -32
  46. package/dist/v2/durable-core/tokens/short-token.d.ts +38 -0
  47. package/dist/v2/durable-core/tokens/short-token.js +126 -0
  48. package/dist/v2/durable-core/tokens/token-patterns.d.ts +4 -0
  49. package/dist/v2/durable-core/tokens/token-patterns.js +9 -0
  50. package/dist/v2/infra/in-memory/token-alias-store/index.d.ts +11 -0
  51. package/dist/v2/infra/in-memory/token-alias-store/index.js +38 -0
  52. package/dist/v2/infra/local/data-dir/index.d.ts +1 -0
  53. package/dist/v2/infra/local/data-dir/index.js +3 -0
  54. package/dist/v2/infra/local/token-alias-store/index.d.ts +16 -0
  55. package/dist/v2/infra/local/token-alias-store/index.js +117 -0
  56. package/dist/v2/ports/data-dir.port.d.ts +1 -0
  57. package/dist/v2/ports/token-alias-store.port.d.ts +33 -0
  58. package/dist/v2/ports/token-alias-store.port.js +2 -0
  59. package/package.json +8 -1
  60. package/workflows/coding-task-workflow-agentic.lean.v2.json +41 -3
  61. package/workflows/examples/routine-injection-example.json +28 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exaudeus/workrail",
3
- "version": "2.1.0",
3
+ "version": "3.1.0",
4
4
  "description": "Step-by-step workflow enforcement for AI agents via MCP",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -14,6 +14,13 @@
14
14
  "bin": {
15
15
  "workrail": "dist/mcp-server.js"
16
16
  },
17
+ "exports": {
18
+ ".": "./dist/mcp-server.js",
19
+ "./engine": {
20
+ "import": "./dist/engine/index.js",
21
+ "types": "./dist/engine/index.d.ts"
22
+ }
23
+ },
17
24
  "files": [
18
25
  "dist",
19
26
  "spec",
@@ -40,13 +40,51 @@
40
40
  }
41
41
  },
42
42
  {
43
- "id": "phase-1-architecture-decision",
44
- "title": "Phase 1: Architecture Decision (Generate, Compare, Challenge, Select)",
43
+ "id": "phase-1a-hypothesis",
44
+ "title": "Phase 1a: State Hypothesis",
45
45
  "runCondition": {
46
46
  "var": "taskComplexity",
47
47
  "not_equals": "Small"
48
48
  },
49
- "prompt": "Design the architecture through deep understanding, not surface-level generation.\n\nPart A — Understand the problem deeply:\n- What are the core tensions in this problem? (e.g., performance vs simplicity, flexibility vs type safety, backward compatibility vs clean design)\n- How does the codebase already solve similar problems? Study the most relevant existing patterns — don't just list files, analyze the architectural decisions and constraints they protect.\n- What's the simplest naive solution? Why is it insufficient? (If it IS sufficient, that's your leading candidate — the burden of proof is on complexity.)\n- What makes this problem hard? What would a junior developer miss?\n\nPart B — Identify tensions and constraints (including philosophy):\n- Extract 2-4 real tradeoffs from your understanding (not generic labels like 'simplicity' or 'maintainability')\n- These tensions drive candidate generation — each candidate resolves them differently\n- Filter `philosophySources` to the principles actually under pressure for THIS problem. Which of the dev's philosophy principles constrain the solution space? For example: does the simplest solution require mutable state when the dev prefers immutability? Does the existing pattern use exceptions when the dev prefers Result types? Would the cleanest approach violate their preference for small interfaces?\n- If `philosophyConflicts` exist for this area of the codebase, surface them as explicit tensions the design must resolve: follow the stated rule, follow the existing pattern, or reconcile them\n\nPart C — State your hypothesis before delegating:\nBefore spawning any subagents, write 2-3 sentences: what do you currently believe the best approach is, and what concerns you most about it? This is your reference point for interrogating subagent output later.\n\nPart D — Generate candidates from tensions:\n- QUICK: self-generate candidates from your tensions. Include mandatory candidates: (1) simplest possible change that satisfies acceptance criteria, (2) follow existing repo pattern.\n- STANDARD: spawn ONE WorkRail Executor running `routine-tension-driven-design` with your tensions, philosophy sources, and problem understanding as input. Simultaneously, spawn ONE WorkRail Executor running `routine-hypothesis-challenge`: 'What constraints or failure modes would make you choose a fundamentally different approach? Propose one, grounded in real reasons.'\n- THOROUGH: spawn ONE WorkRail Executor running `routine-tension-driven-design`, ONE running `routine-hypothesis-challenge` (adversarial divergence), and ONE running `routine-execution-simulation`: 'Trace through the leading approach's 3 most likely failure scenarios step by step.'\n- For STANDARD with riskLevel=High: also spawn the execution simulation subagent.\n\nThe main agent ALWAYS self-generates its own candidates too (at minimum the two mandatory ones: simplest change + existing pattern). Subagent candidates supplement, not replace, your own thinking.\n\nPart E — Interrogate subagent output (if subagents were used):\nDo NOT summarize subagent findings as your own. Instead, interrogate against your hypothesis:\n- Where do subagent findings challenge your hypothesis? Are they right or did they miss context?\n- What did they surface that you genuinely hadn't considered?\n- Where are they just restating the obvious or echoing each other?\n- What did they get wrong or overweight?\nState explicitly: what you changed your mind about and why, or what you held firm on despite their input and why.\n\nPart F — Compare via tradeoffs (not checklists):\nFor each surviving candidate, produce:\n- One-sentence summary of the approach\n- Which tensions it resolves and which it accepts\n- The specific failure mode you'd watch for\n- How it relates to existing repo patterns (follows / adapts / departs)\n- What you gain and what you give up\n- Which of the dev's philosophy principles it honors and which it conflicts with — be specific (principle name + how)\n\nPart G — Challenge the leading option:\n- STANDARD: optionally challenge with ONE WorkRail Executor running `routine-hypothesis-challenge`\n- THOROUGH: challenge top 1-2 candidates using ONE or TWO WorkRail Executors running `routine-hypothesis-challenge`\n\nPart H — Select:\nSet context variables:\n- `selectedApproach` — the chosen design with rationale tied back to tensions\n- `runnerUpApproach` — the next-best option and why it lost\n- `architectureRationale` — which tensions were resolved and which were accepted\n- `pivotTriggers` — specific conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` — the failure mode of the selected approach\n- `acceptedTradeoffs` — what the selected approach gives up (feeds directly into design review)\n- `identifiedFailureModes` — per-candidate failure modes (feeds directly into design review)\n\nRules:\n- the main agent owns the final decision; subagents contribute depth, not decisions\n- if the simplest solution satisfies acceptance criteria, prefer it — complexity must justify itself\n- if the challenged leading candidate no longer looks best, switch deliberately rather than defending sunk cost\n- subagents go deep on specific questions, not wide on generic plans",
49
+ "prompt": "Before any design work, state your current hypothesis in 3-5 sentences.\n\nBased on what you learned in Phase 0, write:\n1. Your current best guess for the approach\n2. Your main concern about that guess\n3. What would most likely make that guess wrong\n\nThis is your reference point. After design generation, you will compare the result against this hypothesis and say what changed your mind or what held firm.\n\nSet context variable:\n- `initialHypothesis`",
50
+ "requireConfirmation": false
51
+ },
52
+ {
53
+ "id": "phase-1b-design-quick",
54
+ "title": "Phase 1b: Lightweight Design (QUICK)",
55
+ "runCondition": {
56
+ "and": [
57
+ { "var": "taskComplexity", "not_equals": "Small" },
58
+ { "var": "rigorMode", "equals": "QUICK" }
59
+ ]
60
+ },
61
+ "prompt": "Generate a lightweight design inline. QUICK rigor means the path is clear and risk is low.\n\nProduce two mandatory candidates:\n1. The simplest possible change that satisfies acceptance criteria\n2. Follow the existing repo pattern for this kind of change\n\nFor each candidate:\n- One-sentence summary\n- Key tradeoff\n- Failure mode to watch\n- Philosophy fit (name specific principles)\n\nCompare and recommend. If both converge on the same approach, say so honestly.\n\nWrite the output to `design-candidates.md` using the same structure as the deep design path:\n- Problem Understanding\n- Candidates (each with: summary, tradeoff, failure mode, philosophy fit)\n- Comparison and Recommendation\n\nSet context variable:\n- `designSummary` — one-paragraph summary of the recommendation and why",
62
+ "requireConfirmation": false
63
+ },
64
+ {
65
+ "id": "phase-1b-design-deep",
66
+ "title": "Phase 1b: Design Generation (Injected Routine — Tension-Driven Design)",
67
+ "runCondition": {
68
+ "and": [
69
+ { "var": "taskComplexity", "not_equals": "Small" },
70
+ { "var": "rigorMode", "not_equals": "QUICK" }
71
+ ]
72
+ },
73
+ "templateCall": {
74
+ "templateId": "wr.templates.routine.tension-driven-design",
75
+ "args": {
76
+ "deliverableName": "design-candidates.md"
77
+ }
78
+ }
79
+ },
80
+ {
81
+ "id": "phase-1c-challenge-and-select",
82
+ "title": "Phase 1c: Challenge and Select",
83
+ "runCondition": {
84
+ "var": "taskComplexity",
85
+ "not_equals": "Small"
86
+ },
87
+ "prompt": "Read `design-candidates.md`, compare against your initial hypothesis, and make the final architecture decision.\n\nInput contract: both QUICK and deep design paths produce `design-candidates.md` with candidates, tradeoffs, and a recommendation. Use that artifact as your primary input.\n\nPart A — Compare to hypothesis:\nRevisit `initialHypothesis`. Now that you have design candidates:\n- Where did the design work confirm your hypothesis?\n- Where did it challenge or change your thinking?\n- What did you learn that you hadn't considered?\nState explicitly what changed your mind and what held firm.\n\nPart B — Challenge the leading option:\n- What's the strongest argument against the recommended approach?\n- What assumption, if wrong, would invalidate it?\n- STANDARD/THOROUGH: optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` focused on the leading option's failure modes\n- THOROUGH: optionally also spawn ONE WorkRail Executor running `routine-execution-simulation` to trace the 3 most likely failure scenarios\n\nPart C — Select:\nMake the final architecture decision. The design output is evidence, not a decision — you own the choice.\n\nIf the simplest solution satisfies acceptance criteria, prefer it. Complexity must justify itself. If the challenged leading candidate no longer looks best, switch deliberately rather than defending sunk cost.\n\nSet context variables:\n- `selectedApproach` — the chosen design with rationale tied back to tensions\n- `runnerUpApproach` — the next-best option and why it lost\n- `architectureRationale` — which tensions were resolved and which were accepted\n- `pivotTriggers` — specific conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` — the failure mode of the selected approach\n- `acceptedTradeoffs` — what the selected approach gives up (feeds directly into design review)\n- `identifiedFailureModes` — per-candidate failure modes (feeds directly into design review)",
50
88
  "requireConfirmation": {
51
89
  "or": [
52
90
  { "var": "automationLevel", "equals": "Low" },
@@ -0,0 +1,28 @@
1
+ {
2
+ "id": "routine-injection-example",
3
+ "name": "Routine Injection Example — Design with Tension-Driven Design",
4
+ "version": "1.0.0",
5
+ "description": "Example workflow that uses templateCall to inject the tension-driven-design routine inline. Validates the end-to-end routine injection path.",
6
+ "steps": [
7
+ {
8
+ "id": "phase-0-gather",
9
+ "title": "Phase 0: Gather Context",
10
+ "prompt": "Gather context about the problem space."
11
+ },
12
+ {
13
+ "id": "phase-1-design",
14
+ "title": "Phase 1: Design (Injected Routine)",
15
+ "templateCall": {
16
+ "templateId": "wr.templates.routine.tension-driven-design",
17
+ "args": {
18
+ "deliverableName": "design-candidates.md"
19
+ }
20
+ }
21
+ },
22
+ {
23
+ "id": "phase-2-implement",
24
+ "title": "Phase 2: Implement",
25
+ "prompt": "Implement the selected design."
26
+ }
27
+ ]
28
+ }