@bastani/atomic 0.9.0-alpha.1 → 0.9.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/CHANGELOG.md +29 -0
  2. package/dist/builtin/cursor/CHANGELOG.md +6 -0
  3. package/dist/builtin/cursor/package.json +2 -2
  4. package/dist/builtin/intercom/CHANGELOG.md +6 -0
  5. package/dist/builtin/intercom/package.json +2 -2
  6. package/dist/builtin/mcp/CHANGELOG.md +6 -0
  7. package/dist/builtin/mcp/package.json +3 -3
  8. package/dist/builtin/subagents/CHANGELOG.md +6 -0
  9. package/dist/builtin/subagents/package.json +4 -4
  10. package/dist/builtin/web-access/CHANGELOG.md +6 -0
  11. package/dist/builtin/web-access/package.json +2 -2
  12. package/dist/builtin/workflows/CHANGELOG.md +19 -0
  13. package/dist/builtin/workflows/README.md +189 -122
  14. package/dist/builtin/workflows/builtin/deep-research-codebase.ts +30 -27
  15. package/dist/builtin/workflows/builtin/goal-ledger.ts +2 -0
  16. package/dist/builtin/workflows/builtin/goal-reports.ts +5 -0
  17. package/dist/builtin/workflows/builtin/goal-runner.ts +17 -20
  18. package/dist/builtin/workflows/builtin/goal-types.ts +2 -0
  19. package/dist/builtin/workflows/builtin/goal.d.ts +1 -0
  20. package/dist/builtin/workflows/builtin/goal.ts +40 -44
  21. package/dist/builtin/workflows/builtin/index.d.ts +1 -0
  22. package/dist/builtin/workflows/builtin/open-claude-design-runner.ts +16 -17
  23. package/dist/builtin/workflows/builtin/open-claude-design.d.ts +1 -0
  24. package/dist/builtin/workflows/builtin/open-claude-design.ts +42 -50
  25. package/dist/builtin/workflows/builtin/prompt-refinement.ts +102 -0
  26. package/dist/builtin/workflows/builtin/ralph-core.ts +6 -4
  27. package/dist/builtin/workflows/builtin/ralph-runner.ts +22 -24
  28. package/dist/builtin/workflows/builtin/ralph.d.ts +2 -0
  29. package/dist/builtin/workflows/builtin/ralph.ts +46 -41
  30. package/dist/builtin/workflows/package.json +2 -2
  31. package/dist/builtin/workflows/src/authoring/typebox-defaults.d.ts +41 -0
  32. package/dist/builtin/workflows/src/authoring/typebox-defaults.ts +217 -0
  33. package/dist/builtin/workflows/src/authoring/workflow.ts +184 -0
  34. package/dist/builtin/workflows/src/authoring.d.ts +14 -66
  35. package/dist/builtin/workflows/src/engine/graph-inference.ts +100 -0
  36. package/dist/builtin/workflows/src/engine/options.ts +40 -0
  37. package/dist/builtin/workflows/src/engine/primitives/chain.ts +29 -0
  38. package/dist/builtin/workflows/src/engine/primitives/exit.ts +2 -0
  39. package/dist/builtin/workflows/src/engine/primitives/parallel.ts +47 -0
  40. package/dist/builtin/workflows/src/engine/primitives/task.ts +108 -0
  41. package/dist/builtin/workflows/src/engine/primitives/ui.ts +41 -0
  42. package/dist/builtin/workflows/src/engine/primitives/workflow.ts +159 -0
  43. package/dist/builtin/workflows/src/engine/replay.ts +8 -0
  44. package/dist/builtin/workflows/src/engine/run.ts +356 -0
  45. package/dist/builtin/workflows/src/engine/runtime.ts +160 -0
  46. package/dist/builtin/workflows/src/extension/workflow-module-loader.ts +9 -3
  47. package/dist/builtin/workflows/src/extension/workflow-prompts.ts +3 -1
  48. package/dist/builtin/workflows/src/extension/workflow-schema.ts +0 -18
  49. package/dist/builtin/workflows/src/index.ts +0 -2
  50. package/dist/builtin/workflows/src/runs/background/runner.ts +6 -3
  51. package/dist/builtin/workflows/src/runs/foreground/executor-child-boundary.ts +3 -3
  52. package/dist/builtin/workflows/src/runs/foreground/executor-child-helpers.ts +4 -4
  53. package/dist/builtin/workflows/src/runs/foreground/executor-child-workflow.ts +1 -158
  54. package/dist/builtin/workflows/src/runs/foreground/executor-direct-helpers.ts +1 -1
  55. package/dist/builtin/workflows/src/runs/foreground/executor-outputs.ts +2 -2
  56. package/dist/builtin/workflows/src/runs/foreground/executor-prompt-nodes.ts +1 -1
  57. package/dist/builtin/workflows/src/runs/foreground/executor-run.ts +1 -359
  58. package/dist/builtin/workflows/src/runs/foreground/executor-scheduler.ts +1 -1
  59. package/dist/builtin/workflows/src/runs/foreground/executor-stage-call.ts +2 -5
  60. package/dist/builtin/workflows/src/runs/foreground/executor-stage-factory.ts +12 -4
  61. package/dist/builtin/workflows/src/runs/foreground/executor-stage-replay.ts +4 -3
  62. package/dist/builtin/workflows/src/runs/foreground/executor-stage-types.ts +9 -2
  63. package/dist/builtin/workflows/src/runs/foreground/executor-task-context.ts +2 -132
  64. package/dist/builtin/workflows/src/runs/foreground/executor-types.ts +2 -2
  65. package/dist/builtin/workflows/src/runs/shared/graph-inference.ts +2 -100
  66. package/dist/builtin/workflows/src/sdk-surface.ts +6 -9
  67. package/dist/builtin/workflows/src/shared/authoring-contract-stage.d.ts +9 -3
  68. package/dist/builtin/workflows/src/shared/authoring-contract-stage.ts +17 -3
  69. package/dist/builtin/workflows/src/shared/authoring-contract-ui.d.ts +3 -33
  70. package/dist/builtin/workflows/src/shared/authoring-contract-ui.ts +9 -81
  71. package/dist/builtin/workflows/src/shared/types.ts +25 -8
  72. package/dist/builtin/workflows/src/shared/workflow-authoring-types.d.ts +49 -0
  73. package/dist/builtin/workflows/src/shared/workflow-authoring-types.ts +84 -0
  74. package/dist/builtin/workflows/src/workflows/registry.ts +7 -3
  75. package/dist/core/agent-session-auto-compaction.d.ts.map +1 -1
  76. package/dist/core/agent-session-auto-compaction.js +6 -1
  77. package/dist/core/agent-session-auto-compaction.js.map +1 -1
  78. package/dist/core/agent-session-bash.d.ts.map +1 -1
  79. package/dist/core/agent-session-bash.js +0 -5
  80. package/dist/core/agent-session-bash.js.map +1 -1
  81. package/dist/core/agent-session-methods.d.ts +0 -2
  82. package/dist/core/agent-session-methods.d.ts.map +1 -1
  83. package/dist/core/agent-session-methods.js.map +1 -1
  84. package/dist/core/agent-session-services.d.ts +0 -1
  85. package/dist/core/agent-session-services.d.ts.map +1 -1
  86. package/dist/core/agent-session-services.js +0 -1
  87. package/dist/core/agent-session-services.js.map +1 -1
  88. package/dist/core/agent-session-tool-registry.d.ts.map +1 -1
  89. package/dist/core/agent-session-tool-registry.js +0 -2
  90. package/dist/core/agent-session-tool-registry.js.map +1 -1
  91. package/dist/core/agent-session-types.d.ts +0 -2
  92. package/dist/core/agent-session-types.d.ts.map +1 -1
  93. package/dist/core/agent-session-types.js.map +1 -1
  94. package/dist/core/agent-session.d.ts +0 -2
  95. package/dist/core/agent-session.d.ts.map +1 -1
  96. package/dist/core/agent-session.js +0 -1
  97. package/dist/core/agent-session.js.map +1 -1
  98. package/dist/core/atomic-guide-command.d.ts.map +1 -1
  99. package/dist/core/atomic-guide-command.js +1 -1
  100. package/dist/core/atomic-guide-command.js.map +1 -1
  101. package/dist/core/extensions/loader-core.d.ts +1 -3
  102. package/dist/core/extensions/loader-core.d.ts.map +1 -1
  103. package/dist/core/extensions/loader-core.js +13 -6
  104. package/dist/core/extensions/loader-core.js.map +1 -1
  105. package/dist/core/extensions/loader-virtual-modules.d.ts +7 -1
  106. package/dist/core/extensions/loader-virtual-modules.d.ts.map +1 -1
  107. package/dist/core/extensions/loader-virtual-modules.js +34 -2
  108. package/dist/core/extensions/loader-virtual-modules.js.map +1 -1
  109. package/dist/core/extensions/loader.d.ts +2 -1
  110. package/dist/core/extensions/loader.d.ts.map +1 -1
  111. package/dist/core/extensions/loader.js +2 -1
  112. package/dist/core/extensions/loader.js.map +1 -1
  113. package/dist/core/index.d.ts +0 -1
  114. package/dist/core/index.d.ts.map +1 -1
  115. package/dist/core/index.js +0 -1
  116. package/dist/core/index.js.map +1 -1
  117. package/dist/core/model-registry-builtins.d.ts.map +1 -1
  118. package/dist/core/model-registry-builtins.js +6 -0
  119. package/dist/core/model-registry-builtins.js.map +1 -1
  120. package/dist/core/model-registry-schemas.d.ts +65 -13
  121. package/dist/core/model-registry-schemas.d.ts.map +1 -1
  122. package/dist/core/model-registry-schemas.js +10 -0
  123. package/dist/core/model-registry-schemas.js.map +1 -1
  124. package/dist/core/resource-loader-core.d.ts +1 -0
  125. package/dist/core/resource-loader-core.d.ts.map +1 -1
  126. package/dist/core/resource-loader-core.js +2 -0
  127. package/dist/core/resource-loader-core.js.map +1 -1
  128. package/dist/core/resource-loader-extensions.d.ts.map +1 -1
  129. package/dist/core/resource-loader-extensions.js +3 -3
  130. package/dist/core/resource-loader-extensions.js.map +1 -1
  131. package/dist/core/resource-loader-internals.d.ts +1 -0
  132. package/dist/core/resource-loader-internals.d.ts.map +1 -1
  133. package/dist/core/resource-loader-internals.js.map +1 -1
  134. package/dist/core/resource-loader-reload.d.ts.map +1 -1
  135. package/dist/core/resource-loader-reload.js +6 -2
  136. package/dist/core/resource-loader-reload.js.map +1 -1
  137. package/dist/core/sdk-exports.d.ts +1 -1
  138. package/dist/core/sdk-exports.d.ts.map +1 -1
  139. package/dist/core/sdk-exports.js.map +1 -1
  140. package/dist/core/sdk-types.d.ts +0 -3
  141. package/dist/core/sdk-types.d.ts.map +1 -1
  142. package/dist/core/sdk-types.js.map +1 -1
  143. package/dist/core/sdk.d.ts.map +1 -1
  144. package/dist/core/sdk.js +0 -1
  145. package/dist/core/sdk.js.map +1 -1
  146. package/dist/core/session-manager-history.d.ts.map +1 -1
  147. package/dist/core/session-manager-history.js +2 -1
  148. package/dist/core/session-manager-history.js.map +1 -1
  149. package/dist/core/system-prompt.d.ts.map +1 -1
  150. package/dist/core/system-prompt.js +0 -1
  151. package/dist/core/system-prompt.js.map +1 -1
  152. package/dist/core/tools/bash.d.ts +0 -5
  153. package/dist/core/tools/bash.d.ts.map +1 -1
  154. package/dist/core/tools/bash.js +10 -11
  155. package/dist/core/tools/bash.js.map +1 -1
  156. package/dist/core/tools/edit-diff-preserve.d.ts +18 -0
  157. package/dist/core/tools/edit-diff-preserve.d.ts.map +1 -0
  158. package/dist/core/tools/edit-diff-preserve.js +85 -0
  159. package/dist/core/tools/edit-diff-preserve.js.map +1 -0
  160. package/dist/core/tools/edit-diff.d.ts +3 -2
  161. package/dist/core/tools/edit-diff.d.ts.map +1 -1
  162. package/dist/core/tools/edit-diff.js +15 -18
  163. package/dist/core/tools/edit-diff.js.map +1 -1
  164. package/dist/core/tools/index.d.ts +0 -1
  165. package/dist/core/tools/index.d.ts.map +1 -1
  166. package/dist/core/tools/index.js +0 -1
  167. package/dist/core/tools/index.js.map +1 -1
  168. package/dist/index.d.ts +2 -2
  169. package/dist/index.d.ts.map +1 -1
  170. package/dist/index.js +1 -1
  171. package/dist/index.js.map +1 -1
  172. package/dist/modes/interactive/components/model-selector.d.ts.map +1 -1
  173. package/dist/modes/interactive/components/model-selector.js +2 -2
  174. package/dist/modes/interactive/components/model-selector.js.map +1 -1
  175. package/dist/modes/interactive/model-search.d.ts +5 -0
  176. package/dist/modes/interactive/model-search.d.ts.map +1 -1
  177. package/dist/modes/interactive/model-search.js +9 -0
  178. package/dist/modes/interactive/model-search.js.map +1 -1
  179. package/dist/utils/shell.d.ts +1 -0
  180. package/dist/utils/shell.d.ts.map +1 -1
  181. package/dist/utils/shell.js +12 -5
  182. package/dist/utils/shell.js.map +1 -1
  183. package/docs/custom-provider.md +4 -3
  184. package/docs/models.md +3 -2
  185. package/docs/packages.md +2 -2
  186. package/docs/quickstart.md +1 -1
  187. package/docs/sdk.md +2 -40
  188. package/docs/security.md +1 -1
  189. package/docs/workflows.md +991 -176
  190. package/package.json +5 -5
  191. package/dist/builtin/workflows/src/workflows/define-workflow.ts +0 -277
  192. package/dist/core/tools/bash-policy-compile.d.ts +0 -5
  193. package/dist/core/tools/bash-policy-compile.d.ts.map +0 -1
  194. package/dist/core/tools/bash-policy-compile.js +0 -241
  195. package/dist/core/tools/bash-policy-compile.js.map +0 -1
  196. package/dist/core/tools/bash-policy-evaluate.d.ts +0 -3
  197. package/dist/core/tools/bash-policy-evaluate.d.ts.map +0 -1
  198. package/dist/core/tools/bash-policy-evaluate.js +0 -92
  199. package/dist/core/tools/bash-policy-evaluate.js.map +0 -1
  200. package/dist/core/tools/bash-policy-format.d.ts +0 -5
  201. package/dist/core/tools/bash-policy-format.d.ts.map +0 -1
  202. package/dist/core/tools/bash-policy-format.js +0 -49
  203. package/dist/core/tools/bash-policy-format.js.map +0 -1
  204. package/dist/core/tools/bash-policy-parser.d.ts +0 -4
  205. package/dist/core/tools/bash-policy-parser.d.ts.map +0 -1
  206. package/dist/core/tools/bash-policy-parser.js +0 -155
  207. package/dist/core/tools/bash-policy-parser.js.map +0 -1
  208. package/dist/core/tools/bash-policy-segment.d.ts +0 -3
  209. package/dist/core/tools/bash-policy-segment.d.ts.map +0 -1
  210. package/dist/core/tools/bash-policy-segment.js +0 -275
  211. package/dist/core/tools/bash-policy-segment.js.map +0 -1
  212. package/dist/core/tools/bash-policy-shell.d.ts +0 -11
  213. package/dist/core/tools/bash-policy-shell.d.ts.map +0 -1
  214. package/dist/core/tools/bash-policy-shell.js +0 -267
  215. package/dist/core/tools/bash-policy-shell.js.map +0 -1
  216. package/dist/core/tools/bash-policy-types.d.ts +0 -146
  217. package/dist/core/tools/bash-policy-types.d.ts.map +0 -1
  218. package/dist/core/tools/bash-policy-types.js +0 -2
  219. package/dist/core/tools/bash-policy-types.js.map +0 -1
  220. package/dist/core/tools/bash-policy.d.ts +0 -6
  221. package/dist/core/tools/bash-policy.d.ts.map +0 -1
  222. package/dist/core/tools/bash-policy.js +0 -5
  223. package/dist/core/tools/bash-policy.js.map +0 -1
package/docs/workflows.md CHANGED
@@ -43,12 +43,14 @@ Use a workflow when a task should be repeatable, inspectable, resumable, or spli
43
43
  - [Direct One-Off Runs](#direct-one-off-runs)
44
44
  - [Fast Inference for Workflow Stages](#fast-inference-for-workflow-stages)
45
45
  - [Writing a Workflow](#writing-a-workflow)
46
+ - [Migrating from the `defineWorkflow()` Builder API](#migrating-from-the-defineworkflow-builder-api)
46
47
  - [Workflow Primitives](#workflow-primitives)
47
48
  - [Task and Stage Options](#task-and-stage-options)
48
49
  - [Programmatic Usage](#programmatic-usage)
49
50
  - [Context Engineering](#context-engineering)
50
51
  - [Design Checklist](#design-checklist)
51
52
  - [Common Mistakes](#common-mistakes)
53
+ - [Workflow Best Practices](#workflow-best-practices)
52
54
 
53
55
  ## Quick Start
54
56
 
@@ -86,7 +88,7 @@ Return structured output with `consolidated_review` and `decision` fields.
86
88
  Atomic will:
87
89
 
88
90
  - ask clarifying questions when stage purpose, inputs, models, or handoffs are ambiguous,
89
- - write a `.atomic/workflows/<name>.ts` file using `defineWorkflow(...).input(...).run(...).compile()`,
91
+ - write a `.atomic/workflows/<name>.ts` file using `workflow({...})`,
90
92
  - pick `ctx.task` / `ctx.chain` / `ctx.parallel` / `ctx.ui` per the [primitives](#workflow-primitives) and [task options](#task-and-stage-options) reference, and
91
93
  - run `/workflow reload` so Atomic rediscovers the workflow resource and you can launch it immediately.
92
94
 
@@ -109,26 +111,29 @@ Named workflow runs are background-oriented. After launch, expect a run id and m
109
111
  Workflow files are plain TypeScript modules. Create `.atomic/workflows/explain-file.ts`:
110
112
 
111
113
  ```ts
112
- import { defineWorkflow, Type } from "@bastani/workflows";
113
-
114
- export default defineWorkflow("explain-file")
115
- .description("Explain a file with tracked workflow stages.")
116
- .input("path", Type.String({ description: "File path to explain." }))
117
- .output(
118
- "explanation",
119
- Type.String({
114
+ import { workflow } from "@bastani/workflows";
115
+ import { Type } from "typebox";
116
+
117
+ export default workflow({
118
+ name: "explain-file",
119
+ description: "Explain a file with tracked workflow stages.",
120
+ inputs: {
121
+ path: Type.String({ description: "File path to explain." }),
122
+ },
123
+ outputs: {
124
+ explanation: Type.String({
120
125
  description: "Explanation of the file's purpose, risks, and key symbols.",
121
126
  }),
122
- )
123
- .run(async (ctx) => {
127
+ },
128
+ run: async (ctx) => {
124
129
  const explanation = await ctx.task("explain", {
125
130
  prompt: `Read ${String(ctx.inputs.path)} and explain purpose, risks, and key symbols.`,
126
131
  context: "fresh",
127
132
  });
128
133
 
129
134
  return { explanation: explanation.text };
130
- })
131
- .compile();
135
+ },
136
+ });
132
137
  ```
133
138
 
134
139
  Run `/workflow reload` or restart Atomic, then list and run it:
@@ -139,21 +144,21 @@ Run `/workflow reload` or restart Atomic, then list and run it:
139
144
  /workflow explain-file path="src/index.ts"
140
145
  ```
141
146
 
142
- See [Writing a Workflow](#writing-a-workflow) for the full builder API and [Workflow Primitives](#workflow-primitives) for `ctx.task` / `ctx.chain` / `ctx.parallel` / `ctx.stage` / `ctx.ui`.
147
+ See [Writing a Workflow](#writing-a-workflow) for the full `workflow({...})` API and [Workflow Primitives](#workflow-primitives) for `ctx.task` / `ctx.chain` / `ctx.parallel` / `ctx.stage` / `ctx.ui`.
143
148
 
144
149
  ## Built-in Workflows
145
150
 
146
151
  Atomic bundles four workflows that cover the most common multi-stage jobs. They are available in every session — no install step required. Use `/workflow list` to confirm they are loaded, and `/workflow inputs <name>` to see the exact inputs in your environment.
147
152
 
148
- These same builtin workflows are also available to workflow authors as compiled definitions. Import them from `@bastani/workflows/builtin` and pass the definition directly to `ctx.workflow(...)` when one workflow should call `deep-research-codebase`, `goal`, `ralph`, `open-claude-design`, or another builtin as a nested child workflow. See [Workflow Composition](#workflow-composition) for full examples alongside user-defined child workflows.
153
+ These same builtin workflows are also available to workflow authors as workflow definitions. Import them from `@bastani/workflows/builtin` and pass the definition directly to `ctx.workflow(...)` when one workflow should call `deep-research-codebase`, `goal`, `ralph`, `open-claude-design`, or another builtin as a nested child workflow. See [Workflow Composition](#workflow-composition) for full examples alongside user-defined child workflows.
149
154
 
150
- For the builtin result tables below, `deep-research-codebase`, `goal`, and `ralph` explicitly declare `.output("result", Type.String(...))` and return a `result` key from `.run()`, so `result` is part of their declared output contract. Every output a workflow exposes — including `result` — must be both declared with `.output(...)` and returned from `.run()`; Atomic no longer adds any automatic `result` output.
155
+ For the builtin result tables below, `deep-research-codebase`, `goal`, and `ralph` explicitly declare `outputs: { result: Type.String(...) }` and return a `result` key from `run`, so `result` is part of their declared output contract. Every output a workflow exposes — including `result` — must be declared in `outputs` and returned from `run` or supplied to `ctx.exit({ outputs })`; Atomic no longer adds any automatic `result` output.
151
156
 
152
157
  | Workflow | What it does | When to use |
153
158
  |---|---|---|
154
159
  | `deep-research-codebase` | Scout + research-history chain → parallel specialist waves → aggregator. Indexes the whole repo and synthesizes findings. | Broad or cross-cutting research before you decide what to change. Prefer `/skill:research-codebase` for one subsystem. |
155
160
  | `goal` | Persisted goal ledger → bounded worker turns → receipts → three-reviewer gate → deterministic reducer → final report. | Small-to-medium scope changes when you can identify the work surface, state the exact outcome, and name the validation that proves it is done — for example tests, lint/typecheck, docs builds, or observable behavior. |
156
- | `ralph` | Prompt-engineering → codebase/online research → sub-agent orchestration → multi-model parallel review → optional final-stage PR handoff. | Larger migrations, broad refactors, and multi-package changes where you want Atomic to transform the prompt into a research question, research the codebase before implementing, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`. |
161
+ | `ralph` | Prompt-refinementresearch-prompt-refinement → codebase/online research → sub-agent orchestration → multi-model parallel review → optional final-stage PR handoff. | Larger migrations, broad refactors, and multi-package changes where you want Atomic to refine the prompt for clarity, transform it into a research question, research the codebase before implementing, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`. |
157
162
  | `open-claude-design` | Design-system onboarding → reference import → HTML generation → impeccable-driven refinement → quality gate → rich HTML handoff. Renders a live `preview.html` you can iterate against (opens through `browser` when available). | UI, page, component, theme, or design-token work that benefits from generation + critique loops. |
158
163
 
159
164
  ### `deep-research-codebase`
@@ -220,7 +225,7 @@ Run examples:
220
225
  /workflow goal objective="Fix the settings form validation bug; add/adjust the focused test and consider it done when invalid emails show the inline error without submitting"
221
226
  ```
222
227
 
223
- `goal` creates an OS-temp `goal-ledger.json` artifact, renders goal-continuation context for each worker turn, writes each worker receipt to `work-turn-N.md`, and appends receipts, reviewer decisions, blockers, reducer decisions, and lifecycle events to the ledger. The objective is treated as user-provided data, not higher-priority instructions.
228
+ `goal` starts with a single `prompt-refinement` stage that invokes the `prompt-engineer` skill (`/skill:prompt-engineer`) to sharpen the raw objective into a clearer, more actionable form using the Workflow Best Practices prompt anatomy documented later in this guide; the refined objective becomes the operative one recorded in the ledger (the original is preserved as `original_objective` and shown in the final report when it differs). `goal` then creates an OS-temp `goal-ledger.json` artifact, renders goal-continuation context for each worker turn, writes each worker receipt to `work-turn-N.md`, and appends receipts, reviewer decisions, blockers, reducer decisions, and lifecycle events to the ledger. The objective is treated as user-provided data, not higher-priority instructions.
224
229
 
225
230
  Write the `objective` like a compact acceptance spec. Say what should exist when the run is done, how you want testing handled, which command(s) or manual checks matter, and what outcome proves completion. The workflow is intentionally lean: it does not first generate an RFC or migration plan, so the developer-supplied objective is where scope, validation, and completion criteria belong.
226
231
 
@@ -234,7 +239,8 @@ Result fields:
234
239
  | `status` | Final reducer status: `complete`, `blocked`, or `needs_human` (or `active` only if externally interrupted). |
235
240
  | `approved` | Whether the reducer reached `complete`. |
236
241
  | `goal_id` | Per-run goal identifier stored in the ledger. |
237
- | `objective` | Normalized goal objective used by the run. |
242
+ | `objective` | Normalized goal objective used by the run (after the `prompt-refinement` stage refines the raw objective). |
243
+ | `original_objective` | The raw user-provided objective exactly as given, before `prompt-refinement`. Omitted when refinement left it unchanged. |
238
244
  | `ledger_path` | OS-temp path to `goal-ledger.json`, including receipts, reviewer decisions, reducer decisions, blockers, and lifecycle events. |
239
245
  | `turns_completed` | Worker/review turns completed. |
240
246
  | `iterations_completed` | Same value as `turns_completed`, retained for status summaries. |
@@ -262,7 +268,7 @@ Run examples:
262
268
  /workflow ralph prompt="Safely implement the API refactor" git_worktree_dir=../atomic-ralph-api-wt base_branch=main
263
269
  ```
264
270
 
265
- Each `ralph` iteration starts by prompt-engineering the user prompt with `/skill:prompt-engineer Transform the following user prompt to a codebase and online research question which can be thoroughly explored: ...`, then researches that transformed question with `/skill:research-codebase ...` and writes the findings under `research/`. The orchestrator treats that research artifact as its primary implementation context, initializes/updates an OS-temp implementation notes file while generating verifiable evidence for any claims it records in the notes and reviewer artifacts, delegates implementation through sub-agents, and asks three independent reviewers to inspect the patch directly against `base_branch`. The reviewer fan-out runs each reviewer on a different primary model family (with shared fallbacks) so the adversarial review gets cross-model coverage instead of three passes from one model. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. For UI-applicable or full-stack changes, the orchestrator runs a `playwright-cli` end-to-end QA pass and records a reviewable proof video (referenced in the implementation notes and surfaced as `qa_video_path`); when `create_pr=true`, the final `pull-request` stage attaches or links that video to the created PR/MR/review. If reviewers find issues, the next prompt-engineering and research stages receive the review artifact path so follow-up research can address unresolved findings, and research stages fork from prior research session data when available. The loop stops only when all three reviewers independently approve (each finds no issues) or `max_loops` is reached, so a P0–P3 finding from any single reviewer keeps Ralph iterating instead of being out-voted by a majority quorum. By default Ralph does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling; Ralph's own PR-creation instructions live in that final stage.
271
+ Each `ralph` run starts with a single `prompt-refinement` stage that invokes the `prompt-engineer` skill (`/skill:prompt-engineer`) to sharpen the raw user prompt into a clearer, more actionable objective using the Workflow Best Practices prompt anatomy documented later in this guide; that refined prompt becomes the operative objective for research, orchestration, and review, while the original is surfaced as `original_prompt`. Each iteration then transforms the refined prompt with `/skill:prompt-engineer Transform the following refined user request into a codebase and online research question which can be thoroughly explored: ...` (`research-prompt-refinement`), researches that transformed question with `/skill:research-codebase ...`, and writes the findings under `research/`. The orchestrator treats that research artifact as its primary implementation context, initializes/updates an OS-temp implementation notes file while generating verifiable evidence for any claims it records in the notes and reviewer artifacts, delegates implementation through sub-agents, and asks three independent reviewers to inspect the patch directly against `base_branch`. The reviewer fan-out runs each reviewer on a different primary model family (with shared fallbacks) so the adversarial review gets cross-model coverage instead of three passes from one model. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. For UI-applicable or full-stack changes, the orchestrator runs a `playwright-cli` end-to-end QA pass and records a reviewable proof video (referenced in the implementation notes and surfaced as `qa_video_path`); when `create_pr=true`, the final `pull-request` stage attaches or links that video to the created PR/MR/review. If reviewers find issues, the next `research-prompt-refinement` and research stages receive the review artifact path so follow-up research can address unresolved findings, and research stages fork from prior research session data when available. The loop stops only when all three reviewers independently approve (each finds no issues) or `max_loops` is reached, so a P0–P3 finding from any single reviewer keeps Ralph iterating instead of being out-voted by a majority quorum. By default Ralph does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling; Ralph's own PR-creation instructions live in that final stage.
266
272
 
267
273
  Set `git_worktree_dir` when you want Ralph's worker stages isolated in a reusable Git worktree. Relative paths resolve from the invoking repository root, existing same-repository worktree roots are reused, and missing paths are created from `base_branch`. Ralph preserves the invoking repo-relative cwd inside the worktree, so launching from `repo/packages/api` with `git_worktree_dir=../repo-wt` runs stages from `../repo-wt/packages/api`.
268
274
 
@@ -282,6 +288,8 @@ Result fields:
282
288
  | `iterations_completed` | Number of research/orchestrate/review loops completed. |
283
289
  | `review_report` | Compact reference to the latest reviewer payload artifact. |
284
290
  | `review_report_path` | JSON artifact path for the latest Ralph review round. |
291
+ | `original_prompt` | The raw user prompt exactly as provided, before the `prompt-refinement` stage. |
292
+ | `refined_prompt` | The clarity-refined prompt produced by the `prompt-refinement` stage and used as the operative objective for research, orchestration, and review. |
285
293
 
286
294
  A typical end-to-end flow is `/skill:research-codebase` → `/skill:create-spec` → `/workflow goal objective="Implement the researched rate-limit behavior, run the focused tests, and finish when the documented burst behavior is validated"` when you can identify the work surface, state the exact outcome, and name the validation that proves it is done. Keep using `/workflow ralph` for larger migrations, broad refactors, and multi-package changes where you want Atomic to research first, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`.
287
295
 
@@ -388,7 +396,7 @@ If the task is only deterministic TypeScript with no LLM/session stage, use a sc
388
396
  | Run, inspect, attach to, pause, interrupt, resume, or check status for an existing workflow | `/workflow ...` or `workflow({ action: ... })` |
389
397
  | Implement a small-to-medium scope change with an identifiable work surface, exact outcome, and named validation | `/workflow goal objective="..."` so Atomic keeps the run bounded, captures receipts in a goal ledger, gates completion through reviewers, and stops as `complete`, `blocked`, or `needs_human` |
390
398
  | Research and execute a larger migration, broad refactor, or multi-package change | `/workflow ralph prompt="..."` so Atomic can transform the prompt into a research question, research the codebase first, delegate implementation through sub-agents, review, and iterate; prompt text alone does not opt in to PR creation, so add `create_pr=true` only when you want the final `pull-request` stage and `pr_report` |
391
- | Create or edit reusable automation | a TypeScript workflow definition exported from `defineWorkflow(...).compile()` |
399
+ | Create or edit reusable automation | a TypeScript workflow definition exported from `workflow({...})` |
392
400
  | Track one-off work without saving a workflow file | direct `workflow({ task })`, `workflow({ tasks })`, or `workflow({ chain })` calls |
393
401
  | Make a workflow robust | design the stage graph, context handoffs, artifacts, validation gates, model fallbacks, and human approval points before coding |
394
402
 
@@ -594,20 +602,21 @@ Atomic discovers workflow definitions in this order:
594
602
 
595
603
  A workflow module may export one default workflow definition and/or named workflow definitions. Discovery checks the default export first, then named exports.
596
604
 
597
- Every runtime export of a discovered workflow file is validated as a workflow definition. A named export that is not a compiled definition — a widget factory, shared constant, or utility function — is rejected with an `INVALID_DEFINITION` discovery diagnostic (`export is not an object`), even when the module also has a valid default export (the valid workflow still loads; the diagnostic flags the extra export as skipped). Type-only exports (`export type` / `export interface`) are erased at runtime and never flagged.
605
+ Every runtime export of a discovered workflow file is validated as a workflow definition. A named export that is not a workflow definition — a widget factory, shared constant, or utility function — is rejected with an `INVALID_DEFINITION` discovery diagnostic (`export is not an object`), even when the module also has a valid default export (the valid workflow still loads; the diagnostic flags the extra export as skipped). Type-only exports (`export type` / `export interface`) are erased at runtime and never flagged.
598
606
 
599
607
  To co-locate reusable helpers with your workflows — for example a `ctx.ui.custom<T>` widget factory you want to import in tests without running the workflow — put them in a subdirectory and import them from the workflow file. Discovery scans only the top level of each workflow directory, so subdirectories such as `.atomic/workflows/lib/` are never treated as workflow modules:
600
608
 
601
609
  ```text
602
610
  .atomic/workflows/
603
- release-picker.ts # only runtime export: defineWorkflow(...).compile()
611
+ release-picker.ts # only runtime export: workflow({...})
604
612
  lib/
605
613
  table-selector.ts # widget factory + helpers; not scanned by discovery
606
614
  ```
607
615
 
608
616
  ```ts
609
617
  // .atomic/workflows/release-picker.ts
610
- import { defineWorkflow, Type } from "@bastani/workflows";
618
+ import { workflow } from "@bastani/workflows";
619
+ import { Type } from "typebox";
611
620
  import { tableSelectorFactory } from "./lib/table-selector.js";
612
621
  ```
613
622
 
@@ -973,7 +982,7 @@ workflow({
973
982
  })
974
983
  ```
975
984
 
976
- Direct mode supports top-level/default options and per-task options such as `context`, `forkFromSessionFile`, `model`, `fallbackModels`, `thinkingLevel`, `contextWindow`, `tools`, `noTools`, `customTools`, `bashPolicy`, `mcp`, `output`, `outputMode`, `reads`, `worktree`, `gitWorktreeDir`, `baseBranch`, `maxOutput`, `artifacts`, `sessionDir`, `cwd`, and `agentDir`. Direct chains also support `chainName`, `chainDir`, and `failFast`.
985
+ Direct mode supports top-level/default options and per-task options such as `context`, `forkFromSessionFile`, `model`, `fallbackModels`, `thinkingLevel`, `contextWindow`, `tools`, `noTools`, `customTools`, `mcp`, `output`, `outputMode`, `reads`, `worktree`, `gitWorktreeDir`, `baseBranch`, `maxOutput`, `artifacts`, `sessionDir`, `cwd`, and `agentDir`. Direct chains also support `chainName`, `chainDir`, and `failFast`.
977
986
 
978
987
  For large fan-outs, prefer `outputMode: "file-only"` so the parent result contains compact file references instead of full output. Treat intercom payloads from async direct runs as user-visible workflow output.
979
988
 
@@ -991,17 +1000,23 @@ Enable workflow fast mode deliberately for broad workflows: parallel fan-out and
991
1000
 
992
1001
  ## Writing a Workflow
993
1002
 
994
- Workflow files are TypeScript modules that export a compiled definition:
1003
+ Workflow files are TypeScript modules that export a workflow definition:
995
1004
 
996
1005
  ```ts
997
- import { defineWorkflow, Type } from "@bastani/workflows";
998
-
999
- export default defineWorkflow("my-workflow")
1000
- .description("Short description shown in workflow listings.")
1001
- .input("prompt", Type.String({ description: "Task or question for the workflow." }))
1002
- .output("summary", Type.String({ description: "Synthesized findings and recommended next steps." }))
1003
- .output("reviewer_count", Type.Number({ description: "Number of parallel reviewers that ran." }))
1004
- .run(async (ctx) => {
1006
+ import { workflow } from "@bastani/workflows";
1007
+ import { Type } from "typebox";
1008
+
1009
+ export default workflow({
1010
+ name: "my-workflow",
1011
+ description: "Short description shown in workflow listings.",
1012
+ inputs: {
1013
+ prompt: Type.String({ description: "Task or question for the workflow." }),
1014
+ },
1015
+ outputs: {
1016
+ summary: Type.String({ description: "Synthesized findings and recommended next steps." }),
1017
+ reviewer_count: Type.Number({ description: "Number of parallel reviewers that ran." }),
1018
+ },
1019
+ run: async (ctx) => {
1005
1020
  const prompt = String(ctx.inputs.prompt);
1006
1021
 
1007
1022
  const scoutPath = ".atomic/workflows/runs/my-workflow/scout.md";
@@ -1047,20 +1062,21 @@ export default defineWorkflow("my-workflow")
1047
1062
  });
1048
1063
 
1049
1064
  return { summary: final.text, reviewer_count: reviews.length };
1050
- })
1051
- .compile();
1065
+ },
1066
+ });
1052
1067
  ```
1053
1068
 
1054
- Builder basics:
1069
+ Authoring basics:
1055
1070
 
1056
- - `defineWorkflow("name")` starts a builder; the name must be non-empty.
1071
+ - `workflow({ ... })` returns the workflow definition directly for discovery; there is no builder terminal step.
1057
1072
  - Workflow names normalize for lookup: trim, lowercase, convert whitespace/underscore to hyphen, remove other punctuation, and collapse hyphens.
1058
- - `.description(text)` sets the listing text.
1059
- - `.input(key, schema)` declares typed user inputs.
1060
- - `.worktreeFromInputs({ gitWorktreeDir, baseBranch })` optionally maps input names to workflow-wide reusable Git worktree defaults.
1061
- - `.output(key, schema)` declares typed outputs that parent workflows receive from `ctx.workflow(childWorkflow, ...)`.
1062
- - `.run(async (ctx) => { ... })` defines the workflow body.
1063
- - `.compile()` returns the workflow definition for discovery.
1073
+ - `description` sets the listing text.
1074
+ - `inputs` declares typed user inputs.
1075
+ - `worktreeFromInputs` optionally maps input names to workflow-wide reusable Git worktree defaults.
1076
+ - `outputs` declares typed outputs that parent workflows receive from `ctx.workflow(childWorkflow, ...)`.
1077
+ - `run: async (ctx) => { ... }` defines the workflow body.
1078
+
1079
+ Migrating an existing file from the removed `defineWorkflow(...).compile()` builder? See [Migrating from the `defineWorkflow()` Builder API](#migrating-from-the-defineworkflow-builder-api) for the full method-to-key mapping, a before/after walkthrough, and a conversion checklist.
1064
1080
 
1065
1081
  `prompt` and `task` are aliases for task text. Prefer `prompt` inside authored workflow files because it mirrors lower-level `stage.prompt(...)`; `task` remains useful in direct tool calls and chain examples.
1066
1082
 
@@ -1068,12 +1084,17 @@ Author workflows to create at least one tracked stage by calling `ctx.task()`, `
1068
1084
 
1069
1085
  ### Early exit with `ctx.exit()`
1070
1086
 
1071
- Use `ctx.exit(options?)` when workflow code intentionally stops the current run from a helper, branch, loop, or precondition guard without classifying the run as failed. `ctx.exit()` throws an executor-owned control signal and is typed as `never`, so code after it is unreachable. In async `.run()` bodies, prefer `return ctx.exit(...)` when the exit is the only path so TypeScript can see the non-returning branch.
1087
+ Use `ctx.exit(options?)` when workflow code intentionally stops the current run from a helper, branch, loop, or precondition guard without classifying the run as failed. `ctx.exit()` throws an executor-owned control signal and is typed as `never`, so code after it is unreachable. In async `run` bodies, prefer `return ctx.exit(...)` when the exit is the only path so TypeScript can see the non-returning branch.
1072
1088
 
1073
1089
  ```ts
1074
- export default defineWorkflow("guarded-import")
1075
- .output("scanned", Type.Number())
1076
- .run(async (ctx) => {
1090
+ export default workflow({
1091
+ name: "guarded-import",
1092
+ description: "",
1093
+ inputs: {},
1094
+ outputs: {
1095
+ scanned: Type.Number(),
1096
+ },
1097
+ run: async (ctx) => {
1077
1098
  const files = await findCandidateFiles(ctx.cwd);
1078
1099
  if (files.length === 0) {
1079
1100
  return ctx.exit({
@@ -1085,11 +1106,11 @@ export default defineWorkflow("guarded-import")
1085
1106
 
1086
1107
  const review = await ctx.task("review", { prompt: `Review ${files.join(", ")}` });
1087
1108
  return { scanned: files.length };
1088
- })
1089
- .compile();
1109
+ },
1110
+ });
1090
1111
  ```
1091
1112
 
1092
- `ctx.exit()` accepts `status: "completed" | "skipped" | "cancelled" | "blocked"`; it never accepts `"failed"` or `"killed"` because thrown errors and external run-control keep those meanings. `status` defaults to `"completed"`. `reason` is persisted and shown in status surfaces, including the default `/workflow status` list and `/workflow status <runId>` detail, so do not put secrets in it. `outputs` may contain a partial subset of declared outputs; provided keys still must be declared with `.output(...)`, match their TypeBox schema, and be JSON-serializable. Missing required outputs are allowed only on the `ctx.exit(...)` path. Exited runs are terminal and not resumable; external `kill`, `pause`, and `interrupt` keep their existing behavior.
1113
+ `ctx.exit()` accepts `status: "completed" | "skipped" | "cancelled" | "blocked"`; it never accepts `"failed"` or `"killed"` because thrown errors and external run-control keep those meanings. `status` defaults to `"completed"`. `reason` is persisted and shown in status surfaces, including the default `/workflow status` list and `/workflow status <runId>` detail, so do not put secrets in it. `outputs` may contain a partial subset of declared outputs; provided keys still must be declared in the workflow's `outputs` object, match their TypeBox schema, and be JSON-serializable. Missing required outputs are allowed only on the `ctx.exit(...)` path. Exited runs are terminal and not resumable; external `kill`, `pause`, and `interrupt` keep their existing behavior.
1093
1114
 
1094
1115
  The first selected `ctx.exit({ outputs })` snapshots its output payload synchronously by value before JavaScript `finally` blocks or cleanup callbacks can mutate the caller-owned object. The snapshot preserves undeclared keys and invalid values until post-cleanup validation, so deleting an undeclared key or changing an invalid value after `ctx.exit(...)` does not change the terminal validation result. If reading `status`, `reason`, or `outputs` options, or enumerating/copying the output snapshot itself, throws, Atomic still selects the exit signal, runs workflow-exit cleanup when feasible, and then records a terminal non-resumable authoring failure (`resumable: false`) if no external terminal control won first.
1095
1116
 
@@ -1122,7 +1143,7 @@ Workflow guidance should also cover the context passed between stages:
1122
1143
 
1123
1144
  ### Inputs
1124
1145
 
1125
- Inputs are declared with TypeBox `Type.*` schemas passed to `.input(key, schema)`. `Type` is re-exported from `@bastani/workflows` (along with the `Static` and `TSchema` type helpers), so you do not import from `typebox` directly in workflow files. Workflow packages still declare `typebox` as a peer dependency so the SDK's shipped types resolve under `tsc` — see [Programmatic Usage](#programmatic-usage). Common input schemas map to picker kinds and accepted runtime values:
1146
+ Inputs are declared with TypeBox `Type.*` schemas in the `inputs` object. Import `Type` from `typebox` directly in workflow files. Workflow packages still declare `typebox` as a peer dependency so TypeBox schemas resolve under `tsc` — see [Programmatic Usage](#programmatic-usage). Common input schemas map to picker kinds and accepted runtime values:
1126
1147
 
1127
1148
  | TypeBox schema | Picker kind | Accepted runtime value |
1128
1149
  |---|---|---|
@@ -1136,21 +1157,26 @@ A `Type.Union([Type.Literal(...)])` of string literals is how a 'select' is expr
1136
1157
 
1137
1158
  Prefer explicit descriptions because `/workflow inputs <name>`, `/workflow <name> --help`, and the input picker show them to the user. Runtime validation uses TypeBox `Value` and is strict for both top-level named runs and `ctx.workflow(...)` child calls: Atomic rejects unknown keys, missing required values, type mismatches, non-JSON-serializable values, and union/literal values outside the declared choices before the workflow body starts. It does not coerce strings like `"3"` to numbers; pass `count=3` or JSON numbers when a schema declares `Type.Number()`.
1138
1159
 
1139
- In TypeScript workflow files, `.input(...)` also narrows `ctx.inputs` for better intellisense: required/defaulted `Type.String()` inputs are `string`, `Type.Number()` is `number`, `Type.Boolean()` is `boolean`, a `Type.Union([Type.Literal(...)])` select is the literal string union, and `Type.Optional(...)` inputs include `undefined`. Use `Static<typeof schema>` when you need the inferred TypeScript type of a schema directly.
1160
+ In TypeScript workflow files, entries in `inputs` also narrow `ctx.inputs` for better intellisense: required/defaulted `Type.String()` inputs are `string`, `Type.Number()` is `number`, `Type.Boolean()` is `boolean`, a `Type.Union([Type.Literal(...)])` select is the literal string union, and `Type.Optional(...)` inputs include `undefined`. Use `Static<typeof schema>` when you need the inferred TypeScript type of a schema directly.
1140
1161
 
1141
1162
  ### Outputs
1142
1163
 
1143
- Workflow outputs are runtime contracts for completed workflow runs and for parent workflows that call a child with `ctx.workflow(childWorkflow, ...)`. A workflow normally returns a JSON-serializable object from `.run()`, and `.output(key, schema)` documents, validates, and exposes keys from that returned object. `ctx.exit({ outputs })` can expose a partial subset of the same declared output contract when the run intentionally stops early. Primitives, arrays, `null`, functions, symbols, `undefined` properties, `NaN`, and infinite numbers fail validation.
1164
+ Workflow outputs are runtime contracts for completed workflow runs and for parent workflows that call a child with `ctx.workflow(childWorkflow, ...)`. A workflow normally returns a JSON-serializable object from `run`, and entries in the `outputs` object document, validate, and expose keys from that returned object. `ctx.exit({ outputs })` can expose a partial subset of the same declared output contract when the run intentionally stops early. Primitives, arrays, `null`, functions, symbols, `undefined` properties, `NaN`, and infinite numbers fail validation.
1144
1165
 
1145
- **Return convention:** outputs are return-object keys. Atomic never infers child workflow outputs from stage names, stage order, or the final assistant message. If a parent should read `child.outputs.foo`, the child workflow's `.run()` must both declare `.output("foo", schema)` and return `{ foo: value }`. `result` is not special and is never added for you: to expose `result`, declare `.output("result", schema)` and return `{ result }` exactly like any other output. Returning a key that is not declared with `.output(...)` fails the run with `atomic-workflows: workflow "<name>" returned undeclared output "<key>"; declare it with .output("<key>", Type....) or remove it from the .run() return`.
1166
+ **Return convention:** outputs are return-object keys. Atomic never infers child workflow outputs from stage names, stage order, or the final assistant message. If a parent should read `child.outputs.foo`, the child workflow's `run` must both declare `outputs: { foo: schema }` and return `{ foo: value }`. `result` is not special and is never added for you: to expose `result`, declare it in `outputs` and return `{ result }` exactly like any other output. Returning a key that is not declared in `outputs` fails the run with `atomic-workflows: workflow "<name>" returned undeclared output "<key>"; declare it in outputs or remove it from the run return`.
1146
1167
 
1147
- `.output(...)` is a schema contract, not an automatic stage selector. To expose values from any stage, capture the stage/task/child result in normal TypeScript and return it from `.run()` under the desired key:
1168
+ The `outputs` object is a schema contract, not an automatic stage selector. To expose values from any stage, capture the stage/task/child result in normal TypeScript and return it from `run` under the desired key:
1148
1169
 
1149
1170
  ```ts
1150
- export default defineWorkflow("review-with-summary")
1151
- .output("research_artifact", Type.String())
1152
- .output("review", Type.String())
1153
- .run(async (ctx) => {
1171
+ export default workflow({
1172
+ name: "review-with-summary",
1173
+ description: "Review with returned artifacts.",
1174
+ inputs: {},
1175
+ outputs: {
1176
+ research_artifact: Type.String(),
1177
+ review: Type.String(),
1178
+ },
1179
+ run: async (ctx) => {
1154
1180
  const researchPath = ".atomic/workflows/runs/review-with-summary/research.md";
1155
1181
  await ctx.task("research", {
1156
1182
  prompt: "Research the target.",
@@ -1166,13 +1192,13 @@ export default defineWorkflow("review-with-summary")
1166
1192
  research_artifact: researchPath,
1167
1193
  review: review.text,
1168
1194
  };
1169
- })
1170
- .compile();
1195
+ },
1196
+ });
1171
1197
  ```
1172
1198
 
1173
- There is no automatic `result` output. A workflow exposes exactly the keys it declares with `.output(...)` and returns from `.run()` — nothing more. To expose `result`, declare `.output("result", schema)` and return `{ result }` like any other output. If `.run()` returns a key that was never declared with `.output(...)`, the run fails with `atomic-workflows: workflow "<name>" returned undeclared output "<key>"; declare it with .output("<key>", Type....) or remove it from the .run() return` (for a child workflow call, `<name>` is the child's own name, and the parent surfaces the failure through the child-failure wrapper `atomic-workflows: child workflow "<childName>" (<displayName>) failed with status failed: ...`).
1199
+ There is no automatic `result` output. A workflow exposes exactly the keys it declares in `outputs` and returns from `run` — nothing more. To expose `result`, declare `outputs: { result: schema }` and return `{ result }` like any other output. If `run` returns a key that was never declared in `outputs`, the run fails with `atomic-workflows: workflow "<name>" returned undeclared output "<key>"; declare it in outputs or remove it from the run return` (for a child workflow call, `<name>` is the child's own name, and the parent surfaces the failure through the child-failure wrapper `atomic-workflows: child workflow "<childName>" (<displayName>) failed with status failed: ...`).
1174
1200
 
1175
- Outputs are declared with TypeBox `Type.*` schemas passed to `.output(key, schema)`. **Prefer precise schemas.** A precise schema gives a precise `Static<>` type for the `.run()` return and for any parent reading `child.outputs`, and it makes runtime validation enforce the real shape instead of waving values through. Reach for `Type.Unknown()`, `Type.Any()`, `Type.Array(Type.Unknown())`, or `Type.Object({}, { additionalProperties: true })` only for genuinely dynamic data whose shape you cannot know ahead of time.
1201
+ Outputs are declared with TypeBox `Type.*` schemas in the `outputs` object. **Prefer precise schemas.** A precise schema gives a precise `Static<>` type for the `run` return and for any parent reading `child.outputs`, and it makes runtime validation enforce the real shape instead of waving values through. Reach for `Type.Unknown()`, `Type.Any()`, `Type.Array(Type.Unknown())`, or `Type.Object({}, { additionalProperties: true })` only for genuinely dynamic data whose shape you cannot know ahead of time.
1176
1202
 
1177
1203
  | TypeBox schema | Static type | Accepted runtime value |
1178
1204
  |---|---|---|
@@ -1188,36 +1214,38 @@ Outputs are declared with TypeBox `Type.*` schemas passed to `.output(key, schem
1188
1214
  | `Type.Object({}, { additionalProperties: true })` | `Record<string, unknown>` | any JSON object (last resort, dynamic only) |
1189
1215
  | `Type.Unknown()` / `Type.Any()` | `unknown` / `any` | any JSON-serializable value (last resort) |
1190
1216
 
1191
- Output schemas carry `description` in their options object. A declared output is required when its schema is **not** wrapped in `Type.Optional(...)`; wrap outputs that may be absent in `Type.Optional(...)`. A required output means the workflow `.run()` return object must contain that output before the run can complete; a missing required output fails with `missing output "<key>"`, and a declared value whose runtime type does not match the schema fails with `output "<key>" expected <type>, got <actual>`. For child workflow calls, the parent boundary fails before the parent continues. Declared outputs are validated against the declared schema with TypeBox `Value` on completion, and every returned/exposed value is recursively validated as JSON-serializable. Child output replay still performs a structured-clone safety check after JSON validation so continuation can restore completed child workflow boundaries.
1217
+ Output schemas carry `description` in their options object. A declared output is required when its schema is **not** wrapped in `Type.Optional(...)`; wrap outputs that may be absent in `Type.Optional(...)`. A required output means the workflow `run` return object must contain that output before the run can complete; a missing required output fails with `missing output "<key>"`, and a declared value whose runtime type does not match the schema fails with `output "<key>" expected <type>, got <actual>`. For child workflow calls, the parent boundary fails before the parent continues. Declared outputs are validated against the declared schema with TypeBox `Value` on completion, and every returned/exposed value is recursively validated as JSON-serializable. Child output replay still performs a structured-clone safety check after JSON validation so continuation can restore completed child workflow boundaries.
1192
1218
 
1193
1219
  #### Prefer precise schemas
1194
1220
 
1195
- A loose output like `Type.Unknown()` or `Type.Object({}, { additionalProperties: true })` types the `.run()` return and `child.outputs.x` as `unknown`/`Record<string, unknown>`, so every consumer must cast or guard before using the value, and runtime validation only checks "is this JSON?" instead of the real shape. Declaring the shape fixes both at once:
1221
+ A loose output like `Type.Unknown()` or `Type.Object({}, { additionalProperties: true })` types the `run` return and `child.outputs.x` as `unknown`/`Record<string, unknown>`, so every consumer must cast or guard before using the value, and runtime validation only checks "is this JSON?" instead of the real shape. Declaring the shape fixes both at once:
1196
1222
 
1197
1223
  ```ts
1198
1224
  // ❌ Loose: child.outputs.report is `unknown`; nothing checks the shape at runtime.
1199
- .output("report", Type.Unknown())
1225
+ outputs: {
1226
+ report: Type.Unknown(),
1227
+ }
1200
1228
 
1201
1229
  // ✅ Precise: child.outputs.report is `{ topic: string; score: number; tags: string[] }`,
1202
1230
  // and TypeBox rejects a returned value missing `score` or with a non-number `score`.
1203
- .output(
1204
- "report",
1205
- Type.Object({
1231
+ outputs: {
1232
+ report: Type.Object({
1206
1233
  topic: Type.String(),
1207
1234
  score: Type.Number(),
1208
1235
  tags: Type.Array(Type.String()),
1209
1236
  }),
1210
- )
1237
+ }
1211
1238
  ```
1212
1239
 
1213
- The same rule applies to inputs: `.input("counts", Type.Array(Type.Number()))` makes `ctx.inputs.counts` a `number[]`, while `Type.Array(Type.Unknown())` only gives you `unknown[]`.
1240
+ The same rule applies to inputs: `inputs: { counts: Type.Array(Type.Number()) }` makes `ctx.inputs.counts` a `number[]`, while `Type.Array(Type.Unknown())` only gives you `unknown[]`.
1214
1241
 
1215
1242
  #### `Type.Unsafe<T>()` escape hatch for deeply-nested values
1216
1243
 
1217
- When you already have a precise TypeScript type for a deeply-nested serializable value and don't want to hand-write the equivalent TypeBox schema, wrap a permissive runtime schema with `Type.Unsafe<MyType>(...)`. The **static** type becomes exactly `MyType` (so `ctx.inputs`, the `.run()` return, and `child.outputs` stay precise), while the **runtime** check stays as lenient as the wrapped schema. Use a `type` alias rather than an `interface` for the wrapped type — an `interface` has no implicit index signature, so it does not satisfy the serializable-output constraint:
1244
+ When you already have a precise TypeScript type for a deeply-nested serializable value and don't want to hand-write the equivalent TypeBox schema, wrap a permissive runtime schema with `Type.Unsafe<MyType>(...)`. The **static** type becomes exactly `MyType` (so `ctx.inputs`, the `run` return, and `child.outputs` stay precise), while the **runtime** check stays as lenient as the wrapped schema. Use a `type` alias rather than an `interface` for the wrapped type — an `interface` has no implicit index signature, so it does not satisfy the serializable-output constraint:
1218
1245
 
1219
1246
  ```ts
1220
- import { defineWorkflow, Type } from "@bastani/workflows";
1247
+ import { workflow } from "@bastani/workflows";
1248
+ import { Type } from "typebox";
1221
1249
 
1222
1250
  type ResearchPacket = {
1223
1251
  readonly topic: string;
@@ -1225,65 +1253,83 @@ type ResearchPacket = {
1225
1253
  readonly sections: readonly { readonly heading: string; readonly body: string }[];
1226
1254
  };
1227
1255
 
1228
- export default defineWorkflow("research-packet")
1229
- .input("topic", Type.String())
1230
- // Static type = ResearchPacket; runtime only checks "is a JSON object".
1231
- .output("packet", Type.Unsafe<ResearchPacket>(Type.Object({}, { additionalProperties: true })))
1232
- .run(async (ctx) => {
1256
+ export default workflow({
1257
+ name: "research-packet",
1258
+ description: "",
1259
+ inputs: {
1260
+ topic: Type.String(),
1261
+ },
1262
+ outputs: {
1263
+ packet: Type.Unsafe<ResearchPacket>(Type.Object({}, { additionalProperties: true })),
1264
+ },
1265
+ run: async (ctx) => {
1233
1266
  const packet: ResearchPacket = {
1234
1267
  topic: ctx.inputs.topic,
1235
1268
  score: 1,
1236
1269
  sections: [{ heading: "overview", body: "…" }],
1237
1270
  };
1238
1271
  return { packet }; // statically checked against ResearchPacket
1239
- })
1240
- .compile();
1272
+ },
1273
+ });
1241
1274
  ```
1242
1275
 
1243
1276
  Tradeoff: `Type.Unsafe<T>()` does not deeply validate at runtime — it trusts that the produced value matches `T`. Use it when the producing code already guarantees the shape (the `contract-complex-leaf` contract workflow does exactly this, wrapping `Type.Unsafe<ComplexPacket>(...)` and `Type.Unsafe<readonly ComplexRecord[]>(...)` around permissive runtime schemas). When you can express the shape directly, prefer a real `Type.Object(...)`/`Type.Array(...)` so runtime validation also catches drift. Keep bare `Type.Unknown()` and `Type.Object({}, { additionalProperties: true })` for the rare cases where the value is genuinely dynamic.
1244
1277
 
1245
1278
  #### How types flow
1246
1279
 
1247
- - `ctx.inputs.x` is `Static<inputSchema>` for the input you declared with `.input("x", schema)` — required and defaulted schemas are always present, and `Type.Optional(...)` adds `| undefined`.
1248
- - The `.run()` return is checked against your declared outputs at **compile time** (a missing required output or a wrong value type is a TypeScript error) and at **runtime** via TypeBox `Value` (undeclared keys are rejected and the declared shape is enforced recursively).
1249
- - `ctx.workflow(child)` returns a discriminated child result. When `child.exited === false`, `child.outputs` is the child's full declared `.output(...)` contract; when `child.exited === true`, `child.outputs` is `Partial<TOutputs>` because child `ctx.exit({ outputs })` may intentionally provide only a subset.
1280
+ - `ctx.inputs.x` is `Static<inputSchema>` for the input you declared as `inputs: { x: schema }` — required and defaulted schemas are always present, and `Type.Optional(...)` adds `| undefined`.
1281
+ - The `run` return is checked against your declared outputs at **compile time** (a missing required output or a wrong value type is a TypeScript error) and at **runtime** via TypeBox `Value` (undeclared keys are rejected and the declared shape is enforced recursively).
1282
+ - `ctx.workflow(child)` returns a discriminated child result. When `child.exited === false`, `child.outputs` is the child's full declared `outputs` contract; when `child.exited === true`, `child.outputs` is `Partial<TOutputs>` because child `ctx.exit({ outputs })` may intentionally provide only a subset.
1250
1283
 
1251
1284
  Use `Static<typeof schema>` (both `Static` and `TSchema` are re-exported from `@bastani/workflows`) when you need the inferred TypeScript type of a schema directly — for example to type a helper that builds an output value.
1252
1285
 
1253
1286
  ### Workflow Composition
1254
1287
 
1255
- Use workflow composition when one workflow should call another reusable workflow and consume its outputs as a tracked boundary stage. The child can be a user-defined workflow from your project/package or a bundled builtin workflow. In both cases, use normal TypeScript imports: import the compiled child workflow definition, then pass that definition directly to `ctx.workflow(workflowDefinition, options)`. Registry names, path objects, and string aliases are not accepted by `ctx.workflow(...)`.
1288
+ Use workflow composition when one workflow should call another reusable workflow and consume its outputs as a tracked boundary stage. The child can be a user-defined workflow from your project/package or a bundled builtin workflow. In both cases, use normal TypeScript imports: import the child workflow definition, then pass that definition directly to `ctx.workflow(workflowDefinition, options)`. Registry names, path objects, and string aliases are not accepted by `ctx.workflow(...)`.
1256
1289
 
1257
- For workflows intended to be called by parent workflows, declare `.output(...)` for every field a parent should rely on, including `result`. No output exists without declaration: a child exposes exactly its declared outputs, and returning an undeclared key fails the child call.
1290
+ For workflows intended to be called by parent workflows, declare every field a parent should rely on in the child workflow's `outputs` object, including `result`. No output exists without declaration: a child exposes exactly its declared outputs, and returning an undeclared key fails the child call.
1258
1291
 
1259
1292
  #### Compose with a user-defined workflow
1260
1293
 
1261
- User-defined workflows are ordinary TypeScript modules. Import the compiled definition with a relative module specifier and call it directly from the parent workflow:
1294
+ User-defined workflows are ordinary TypeScript modules. Import the workflow definition with a relative module specifier and call it directly from the parent workflow:
1262
1295
 
1263
1296
  ```ts
1264
1297
  // .atomic/workflows/shared-research.ts
1265
- import { defineWorkflow, Type } from "@bastani/workflows";
1298
+ import { workflow } from "@bastani/workflows";
1299
+ import { Type } from "typebox";
1266
1300
 
1267
- export default defineWorkflow("shared-research")
1268
- .input("topic", Type.String())
1269
- .output("summary", Type.String({ description: "Research summary markdown." }))
1270
- // Precise element type: child.outputs.sources is `string[] | undefined`, not `unknown[]`.
1271
- .output("sources", Type.Optional(Type.Array(Type.String(), { description: "Source URLs and file references." })))
1272
- .run(async (ctx) => {
1301
+ export default workflow({
1302
+ name: "shared-research",
1303
+ description: "",
1304
+ inputs: {
1305
+ topic: Type.String(),
1306
+ },
1307
+ outputs: {
1308
+ summary: Type.String({ description: "Research summary markdown." }),
1309
+ sources: Type.Optional(Type.Array(Type.String(), { description: "Source URLs and file references." })),
1310
+ },
1311
+ run: async (ctx) => {
1273
1312
  const result = await ctx.task("research", { prompt: `Research ${String(ctx.inputs.topic)}` });
1274
1313
  return { summary: result.text, sources: [] };
1275
- })
1276
- .compile();
1314
+ },
1315
+ });
1277
1316
 
1278
1317
  // .atomic/workflows/research-and-synthesize.ts
1279
- import { defineWorkflow, Type } from "@bastani/workflows";
1318
+ import { workflow } from "@bastani/workflows";
1319
+ import { Type } from "typebox";
1280
1320
  import sharedResearch from "./shared-research.js";
1281
1321
 
1282
- export default defineWorkflow("research-and-synthesize")
1283
- .input("topic", Type.String())
1284
- .output("final", Type.String({ description: "Synthesis built from the child research summary." }))
1285
- .output("child_run_id", Type.String({ description: "Run id of the nested shared-research child." }))
1286
- .run(async (ctx) => {
1322
+ export default workflow({
1323
+ name: "research-and-synthesize",
1324
+ description: "Run shared research and synthesize it.",
1325
+ inputs: {
1326
+ topic: Type.String(),
1327
+ },
1328
+ outputs: {
1329
+ final: Type.String({ description: "Synthesis built from the child research summary." }),
1330
+ child_run_id: Type.String({ description: "Run id of the nested shared-research child." }),
1331
+ },
1332
+ run: async (ctx) => {
1287
1333
  const child = await ctx.workflow(sharedResearch, {
1288
1334
  inputs: { topic: ctx.inputs.topic },
1289
1335
  stageName: "run shared research",
@@ -1296,13 +1342,13 @@ export default defineWorkflow("research-and-synthesize")
1296
1342
  prompt: `Synthesize:\n\n${String(child.outputs.summary)}`,
1297
1343
  });
1298
1344
  return { final: final.text, child_run_id: child.runId };
1299
- })
1300
- .compile();
1345
+ },
1346
+ });
1301
1347
  ```
1302
1348
 
1303
1349
  #### Compose with builtin workflows
1304
1350
 
1305
- Builtin workflows are also exported as compiled workflow definitions, so parent workflows can call them exactly like user-defined workflows. Use the barrel export when you want several builtins:
1351
+ Builtin workflows are also exported as workflow definitions, so parent workflows can call them exactly like user-defined workflows. Use the barrel export when you want several builtins:
1306
1352
 
1307
1353
  ```ts
1308
1354
  import { deepResearchCodebase, goal, openClaudeDesign, ralph } from "@bastani/workflows/builtin";
@@ -1329,25 +1375,29 @@ Common builtin import targets:
1329
1375
  Example parent workflow that runs builtin deep research, then chooses either `goal` or `ralph` as the nested implementation runner:
1330
1376
 
1331
1377
  ```ts
1332
- import { defineWorkflow, Type } from "@bastani/workflows";
1378
+ import { workflow } from "@bastani/workflows";
1379
+ import { Type } from "typebox";
1333
1380
  import { deepResearchCodebase, goal, ralph } from "@bastani/workflows/builtin";
1334
1381
 
1335
- export default defineWorkflow("research-then-implement")
1336
- .input("topic", Type.String())
1337
- .input(
1338
- "runner",
1339
- Type.Union([Type.Literal("goal"), Type.Literal("ralph")], {
1382
+ export default workflow({
1383
+ name: "research-then-implement",
1384
+ description: "Run deep research, then dispatch to goal or Ralph.",
1385
+ inputs: {
1386
+ topic: Type.String(),
1387
+ runner: Type.Union([Type.Literal("goal"), Type.Literal("ralph")], {
1340
1388
  default: "goal",
1341
1389
  description: "Use goal for bounded changes or Ralph for broad research-first implementation work.",
1342
1390
  }),
1343
- )
1344
- .output("research_doc_path", Type.Optional(Type.String({ description: "Path to the deep-research document used for implementation." })))
1345
- .output("runner", Type.String({ description: "Which nested runner executed: \"goal\" or \"ralph\"." }))
1346
- // Genuinely dynamic: the nested runner (goal vs ralph) is chosen at runtime and
1347
- // each exposes a different declared output shape, so a loose object is appropriate here.
1348
- // When a child's outputs are known and fixed, declare the precise shape instead.
1349
- .output("implementation", Type.Object({}, { additionalProperties: true, description: "Declared outputs from the nested implementation workflow." }))
1350
- .run(async (ctx) => {
1391
+ },
1392
+ outputs: {
1393
+ research_doc_path: Type.Optional(Type.String({ description: "Path to the deep-research document used for implementation." })),
1394
+ runner: Type.String({ description: "Which nested runner executed: \"goal\" or \"ralph\"." }),
1395
+ // Genuinely dynamic: the nested runner (goal vs ralph) is chosen at runtime and
1396
+ // each exposes a different declared output shape, so a loose object is appropriate here.
1397
+ // When a child's outputs are known and fixed, declare the precise shape instead.
1398
+ implementation: Type.Object({}, { additionalProperties: true, description: "Declared outputs from the nested implementation workflow." }),
1399
+ },
1400
+ run: async (ctx) => {
1351
1401
  const topic = String(ctx.inputs.topic);
1352
1402
  const research = await ctx.workflow(deepResearchCodebase, {
1353
1403
  inputs: { prompt: topic, max_concurrency: 4 },
@@ -1392,11 +1442,11 @@ export default defineWorkflow("research-then-implement")
1392
1442
  runner: "goal",
1393
1443
  implementation: implementation.outputs,
1394
1444
  };
1395
- })
1396
- .compile();
1445
+ },
1446
+ });
1397
1447
  ```
1398
1448
 
1399
- Passing a compiled definition directly to `ctx.workflow(...)` uses the child workflow's normalized name for replay metadata and default boundary labels (`shared-research` for the user-defined example above, or builtin names such as `deep-research-codebase`, `goal`, and `ralph`).
1449
+ Passing a workflow definition directly to `ctx.workflow(...)` uses the child workflow's normalized name for replay metadata and default boundary labels (`shared-research` for the user-defined example above, or builtin names such as `deep-research-codebase`, `goal`, and `ralph`).
1400
1450
 
1401
1451
  `ctx.workflow(workflowDefinition)` starts a nested workflow behind a parent boundary stage named `workflow:<workflow-name>` by default. User-facing status and graph views flatten that child into the parent run, so composition behaves like inlining the child workflow code: child stages, HIL prompt nodes, and deeper imported workflows appear in one expanded graph. The nested run id remains available internally for routing attach/pause/interrupt/resume/kill to the correct live stage, but it is not shown as a separate top-level `/workflow status` entry. The returned child result has:
1402
1452
 
@@ -1413,7 +1463,7 @@ Passing a compiled definition directly to `ctx.workflow(...)` uses the child wor
1413
1463
 
1414
1464
  | Option | Meaning |
1415
1465
  |---|---|
1416
- | `inputs` | Values validated against the child workflow's `.input()` schema before the child starts. |
1466
+ | `inputs` | Values validated against the child workflow's `inputs` schema map before the child starts. |
1417
1467
  | `stageName` | Parent boundary stage label. Defaults to `workflow:<workflow-name>`. |
1418
1468
 
1419
1469
  Output exposure rules:
@@ -1428,9 +1478,9 @@ if (child.exited === true) {
1428
1478
  }
1429
1479
  ```
1430
1480
 
1431
- A child exposes exactly its declared outputs — the keys it declared with `.output(...)` and returned from `.run()` or supplied to `ctx.exit({ outputs })`. There are no implicit outputs and no raw return-object passthrough. If `.run()` returns a key that was not declared with `.output(...)`, the child run fails with `atomic-workflows: workflow "<childName>" returned undeclared output "<key>"; declare it with .output("<key>", Type....) or remove it from the .run() return`, and the parent surfaces that failure through the wrapper `atomic-workflows: child workflow "<childName>" (<displayName>) failed with status failed: ...`. A child with no declared outputs therefore exposes no outputs. Missing required outputs, schema type mismatches, and non-JSON-serializable returned values fail normal child completion before the parent continues; child `ctx.exit({ outputs })` allows missing required outputs but still validates every provided key and sets `child.exited === true` so parent code must handle the partial shape.
1481
+ A child exposes exactly its declared outputs — the keys declared in `outputs` and returned from `run` or supplied to `ctx.exit({ outputs })`. There are no implicit outputs and no raw return-object passthrough. If `run` returns a key that was not declared in `outputs`, the child run fails with `atomic-workflows: workflow "<childName>" returned undeclared output "<key>"; declare it in outputs or remove it from the run return`, and the parent surfaces that failure through the wrapper `atomic-workflows: child workflow "<childName>" (<displayName>) failed with status failed: ...`. A child with no declared outputs therefore exposes no outputs. Missing required outputs, schema type mismatches, and non-JSON-serializable returned values fail normal child completion before the parent continues; child `ctx.exit({ outputs })` allows missing required outputs but still validates every provided key and sets `child.exited === true` so parent code must handle the partial shape.
1432
1482
 
1433
- Only compiled workflow definitions can be passed to `ctx.workflow(...)`. Import reusable workflows with TypeScript `import` statements first; use `/workflow` names such as `goal` only for launching named runs, not as `ctx.workflow(...)` arguments. If a module is missing or does not export a compiled workflow definition, workflow discovery fails when loading that module. Nested child workflows count against `maxDepth` (default `4` total workflow levels).
1483
+ Only workflow definitions can be passed to `ctx.workflow(...)`. Import reusable workflows with TypeScript `import` statements first; use `/workflow` names such as `goal` only for launching named runs, not as `ctx.workflow(...)` arguments. If a module is missing or does not export a workflow definition, workflow discovery fails when loading that module. Nested child workflows count against `maxDepth` (default `4` total workflow levels).
1434
1484
 
1435
1485
  The graph includes both the parent boundary node and the imported child workflow's own stages while the child is loading/running, so the user can observe progress and interrupt sub-workflows before they complete. Completed boundaries still retain the child workflow name, child run id prefix, and exposed output count for replay/debugging. Skipped or failed boundaries do not retain child-edge metadata (`workflowChild` / `workflowChildRun`), and graph expansion ignores any stale non-completed boundary metadata from older persisted sessions instead of flattening an unrelated child run. Use `stageName` when the parent needs a more specific label, but keep it concise so the child summary remains readable in the graph.
1436
1486
 
@@ -1438,6 +1488,121 @@ If a parent workflow exits through `ctx.exit(...)` while a child workflow is in
1438
1488
 
1439
1489
  Continuation replay treats the parent child-workflow boundary as the durable checkpoint: a previously completed child boundary replays with the original exposed outputs and without re-running the child, while a child that failed or was interrupted before completion starts again from the beginning on continuation. If `ctx.exit(...)` wins while a completed boundary is being replayed but before replay finalization, the boundary is finalized as skipped and its preloaded child metadata is omitted from store, persistence, restore, and expanded graph views.
1440
1490
 
1491
+ ## Migrating from the `defineWorkflow()` Builder API
1492
+
1493
+ The chained builder API — `defineWorkflow(name).description(...).input(...).output(...).worktreeFromInputs(...).run(...).compile()` — was removed in [#1457](https://github.com/bastani-inc/atomic/pull/1457). The single `workflow({ name?, description, inputs, outputs, run })` object form is now the only authoring door. There is no shim and no deprecation period: workflow files that still call `defineWorkflow(...).compile()` fail discovery with a module-load error until they are migrated.
1494
+
1495
+ This section is for workflow files written against the previous API. If you are authoring a new workflow, skip it and start from [Writing a Workflow](#writing-a-workflow).
1496
+
1497
+ ### What changed
1498
+
1499
+ - `import { defineWorkflow, Type } from "@bastani/workflows"` → `workflow` now comes from `@bastani/workflows`, and `Type` comes from the `typebox` package directly. `@bastani/workflows` no longer re-exports `Type`. The `Static` and `TSchema` *type* exports are still re-exported from `@bastani/workflows`, so `import type { Static } from "@bastani/workflows"` keeps working — only the runtime `Type` builder moved.
1500
+ - The fluent builder chain became one object literal passed to `workflow({ ... })`.
1501
+ - `name` moved from the `defineWorkflow(name)` argument into the object. It is now **optional** — omit it and discovery derives the name from the filename (the recommended style used by the builtins and most examples), or keep it when you want the name to differ from the file's basename.
1502
+ - `outputs` is now **required**. Workflows that declared no outputs before must now pass `outputs: {}`.
1503
+ - `.compile()` is gone. `workflow({ ... })` returns the frozen, branded definition directly; `export default` it.
1504
+ - The imperative object-form `runWorkflow(...)` runner is also removed (it is a `never` placeholder that throws on access). Programmatic execution uses the exported `run(def, inputs)` helper or a registry — see [Programmatic Usage](#programmatic-usage).
1505
+
1506
+ ### Builder method → object key
1507
+
1508
+ | Removed builder API | New `workflow({ ... })` key |
1509
+ | --- | --- |
1510
+ | `defineWorkflow("name")` argument | `name: "name"` (optional; derived from the filename when omitted) |
1511
+ | `.description(text)` | `description: text` |
1512
+ | `.input(key, schema)` (repeatable) | `inputs: { key: schema, ... }` |
1513
+ | `.output(key, schema)` (repeatable) | `outputs: { key: schema, ... }` (required, even if `{}`) |
1514
+ | `.worktreeFromInputs(binding)` | `worktreeFromInputs: binding` (binding shape unchanged) |
1515
+ | `.run(fn)` callback | `run: fn` |
1516
+ | `.compile()` terminal | delete — `workflow({ ... })` returns the definition |
1517
+
1518
+ `ctx` and every primitive (`ctx.task`, `ctx.chain`, `ctx.parallel`, `ctx.stage`, `ctx.workflow`, `ctx.exit`, `ctx.ui`) are unchanged, so workflow **bodies do not need rewriting** — only the authoring wrapper changes.
1519
+
1520
+ ### Full before / after
1521
+
1522
+ Before (removed API):
1523
+
1524
+ ```ts
1525
+ import { defineWorkflow, Type } from "@bastani/workflows";
1526
+
1527
+ export default defineWorkflow("review-changes")
1528
+ .description("Run two reviewers in parallel and synthesize a decision.")
1529
+ .input("target", Type.String({ description: "Path or change target to review." }))
1530
+ .input("base_branch", Type.String({ default: "origin/main" }))
1531
+ .output("decision", Type.String())
1532
+ .output("concerns", Type.Optional(Type.Array(Type.String())))
1533
+ .worktreeFromInputs({ baseBranch: "base_branch" })
1534
+ .run(async (ctx) => {
1535
+ const target = String(ctx.inputs.target);
1536
+ const [quality, runtime] = await ctx.parallel(
1537
+ [
1538
+ { name: "quality", prompt: `Review quality of ${target}` },
1539
+ { name: "runtime", prompt: `Review runtime behavior of ${target}` },
1540
+ ],
1541
+ { concurrency: 2 },
1542
+ );
1543
+ return { decision: `${quality.text}\n${runtime.text}`, concerns: [] };
1544
+ })
1545
+ .compile();
1546
+ ```
1547
+
1548
+ After (current API):
1549
+
1550
+ ```ts
1551
+ import { workflow } from "@bastani/workflows";
1552
+ import { Type } from "typebox";
1553
+
1554
+ export default workflow({
1555
+ name: "review-changes", // optional — omit to derive from filename
1556
+ description: "Run two reviewers in parallel and synthesize a decision.",
1557
+ inputs: {
1558
+ target: Type.String({ description: "Path or change target to review." }),
1559
+ base_branch: Type.String({ default: "origin/main" }),
1560
+ },
1561
+ outputs: {
1562
+ decision: Type.String(),
1563
+ concerns: Type.Optional(Type.Array(Type.String())),
1564
+ },
1565
+ worktreeFromInputs: { baseBranch: "base_branch" },
1566
+ run: async (ctx) => {
1567
+ const target = String(ctx.inputs.target);
1568
+ const [quality, runtime] = await ctx.parallel(
1569
+ [
1570
+ { name: "quality", prompt: `Review quality of ${target}` },
1571
+ { name: "runtime", prompt: `Review runtime behavior of ${target}` },
1572
+ ],
1573
+ { concurrency: 2 },
1574
+ );
1575
+ return { decision: `${quality.text}\n${runtime.text}`, concerns: [] };
1576
+ },
1577
+ });
1578
+ ```
1579
+
1580
+ ### Conversion checklist
1581
+
1582
+ For each `.atomic/workflows/*.ts` (or workflow-package) file:
1583
+
1584
+ 1. Swap the import to `import { workflow } from "@bastani/workflows"` and add `import { Type } from "typebox"`. Drop `defineWorkflow` from the `@bastani/workflows` import. `import type { Static, TSchema }` can stay on the `@bastani/workflows` import if you use those types.
1585
+ 2. Replace `defineWorkflow("<name>")` with `workflow({`. You may keep `name: "<name>"` or drop the key entirely to derive the name from the filename.
1586
+ 3. Move `.description("<text>")` to a `description: "<text>",` property.
1587
+ 4. Collect every `.input(key, schema)` into one `inputs: { key: schema, ... },` map.
1588
+ 5. Collect every `.output(key, schema)` into one `outputs: { key: schema, ... },` map. If there were no `.output(...)` calls, add `outputs: {},` — it is now required.
1589
+ 6. Move `.worktreeFromInputs(binding)` to a `worktreeFromInputs: binding,` property (same binding shape, unchanged).
1590
+ 7. Move the `.run(fn)` callback to a `run: fn,` property; the body stays byte-for-byte the same.
1591
+ 8. Delete the trailing `.compile()`, close the object with `})`, and keep `export default`.
1592
+ 9. Run `/workflow reload` (or restart Atomic) and `/workflow list` to confirm the file loads. Because `ctx` and its primitives are unchanged, stage behavior, graph layout, resume/kill, and human-input prompts are unaffected.
1593
+
1594
+ ### Gotchas
1595
+
1596
+ - **`outputs` is required.** The old `.output(...)` calls were optional, and a workflow with none compiled fine. The new object form throws `workflow: outputs must be a schema map` when `outputs` is missing, so declare `outputs: {}` for outputless workflows.
1597
+ - **`Type` is no longer re-exported.** `import { Type } from "@bastani/workflows"` fails type-checking; import it from `typebox` instead. (`Static` and `TSchema` *types* are still re-exported from `@bastani/workflows`, so those imports do not need to change.)
1598
+ - **`.compile()` does not exist.** Leaving it produces a runtime `TypeError`; `workflow({ ... })` already returns the frozen, branded definition.
1599
+ - **`name` is derived from the filename when omitted.** `review-changes.ts` becomes the `review-changes` workflow, so an explicit `name` is only needed when it should differ from the basename.
1600
+ - **No hand-rolled definitions.** Objects carrying `__piWorkflow: true` that you construct by hand are rejected by discovery and by `ctx.workflow(...)`. Only definitions minted by `workflow({ ... })` are accepted.
1601
+ - **The imperative `runWorkflow` runner is gone.** It is now a `never` placeholder that throws on access; use the exported `run(def, inputs)` helper or a registry for programmatic execution.
1602
+ - **Keep `outputs` inline for the strictest type checking.** The old builder enforced no-extra-output keys through a `NoExtraOutputs` generic on `.run(fn)`; the object form re-creates that check for inline `outputs` maps, but cannot recover output keys when a schema map is widened or built up before being passed to `workflow({ ... })`. Keep the `outputs` literal inline so the declared-key check stays exact.
1603
+
1604
+ Everything else — stage primitives, `ctx.inputs` typing, runtime validation, DAG inference, MCP scoping, resume/kill, worktree binding, model fallback, and the `/workflow` tool contract — is unchanged.
1605
+
1441
1606
  ## Workflow Primitives
1442
1607
 
1443
1608
  Prefer high-level primitives because they create tracked graph nodes, provide consistent handoff semantics, and keep workflow definitions easier to read.
@@ -1449,7 +1614,7 @@ Prefer high-level primitives because they create tracked graph nodes, provide co
1449
1614
  | Independent concurrent branches | `ctx.parallel(steps, options?)` |
1450
1615
  | Reusable child workflow | Call `ctx.workflow(workflowDefinition, options?)` |
1451
1616
  | Human input during a workflow run | `ctx.ui.input/confirm/select/editor/custom` |
1452
- | Pure deterministic computation, parsing, or file I/O | Plain TypeScript in `.run()` or helpers |
1617
+ | Pure deterministic computation, parsing, or file I/O | Plain TypeScript in `run` or helpers |
1453
1618
  | Fine-grained session control | `ctx.stage(name, options?)` |
1454
1619
 
1455
1620
  Use `previous` and `{previous}` for compact handoffs only. If no placeholder is present, the runtime appends context, so a large `previous` payload can silently bloat the next model prompt. Chain defaults are:
@@ -1483,7 +1648,7 @@ Common task/stage options include:
1483
1648
  - `context: "fresh" | "fork"`, `forkFromSessionFile`
1484
1649
  - `model`, `fallbackModels`, `thinkingLevel`, `scopedModels`, `modelRegistry` — `model` and each `fallbackModels` entry accept a `model_name:thinking_effort` reasoning suffix and an optional parenthesized context-window token such as `model (1m)` (see [Reasoning levels](#reasoning-levels) and [Context windows](#context-windows)); the standalone `thinkingLevel` is deprecated
1485
1650
  - `contextWindow`, `contextWindowStrict` — stage-wide context-window budget mapped to the SDK `createAgentSession` options of the same name (non-strict by default)
1486
- - `tools`, `noTools`, `customTools`, `mcp: { allow?: string[], deny?: string[] }`, `bashPolicy`
1651
+ - `tools`, `noTools`, `customTools`, `mcp: { allow?: string[], deny?: string[] }`
1487
1652
  - `schema` for a structured final answer from this workflow item
1488
1653
  - `output`, `outputMode`, `reads`, `worktree`, `gitWorktreeDir`, `baseBranch`, `maxOutput`, `artifacts`, `sessionDir`, `cwd`, `agentDir`
1489
1654
  - advanced host-supplied SDK seams: `authStorage`, `resourceLoader`, `sessionManager`, `settingsManager`, `sessionStartEvent`
@@ -1494,43 +1659,30 @@ Workflow stages inherit the active host session directory only when the host is
1494
1659
 
1495
1660
  `subagent` is available as a default workflow-stage tool, with the same default two-hop nesting budget as main chat: a workflow stage can launch a subagent, and that subagent can launch one nested subagent before the guard blocks further delegation. `tools` remains an allowlist across built-in tools and bundled extension tools; if you set `tools`, list every tool the stage should see. Explicitly listing tools such as `subagent`, `web_search`, `fetch_content`, or `intercom` exposes those tools to the stage, while `excludedTools` and `noTools: "all"` still win. The bundled subagent definitions from `@bastani/subagents` are available to the `subagent` tool in workflow stages; when a workflow is itself running inside a subagent child process, Atomic isolates stage resource discovery from the parent child-process flags so `subagent` remains available while workflow-stage nested-depth guards remain in force.
1496
1661
 
1497
- `bashPolicy` scopes the built-in `bash` tool for one stage or task. `tools` must still include `"bash"` (or leave it available by default); the policy only narrows command text after the shell tool is exposed. It supports exact strings, `{ prefix }`, command-string `{ glob }`, and `{ regex, flags? }` rules, `default: "allow" | "deny"` (default `"allow"`), `deny` precedence, and `match: "segments" | "whole"` (default `"segments"`). Omitting `bashPolicy`, passing `{}`, or passing a default-allow policy with no `allow`/`deny` rules (including empty arrays or match-only default-allow policies) preserves legacy behavior and does not parse commands; malformed policy shapes such as unknown top-level keys (`denny`, `extra`), non-array `allow`/`deny`, invalid rule objects, invalid regexes, invalid glob bracket ranges, or stateful `g`/`y` regex flags fail closed as `invalid-policy`. Segment mode checks each command in pipelines/chains/substitutions before execution, treats unquoted LF, CRLF, and bare CR as command separators, keeps non-leading Bash `>|` noclobber redirections inside the current command segment, and rejects reserved/compound shell heads, leading redirections, attached command-head redirections, and command heads that are not literal words.
1498
-
1499
- ```ts
1500
- await ctx.task("browser-preview", {
1501
- tools: ["bash"],
1502
- bashPolicy: {
1503
- default: "deny",
1504
- allow: [
1505
- "which playwright-cli",
1506
- { prefix: "playwright-cli open " },
1507
- { prefix: "playwright-cli snapshot" },
1508
- { prefix: "grep " },
1509
- ],
1510
- deny: [{ regex: "\\brm\\b" }],
1511
- },
1512
- prompt: "Open the preview with playwright-cli, then summarize the visible state.",
1513
- });
1514
- ```
1515
-
1516
- A command such as `playwright-cli snapshot | grep title` passes only when both segments are allowed, and `playwright-cli snapshot\nrm -rf /tmp/proof` cannot be hidden behind a `{ prefix: "playwright-cli " }` rule because the newline starts a new segment. Glob rules match command strings rather than filesystem path segments: `*` and `?` may span `/`, so `{ glob: "playwright-cli *" }` matches URLs and slash-bearing paths such as `playwright-cli http://localhost:3000`, `playwright-cli docs/index.html`, and `playwright-cli ./preview/output.html` while still matching the whole target rather than `echo playwright-cli ...`; escaped bracket-class metacharacters such as `\-`, `\^`, `\]`, `\[`, and `\\` stay literal, while malformed glob ranges such as `{ glob: "echo [z-a]" }` become `invalid-policy` denials. Segment mode accepts literal heads such as `grep`, `./script`, `/usr/bin/env`, `bun`, and `playwright-cli`, and treats non-leading `>|` as redirection syntax so `echo ok >|/tmp/out` stays one segment, but conservatively rejects reserved or compound heads (`coproc`, `if`, `for`, `while`, `case`, `{`, `}`, `!`), leading redirections (`>file cmd`, `2>file cmd`, `<file cmd`, `&>file cmd`, `&>>file cmd`, `>|file cmd`, `<&0 cmd`, `>&2 cmd`), redirections attached to the command-head word (`cmd>file`, `cmd>>file`, `cmd>|file`, `cmd2>file`, `cmd>&2`, `cmd</tmp/in`), leading environment assignments (`PATH=/tmp:$PATH playwright-cli snapshot`, `LD_PRELOAD=/tmp/x playwright-cli snapshot`, `FOO=bar`), dynamic heads such as `$cmd`, `${cmd}`, `r''m`, `r\m`, `~/bin/rm`, `r*m`, `{rm,echo}`, `r$(printf m)`, or backtick-built command names. A single denied, redirection-prefixed, attached-redirection, assignment-prefixed, dynamic, or unrecognized segment blocks the whole command with a model-readable tool error and no UI prompt, so the behavior works in headless workflow runs. Use `match: "whole"` only when raw-command matching is intentional.
1662
+ Workflow stages use the same upstream-compatible `bash` tool as normal Atomic sessions. If `bash` is enabled for a stage, commands run through the configured shell with the stage process permissions; workflow options no longer include a command-level allow/deny field for shell text. Use `tools`/`noTools` to expose or hide shell access, prefer narrower custom tools for repeatable operations, and run workflows inside a container, VM, or other sandbox when command allowlisting or stronger isolation is required.
1517
1663
 
1518
1664
  `gitWorktreeDir` selects a reusable Git worktree root for `ctx.stage`, `ctx.task`, `ctx.chain`, and `ctx.parallel`. If the path is missing, Atomic creates it with `git worktree add --detach <path> <baseBranch>`; if it exists, it must be a same-repository worktree root. The default stage cwd becomes the matching cwd inside the worktree and preserves the invoking repo-relative subdirectory. Explicit `cwd` still wins; relative `cwd` values resolve from the worktree cwd, while absolute `cwd` values are used as provided. `gitWorktreeDir` is mutually exclusive with `worktree: true`: use `gitWorktreeDir` for named/reusable worktrees and `worktree: true` for temporary direct-mode worktrees that are cleaned up after the run.
1519
1665
 
1520
- To bind user inputs to a workflow-wide worktree default, use the builder method:
1666
+ To bind user inputs to a workflow-wide worktree default, set `worktreeFromInputs` in `workflow({...})`:
1521
1667
 
1522
1668
  ```ts
1523
- export default defineWorkflow("safe-implementation")
1524
- .input("task", Type.String())
1525
- .input("git_worktree_dir", Type.String({ default: "" }))
1526
- .input("base_branch", Type.String({ default: "origin/main" }))
1527
- .worktreeFromInputs({ gitWorktreeDir: "git_worktree_dir", baseBranch: "base_branch" })
1528
- .output("result", Type.String({ description: "Implementation result text." }))
1529
- .run(async (ctx) => {
1669
+ export default workflow({
1670
+ name: "safe-implementation",
1671
+ description: "",
1672
+ inputs: {
1673
+ task: Type.String(),
1674
+ git_worktree_dir: Type.String({ default: "" }),
1675
+ base_branch: Type.String({ default: "origin/main" }),
1676
+ },
1677
+ outputs: {
1678
+ result: Type.String({ description: "Implementation result text." }),
1679
+ },
1680
+ worktreeFromInputs: { gitWorktreeDir: "git_worktree_dir", baseBranch: "base_branch" },
1681
+ run: async (ctx) => {
1530
1682
  const result = await ctx.task("implement", { task: String(ctx.inputs.task) });
1531
1683
  return { result: result.text };
1532
- })
1533
- .compile();
1684
+ },
1685
+ });
1534
1686
  ```
1535
1687
 
1536
1688
  For lower-level integrations, `@bastani/workflows` also exports `setupGitWorktree({ gitWorktreeDir, baseBranch, cwd })`, returning `{ worktreeRoot, cwd, repositoryRoot, created }` with the same validation, symlink-preserving path handling, and cwd-preservation behavior used by workflow stages.
@@ -1593,20 +1745,26 @@ The budget applies only to the candidate that carries the token; other primary a
1593
1745
  - `/workflow <name> key=value ...` for interactive named runs
1594
1746
  - `/workflow connect|attach|pause|interrupt|resume|status|inputs|reload` for live control, inspection, and rediscovery
1595
1747
  - the `workflow` tool for agent-initiated orchestration and direct one-off runs
1596
- Workflow definition files must export definitions produced by `defineWorkflow(...).compile()`. Keep non-workflow runtime helpers (widget factories, shared utilities) in a subdirectory the discovery scan ignores, such as `.atomic/workflows/lib/` — see [Workflow Locations](#workflow-locations). The former imperative object-form runner is not part of the public SDK, and authored workflow files cannot import `runWorkflow` from `@bastani/workflows`.
1748
+ Workflow definition files must export definitions produced by `workflow({...})`. Keep non-workflow runtime helpers (widget factories, shared utilities) in a subdirectory the discovery scan ignores, such as `.atomic/workflows/lib/` — see [Workflow Locations](#workflow-locations). The former imperative object-form runner is not part of the public SDK, and authored workflow files cannot import `runWorkflow` from `@bastani/workflows`.
1597
1749
 
1598
1750
  Standalone TypeScript workflow packages type-check the SDK import with no hand-authored `.d.ts`, no `declare module` shim, and no `tsconfig` `paths` alias. The SDK types ship with `@bastani/atomic`, so a workflow package depends only on `@bastani/atomic` (plus a `typebox` peer):
1599
1751
 
1600
1752
  ```ts
1601
- import { defineWorkflow, Type } from "@bastani/workflows";
1602
-
1603
- export default defineWorkflow("map-workflow-sdk")
1604
- .input("prompt", Type.String({ default: "map workflow sdk" }))
1605
- .run(async (ctx) => {
1753
+ import { workflow } from "@bastani/workflows";
1754
+ import { Type } from "typebox";
1755
+
1756
+ export default workflow({
1757
+ name: "map-workflow-sdk",
1758
+ description: "Map the workflow SDK.",
1759
+ inputs: {
1760
+ prompt: Type.String({ default: "map workflow sdk" }),
1761
+ },
1762
+ outputs: {},
1763
+ run: async (ctx) => {
1606
1764
  await ctx.task("map", { prompt: ctx.inputs.prompt });
1607
1765
  return {};
1608
- })
1609
- .compile();
1766
+ },
1767
+ });
1610
1768
  ```
1611
1769
 
1612
1770
  How those types resolve depends on what else the package imports:
@@ -1630,22 +1788,29 @@ How those types resolve depends on what else the package imports:
1630
1788
  /// <reference types="@bastani/atomic/workflows/ambient" />
1631
1789
  ```
1632
1790
 
1633
- Either form makes `import { defineWorkflow, Type } from "@bastani/workflows"` and the `@bastani/workflows/builtin/*` composition imports resolve under `tsc` (`moduleResolution: NodeNext`) with no hand-authored `.d.ts`, no `declare module` shim, and no `paths` alias. `@bastani/workflows` is not a separate npm package — its types ship with `@bastani/atomic` — so list both `@bastani/atomic` and `typebox` (the SDK's emitted types reference TypeBox) in `peerDependencies`. Runtime discovery and loading via `atomic.workflows` are unchanged: Atomic's loader still supplies the SDK when workflow files execute.
1791
+ Either form makes `import { workflow } from "@bastani/workflows"
1792
+ import { Type } from "typebox"` and the `@bastani/workflows/builtin/*` composition imports resolve under `tsc` (`moduleResolution: NodeNext`) with no hand-authored `.d.ts`, no `declare module` shim, and no `paths` alias. `@bastani/workflows` is not a separate npm package — its types ship with `@bastani/atomic` — so list both `@bastani/atomic` and `typebox` (workflow files import `Type` from `typebox`) in `peerDependencies`. Runtime discovery and loading via `atomic.workflows` are unchanged: Atomic's loader still supplies the SDK when workflow files execute.
1634
1793
 
1635
1794
  The `workflow` tool still supports direct one-off `task`, `tasks`, and `chain` modes. Direct chains support `chainName` for status/artifact grouping and `chainDir` as a shared directory for relative reads, outputs, and worktree diffs.
1636
1795
 
1637
1796
  Use `createRegistry()` when code needs to group definitions explicitly:
1638
1797
 
1639
1798
  ```ts
1640
- import { createRegistry, defineWorkflow, Type } from "@bastani/workflows";
1641
-
1642
- const alpha = defineWorkflow("alpha")
1643
- .output("text", Type.String({ description: "Alpha task output text." }))
1644
- .run(async (ctx) => {
1799
+ import { createRegistry, workflow } from "@bastani/workflows";
1800
+ import { Type } from "typebox";
1801
+
1802
+ const alpha = workflow({
1803
+ name: "alpha",
1804
+ description: "",
1805
+ inputs: {},
1806
+ outputs: {
1807
+ text: Type.String({ description: "Alpha task output text." }),
1808
+ },
1809
+ run: async (ctx) => {
1645
1810
  const result = await ctx.task("alpha", { prompt: "Run alpha." });
1646
1811
  return { text: result.text };
1647
- })
1648
- .compile();
1812
+ },
1813
+ });
1649
1814
 
1650
1815
  const registry = createRegistry().register(alpha);
1651
1816
  registry.names();
@@ -1816,7 +1981,7 @@ Before implementing or shipping a non-trivial workflow, answer these questions:
1816
1981
  - **Stage decomposition:** For each stage, what question does it answer, what context does it need, what output should it return, and what model/tool/MCP requirements does it have?
1817
1982
  - **Local stage contract:** Can this stage prompt stand alone with its current objective, inputs/artifacts, expected outputs, tools/checks, and success criteria, without unexplained workflow internals or future-stage assumptions?
1818
1983
  - **Information flow:** For every edge between stages, is `previous` enough, or should the handoff use structured returns, files, `reads`, `output`, or `outputMode`?
1819
- - **Output contract:** Which outputs should be declared with `.output(...)`, which stage/task/child results should `.run()` return for those keys, and what runtime type must each value have? If another workflow may call this workflow as a child, which non-default outputs should the parent rely on?
1984
+ - **Output contract:** Which outputs should be declared in `outputs`, which stage/task/child results should `run` return for those keys, and what runtime type must each value have? If another workflow may call this workflow as a child, which non-default outputs should the parent rely on?
1820
1985
  - **Context size:** Can downstream stages succeed from the handoff alone? Should large transcripts, logs, or research bundles be summarized or saved as artifacts?
1821
1986
  - **Control flow:** Should the workflow use `ctx.chain`, `ctx.parallel`, `ctx.ui`, bounded loops, `failFast`, or `fallbackModels`?
1822
1987
  - **User experience:** Are stage names readable in status and graph views? Is the final output compact? Are important artifacts saved with stable paths?
@@ -1830,8 +1995,8 @@ Good workflows are information-flow systems, not just prompt sequences. Keep sta
1830
1995
  - Do not guess input keys; inspect with `inputs` or `get` first.
1831
1996
  - Do not call `create`, `update`, or `delete` on the workflow tool; definitions are code-authored.
1832
1997
  - Do not use legacy workflow tool fields like `agent`, `stage`, or run-control `name`.
1833
- - Do not pass strings such as `"goal"` or path objects to `ctx.workflow(...)`; import the compiled workflow definition from `@bastani/workflows/builtin` or another TypeScript module first.
1834
- - Do not rely on undeclared child outputs; returning a key that is not declared with `.output(...)` fails the run. Declare `.output(...)` for every child-workflow field you expose — including `result` — and return values matching those schemas from `.run()`.
1998
+ - Do not pass strings such as `"goal"` or path objects to `ctx.workflow(...)`; import the workflow definition from `@bastani/workflows/builtin` or another TypeScript module first.
1999
+ - Do not rely on undeclared child outputs; returning a key that is not declared in `outputs` fails the run. Declare every child-workflow field you expose in `outputs` — including `result` — and return values matching those schemas from `run`.
1835
2000
  - Do not expect to select or rename child outputs at the call site; parent workflows receive the child's declared output contract as `child.outputs` after checking `child.exited === false`, and a partial declared-output map when `child.exited === true`.
1836
2001
  - Do not expect named workflow runs to block the chat turn; they are background tasks.
1837
2002
  - Do not call `kill` when the user asks to interrupt or pause resumably.
@@ -1839,3 +2004,653 @@ Good workflows are information-flow systems, not just prompt sequences. Keep sta
1839
2004
  - Do not write stage prompts that depend on hidden workflow-wide awareness; make each model stage locally scoped and self-described.
1840
2005
  - Do not parse model gate decisions from ad-hoc prose with regular expressions; configure `schema` on a focused workflow item and consume `result.structured`.
1841
2006
  - Return compact structured decisions and save large artifacts to files; artifact handoffs should still use files when the next stage does not need the whole payload in context.
2007
+
2008
+ ## Workflow Best Practices
2009
+
2010
+ This is the playbook I use to get consistently better results from coding agents and workflow systems.
2011
+
2012
+ The core idea is simple: do not treat an agent like a magic box. Treat it like a capable engineering partner that needs a clear objective, tight scope, explicit validation, and occasional steering.
2013
+
2014
+ Most weak agent runs fail for predictable reasons: the goal is vague, the scope is too broad, validation is missing, or the agent keeps following the wrong signal. This playbook is about avoiding those failure modes.
2015
+
2016
+ The examples below are synthetic and intentionally generic. Replace placeholders like `[component]`, `[test command]`, and `[workflow]` with your own project details.
2017
+
2018
+ ---
2019
+
2020
+ ### The core loop
2021
+
2022
+ The workflow pattern I rely on most often is:
2023
+
2024
+ ```text
2025
+ Objective -> Scope -> Done criteria -> Run -> Inspect -> Steer -> Validate -> Summarize
2026
+ ```
2027
+
2028
+ In practice, that means:
2029
+
2030
+ 1. Define the end state.
2031
+ 2. Constrain the blast radius.
2032
+ 3. State what counts as done.
2033
+ 4. Let the agent or workflow work.
2034
+ 5. Inspect status before reading details.
2035
+ 6. Steer only when the run is off track, blocked, or missing criteria.
2036
+ 7. Require evidence before accepting the result.
2037
+ 8. Ask for a summary, handoff, or next-step plan.
2038
+
2039
+ A good workflow prompt does not just say what to try. It says what success looks like.
2040
+
2041
+ ---
2042
+
2043
+ ### Prompt anatomy
2044
+
2045
+ A strong workflow prompt usually has these parts:
2046
+
2047
+ #### Objective
2048
+
2049
+ What should be true when the work is complete?
2050
+
2051
+ ```text
2052
+ Implement `[specific behavior]` in `[component]`.
2053
+ ```
2054
+
2055
+ #### Context
2056
+
2057
+ What does the agent need to know before acting?
2058
+
2059
+ ```text
2060
+ This is needed because `[reason]`. The relevant code likely lives near `[area]`.
2061
+ ```
2062
+
2063
+ #### Scope
2064
+
2065
+ What is the agent allowed to change?
2066
+
2067
+ ```text
2068
+ Only touch files directly required for `[behavior]`.
2069
+ ```
2070
+
2071
+ #### Non-goals
2072
+
2073
+ What should the agent avoid?
2074
+
2075
+ ```text
2076
+ Do not redesign `[subsystem]`, refactor unrelated code, or change public behavior outside `[case]`.
2077
+ ```
2078
+
2079
+ #### Done criteria
2080
+
2081
+ How will we know the work is complete?
2082
+
2083
+ ```text
2084
+ Done means:
2085
+ - `[new behavior]` works.
2086
+ - `[existing behavior]` is unchanged.
2087
+ - `[test command]` passes.
2088
+ - The final response includes changed files, validation results, and remaining risks.
2089
+ ```
2090
+
2091
+ #### Stop conditions
2092
+
2093
+ When should the agent stop and ask instead of guessing?
2094
+
2095
+ ```text
2096
+ If this requires changing `[public API/security behavior/data migration]`, stop and ask first.
2097
+ ```
2098
+
2099
+ ---
2100
+
2101
+ ### Core principles
2102
+
2103
+ #### 1. Start with the end state
2104
+
2105
+ I try to describe what should be true at the end, not just what the agent should investigate.
2106
+
2107
+ Bad:
2108
+
2109
+ ```text
2110
+ Look into the login issue.
2111
+ ```
2112
+
2113
+ Better:
2114
+
2115
+ ```text
2116
+ Fix the login redirect regression. Done means users who sign in from `[page]` return to `[expected destination]`, and `[test command]` passes.
2117
+ ```
2118
+
2119
+ #### 2. Keep scope tight
2120
+
2121
+ Agents are often tempted to clean up nearby code. Sometimes that is useful, but most workflow runs should be bounded.
2122
+
2123
+ Use phrases like:
2124
+
2125
+ - `Only touch files required for this behavior.`
2126
+ - `Do not refactor unrelated code.`
2127
+ - `Preserve existing behavior for [case].`
2128
+ - `Make the smallest correct change.`
2129
+
2130
+ #### 3. Separate implementation from validation
2131
+
2132
+ A change is not done because the agent says it is done. It is done when the relevant evidence supports it.
2133
+
2134
+ That evidence can be:
2135
+
2136
+ - a targeted test,
2137
+ - a broader regression test,
2138
+ - a smoke command,
2139
+ - a typecheck or lint command,
2140
+ - a structured output contract check,
2141
+ - or a clear manual verification step.
2142
+
2143
+ #### 4. Prefer evidence over speculation
2144
+
2145
+ When something fails, I steer the agent back to the observable signal: the error, failing test, log line, user behavior, or broken contract.
2146
+
2147
+ ```text
2148
+ Treat the failing assertion as the source of truth. Do not guess from nearby code alone.
2149
+ ```
2150
+
2151
+ #### 5. Use staged thinking
2152
+
2153
+ For ambiguous work, I usually separate the flow into stages:
2154
+
2155
+ ```text
2156
+ Investigate -> identify root cause -> propose fix -> implement -> validate -> summarize
2157
+ ```
2158
+
2159
+ If the cause is not clear, I do not want the agent making broad changes just to see what happens.
2160
+
2161
+ #### 6. Steer, do not micromanage
2162
+
2163
+ The best steering messages are short and corrective. They add constraints, redirect attention, or provide a decision.
2164
+
2165
+ You usually do not need to rewrite the whole prompt. You need to say what changed.
2166
+
2167
+ #### 7. Treat failed validation as the next task
2168
+
2169
+ A failed test is not a footnote. It becomes the next objective.
2170
+
2171
+ ```text
2172
+ Validation failed on `[command]`. Treat that as the source of truth. Fix the root cause only, rerun the failing check, then report the result.
2173
+ ```
2174
+
2175
+ #### 8. Interrupt stale or wrong work
2176
+
2177
+ If a run is solving the wrong problem, based on outdated assumptions, or duplicating another run, stop it. Letting it continue usually creates more cleanup later.
2178
+
2179
+ #### 9. Inspect at the right level
2180
+
2181
+ For long-running workflows, I do not start by reading every log. I check:
2182
+
2183
+ 1. overall status,
2184
+ 2. current stage,
2185
+ 3. blocker or failure reason,
2186
+ 4. relevant stage details only if needed.
2187
+
2188
+ #### 10. Ask for synthesis before handoff
2189
+
2190
+ Before switching from investigation to implementation, or from implementation to review, I often ask for a concise synthesis:
2191
+
2192
+ ```text
2193
+ Summarize root cause, proposed fix, files involved, validation plan, and remaining risks.
2194
+ ```
2195
+
2196
+ ---
2197
+
2198
+ ### Common workflow patterns
2199
+
2200
+ #### Scoped implementation sprint
2201
+
2202
+ **Use when:** You have a clear feature, bug fix, or issue to delegate.
2203
+
2204
+ **Prompt shape:**
2205
+
2206
+ ```text
2207
+ Implement `[feature]` in `[component]`. Only touch files directly needed for this behavior. Done means the new behavior works, existing behavior is unchanged, and `[test command]` passes.
2208
+ ```
2209
+
2210
+ **Why it works:** The agent gets autonomy, but the objective and blast radius are bounded.
2211
+
2212
+ **Validation:** Run the most relevant targeted check first, then a broader nearby check if the change is risky.
2213
+
2214
+ ---
2215
+
2216
+ #### Regression repair loop
2217
+
2218
+ **Use when:** CI, tests, typecheck, lint, or smoke validation fails.
2219
+
2220
+ **Prompt shape:**
2221
+
2222
+ ```text
2223
+ Fix the failing `[test suite]` regression. Treat the failure output as the source of truth. Do not refactor unrelated code. Done means the failing test passes and no nearby tests regress.
2224
+ ```
2225
+
2226
+ **Why it works:** It anchors the run to observable evidence instead of speculation.
2227
+
2228
+ **Validation:** Reproduce the failure, fix the root cause, rerun the failing check, then run a nearby or broader check.
2229
+
2230
+ ---
2231
+
2232
+ #### Workflow or tooling smoke test
2233
+
2234
+ **Use when:** You changed a workflow definition, prompt contract, structured output, CLI behavior, or developer tool.
2235
+
2236
+ **Prompt shape:**
2237
+
2238
+ ```text
2239
+ Validate `[workflow/tool]` after the change. Run a minimal smoke case, confirm required outputs are present, and report whether it can be invoked with expected inputs.
2240
+ ```
2241
+
2242
+ **Why it works:** Workflow and tooling changes often fail at integration boundaries. A small smoke case catches those failures early.
2243
+
2244
+ **Validation:** Reload or rerun the tool, check the output shape, and report contract mismatches.
2245
+
2246
+ ---
2247
+
2248
+ #### Human-in-the-loop checkpoint
2249
+
2250
+ **Use when:** The workflow might need a product decision, API decision, migration choice, or risky approval.
2251
+
2252
+ **Prompt shape:**
2253
+
2254
+ ```text
2255
+ If blocked, ask before changing public API behavior. Otherwise proceed with the smallest compatible fix.
2256
+ ```
2257
+
2258
+ **Why it works:** The agent keeps moving where it can, but does not guess on high-impact decisions.
2259
+
2260
+ **Validation:** Confirm the decision is reflected in the final behavior and summary.
2261
+
2262
+ ---
2263
+
2264
+ #### Release gate
2265
+
2266
+ **Use when:** Preparing a release, version bump, changelog, publish step, migration, or deployment-adjacent task.
2267
+
2268
+ **Prompt shape:**
2269
+
2270
+ ```text
2271
+ Prepare a `[release kind]` release for `[version]`. Do not publish unless validation passes. Report the exact checks performed and any unresolved blockers.
2272
+ ```
2273
+
2274
+ **Why it works:** Release work needs explicit gates and stop conditions.
2275
+
2276
+ **Validation:** Require changelog review, tests, build/package checks, and a clear publish/no-publish decision.
2277
+
2278
+ ---
2279
+
2280
+ #### Monitor-and-steer long run
2281
+
2282
+ **Use when:** A workflow runs asynchronously, has multiple stages, or may need supervision.
2283
+
2284
+ **Prompt shape:**
2285
+
2286
+ ```text
2287
+ Show the current stage and blocker. If implementation is complete, summarize validation status and remaining risks.
2288
+ ```
2289
+
2290
+ **Why it works:** It avoids both blind trust and excessive log-reading.
2291
+
2292
+ **Validation:** Inspect status first, then stages, then only the relevant details.
2293
+
2294
+ ---
2295
+
2296
+ #### Investigate before implementing
2297
+
2298
+ **Use when:** A bug or request is ambiguous.
2299
+
2300
+ **Prompt shape:**
2301
+
2302
+ ```text
2303
+ Investigate `[bug]`, identify root cause, and propose the smallest fix. Do not implement until the cause is clear.
2304
+ ```
2305
+
2306
+ **Why it works:** It prevents the agent from making changes before it understands the failure mode.
2307
+
2308
+ **Validation:** Ask for a reproduction, root-cause explanation, proposed fix, and test plan before implementation.
2309
+
2310
+ ---
2311
+
2312
+ ### Steering patterns
2313
+
2314
+ #### Tighten scope
2315
+
2316
+ **Signal:** The agent starts expanding into adjacent cleanup, unrelated files, or broad refactors.
2317
+
2318
+ **Steer:**
2319
+
2320
+ ```text
2321
+ Narrow this to `[specific behavior]` in `[component]`. Do not refactor unrelated code or change `[adjacent area]`. Done means `[specific acceptance criteria]`.
2322
+ ```
2323
+
2324
+ **Why:** Prevents risky changes and keeps the run reviewable.
2325
+
2326
+ ---
2327
+
2328
+ #### Add missing done criteria
2329
+
2330
+ **Signal:** The agent has a plan, but no clear finish line.
2331
+
2332
+ **Steer:**
2333
+
2334
+ ```text
2335
+ Use these done criteria:
2336
+ 1. `[behavior]` works.
2337
+ 2. `[regression]` remains unchanged.
2338
+ 3. `[test command]` passes.
2339
+ 4. Report files changed and validation results.
2340
+ ```
2341
+
2342
+ **Why:** Makes completion verifiable.
2343
+
2344
+ ---
2345
+
2346
+ #### Redirect an off-track stage
2347
+
2348
+ **Signal:** The workflow is investigating the wrong area or solving the wrong problem.
2349
+
2350
+ **Steer:**
2351
+
2352
+ ```text
2353
+ Stop pursuing `[wrong direction]`. The relevant signal is `[error/test/user behavior]`. Re-focus on `[target area]` and continue from there.
2354
+ ```
2355
+
2356
+ **Why:** Saves time and prevents wrong assumptions from compounding.
2357
+
2358
+ ---
2359
+
2360
+ #### Respond to a blocked prompt
2361
+
2362
+ **Signal:** The workflow asks for approval, a choice, or clarification.
2363
+
2364
+ **Steer:**
2365
+
2366
+ ```text
2367
+ Choose `[option]`. Continue only if `[condition]`; otherwise stop and report the blocker.
2368
+ ```
2369
+
2370
+ **Why:** Keeps the workflow unblocked without adding ambiguity.
2371
+
2372
+ ---
2373
+
2374
+ #### Turn failed validation into the next task
2375
+
2376
+ **Signal:** Tests, typecheck, lint, build, or smoke checks fail.
2377
+
2378
+ **Steer:**
2379
+
2380
+ ```text
2381
+ Validation failed on `[command]`. Treat that as the source of truth. Fix the root cause only, rerun the failing check, then report the result.
2382
+ ```
2383
+
2384
+ **Why:** Prevents accepting partially working output.
2385
+
2386
+ ---
2387
+
2388
+ #### Ask for synthesis
2389
+
2390
+ **Signal:** The workflow has gathered information, but the next action is unclear.
2391
+
2392
+ **Steer:**
2393
+
2394
+ ```text
2395
+ Synthesize the current findings into: root cause, proposed fix, files likely involved, validation plan, and remaining risks.
2396
+ ```
2397
+
2398
+ **Why:** Converts exploration into a usable plan.
2399
+
2400
+ ---
2401
+
2402
+ #### Pause, kill, or rerun
2403
+
2404
+ **Signal:** A run is stale, duplicated, superseded, or based on outdated assumptions.
2405
+
2406
+ **Steer:**
2407
+
2408
+ ```text
2409
+ Pause this run; it has been superseded by `[new context]`. Resume only with `[updated objective]`, or stop and summarize current state.
2410
+ ```
2411
+
2412
+ **Why:** Avoids conflicting changes and wasted work.
2413
+
2414
+ ---
2415
+
2416
+ ### Copy-paste templates
2417
+
2418
+ #### Start a workflow
2419
+
2420
+ ```text
2421
+ Objective:
2422
+ Implement/fix `[specific behavior]` in `[component]`.
2423
+
2424
+ Context:
2425
+ `[short context about why this matters or where to look]`
2426
+
2427
+ Scope:
2428
+ - Only touch files required for `[behavior]`.
2429
+ - Do not refactor unrelated code.
2430
+ - Preserve existing behavior for `[existing case]`.
2431
+
2432
+ Done criteria:
2433
+ - `[new behavior]` works.
2434
+ - `[regression case]` still works.
2435
+ - `[test command]` passes.
2436
+ - Report changed files, validation results, and any risks.
2437
+
2438
+ Stop conditions:
2439
+ - If this requires `[risky decision]`, stop and ask first.
2440
+ ```
2441
+
2442
+ #### Tighten scope
2443
+
2444
+ ```text
2445
+ Tighten scope to `[specific target]`.
2446
+
2447
+ Do not work on:
2448
+ - `[excluded area 1]`
2449
+ - `[excluded area 2]`
2450
+ - broad cleanup or unrelated refactors
2451
+
2452
+ Continue only on the path needed to satisfy:
2453
+ `[acceptance criterion]`.
2454
+ ```
2455
+
2456
+ #### Add acceptance criteria
2457
+
2458
+ ```text
2459
+ Add these acceptance criteria before continuing:
2460
+
2461
+ 1. User can `[action]`.
2462
+ 2. System handles `[edge case]`.
2463
+ 3. Existing behavior `[existing behavior]` is unchanged.
2464
+ 4. `[test command]` passes.
2465
+ 5. Final response includes validation evidence.
2466
+ ```
2467
+
2468
+ #### Redirect a stage
2469
+
2470
+ ```text
2471
+ This stage is off track.
2472
+
2473
+ Stop investigating `[wrong area]`.
2474
+ The relevant signal is `[error/output/requirement]`.
2475
+ Refocus on `[correct area]`.
2476
+
2477
+ Next:
2478
+ 1. Reproduce or inspect `[signal]`.
2479
+ 2. Identify root cause.
2480
+ 3. Make the smallest fix.
2481
+ 4. Run `[validation command]`.
2482
+ ```
2483
+
2484
+ #### Handle failed validation
2485
+
2486
+ ```text
2487
+ Validation failed:
2488
+
2489
+ Command:
2490
+ `[command]`
2491
+
2492
+ Failure:
2493
+ `[short sanitized failure summary]`
2494
+
2495
+ Treat this as the source of truth.
2496
+ Fix only the root cause.
2497
+ Rerun the failing command.
2498
+ If it still fails, summarize the blocker and stop.
2499
+ ```
2500
+
2501
+ #### Ask for synthesis
2502
+
2503
+ ```text
2504
+ Synthesize current progress into:
2505
+
2506
+ - What was attempted
2507
+ - What changed
2508
+ - What evidence supports the result
2509
+ - What remains uncertain
2510
+ - Recommended next steps
2511
+ - Exact validation commands run
2512
+ ```
2513
+
2514
+ #### Turn findings into implementation steps
2515
+
2516
+ ```text
2517
+ Convert the findings into an implementation plan:
2518
+
2519
+ 1. Files/components to change
2520
+ 2. Order of changes
2521
+ 3. Tests to add or update
2522
+ 4. Validation commands
2523
+ 5. Risks or edge cases
2524
+ 6. Stop conditions
2525
+ ```
2526
+
2527
+ #### Prepare a release gate
2528
+
2529
+ ```text
2530
+ Prepare `[version]` as a `[release kind]` release.
2531
+
2532
+ Requirements:
2533
+ - Verify changelog entries are complete.
2534
+ - Run `[test command]`.
2535
+ - Run `[build/package command]`.
2536
+ - Do not publish unless all validation passes.
2537
+ - If any gate fails, stop and report blockers.
2538
+
2539
+ Final response should include:
2540
+ - Version
2541
+ - Checks run
2542
+ - Results
2543
+ - Files changed
2544
+ - Publish readiness
2545
+ ```
2546
+
2547
+ ---
2548
+
2549
+ ### Concrete examples
2550
+
2551
+ #### Example 1: Fixing a failing test
2552
+
2553
+ **Scenario:** A package has one failing unit test after a recent change.
2554
+
2555
+ **Initial objective:**
2556
+
2557
+ ```text
2558
+ Fix the failing `[unit test]`. Do not rewrite the module. Done means the test passes and nearby tests still pass.
2559
+ ```
2560
+
2561
+ **Steering message:**
2562
+
2563
+ ```text
2564
+ Stop exploring unrelated failures. Focus only on the assertion mismatch in `[test file]`.
2565
+ ```
2566
+
2567
+ **Validation:** Run `[targeted test command]`, then `[nearby test command]`.
2568
+
2569
+ **Outcome:** Small fix applied, regression test passes, and the workflow reports exact commands and results.
2570
+
2571
+ ---
2572
+
2573
+ #### Example 2: Repairing a workflow definition
2574
+
2575
+ **Scenario:** A custom workflow no longer returns the expected structured output.
2576
+
2577
+ **Initial objective:**
2578
+
2579
+ ```text
2580
+ Validate `[workflow]` and fix its output contract. Done means the smoke run returns `[required fields]`.
2581
+ ```
2582
+
2583
+ **Steering message:**
2584
+
2585
+ ```text
2586
+ Treat the missing output field as the root issue. Do not change unrelated stage prompts.
2587
+ ```
2588
+
2589
+ **Validation:** Reload workflow, run minimal smoke input, inspect structured result.
2590
+
2591
+ **Outcome:** Contract fixed, smoke test passes, and the workflow can be reused safely.
2592
+
2593
+ ---
2594
+
2595
+ #### Example 3: Investigating before implementing
2596
+
2597
+ **Scenario:** A user-reported bug is ambiguous.
2598
+
2599
+ **Initial objective:**
2600
+
2601
+ ```text
2602
+ Investigate `[bug]`, identify root cause, and propose the smallest fix. Do not implement until the cause is clear.
2603
+ ```
2604
+
2605
+ **Steering message:**
2606
+
2607
+ ```text
2608
+ Synthesize findings first: root cause, affected path, proposed fix, and validation plan.
2609
+ ```
2610
+
2611
+ **Validation:** Add or run a reproduction test before changing code.
2612
+
2613
+ **Outcome:** Clear implementation plan produced, then delegated as a scoped fix.
2614
+
2615
+ ---
2616
+
2617
+ ### Anti-patterns
2618
+
2619
+ | Anti-pattern | Better approach |
2620
+ | --- | --- |
2621
+ | `Fix this.` | `Fix [specific failure]; done means [test command] passes.` |
2622
+ | No validation step | Require tests, smoke checks, typecheck, or explicit manual verification. |
2623
+ | Broad refactors | Constrain the run to the files needed for the objective. |
2624
+ | Letting a wrong stage continue | Redirect or interrupt as soon as the agent follows the wrong signal. |
2625
+ | Accepting unverified summaries | Ask for changed files, commands run, results, and remaining risks. |
2626
+ | Mixing investigation and implementation too early | Ask for root cause and proposed fix before code changes. |
2627
+ | Ignoring blocked stages | Answer directly with one decision and any constraints. |
2628
+ | Continuing stale runs | Pause, kill, or rerun with updated context. |
2629
+ | Reading every log | Inspect status, then stages, then only relevant details. |
2630
+ | Publishing without gates | Require release validation and explicit stop conditions. |
2631
+
2632
+ ---
2633
+
2634
+ ### Quick reference
2635
+
2636
+ Before starting a workflow, include:
2637
+
2638
+ - [ ] Objective
2639
+ - [ ] Context
2640
+ - [ ] Scope
2641
+ - [ ] Non-goals
2642
+ - [ ] Done criteria
2643
+ - [ ] Validation command
2644
+ - [ ] Reporting requirements
2645
+ - [ ] Stop conditions
2646
+
2647
+ Before accepting a workflow result, ask:
2648
+
2649
+ - [ ] What changed?
2650
+ - [ ] Why was this the right fix?
2651
+ - [ ] What evidence supports it?
2652
+ - [ ] Which commands were run?
2653
+ - [ ] What still might be risky?
2654
+ - [ ] Is anything blocked or unresolved?
2655
+
2656
+ The better the prompt defines the game, the better the agent can play it.