@bastani/atomic 0.6.4 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/.agents/skills/create-spec/SKILL.md +6 -3
  2. package/.agents/skills/tdd/SKILL.md +107 -0
  3. package/.agents/skills/tdd/deep-modules.md +33 -0
  4. package/.agents/skills/tdd/interface-design.md +31 -0
  5. package/.agents/skills/tdd/mocking.md +59 -0
  6. package/.agents/skills/tdd/refactoring.md +10 -0
  7. package/.agents/skills/tdd/tests.md +61 -0
  8. package/.agents/skills/workflow-creator/SKILL.md +550 -0
  9. package/.agents/skills/workflow-creator/references/agent-sessions.md +891 -0
  10. package/.agents/skills/workflow-creator/references/agent-setup-recipe.md +266 -0
  11. package/.agents/skills/workflow-creator/references/computation-and-validation.md +201 -0
  12. package/.agents/skills/workflow-creator/references/control-flow.md +470 -0
  13. package/.agents/skills/workflow-creator/references/failure-modes.md +1014 -0
  14. package/.agents/skills/workflow-creator/references/getting-started.md +392 -0
  15. package/.agents/skills/workflow-creator/references/registry-and-validation.md +141 -0
  16. package/.agents/skills/workflow-creator/references/running-workflows.md +418 -0
  17. package/.agents/skills/workflow-creator/references/session-config.md +384 -0
  18. package/.agents/skills/workflow-creator/references/state-and-data-flow.md +356 -0
  19. package/.agents/skills/workflow-creator/references/user-input.md +234 -0
  20. package/.agents/skills/workflow-creator/references/workflow-inputs.md +392 -0
  21. package/.claude/agents/debugger.md +2 -2
  22. package/.claude/agents/reviewer.md +1 -1
  23. package/.claude/agents/worker.md +2 -2
  24. package/.github/agents/debugger.md +1 -1
  25. package/.github/agents/worker.md +1 -1
  26. package/.mcp.json +5 -1
  27. package/.opencode/agents/debugger.md +1 -1
  28. package/.opencode/agents/worker.md +1 -1
  29. package/README.md +236 -201
  30. package/dist/sdk/define-workflow.d.ts +11 -6
  31. package/dist/sdk/define-workflow.d.ts.map +1 -1
  32. package/dist/sdk/errors.d.ts +10 -0
  33. package/dist/sdk/errors.d.ts.map +1 -1
  34. package/dist/sdk/index.d.ts +21 -9
  35. package/dist/sdk/index.d.ts.map +1 -1
  36. package/dist/sdk/primitives/inputs.d.ts +36 -0
  37. package/dist/sdk/primitives/inputs.d.ts.map +1 -0
  38. package/dist/sdk/primitives/metadata.d.ts +40 -0
  39. package/dist/sdk/primitives/metadata.d.ts.map +1 -0
  40. package/dist/sdk/primitives/run.d.ts +57 -0
  41. package/dist/sdk/primitives/run.d.ts.map +1 -0
  42. package/dist/sdk/primitives/sessions.d.ts +128 -0
  43. package/dist/sdk/primitives/sessions.d.ts.map +1 -0
  44. package/dist/sdk/runtime/executor.d.ts +24 -56
  45. package/dist/sdk/runtime/executor.d.ts.map +1 -1
  46. package/dist/sdk/runtime/orchestrator-entry.d.ts +26 -0
  47. package/dist/sdk/runtime/orchestrator-entry.d.ts.map +1 -0
  48. package/dist/sdk/runtime/tmux.d.ts +20 -0
  49. package/dist/sdk/runtime/tmux.d.ts.map +1 -1
  50. package/dist/sdk/types.d.ts +26 -86
  51. package/dist/sdk/types.d.ts.map +1 -1
  52. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts.map +1 -1
  53. package/dist/sdk/workflows/builtin/deep-research-codebase/copilot/index.d.ts.map +1 -1
  54. package/dist/sdk/workflows/builtin/deep-research-codebase/opencode/index.d.ts.map +1 -1
  55. package/dist/sdk/workflows/builtin/open-claude-design/claude/index.d.ts.map +1 -1
  56. package/dist/sdk/workflows/builtin/open-claude-design/copilot/index.d.ts.map +1 -1
  57. package/dist/sdk/workflows/builtin/open-claude-design/opencode/index.d.ts.map +1 -1
  58. package/dist/sdk/workflows/builtin/ralph/claude/index.d.ts.map +1 -1
  59. package/dist/sdk/workflows/builtin/ralph/copilot/index.d.ts.map +1 -1
  60. package/dist/sdk/workflows/builtin/ralph/opencode/index.d.ts.map +1 -1
  61. package/dist/sdk/workflows/index.d.ts +20 -12
  62. package/dist/sdk/workflows/index.d.ts.map +1 -1
  63. package/dist/services/config/additional-instructions.d.ts +1 -1
  64. package/dist/services/config/additional-instructions.d.ts.map +1 -1
  65. package/package.json +4 -4
  66. package/src/cli.ts +39 -56
  67. package/src/commands/builtin-registry.ts +37 -0
  68. package/src/commands/cli/chat/index.ts +1 -3
  69. package/src/{sdk → commands/cli}/management-commands.ts +15 -55
  70. package/src/commands/cli/session.ts +1 -1
  71. package/src/commands/cli/workflow-command.test.ts +250 -16
  72. package/src/commands/cli/workflow-inputs.test.ts +1 -0
  73. package/src/commands/cli/workflow-inputs.ts +13 -3
  74. package/src/commands/cli/workflow-list.test.ts +1 -0
  75. package/src/commands/cli/workflow-list.ts +0 -0
  76. package/src/commands/cli/workflow-status.ts +1 -1
  77. package/src/commands/cli/workflow.ts +191 -11
  78. package/src/sdk/define-workflow.test.ts +47 -16
  79. package/src/sdk/define-workflow.ts +24 -6
  80. package/src/sdk/errors.test.ts +11 -0
  81. package/src/sdk/errors.ts +13 -0
  82. package/src/sdk/index.test.ts +92 -0
  83. package/src/sdk/index.ts +71 -15
  84. package/src/sdk/primitives/inputs.ts +48 -0
  85. package/src/sdk/primitives/metadata.ts +63 -0
  86. package/src/sdk/primitives/run.ts +81 -0
  87. package/src/sdk/primitives/sessions.test.ts +594 -0
  88. package/src/sdk/primitives/sessions.ts +328 -0
  89. package/src/sdk/runtime/executor.ts +36 -115
  90. package/src/sdk/runtime/orchestrator-entry.ts +110 -0
  91. package/src/sdk/runtime/tmux.ts +33 -0
  92. package/src/sdk/types.ts +26 -91
  93. package/src/sdk/workflows/builtin/deep-research-codebase/claude/index.ts +1 -0
  94. package/src/sdk/workflows/builtin/deep-research-codebase/copilot/index.ts +1 -0
  95. package/src/sdk/workflows/builtin/deep-research-codebase/opencode/index.ts +1 -0
  96. package/src/sdk/workflows/builtin/open-claude-design/claude/index.ts +1 -0
  97. package/src/sdk/workflows/builtin/open-claude-design/copilot/index.ts +1 -0
  98. package/src/sdk/workflows/builtin/open-claude-design/opencode/index.ts +1 -0
  99. package/src/sdk/workflows/builtin/ralph/claude/index.ts +1 -0
  100. package/src/sdk/workflows/builtin/ralph/copilot/index.ts +1 -0
  101. package/src/sdk/workflows/builtin/ralph/opencode/index.ts +1 -0
  102. package/src/sdk/workflows/index.ts +68 -51
  103. package/src/services/config/additional-instructions.ts +1 -1
  104. package/.agents/skills/test-driven-development/SKILL.md +0 -371
  105. package/.agents/skills/test-driven-development/testing-anti-patterns.md +0 -299
  106. package/dist/commands/cli/session.d.ts +0 -67
  107. package/dist/commands/cli/session.d.ts.map +0 -1
  108. package/dist/commands/cli/workflow-status.d.ts +0 -63
  109. package/dist/commands/cli/workflow-status.d.ts.map +0 -1
  110. package/dist/sdk/commander.d.ts +0 -74
  111. package/dist/sdk/commander.d.ts.map +0 -1
  112. package/dist/sdk/management-commands.d.ts +0 -42
  113. package/dist/sdk/management-commands.d.ts.map +0 -1
  114. package/dist/sdk/workflow-cli.d.ts +0 -103
  115. package/dist/sdk/workflow-cli.d.ts.map +0 -1
  116. package/dist/sdk/workflows/builtin-registry.d.ts +0 -113
  117. package/dist/sdk/workflows/builtin-registry.d.ts.map +0 -1
  118. package/src/sdk/commander.ts +0 -161
  119. package/src/sdk/workflow-cli.ts +0 -409
  120. package/src/sdk/workflows/builtin-registry.ts +0 -23
@@ -0,0 +1,550 @@
1
+ ---
2
+ name: workflow-creator
3
+ description: Create AND run Atomic CLI workflows (`defineWorkflow().run().compile()` with `ctx.stage()`) across Claude, Copilot, and OpenCode SDKs. Use for **authoring** when the user wants to build, edit, debug, or design agent pipelines — multi-stage automations, review/fix loops, parallel fan-out, headless/background stages, `defineWorkflow`, `ctx.stage`, `ctx.inputs`, declared `WorkflowInput` schemas, `runWorkflow`, `createRegistry`, `listWorkflows`, the SDK's metadata accessors (`getName`, `getInputSchema`, `getAgent`), `validateInputs`, the interactive workflow picker (`WorkflowPicker` from `@bastani/atomic/workflows/components`), single or multi-workflow composition roots. Use for **running** when the user wants to kick off, execute, monitor, or tear down an existing workflow — "run the ralph workflow", "start gen-spec", "is it done yet?", "what's the status?", "kill the session", or any mention of `atomic workflow -n`, `atomic workflow inputs`, `atomic workflow status`, the picker, or `atomic session kill`.
4
+ ---
5
+
6
+ # Workflow Creator
7
+
8
+ You are a workflow architect specializing in the Atomic CLI `defineWorkflow().run().compile()` API. You translate user intent into well-structured workflow files that orchestrate multiple coding agent sessions using **programmatic SDK code** — Claude Agent SDK, Copilot SDK, and OpenCode SDK. Sessions are spawned dynamically via `ctx.stage(stageOpts, clientOpts, sessionOpts, callback)` inside the `.run()` callback, using native TypeScript control flow (loops, conditionals, `Promise.all()`) for orchestration. The runtime auto-creates the SDK client and session, injects them as `s.client` and `s.session`, runs the callback, then auto-cleans up.
9
+
10
+ You also serve as a **context engineering advisor** — use the design skills listed under "Design Advisory Skills" to make informed architectural decisions about session structure, data flow, prompt composition, and quality assurance.
11
+
12
+ Two user journeys live in this skill:
13
+
14
+ - **Authoring** a new workflow (or editing/debugging an existing one) → read on below.
15
+ - **Running** a workflow on the user's behalf ("run ralph on this spec", "is it done yet?", "kill it") → go to `references/running-workflows.md`.
16
+
17
+ ## Reference Files
18
+
19
+ Load references on demand. **Only `getting-started.md` is always-load.** Everything else is conditional — pull it in when the task matches the trigger column.
20
+
21
+ | File | Load when |
22
+ |---|---|
23
+ | `getting-started.md` | **Always** — quick-start examples for all 3 SDKs, SDK exports, `SessionContext` field reference |
24
+ | `agent-setup-recipe.md` | When the user is starting from zero (empty terminal, no project, "set me up", "how do I get started"). Deterministic env-detect → install → scaffold → smoke-test playbook with typed-error recovery hints |
25
+ | `failure-modes.md` | Before shipping any multi-session workflow. 16 catalogued failures (silent + loud) with wrong-vs-right patterns and a pre-ship design checklist |
26
+ | `workflow-inputs.md` | When declaring structured inputs or documenting how a workflow is invoked — `WorkflowInput` schema, field-type selection, picker + CLI flag semantics, builtin-protection rules |
27
+ | `agent-sessions.md` | When writing SDK calls — `s.session.query()` (Claude), `s.session.send()` (Copilot), `s.client.session.prompt()` (OpenCode); includes session-lifecycle pitfalls and when to use `sendAndWait` with explicit timeouts |
28
+ | `control-flow.md` | When using loops, conditionals, parallel execution (`Promise.all`), headless fan-out, or review/fix patterns |
29
+ | `state-and-data-flow.md` | When passing data between sessions — `s.save()`, `s.transcript()`, `s.getMessages()`, file persistence, transcript compression |
30
+ | `running-workflows.md` | When the user asks you to **run** an existing workflow rather than author one |
31
+ | `computation-and-validation.md` | When adding deterministic computation, response parsing, validation, quality gates, or file I/O |
32
+ | `session-config.md` | When configuring model, tools, permissions, hooks, or structured output per SDK |
33
+ | `user-input.md` | When collecting user input **mid-workflow** (not at invocation time — use `workflow-inputs.md` for that) |
34
+ | `registry-and-validation.md` | When setting up `createRegistry()` and iterating it via `listWorkflows`, understanding key scheme, validate-on-register rules, and same-name collision detection (only relevant for the multi-workflow cli) |
35
+
36
+ ## Scaffold a new workflow from scratch
37
+
38
+ When the user asks you to build a new workflow — and especially when they're starting from an empty terminal — **load `references/agent-setup-recipe.md` and follow it as your playbook.** That reference is the deterministic, agent-prescriptive version of the steps below: env detection, dependency install, scaffold, smoke-test, and typed-error recovery. It is the single source of truth for setup; the summary here exists so you remember the shape without leaving SKILL.md.
39
+
40
+ The shape:
41
+
42
+ ```
43
+ <repo>/
44
+ ├── package.json
45
+ ├── tsconfig.json
46
+ └── src/
47
+ ├── workflows/
48
+ │ └── <workflow-name>/
49
+ │ ├── claude.ts # one file per agent you target
50
+ │ ├── copilot.ts
51
+ │ └── opencode.ts
52
+ └── <agent>-worker.ts # one composition root per agent
53
+ ```
54
+
55
+ One workflow per directory, one file per agent, one composition-root file per agent. The convention is fixed because every atomic project looks the same — users, agents, and this skill all locate files the same way. Improvising paths makes the next agent's job harder.
56
+
57
+ The five-step rhythm is the same regardless of workflow complexity:
58
+
59
+ 1. **Verify prerequisites** — Bun, tmux/psmux, an authenticated agent CLI. Surface missing pieces *before* writing code; the first `bun run` will fail otherwise. Devcontainers using `ghcr.io/flora131/atomic/<agent>:1` bundle all three.
60
+ 2. **Bootstrap** — `bun init -y` (skip if `package.json` exists) + `bun add @bastani/atomic` + the provider SDK(s) the user targets.
61
+ 3. **Write the workflow** at `src/workflows/<name>/<agent>.ts` using `defineWorkflow({ ... source: import.meta.path }).for(agent).run(...).compile()`. The `source: import.meta.path` is mandatory — the SDK re-imports this path inside the orchestrator child process. Per-agent skeletons live in `references/getting-started.md` §"Quick-start example".
62
+ 4. **Write the composition root** at `src/<agent>-worker.ts` (single workflow) or `src/cli.ts` (multiple workflows). The SDK exposes pure primitives — Commander/citty/yargs/etc. is the dev's choice; `runWorkflow({ workflow, inputs })` is the action body. Catch `MissingDependencyError` and `SessionNotFoundError` for friendly CLI messages.
63
+ 5. **Verify** — `bunx tsc --noEmit`, then a real `bun run src/<agent>-worker.ts --prompt "..."` smoke test. Watch the tmux pane spawn, the agent reply, the session end cleanly.
64
+
65
+ The when-in-doubt rules:
66
+
67
+ - **Single agent, single workflow** — the 90% case. One `<agent>.ts` + one `<agent>-worker.ts`. Done.
68
+ - **Same workflow across agents** — three `<agent>.ts` files that share helpers from `src/workflows/<name>/helpers/`; three `<agent>-worker.ts` files.
69
+ - **Multiple workflows in one CLI** — build a `createRegistry().register(...)` pipeline and iterate it via `listWorkflows(registry)` to mount one Commander subcommand per workflow. Use a `src/cli.ts` composition root instead of per-agent workers.
70
+
71
+ If the user's need doesn't match any of these, ask before scaffolding — picking wrong here means rewriting 100% of the scaffold.
72
+
73
+ For monitoring and lifecycle management after a run is live, the global `atomic` CLI (`atomic session list`, `atomic workflow status`, `atomic session kill -y`) and the SDK session primitives (`listSessions`, `getSession`, `getSessionStatus`, `attachSession`, `detachSession`, `stopSession`, `nextWindow`, `previousWindow`, `gotoOrchestrator`) both operate on the shared `atomic` tmux socket — workflows started either way show up in both surfaces. See `references/running-workflows.md` for the `needs_review` state and worked teardown examples, and `examples/pane-navigation/` for a reference driver CLI exercising the navigation primitives.
74
+
75
+ ## Information Flow Is a First-Class Design Concern
76
+
77
+ **A workflow is an information flow problem, not a sequence of prompts.**
78
+ Before writing any `ctx.stage()` call, answer for every session boundary:
79
+
80
+ - What context does this session need, how will it reach the session
81
+ (prompt handoff, file, single multi-turn stage), and what happens if the
82
+ context window fills up?
83
+
84
+ For Copilot and OpenCode, every `ctx.stage()` is a fresh conversation;
85
+ Claude reuses a tmux pane per stage. Read these before shipping any
86
+ multi-session workflow:
87
+
88
+ - `references/agent-sessions.md` §"Critical pitfall: session lifecycle
89
+ controls what context is available" — lifecycle table, context-loss
90
+ patterns, and per-SDK details.
91
+ - `references/failure-modes.md` — silent + loud failures with wrong-vs-right
92
+ patterns and the pre-ship design checklist.
93
+ - `references/state-and-data-flow.md` — `s.save()`, `s.transcript()`, and
94
+ file-based handoff patterns.
95
+
96
+ ## Design Advisory Skills
97
+
98
+ Workflow quality depends on two disciplines: **prompt engineering** (crafting
99
+ clear, structured prompts each session receives) and **context engineering**
100
+ (ensuring the right information reaches each session without exceeding token
101
+ budgets). Use `prompt-engineer` to improve individual session prompts —
102
+ clarity, XML structure, few-shot examples, chain-of-thought — and the
103
+ context engineering skills below to design information flow between sessions.
104
+
105
+ | Design Concern | Skill | Trigger |
106
+ |---|---|---|
107
+ | Prompt clarity and structure | `prompt-engineer` | Every workflow — clear instructions, XML tags, examples, chain-of-thought |
108
+ | Session prompt structure | `context-fundamentals` | Every workflow — token budgeting, prompt positioning, progressive disclosure |
109
+ | Context failure prevention | `context-degradation` | Long conversations, accumulated state, multi-turn loops |
110
+ | Transcript compression | `context-compression` | Passing large transcripts between sessions |
111
+ | Multi-session architecture | `multi-agent-patterns` | Coordination topology, handoff protocols, error propagation |
112
+ | Cross-run persistence | `memory-systems` | Retaining knowledge across separate executions |
113
+ | Custom tools and capabilities | `tool-design` | Sessions exposing custom tools |
114
+ | File-based coordination | `filesystem-context` | Sessions sharing state via files |
115
+ | Remote execution | `hosted-agents` | Sandboxed or remote environments |
116
+ | Token efficiency | `context-optimization` | Compaction triggers, observation masking, cache-friendly ordering |
117
+ | Quality gates | `evaluation` | Review loops or quality checkpoints |
118
+ | LLM-as-judge review | `advanced-evaluation` | Automated review sessions judging other sessions' output |
119
+ | Task-model fit | `project-development` | Validating whether a task is viable for agent automation |
120
+ | Deliberative reasoning | `bdi-mental-states` | Explainable reasoning chains or formal cognitive models |
121
+
122
+ ## How Workflows Work
123
+
124
+ A workflow is a TypeScript file with a single `.run()` callback that
125
+ orchestrates agent sessions dynamically. Inside the callback, `ctx.stage()`
126
+ spawns sessions — each gets its own tmux window and graph node (unless
127
+ running in headless mode). Native TypeScript handles all control flow:
128
+ loops, conditionals, `Promise.all()`, `try`/`catch`.
129
+
130
+ ```ts
131
+ import { defineWorkflow, extractAssistantText } from "@bastani/atomic/workflows";
132
+
133
+ export default defineWorkflow({
134
+ name: "my-workflow",
135
+ source: import.meta.path,
136
+ description: "...",
137
+ inputs: [
138
+ { name: "prompt", type: "text", required: true, description: "task to perform" },
139
+ ],
140
+ })
141
+ .for("claude")
142
+ .run(async (ctx) => {
143
+ const step1 = await ctx.stage({ name: "step-1" }, {}, {}, async (s) => { /* s.client, s.session */ });
144
+ await ctx.stage({ name: "step-2" }, {}, {}, async (s) => { /* s.client, s.session */ });
145
+ })
146
+ .compile();
147
+ ```
148
+
149
+ The runtime manages the full session lifecycle — callback return marks
150
+ completion; throws mark errors. `.compile()` produces a branded
151
+ `WorkflowDefinition` consumed by the CLI.
152
+
153
+ ### Background (headless) stages
154
+
155
+ Pass `{ headless: true }` in `stageOpts` to run a stage in-process with no
156
+ tmux window or graph node. The callback interface is identical
157
+ (`s.client`, `s.session`, `s.save()`, `s.transcript()` all work). For
158
+ mechanics, fan-out patterns, and graph topology see
159
+ `references/control-flow.md` §"Headless stages" and
160
+ `references/agent-sessions.md` per-SDK "Headless mode" sections.
161
+
162
+ ### Installing the workflow SDK
163
+
164
+ Install `@bastani/atomic` plus the native SDK(s) you target
165
+ (`@anthropic-ai/claude-agent-sdk`, `@github/copilot-sdk`,
166
+ `@opencode-ai/sdk`).
167
+
168
+ ### Composition root
169
+
170
+ Workflows are wired into a **composition root** — a TypeScript file the
171
+ user runs with `bun`. The SDK exposes pure primitives:
172
+
173
+ - `runWorkflow({ workflow, inputs, detach? })` — spawn a workflow's tmux session.
174
+ - `createRegistry()` / `listWorkflows(reg)` / `getWorkflow(reg, agent, name)` — build and iterate a registry.
175
+ - `getName(wf) / getAgent(wf) / getDescription(wf) / getInputSchema(wf) / getSource(wf) / getMinSDKVersion(wf)` — read workflow metadata.
176
+ - `validateInputs(wf, raw)` — apply defaults and validate against the declared schema.
177
+ - **Session lifecycle** — `listSessions / getSession / stopSession / attachSession / detachSession / getSessionStatus / getSessionTranscript`. Manage running tmux sessions on the shared atomic socket.
178
+ - **Pane navigation** — `nextWindow / previousWindow / gotoOrchestrator`. Pure tmux verbs: they update the session's current-window pointer and return immediately. Never auto-attach — an attached client sees the change live; if no client is watching, the next `attachSession` call lands on the new window. Compose `nextWindow(id) + attachSession(id)` for navigate-then-attach.
179
+ - **Typed errors** (catch with `instanceof` to render friendly CLI messages) — `MissingDependencyError` (tmux/psmux/bun missing), `SessionNotFoundError` (id not on the atomic socket), `WorkflowNotCompiledError` (forgot `.compile()`), `InvalidWorkflowError` (default export not a `WorkflowDefinition`), `IncompatibleSDKError` (workflow's `minSDKVersion` newer than installed CLI). All thrown by SDK primitives; all carry the relevant payload field (`dependency`, `id`, `path`, version pair).
180
+ - `WorkflowPicker` (from `@bastani/atomic/workflows/components`) — the interactive picker `atomic workflow -a claude` uses.
181
+
182
+ You compose them into whatever CLI library you prefer. The SDK never
183
+ re-execs the dev's CLI — it ships its own orchestrator entry script and
184
+ re-execs *that* with positional args.
185
+
186
+ ```ts
187
+ // src/claude-worker.ts — single workflow with a small Commander entrypoint
188
+ import { Command } from "@commander-js/extra-typings";
189
+ import { getInputSchema, runWorkflow } from "@bastani/atomic/workflows";
190
+ import workflow from "./workflows/my-workflow/claude.ts";
191
+
192
+ const program = new Command();
193
+ for (const input of getInputSchema(workflow)) {
194
+ program.option(`--${input.name} <value>`, input.description ?? "");
195
+ }
196
+ program.action(async (rawOpts) => {
197
+ await runWorkflow({ workflow, inputs: rawOpts as Record<string, string> });
198
+ });
199
+ await program.parseAsync();
200
+
201
+ // src/cli.ts — many workflows via createRegistry + listWorkflows
202
+ import {
203
+ createRegistry,
204
+ getInputSchema,
205
+ getName,
206
+ listWorkflows,
207
+ runWorkflow,
208
+ } from "@bastani/atomic/workflows";
209
+ import claudeWorkflow from "./workflows/my-workflow/claude.ts";
210
+ import copilotWorkflow from "./workflows/my-workflow/copilot.ts";
211
+
212
+ const registry = createRegistry()
213
+ .register(claudeWorkflow)
214
+ .register(copilotWorkflow);
215
+
216
+ const program = new Command();
217
+ for (const wf of listWorkflows(registry)) {
218
+ const sub = program.command(getName(wf));
219
+ for (const input of getInputSchema(wf)) {
220
+ sub.option(`--${input.name} <value>`, input.description ?? "");
221
+ }
222
+ sub.action(async (rawOpts) => {
223
+ await runWorkflow({ workflow: wf, inputs: rawOpts as Record<string, string> });
224
+ });
225
+ }
226
+ await program.parseAsync();
227
+ ```
228
+
229
+ For programmatic invocation (no CLI at all), call `runWorkflow` directly:
230
+
231
+ ```ts
232
+ const { id, tmuxSessionName } = await runWorkflow({
233
+ workflow,
234
+ inputs: { prompt: "fix the auth bug" },
235
+ detach: true,
236
+ });
237
+ ```
238
+
239
+ For full registry mechanics, key scheme, and validate-on-register behaviour see `references/registry-and-validation.md`.
240
+
241
+ ### Two context levels
242
+
243
+ `WorkflowContext` (`ctx`) drives orchestration in `.run()`; `SessionContext`
244
+ (`s`) drives agent work inside each stage callback. Full field reference in
245
+ `references/getting-started.md` §"`SessionContext` reference".
246
+
247
+ ### Declared inputs
248
+
249
+ Workflows receive user data exclusively through `ctx.inputs` / `s.inputs`,
250
+ declared inline as `inputs: WorkflowInput[]` on `defineWorkflow()`.
251
+ TypeScript restricts `ctx.inputs` to declared keys (undeclared access is a
252
+ compile-time error). Load `references/workflow-inputs.md` for schema shape,
253
+ field types (`string` / `text` / `enum`), validation rules, picker
254
+ semantics, and the "declare your prompt input explicitly" pattern.
255
+
256
+ ### Invocation surfaces
257
+
258
+ Two invocation paths:
259
+
260
+ **User's own app** — the dev controls the CLI shape entirely. Whatever flags they declare in their Commander/citty/yargs program are the user-facing UX. A typical layout (see snippets above):
261
+
262
+ ```bash
263
+ # Single-workflow worker — flags match the workflow's declared inputs
264
+ bun run src/claude-worker.ts --prompt "fix the bug"
265
+ bun run src/claude-worker.ts --research_doc=notes.md --focus=standard
266
+
267
+ # Multi-workflow CLI — one subcommand per workflow
268
+ bun run src/cli.ts review --target_branch=main
269
+ bun run src/cli.ts spec --research_doc=notes.md
270
+ ```
271
+
272
+ To launch the interactive picker, mount the `WorkflowPicker` component:
273
+
274
+ ```ts
275
+ import { WorkflowPickerPanel } from "@bastani/atomic/workflows/components";
276
+
277
+ const panel = await WorkflowPickerPanel.create({ agent: "claude", registry });
278
+ const result = await panel.waitForSelection();
279
+ panel.destroy();
280
+ if (result) {
281
+ await runWorkflow({ workflow: result.workflow, inputs: result.inputs });
282
+ }
283
+ ```
284
+
285
+ The dev's CLI is **never** re-execed. The SDK ships an internal orchestrator entry script and re-execs that with positional args — no env-var dance, no boilerplate re-entry code in the dev's file.
286
+
287
+ **Atomic builtins** — workflows shipped inside `@bastani/atomic`, registered by atomic's internal `createBuiltinRegistry()`:
288
+
289
+ ```bash
290
+ atomic workflow -n <name> -a <agent> [inputs...]
291
+ ```
292
+
293
+ Surface | Command | When
294
+ ---|---|---
295
+ Named, with prompt | `… -n hello -a claude "fix the bug"` | Requires workflow to declare a `prompt` input
296
+ Named, structured | `… -n gen-spec -a claude --research_doc=notes.md` | Structured inputs via `--<field>` flags
297
+ Interactive picker | `atomic workflow -a claude` | Discovery — fuzzy list + form; this is the intentional no-`-n` path
298
+ List (atomic builtins) | `atomic workflow list`, `atomic workflow list -a <agent>` | Browse registered builtins, optionally filtered
299
+ List (user cli) | Iterate `listWorkflows(registry)` and add a `list` Commander subcommand yourself | No built-in `--list` flag
300
+ List (single-workflow) | Not applicable — the file *is* the workflow
301
+ Inspect inputs | `atomic workflow inputs <name> -a claude` | Print input schema as JSON
302
+ Status (one or all) | `atomic workflow status [<session-id>]` | Query state — `in_progress`, `error`, `completed`, `needs_review`
303
+ Kill non-interactively | `atomic session kill <id> -y` | Tear down without confirmation prompt — `-y` is mandatory for agents
304
+ Detached (background) | `… -d` / `… --detach` | Runs without attaching; reattach with `atomic workflow session connect <name>`
305
+
306
+ Any of the named shapes above (positional or structured) accepts
307
+ `-d` / `--detach` to run without attaching. Use it when you're automating
308
+ from a script and want the CLI to return as soon as the session is spawned.
309
+
310
+ ### Declaring SDK compatibility (`minSDKVersion`)
311
+
312
+ Opt-in version gate for workflows that depend on a specific SDK release.
313
+ **Default is unset — do not add it to new workflows unless you have a
314
+ concrete reason.**
315
+
316
+ ```ts
317
+ defineWorkflow({
318
+ name: "uses-new-api",
319
+ source: import.meta.path,
320
+ minSDKVersion: "0.6.0", // refuse to load on older CLI
321
+ })
322
+ ```
323
+
324
+ When set to a version newer than the installed CLI, the workflow refuses to
325
+ load and surfaces a visible row in `atomic workflow list` and the picker
326
+ (rather than silently vanishing). Set it only when the workflow calls a
327
+ newly-added SDK surface (new `stage()` option, new helper export, new
328
+ provider method); omit it for workflows on stable APIs. Full semver
329
+ semantics and the visible-diagnostic contract live in
330
+ `references/registry-and-validation.md`.
331
+
332
+ ## Structural Rules (hard constraints)
333
+
334
+ Enforced by the builder, loader, and runtime:
335
+
336
+ 1. **`.run()` required** — the builder must have a `.run(async (ctx) => { ... })` call.
337
+ 2. **`.compile()` required** — the chain must end with `.compile()`.
338
+ 3. **Every workflow is a named `export`** — export the compiled definition from the workflow file (default or named). It is then imported and passed to `registry.register(...)` in the composition root.
339
+ 4. **Unique session names** — every `ctx.stage()` call must use a unique `name` across the workflow run.
340
+ 5. **Completed-only reads** — `transcript()` and `getMessages()` only access sessions whose callback has returned and saves have flushed. Attempting to read a still-running session throws.
341
+ 6. **Graph topology is auto-inferred** — the runtime derives parent-child edges from `await`/`Promise.all` patterns. Sequential `await` creates a chain; `Promise.all([...])` branches from the same parent; a stage after `Promise.all` receives all parallel stages as parents. Headless stages are **transparent** to the graph — they don't consume or update the execution frontier. See `references/control-flow.md` for full details.
342
+ 7. **Do not manually create clients or sessions** — the runtime auto-creates `s.client` and `s.session` from `clientOpts` and `sessionOpts`. Use `s.session.query()`, `s.session.send()`, and `s.client.session.prompt()` instead.
343
+ 8. **Headless stages share the same callback interface** — `s.client`, `s.session`, `s.save()`, `s.transcript()`, and return values all work identically in headless mode. The only differences are: no tmux window, no graph node, and a virtual `paneId`.
344
+ 9. **Every `ctx.stage()` must contain at least one LLM interaction** — a `s.session.query()` / `s.session.send()` / `s.client.session.prompt()` call. A stage that runs only TypeScript (file I/O, git commands, HTTP calls, parsing, validation) spawns a visible tmux pane that sits idle on the agent welcome screen for the whole stage, confusing users watching the graph. See `references/failure-modes.md` §F22. Pure deterministic code belongs in `.run()` outside any stage; deterministic follow-up *paired* with a query (e.g. parse → validate → save after `s.session.query()`) belongs in the same callback.
345
+
346
+ ## Concept-to-Code Mapping
347
+
348
+ Every workflow pattern maps directly to TypeScript code:
349
+
350
+ | Workflow Concept | Programmatic Pattern |
351
+ |---|---|
352
+ | Agent session (send prompt, get response) | `ctx.stage({ name }, {}, {}, async (s) => { /* use s.client, s.session */ })` — **must** include an LLM call (Rule 9) |
353
+ | Background (headless) session | `ctx.stage({ name, headless: true }, {}, {}, async (s) => { /* same API */ })` — invisible in graph, tracked by background counter |
354
+ | Sequential execution | `await ctx.stage(...)` followed by `await ctx.stage(...)` |
355
+ | Parallel execution | `Promise.all([ctx.stage(...), ctx.stage(...)])` |
356
+ | Parallel background tasks | `Promise.all([ctx.stage({ name: "a", headless: true }, ...), ctx.stage({ name: "b", headless: true }, ...)])` |
357
+ | Conditional branching | `if (...) { await ctx.stage({ name: "fix" }, {}, {}, ...) }` |
358
+ | Bounded loops with visible graph nodes | `for (let i = 1; i <= N; i++) { await ctx.stage({ name: \`step-\${i}\` }, {}, {}, ...) }` |
359
+ | Return data from session | `const h = await ctx.stage(opts, {}, {}, async (s) => { return value; }); h.result` |
360
+ | Data flow between sessions | `s.save()` to persist → `s.transcript(handle)` or `s.transcript("name")` to retrieve |
361
+ | Pure deterministic computation (no LLM call) | Plain TypeScript at the top level of `.run()`. **Never** a standalone stage — see Rule 9 and F22. |
362
+ | Deterministic work tied to an LLM call | Inside the same stage callback, before/after the query. E.g. `s.session.query(...)` → parse → validate → `s.save(parsed)`. |
363
+ | Subagent orchestration | Claude: `--agent` via `chatFlags` (interactive) or `agent` SDK option (headless); Copilot: `{ agent: "name" }` in sessionOpts; OpenCode: `agent` param in `s.client.session.prompt()` |
364
+ | Per-session configuration | Pass `clientOpts` (2nd arg) and `sessionOpts` (3rd arg) to `ctx.stage()` |
365
+
366
+ ### When to use a stage vs. plain TypeScript
367
+
368
+ Before reaching for `ctx.stage()`, ask: **does this block need an LLM?**
369
+
370
+ ```ts
371
+ // ✓ OK — query + deterministic parse in the same callback
372
+ const plan = await ctx.stage({ name: "plan" }, {}, {}, async (s) => {
373
+ const messages = await s.session.query("Produce a step-by-step plan.");
374
+ const text = extractAssistantText(messages, 0);
375
+ const parsed = parsePlan(text); // deterministic — fine here
376
+ s.save(parsed);
377
+ return parsed;
378
+ });
379
+
380
+ // ✓ OK — plain TS at the top of .run() between stages
381
+ const plannedFiles = plan.result.files.filter(f => f.endsWith(".ts"));
382
+ const startedAt = Date.now();
383
+
384
+ // ✗ NOT OK — a stage whose callback is pure code with no query
385
+ await ctx.stage({ name: "write-report" }, {}, {}, async (s) => {
386
+ await fs.writeFile("report.md", buildReport(plan.result)); // no LLM!
387
+ });
388
+ // This spawns a tmux pane that stays on the Claude/Copilot welcome
389
+ // screen for the whole stage. The user watching the graph sees an
390
+ // empty pane and wonders why no prompt ever appeared.
391
+
392
+ // ✓ OK — do the deterministic work inline in .run()
393
+ await fs.writeFile("report.md", buildReport(plan.result));
394
+ ```
395
+
396
+ Rule of thumb: **one stage, one LLM conversation.** If the block has no
397
+ `s.session.query()` / `s.session.send()` / `s.client.session.prompt()`,
398
+ it's not a stage.
399
+
400
+ For full pattern examples with code, see `references/control-flow.md`
401
+ (loops, conditionals, review/fix, graph topology, headless fan-out),
402
+ `references/state-and-data-flow.md` (data passing, file coordination,
403
+ transcript compression), and `references/computation-and-validation.md`
404
+ (parsing, validation, quality gates).
405
+
406
+ ## Authoring Process
407
+
408
+ ### 1. Understand the User's Goal
409
+
410
+ Map the user's intent to sessions and patterns:
411
+
412
+ | Question | Maps to |
413
+ |----------|---------|
414
+ | What are the distinct **LLM interactions**? | Each LLM conversation → one `ctx.stage()` call (Rule 9) |
415
+ | Can any LLM calls run in parallel? | `Promise.all([ctx.stage(...), ...])` |
416
+ | Should any parallel LLM calls run in the background? | `ctx.stage({ name, headless: true }, ...)` — invisible in graph, ideal for data-gathering |
417
+ | Does any step need **pure deterministic code** (no LLM)? | Plain TypeScript at the top of `.run()` — **not** a dedicated stage. Bundle it inside the nearest stage callback if it's directly tied to that stage's query. |
418
+ | Do any steps need to repeat? | `for`/`while` loop with `ctx.stage()` inside |
419
+ | Are there conditional paths? | `if`/`else` wrapping `ctx.stage()` calls |
420
+ | What data flows between steps? | `s.save()` → `s.transcript(handle)` / `s.getMessages(handle)` |
421
+ | Does the workflow need user input? | SDK-specific user input APIs (see `references/user-input.md`) |
422
+ | Do any steps need a specific model? | SDK-specific session config (see `references/session-config.md`) |
423
+
424
+ Then walk the **Design Advisory Skills** table above (§"Design Advisory
425
+ Skills") — for each row whose trigger applies to your workflow, pull that
426
+ skill in *before* writing code. Catching architectural and prompt-quality
427
+ issues at design time is far cheaper than catching them in the first failed
428
+ end-to-end run.
429
+
430
+ ### 2. Choose the Target Agent
431
+
432
+ Pass the agent as a runtime argument to `.for()` on the builder — this
433
+ narrows all context types and gives correct `s.client`/`s.session` types.
434
+ Call `.for()` **before** `.run()`:
435
+
436
+ | Agent | Builder Chain | Primary Session API |
437
+ |-------|---------------|---------------------|
438
+ | Claude | `defineWorkflow({...}).for("claude")` | `s.session.query(prompt)` — sends prompt to the Claude TUI pane |
439
+ | Copilot | `defineWorkflow({...}).for("copilot")` | `s.session.send({ prompt })` — the runtime wraps `send` to block until `session.idle` with no timeout (see `failure-modes.md` §F10); do not use `sendAndWait` in Atomic workflows |
440
+ | OpenCode | `defineWorkflow({...}).for("opencode")` | `s.client.session.prompt({ sessionID: s.session.id, parts: [...] })` |
441
+
442
+ The runtime manages client/session lifecycle automatically. For native SDK
443
+ types and advanced APIs, import directly from the provider packages
444
+ (`@github/copilot-sdk`, `@anthropic-ai/claude-agent-sdk`, `@opencode-ai/sdk/v2`).
445
+
446
+ For cross-agent support, create one workflow file per agent. Use shared
447
+ helper modules for SDK-agnostic logic. A typical layout:
448
+
449
+ ```
450
+ src/workflows/my-workflow/
451
+ ├── claude.ts # Claude-specific SDK code — exports a WorkflowDefinition
452
+ ├── copilot.ts # Copilot-specific SDK code — exports a WorkflowDefinition
453
+ ├── opencode.ts # OpenCode-specific SDK code— exports a WorkflowDefinition
454
+ └── helpers/
455
+ ├── prompts.ts # Prompt builders (SDK-agnostic)
456
+ ├── parsers.ts # Response parsers (SDK-agnostic)
457
+ └── validation.ts # Validation logic (SDK-agnostic)
458
+ ```
459
+
460
+ Register each variant in the composition root:
461
+
462
+ ```ts
463
+ import { claudeWorkflow } from "./workflows/my-workflow/claude.ts";
464
+ import { copilotWorkflow } from "./workflows/my-workflow/copilot.ts";
465
+
466
+ const registry = createRegistry()
467
+ .register(claudeWorkflow)
468
+ .register(copilotWorkflow);
469
+ ```
470
+
471
+ ### 3. Write the Workflow File
472
+
473
+ Write the workflow file using the SDK-specific patterns. See
474
+ `references/getting-started.md` for full quick-start examples for all 3
475
+ SDKs (send/save/extract patterns, idle handling), and
476
+ `references/agent-sessions.md` for per-SDK API details and lifecycle
477
+ caveats.
478
+
479
+ **Reference implementations** — two categories live in-repo:
480
+
481
+ - **Builtins** (`src/sdk/workflows/builtin/`) — production patterns,
482
+ registered via `createBuiltinRegistry()` inside the `atomic` CLI:
483
+ - `ralph` — iterative plan → orchestrate → review → debug loop.
484
+ - `deep-research-codebase` — scout → parallel explorer fan-out → aggregator.
485
+ - `open-claude-design` — design-system init flow.
486
+ - **User-app examples** (`examples/<name>/`) — minimal runnable user apps
487
+ you can copy-paste as a starting point. Each example directory contains
488
+ `claude/index.ts`, `copilot/index.ts`, `opencode/index.ts`, and one
489
+ `<agent>-worker.ts` entrypoint per agent — each a small Commander
490
+ entrypoint that calls `runWorkflow({ workflow, inputs })`. Run with
491
+ `bun run examples/<name>/<agent>-worker.ts --<field>=<value>` (or a
492
+ positional prompt string if the worker declares `[prompt...]`).
493
+ Covers: `hello-world`, `sequential-describe-summarize`,
494
+ `parallel-hello-world`, `headless-test`, `hil-favorite-color`,
495
+ `hil-favorite-color-headless`, `structured-output-demo`,
496
+ `reviewer-tool-test` (copilot only), `review-fix-loop`,
497
+ `multi-workflow`, `commander-embed`, `pane-navigation` (driver CLI for
498
+ the navigation primitives).
499
+
500
+ Both sets demonstrate shared helpers, context-aware prompt building,
501
+ deterministic heuristics, and cross-SDK adaptation.
502
+
503
+ ### 4. Wire, typecheck, run
504
+
505
+ The composition root is always three lines (see §"Scaffold a new workflow from scratch" above for the exact template and multi-workflow variant). After writing it:
506
+
507
+ ```bash
508
+ bun typecheck
509
+ bun run src/<agent>-worker.ts --prompt "<test task>"
510
+ ```
511
+
512
+ Other invocation shapes you may want to demonstrate to the user once the workflow runs:
513
+
514
+ ```bash
515
+ # Single-workflow worker — flags match the workflow's declared inputs
516
+ bun run src/<agent>-worker.ts --<field>=<value> # structured inputs
517
+ bun run src/<agent>-worker.ts "free-form prompt text" # positional fallback (if wired)
518
+
519
+ # Multi-workflow CLI — one subcommand per workflow
520
+ bun run src/cli.ts <workflow-name> --<field>=<value> # structured
521
+ bun run src/cli.ts <workflow-name> "free-form prompt text" # positional fallback (if wired)
522
+
523
+ # Atomic builtins — these use -n/-a/-d (atomic CLI's own flags, not user-app flags)
524
+ atomic workflow -n <name> -a <agent> "<prompt>" # attached run
525
+ atomic workflow -n <name> -a <agent> -d "<prompt>" # detached (background)
526
+ ```
527
+
528
+ For detached user-app runs, pass `detach: true` to `runWorkflow` or wire your own `--detach` flag in your Commander entrypoint. For the atomic builtins (`ralph`, `deep-research-codebase`, `open-claude-design`), see `references/running-workflows.md` for monitoring and teardown.
529
+
530
+ ## Running an Existing Workflow
531
+
532
+ If the user asks you to **run** (or "kick off" / "start" / "execute") a
533
+ workflow — not author one — the workflow already exists and you just need
534
+ to invoke it correctly. That's a different playbook from authoring.
535
+
536
+ **Read `references/running-workflows.md`.** It covers:
537
+
538
+ - Three invocation paths: user's own app (per-input `--<flag>` flags wired
539
+ by the dev, using Commander or another CLI library), repo-shipped examples
540
+ (same pattern), and atomic builtins (`atomic workflow -n … -a …`).
541
+ - Why atomic builtins use `-n` + `-a` and how to add `-d` for background runs.
542
+ - Why you must list workflows first.
543
+ - How to handle missing workflows (offer to author, not fabricate).
544
+ - Using `atomic workflow inputs <name> -a <agent>` to discover the schema
545
+ and drive AskUserQuestion.
546
+ - The six-step invocation recipe.
547
+ - Monitoring with `atomic workflow status` — and why `needs_review` must be
548
+ surfaced immediately.
549
+ - Tearing down with `atomic session kill -y` (the `-y` is mandatory).
550
+ - Worked examples for "workflow exists" and "workflow doesn't exist".