@bastani/atomic 0.6.4 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/create-spec/SKILL.md +6 -3
- package/.agents/skills/tdd/SKILL.md +107 -0
- package/.agents/skills/tdd/deep-modules.md +33 -0
- package/.agents/skills/tdd/interface-design.md +31 -0
- package/.agents/skills/tdd/mocking.md +59 -0
- package/.agents/skills/tdd/refactoring.md +10 -0
- package/.agents/skills/tdd/tests.md +61 -0
- package/.agents/skills/workflow-creator/SKILL.md +550 -0
- package/.agents/skills/workflow-creator/references/agent-sessions.md +891 -0
- package/.agents/skills/workflow-creator/references/agent-setup-recipe.md +266 -0
- package/.agents/skills/workflow-creator/references/computation-and-validation.md +201 -0
- package/.agents/skills/workflow-creator/references/control-flow.md +470 -0
- package/.agents/skills/workflow-creator/references/failure-modes.md +1014 -0
- package/.agents/skills/workflow-creator/references/getting-started.md +392 -0
- package/.agents/skills/workflow-creator/references/registry-and-validation.md +141 -0
- package/.agents/skills/workflow-creator/references/running-workflows.md +418 -0
- package/.agents/skills/workflow-creator/references/session-config.md +384 -0
- package/.agents/skills/workflow-creator/references/state-and-data-flow.md +356 -0
- package/.agents/skills/workflow-creator/references/user-input.md +234 -0
- package/.agents/skills/workflow-creator/references/workflow-inputs.md +392 -0
- package/.claude/agents/debugger.md +2 -2
- package/.claude/agents/reviewer.md +1 -1
- package/.claude/agents/worker.md +2 -2
- package/.github/agents/debugger.md +1 -1
- package/.github/agents/worker.md +1 -1
- package/.mcp.json +5 -1
- package/.opencode/agents/debugger.md +1 -1
- package/.opencode/agents/worker.md +1 -1
- package/README.md +236 -201
- package/dist/sdk/define-workflow.d.ts +11 -6
- package/dist/sdk/define-workflow.d.ts.map +1 -1
- package/dist/sdk/errors.d.ts +10 -0
- package/dist/sdk/errors.d.ts.map +1 -1
- package/dist/sdk/index.d.ts +21 -9
- package/dist/sdk/index.d.ts.map +1 -1
- package/dist/sdk/primitives/inputs.d.ts +36 -0
- package/dist/sdk/primitives/inputs.d.ts.map +1 -0
- package/dist/sdk/primitives/metadata.d.ts +40 -0
- package/dist/sdk/primitives/metadata.d.ts.map +1 -0
- package/dist/sdk/primitives/run.d.ts +57 -0
- package/dist/sdk/primitives/run.d.ts.map +1 -0
- package/dist/sdk/primitives/sessions.d.ts +128 -0
- package/dist/sdk/primitives/sessions.d.ts.map +1 -0
- package/dist/sdk/runtime/executor.d.ts +24 -56
- package/dist/sdk/runtime/executor.d.ts.map +1 -1
- package/dist/sdk/runtime/orchestrator-entry.d.ts +26 -0
- package/dist/sdk/runtime/orchestrator-entry.d.ts.map +1 -0
- package/dist/sdk/runtime/tmux.d.ts +20 -0
- package/dist/sdk/runtime/tmux.d.ts.map +1 -1
- package/dist/sdk/types.d.ts +26 -86
- package/dist/sdk/types.d.ts.map +1 -1
- package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts.map +1 -1
- package/dist/sdk/workflows/builtin/deep-research-codebase/copilot/index.d.ts.map +1 -1
- package/dist/sdk/workflows/builtin/deep-research-codebase/opencode/index.d.ts.map +1 -1
- package/dist/sdk/workflows/builtin/open-claude-design/claude/index.d.ts.map +1 -1
- package/dist/sdk/workflows/builtin/open-claude-design/copilot/index.d.ts.map +1 -1
- package/dist/sdk/workflows/builtin/open-claude-design/opencode/index.d.ts.map +1 -1
- package/dist/sdk/workflows/builtin/ralph/claude/index.d.ts.map +1 -1
- package/dist/sdk/workflows/builtin/ralph/copilot/index.d.ts.map +1 -1
- package/dist/sdk/workflows/builtin/ralph/opencode/index.d.ts.map +1 -1
- package/dist/sdk/workflows/index.d.ts +20 -12
- package/dist/sdk/workflows/index.d.ts.map +1 -1
- package/dist/services/config/additional-instructions.d.ts +1 -1
- package/dist/services/config/additional-instructions.d.ts.map +1 -1
- package/package.json +4 -4
- package/src/cli.ts +39 -56
- package/src/commands/builtin-registry.ts +37 -0
- package/src/commands/cli/chat/index.ts +1 -3
- package/src/{sdk → commands/cli}/management-commands.ts +15 -55
- package/src/commands/cli/session.ts +1 -1
- package/src/commands/cli/workflow-command.test.ts +250 -16
- package/src/commands/cli/workflow-inputs.test.ts +1 -0
- package/src/commands/cli/workflow-inputs.ts +13 -3
- package/src/commands/cli/workflow-list.test.ts +1 -0
- package/src/commands/cli/workflow-list.ts +0 -0
- package/src/commands/cli/workflow-status.ts +1 -1
- package/src/commands/cli/workflow.ts +191 -11
- package/src/sdk/define-workflow.test.ts +47 -16
- package/src/sdk/define-workflow.ts +24 -6
- package/src/sdk/errors.test.ts +11 -0
- package/src/sdk/errors.ts +13 -0
- package/src/sdk/index.test.ts +92 -0
- package/src/sdk/index.ts +71 -15
- package/src/sdk/primitives/inputs.ts +48 -0
- package/src/sdk/primitives/metadata.ts +63 -0
- package/src/sdk/primitives/run.ts +81 -0
- package/src/sdk/primitives/sessions.test.ts +594 -0
- package/src/sdk/primitives/sessions.ts +328 -0
- package/src/sdk/runtime/executor.ts +36 -115
- package/src/sdk/runtime/orchestrator-entry.ts +110 -0
- package/src/sdk/runtime/tmux.ts +33 -0
- package/src/sdk/types.ts +26 -91
- package/src/sdk/workflows/builtin/deep-research-codebase/claude/index.ts +1 -0
- package/src/sdk/workflows/builtin/deep-research-codebase/copilot/index.ts +1 -0
- package/src/sdk/workflows/builtin/deep-research-codebase/opencode/index.ts +1 -0
- package/src/sdk/workflows/builtin/open-claude-design/claude/index.ts +1 -0
- package/src/sdk/workflows/builtin/open-claude-design/copilot/index.ts +1 -0
- package/src/sdk/workflows/builtin/open-claude-design/opencode/index.ts +1 -0
- package/src/sdk/workflows/builtin/ralph/claude/index.ts +1 -0
- package/src/sdk/workflows/builtin/ralph/copilot/index.ts +1 -0
- package/src/sdk/workflows/builtin/ralph/opencode/index.ts +1 -0
- package/src/sdk/workflows/index.ts +68 -51
- package/src/services/config/additional-instructions.ts +1 -1
- package/.agents/skills/test-driven-development/SKILL.md +0 -371
- package/.agents/skills/test-driven-development/testing-anti-patterns.md +0 -299
- package/dist/commands/cli/session.d.ts +0 -67
- package/dist/commands/cli/session.d.ts.map +0 -1
- package/dist/commands/cli/workflow-status.d.ts +0 -63
- package/dist/commands/cli/workflow-status.d.ts.map +0 -1
- package/dist/sdk/commander.d.ts +0 -74
- package/dist/sdk/commander.d.ts.map +0 -1
- package/dist/sdk/management-commands.d.ts +0 -42
- package/dist/sdk/management-commands.d.ts.map +0 -1
- package/dist/sdk/workflow-cli.d.ts +0 -103
- package/dist/sdk/workflow-cli.d.ts.map +0 -1
- package/dist/sdk/workflows/builtin-registry.d.ts +0 -113
- package/dist/sdk/workflows/builtin-registry.d.ts.map +0 -1
- package/src/sdk/commander.ts +0 -161
- package/src/sdk/workflow-cli.ts +0 -409
- package/src/sdk/workflows/builtin-registry.ts +0 -23
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
# Agent Setup Recipe
|
|
2
|
+
|
|
3
|
+
A deterministic recipe for getting a user from "empty terminal" to "first workflow runs" in one session. This is the path to follow whenever the user signals they want to start using the workflow SDK from zero — phrases like *"set me up with the workflow SDK"*, *"I want to write workflows"*, *"bootstrap a workflow project"*, *"how do I get started"*, or any equivalent. If the user already has `@bastani/atomic` installed and a workflow file exists, jump to step 5.
|
|
4
|
+
|
|
5
|
+
## Why this recipe exists
|
|
6
|
+
|
|
7
|
+
Bootstrapping is the highest-friction moment of the SDK because three of the runtime dependencies live outside `bun add`:
|
|
8
|
+
|
|
9
|
+
- **Bun** — the SDK uses `Bun.spawn` and Bun-specific module resolution. It will not run on Node.
|
|
10
|
+
- **A terminal multiplexer** — tmux on macOS/Linux, psmux on Windows. Every `ctx.stage()` runs inside a detachable session on the `atomic` socket.
|
|
11
|
+
- **An authenticated agent CLI** — `claude`, `copilot`, or `opencode`. The runtime spawns these at each stage; if the binary is missing or unauthenticated, the first stage will fail with an error the user has no way to interpret.
|
|
12
|
+
|
|
13
|
+
A user hitting `bun add @bastani/atomic` in an empty project and then running their workflow will see one of these three blow up 30 seconds in with a stack trace that does not name the missing piece. This recipe checks all three up front and surfaces the missing one as a one-line fix. It also wires the typed errors the SDK throws (`MissingDependencyError`, `SessionNotFoundError`, `WorkflowNotCompiledError`, `InvalidWorkflowError`, `IncompatibleSDKError`) to actionable messages — so when something does fail later, the user sees a sentence, not a stack.
|
|
14
|
+
|
|
15
|
+
Treat the steps below as a checklist, not a script. Read each step before running anything; tell the user what you found and what you're about to do; only proceed when each precondition is satisfied. Skipping a step "because it probably works" is what makes setup feel flaky.
|
|
16
|
+
|
|
17
|
+
## Step 1 — Detect what's already there
|
|
18
|
+
|
|
19
|
+
Run these in parallel and read the output yourself before relaying anything to the user. If a check fails, stop the recipe and surface the fix before moving on:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
bun --version # Bun
|
|
23
|
+
which tmux || where.exe psmux 2>/dev/null # multiplexer
|
|
24
|
+
claude --version 2>/dev/null # only one of these matters —
|
|
25
|
+
opencode --version 2>/dev/null # the user picks the agent in step 2
|
|
26
|
+
copilot --version 2>/dev/null
|
|
27
|
+
ls package.json 2>/dev/null # is this an existing project?
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
| Missing | Fix to recommend |
|
|
31
|
+
|---|---|
|
|
32
|
+
| Bun | `curl -fsSL https://bun.sh/install \| bash` (macOS/Linux) or `powershell -c "irm bun.sh/install.ps1 \| iex"` (Windows) |
|
|
33
|
+
| tmux/psmux | `brew install tmux` / `apt install tmux` / etc. on macOS+Linux; [psmux](https://github.com/psmux/psmux) on Windows |
|
|
34
|
+
| Agent CLI | Direct the user to the agent's install/auth page — Claude Code (`code.claude.com/docs`), OpenCode (`opencode.ai`), Copilot CLI (`github.com/features/copilot/cli`) |
|
|
35
|
+
|
|
36
|
+
Do not attempt the install yourself unless the user has explicitly approved it — `curl | bash` is a remote-exec that warrants confirmation. Print the suggested command and let the user kick it off.
|
|
37
|
+
|
|
38
|
+
If the user is on a devcontainer with `ghcr.io/flora131/atomic/<agent>:1` in `.devcontainer/devcontainer.json`, all three are already installed and authenticated — skip the prereq checks and tell them so.
|
|
39
|
+
|
|
40
|
+
## Step 2 — Pick the agent (and confirm intent)
|
|
41
|
+
|
|
42
|
+
Ask the user which agent they're targeting and whether they want one or multiple. The answer drives steps 4 and 5.
|
|
43
|
+
|
|
44
|
+
- **One agent** → scaffold one `<agent>.ts` workflow file + one `<agent>-worker.ts` composition root. This is the 90% case; recommend it unless the user pushes back.
|
|
45
|
+
- **Multiple agents, same workflow logic** → scaffold one workflow file per agent under `src/workflows/<name>/` plus a single `src/cli.ts` that uses `createRegistry()` to dispatch.
|
|
46
|
+
- **Multiple workflows, one agent** → scaffold each workflow under `src/workflows/<name>/` and either ship multiple worker files or use the registry pattern.
|
|
47
|
+
|
|
48
|
+
Don't guess. Use `AskUserQuestion` (or the equivalent) when intent is unclear — picking wrong here means rewriting 100% of the scaffold.
|
|
49
|
+
|
|
50
|
+
## Step 3 — Bootstrap the project
|
|
51
|
+
|
|
52
|
+
If `package.json` already exists, skip `bun init`. Otherwise:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
bun init -y
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Add the SDK plus only the provider package(s) the user picked:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
bun add @bastani/atomic
|
|
62
|
+
bun add @anthropic-ai/claude-agent-sdk # only if Claude
|
|
63
|
+
bun add @github/copilot-sdk # only if Copilot
|
|
64
|
+
bun add @opencode-ai/sdk # only if OpenCode
|
|
65
|
+
bun add @commander-js/extra-typings # for the worker; swap for citty/yargs if the user prefers
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
If the user has `npm install`, `yarn add`, or any non-Bun command on file, gently redirect — the SDK will not work under Node, full stop.
|
|
69
|
+
|
|
70
|
+
## Step 4 — Scaffold the workflow file
|
|
71
|
+
|
|
72
|
+
Drop into `src/workflows/<name>/<agent>.ts`. The convention is one directory per workflow, one file per agent — this keeps `src/workflows/<name>/helpers/` available for SDK-agnostic logic when the user does want cross-agent support later. The naming matters because every reference doc and every agent looking at the codebase finds files the same way.
|
|
73
|
+
|
|
74
|
+
Always include `source: import.meta.path` — the runtime re-imports the module from this path inside the orchestrator child process. Forget it and the workflow loads fine but `runWorkflow` blows up at spawn time with `InvalidWorkflowError`.
|
|
75
|
+
|
|
76
|
+
### Claude template
|
|
77
|
+
|
|
78
|
+
```ts
|
|
79
|
+
// src/workflows/<name>/claude.ts
|
|
80
|
+
import { defineWorkflow } from "@bastani/atomic/workflows";
|
|
81
|
+
|
|
82
|
+
export default defineWorkflow({
|
|
83
|
+
name: "<workflow-name>",
|
|
84
|
+
source: import.meta.path,
|
|
85
|
+
description: "<one-line description>",
|
|
86
|
+
inputs: [
|
|
87
|
+
{ name: "prompt", type: "text", required: true, description: "what the user supplies" },
|
|
88
|
+
],
|
|
89
|
+
})
|
|
90
|
+
.for("claude")
|
|
91
|
+
.run(async (ctx) => {
|
|
92
|
+
await ctx.stage({ name: "step-1" }, {}, {}, async (s) => {
|
|
93
|
+
await s.session.query(ctx.inputs.prompt ?? "");
|
|
94
|
+
s.save(s.sessionId);
|
|
95
|
+
});
|
|
96
|
+
})
|
|
97
|
+
.compile();
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Copilot template
|
|
101
|
+
|
|
102
|
+
```ts
|
|
103
|
+
.for("copilot")
|
|
104
|
+
.run(async (ctx) => {
|
|
105
|
+
await ctx.stage({ name: "step-1" }, {}, {}, async (s) => {
|
|
106
|
+
await s.session.send({ prompt: ctx.inputs.prompt ?? "" });
|
|
107
|
+
s.save(await s.session.getMessages());
|
|
108
|
+
});
|
|
109
|
+
})
|
|
110
|
+
.compile();
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### OpenCode template
|
|
114
|
+
|
|
115
|
+
```ts
|
|
116
|
+
.for("opencode")
|
|
117
|
+
.run(async (ctx) => {
|
|
118
|
+
await ctx.stage({ name: "step-1" }, {}, { title: "step-1" }, async (s) => {
|
|
119
|
+
const result = await s.client.session.prompt({
|
|
120
|
+
sessionID: s.session.id,
|
|
121
|
+
parts: [{ type: "text", text: ctx.inputs.prompt ?? "" }],
|
|
122
|
+
});
|
|
123
|
+
s.save(result.data!);
|
|
124
|
+
});
|
|
125
|
+
})
|
|
126
|
+
.compile();
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
The `s.save(...)` call shape differs per agent on purpose — see `getting-started.md` "Saving Transcripts" for the per-provider rationale.
|
|
130
|
+
|
|
131
|
+
## Step 5 — Scaffold the composition root
|
|
132
|
+
|
|
133
|
+
The SDK ships pure primitives, not a CLI wrapper. The user composes them into whatever CLI library they prefer. Default to Commander unless they say otherwise.
|
|
134
|
+
|
|
135
|
+
### Single workflow worker
|
|
136
|
+
|
|
137
|
+
```ts
|
|
138
|
+
// src/<agent>-worker.ts
|
|
139
|
+
import { Command } from "@commander-js/extra-typings";
|
|
140
|
+
import {
|
|
141
|
+
getInputSchema,
|
|
142
|
+
runWorkflow,
|
|
143
|
+
MissingDependencyError,
|
|
144
|
+
SessionNotFoundError,
|
|
145
|
+
} from "@bastani/atomic/workflows";
|
|
146
|
+
import workflow from "./workflows/<name>/<agent>.ts";
|
|
147
|
+
|
|
148
|
+
const program = new Command();
|
|
149
|
+
for (const input of getInputSchema(workflow)) {
|
|
150
|
+
program.option(`--${input.name} <value>`, input.description ?? "");
|
|
151
|
+
}
|
|
152
|
+
program.action(async (rawOpts) => {
|
|
153
|
+
try {
|
|
154
|
+
await runWorkflow({ workflow, inputs: rawOpts as Record<string, string> });
|
|
155
|
+
} catch (err) {
|
|
156
|
+
if (err instanceof MissingDependencyError) {
|
|
157
|
+
console.error(`Missing dependency: ${err.dependency}. Install it and rerun.`);
|
|
158
|
+
process.exit(1);
|
|
159
|
+
}
|
|
160
|
+
throw err;
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
await program.parseAsync();
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
The typed-error catch is small but it pays for itself the first time `tmux` is missing — the user gets one actionable line instead of an SDK stack trace. Add more `instanceof` branches as the surface grows (see Step 8).
|
|
167
|
+
|
|
168
|
+
### Multi-workflow CLI
|
|
169
|
+
|
|
170
|
+
When the user picks "multiple workflows, one CLI", swap the worker for a registry-driven composition root:
|
|
171
|
+
|
|
172
|
+
```ts
|
|
173
|
+
// src/cli.ts
|
|
174
|
+
import { Command } from "@commander-js/extra-typings";
|
|
175
|
+
import {
|
|
176
|
+
createRegistry,
|
|
177
|
+
getInputSchema,
|
|
178
|
+
getName,
|
|
179
|
+
listWorkflows,
|
|
180
|
+
runWorkflow,
|
|
181
|
+
} from "@bastani/atomic/workflows";
|
|
182
|
+
import flowA from "./workflows/<name-a>/<agent>.ts";
|
|
183
|
+
import flowB from "./workflows/<name-b>/<agent>.ts";
|
|
184
|
+
|
|
185
|
+
const registry = createRegistry().register(flowA).register(flowB);
|
|
186
|
+
const program = new Command("my-app");
|
|
187
|
+
for (const wf of listWorkflows(registry)) {
|
|
188
|
+
const sub = program.command(getName(wf)).description(wf.description);
|
|
189
|
+
for (const input of getInputSchema(wf)) {
|
|
190
|
+
sub.option(`--${input.name} <value>`, input.description ?? "");
|
|
191
|
+
}
|
|
192
|
+
sub.action(async (rawOpts) => {
|
|
193
|
+
await runWorkflow({ workflow: wf, inputs: rawOpts as Record<string, string> });
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
await program.parseAsync();
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Every `(agent, name)` key must be unique across the registry — registering a duplicate throws immediately at startup, which is intentional. Agents reading the codebase rely on stable keys.
|
|
200
|
+
|
|
201
|
+
## Step 6 — Add a `typecheck` script
|
|
202
|
+
|
|
203
|
+
The biggest payoff for catching mistakes early is `bunx tsc --noEmit`. Wire it into `package.json`:
|
|
204
|
+
|
|
205
|
+
```jsonc
|
|
206
|
+
{
|
|
207
|
+
"scripts": {
|
|
208
|
+
"typecheck": "bunx tsc --noEmit"
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
Then run it once before any execution:
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
bun run typecheck
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
If this fails, fix the errors before moving on — typecheck failures here usually mean a missing `.compile()`, a mistyped `.for(...)` agent, or an `inputs` field accessed but never declared. All three become silent runtime errors otherwise.
|
|
220
|
+
|
|
221
|
+
## Step 7 — Smoke test
|
|
222
|
+
|
|
223
|
+
Run the workflow attached the first time so the user can watch a tmux pane spawn and see Claude/Copilot/OpenCode actually respond:
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
bun run src/<agent>-worker.ts --prompt "Reply with the single word 'ok'"
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Three things to verify:
|
|
230
|
+
|
|
231
|
+
1. **The pane appears** — tmux opens, the agent welcome banner renders, the prompt fires. If the pane never opens, the multiplexer check from Step 1 was wrong.
|
|
232
|
+
2. **The agent replies** — within ~30s the agent prints back `ok`. If it sits idle, the agent CLI is probably not authenticated; rerun `claude` / `opencode` / `copilot` and complete the auth flow.
|
|
233
|
+
3. **The session ends cleanly** — `s.save(...)` flushes, the orchestrator exits, the user lands back on their shell. If the orchestrator hangs, see `failure-modes.md`.
|
|
234
|
+
|
|
235
|
+
After the attached run works, demonstrate the detached path:
|
|
236
|
+
|
|
237
|
+
```bash
|
|
238
|
+
bun run src/<agent>-worker.ts --prompt "..." # then in your worker, set detach: true once the user wants it
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
For a worker that supports both, expose `--detach` as a Commander flag and pass `detach: true` to `runWorkflow`. Sessions started detached show up in `atomic session list` (and via `listSessions({ scope: "workflow" })` from your own CLI) — they keep running on the shared `atomic` tmux socket regardless of the terminal.
|
|
242
|
+
|
|
243
|
+
## Step 8 — Failure recovery (typed errors)
|
|
244
|
+
|
|
245
|
+
The SDK throws typed errors from `@bastani/atomic` so callers can pattern-match without parsing message text. When you wire user-facing CLIs, add `instanceof` branches for the ones that need a friendly message:
|
|
246
|
+
|
|
247
|
+
| Error | When | Friendly message |
|
|
248
|
+
|---|---|---|
|
|
249
|
+
| `MissingDependencyError` | tmux / psmux / bun is not on `PATH` at runtime | `Missing dependency: <dep>. Install it (see prereqs) and rerun.` |
|
|
250
|
+
| `SessionNotFoundError` | `attachSession`/`nextWindow`/`previousWindow`/`gotoOrchestrator` called with an id that's not on the atomic socket | `session not found: <id>. Run "atomic session list" or list via listSessions() to see what's running.` |
|
|
251
|
+
| `WorkflowNotCompiledError` | The dev forgot `.compile()` at the end of `defineWorkflow(...)` | The error message itself is the fix — surface as-is. |
|
|
252
|
+
| `InvalidWorkflowError` | The imported file's default export isn't a `WorkflowDefinition` | Ditto — surface the message; it tells the dev to add `defineWorkflow(...).compile()`. |
|
|
253
|
+
| `IncompatibleSDKError` | The workflow declares `minSDKVersion` newer than the installed CLI | Tell the user to either `bun update -g @bastani/atomic` or relax the workflow's `minSDKVersion`. |
|
|
254
|
+
|
|
255
|
+
Don't catch errors you don't know how to render — let them throw. A blanket `catch (err) { console.error(err) }` defeats the typed surface.
|
|
256
|
+
|
|
257
|
+
## Step 9 — Hand off
|
|
258
|
+
|
|
259
|
+
Once the smoke test passes, the user owns the project. Tell them:
|
|
260
|
+
|
|
261
|
+
- **Where the workflow lives** — `src/workflows/<name>/<agent>.ts`. Edits there change the pipeline shape.
|
|
262
|
+
- **Where the entry point lives** — `src/<agent>-worker.ts` (or `src/cli.ts` for the registry shape). Edits there change the user-facing flag surface.
|
|
263
|
+
- **How to monitor** — `atomic session list` for a system-wide view, `atomic workflow status <id>` for one run, or wire `listSessions` / `getSessionStatus` into their own CLI's subcommands. The pane-navigation primitives (`nextWindow`, `previousWindow`, `gotoOrchestrator`, `detachSession`) drive tmux directly without taking over the user's terminal — see `examples/pane-navigation/` for a reference driver CLI.
|
|
264
|
+
- **What to read next** — `references/getting-started.md` for the SDK exports table, `references/control-flow.md` for loops/parallel/headless, `references/state-and-data-flow.md` for `s.save`/`s.transcript` patterns, `references/failure-modes.md` before shipping any multi-stage workflow.
|
|
265
|
+
|
|
266
|
+
If the user is now stuck on workflow design rather than setup ("how do I do a review-fix loop?", "what's the right shape for parallel research?"), pivot to the authoring guidance in `SKILL.md` §"Authoring Process" and the `Design Advisory Skills` table. Setup is done.
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# Computation and Validation
|
|
2
|
+
|
|
3
|
+
Deterministic computation — validation, data transforms, file I/O, API calls — is written as plain TypeScript inside `.run()` or session callbacks. No LLM session is needed. This is the programmatic equivalent of a `.tool()` node.
|
|
4
|
+
|
|
5
|
+
## Inline computation
|
|
6
|
+
|
|
7
|
+
Any TypeScript code inside a session callback that doesn't call an SDK prompt function is deterministic computation. Prefer handle-based lookups (`s.getMessages(plannerHandle)`) over string names when a handle is in scope — it preserves type information and survives stage renames:
|
|
8
|
+
|
|
9
|
+
```ts
|
|
10
|
+
const plannerHandle = await ctx.stage({ name: "planner" }, {}, {}, async (s) => { /* ... */ });
|
|
11
|
+
|
|
12
|
+
await ctx.stage({ name: "validate-and-fix", description: "Validate, then fix if needed" }, {}, {}, async (s) => {
|
|
13
|
+
// Step 1: Deterministic — parse prior session's output (handle-based lookup)
|
|
14
|
+
const messages = await s.getMessages(plannerHandle);
|
|
15
|
+
const planText = extractText(messages);
|
|
16
|
+
const plan = JSON.parse(planText);
|
|
17
|
+
|
|
18
|
+
// Step 2: Deterministic — validate the plan
|
|
19
|
+
const isValid = plan.tasks?.length > 0 && plan.tasks.every((t: { id: string; description: string }) => t.id && t.description);
|
|
20
|
+
|
|
21
|
+
if (!isValid) {
|
|
22
|
+
// Step 3: Agent session — ask the agent to fix the plan
|
|
23
|
+
await s.session.query("The plan is invalid. Please create a valid plan with tasks.");
|
|
24
|
+
} else {
|
|
25
|
+
// Step 4: Agent session — execute the valid plan
|
|
26
|
+
await s.session.query(`Execute this plan:\n${JSON.stringify(plan.tasks)}`);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
s.save(s.sessionId);
|
|
30
|
+
});
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Parsing SDK responses
|
|
34
|
+
|
|
35
|
+
Each SDK returns responses in different formats. Use helpers to extract text:
|
|
36
|
+
|
|
37
|
+
### Claude
|
|
38
|
+
|
|
39
|
+
`s.session.query()` returns `SessionMessage[]` — the native SDK transcript messages from this turn. Use `extractAssistantText()` to extract the plain text:
|
|
40
|
+
|
|
41
|
+
```ts
|
|
42
|
+
import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
43
|
+
|
|
44
|
+
const result = await s.session.query("...");
|
|
45
|
+
const text = extractAssistantText(result, 0); // Extract text from SessionMessage[]
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Copilot
|
|
49
|
+
|
|
50
|
+
`s.session.getMessages()` returns `SessionEvent[]`. Concatenate every
|
|
51
|
+
top-level assistant turn's non-empty content — picking only `.at(-1)` is a
|
|
52
|
+
silent-failure trap (see `failure-modes.md` §F1 / §F2). Use the
|
|
53
|
+
`getAssistantText` helper defined in `failure-modes.md` §F1:
|
|
54
|
+
|
|
55
|
+
```ts
|
|
56
|
+
// Usage:
|
|
57
|
+
const messages = await s.session.getMessages();
|
|
58
|
+
const text = getAssistantText(messages);
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### OpenCode
|
|
62
|
+
|
|
63
|
+
`session.prompt()` returns `{ data: { info, parts } }`. Filter for text
|
|
64
|
+
parts only — non-text parts produce `[object Object]` (see
|
|
65
|
+
`failure-modes.md` §F3). Use the `extractResponseText` helper defined
|
|
66
|
+
there:
|
|
67
|
+
|
|
68
|
+
```ts
|
|
69
|
+
// Usage:
|
|
70
|
+
const text = extractResponseText(result.data!.parts);
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Validation patterns
|
|
74
|
+
|
|
75
|
+
### JSON parsing with fallback
|
|
76
|
+
|
|
77
|
+
Use a layered fallback: direct parse → **last** fenced block (not the
|
|
78
|
+
first — prose often quotes examples earlier; see `failure-modes.md` §F8) →
|
|
79
|
+
last balanced object. Canonical helper lives in `state-and-data-flow.md`
|
|
80
|
+
§"Response parsers"; copy it into a sibling `helpers/parsers.ts` and
|
|
81
|
+
import. The full three-layer implementation, including the balanced-object
|
|
82
|
+
fallback, is in `src/sdk/workflows/builtin/ralph/helpers/prompts.ts`.
|
|
83
|
+
|
|
84
|
+
### Zod validation
|
|
85
|
+
|
|
86
|
+
Import Zod directly in your workflow file for runtime validation:
|
|
87
|
+
|
|
88
|
+
```ts
|
|
89
|
+
import { z } from "zod";
|
|
90
|
+
|
|
91
|
+
const TaskSchema = z.object({
|
|
92
|
+
id: z.string(),
|
|
93
|
+
description: z.string(),
|
|
94
|
+
status: z.enum(["pending", "in_progress", "completed", "error"]),
|
|
95
|
+
blockedBy: z.array(z.string()).optional(),
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// In run():
|
|
99
|
+
const parsed = parseJsonResponse(responseText);
|
|
100
|
+
const result = TaskSchema.array().safeParse(parsed?.tasks);
|
|
101
|
+
if (!result.success) {
|
|
102
|
+
console.error("Validation failed:", result.error.issues);
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## File I/O
|
|
107
|
+
|
|
108
|
+
Read and write files directly in `run()`:
|
|
109
|
+
|
|
110
|
+
```ts
|
|
111
|
+
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
112
|
+
import { join } from "path";
|
|
113
|
+
|
|
114
|
+
// Inside a ctx.stage() callback:
|
|
115
|
+
async (s) => {
|
|
116
|
+
// Write to session directory
|
|
117
|
+
const outputDir = join(s.sessionDir, "artifacts");
|
|
118
|
+
await mkdir(outputDir, { recursive: true });
|
|
119
|
+
await writeFile(join(outputDir, "report.json"), JSON.stringify(data));
|
|
120
|
+
|
|
121
|
+
// Read from project
|
|
122
|
+
const config = await readFile("./tsconfig.json", "utf-8");
|
|
123
|
+
},
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## API calls
|
|
127
|
+
|
|
128
|
+
Make HTTP requests for external integrations:
|
|
129
|
+
|
|
130
|
+
```ts
|
|
131
|
+
// Inside a ctx.stage() callback:
|
|
132
|
+
async (s) => {
|
|
133
|
+
const response = await fetch("https://api.example.com/data");
|
|
134
|
+
const data = await response.json();
|
|
135
|
+
|
|
136
|
+
// Use the data in a prompt
|
|
137
|
+
await s.session.query(`Process this data:\n${JSON.stringify(data)}`);
|
|
138
|
+
s.save(s.sessionId);
|
|
139
|
+
},
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Data transforms
|
|
143
|
+
|
|
144
|
+
Transform data between sessions:
|
|
145
|
+
|
|
146
|
+
```ts
|
|
147
|
+
// Inside a ctx.stage() callback:
|
|
148
|
+
async (s) => {
|
|
149
|
+
const raw = await s.getMessages("planner"); // or s.getMessages(handle) if a handle is in scope (preferred)
|
|
150
|
+
|
|
151
|
+
// Transform: extract only task IDs and descriptions
|
|
152
|
+
const tasks = extractTasks(raw).map(t => ({
|
|
153
|
+
id: t.id,
|
|
154
|
+
description: t.description,
|
|
155
|
+
priority: calculatePriority(t),
|
|
156
|
+
}));
|
|
157
|
+
|
|
158
|
+
// Sort by priority
|
|
159
|
+
tasks.sort((a, b) => b.priority - a.priority);
|
|
160
|
+
|
|
161
|
+
// Pass to agent
|
|
162
|
+
await s.session.query(`Execute these tasks in order:\n${JSON.stringify(tasks)}`);
|
|
163
|
+
s.save(s.sessionId);
|
|
164
|
+
},
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Quality Gate with LLM-as-Judge
|
|
168
|
+
|
|
169
|
+
Add automated quality checkpoints using evaluation rubrics. This pattern applies `evaluation` + `advanced-evaluation`:
|
|
170
|
+
|
|
171
|
+
```ts
|
|
172
|
+
.run(async (ctx) => {
|
|
173
|
+
const impl = await ctx.stage({ name: "implement" }, {}, {}, async (s) => {
|
|
174
|
+
await s.session.query((s.inputs.prompt ?? ""));
|
|
175
|
+
s.save(s.sessionId);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
await ctx.stage({ name: "quality-gate" }, {}, {}, async (s) => {
|
|
179
|
+
const implTranscript = await s.transcript(impl);
|
|
180
|
+
const result = await s.session.query(
|
|
181
|
+
`You are a code quality judge. Score this implementation 1-5 for:
|
|
182
|
+
- **Correctness**: Does it solve the stated problem?
|
|
183
|
+
- **Completeness**: Are edge cases handled?
|
|
184
|
+
- **Style**: Does it follow project conventions?
|
|
185
|
+
|
|
186
|
+
## Implementation to judge
|
|
187
|
+
${implTranscript.content}
|
|
188
|
+
|
|
189
|
+
Respond with JSON: { "correctness": N, "completeness": N, "style": N, "pass": boolean, "issues": [...] }`,
|
|
190
|
+
);
|
|
191
|
+
|
|
192
|
+
const scores = parseJsonResponse(extractAssistantText(result, 0));
|
|
193
|
+
|
|
194
|
+
if (!scores.pass) {
|
|
195
|
+
await s.session.query(`Fix these quality issues:\n${scores.issues.join("\n")}`);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
s.save(s.sessionId);
|
|
199
|
+
});
|
|
200
|
+
})
|
|
201
|
+
```
|