@bastani/atomic 0.5.12-3 → 0.5.12-5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/workflow-creator/SKILL.md +24 -17
- package/.agents/skills/workflow-creator/references/agent-sessions.md +67 -24
- package/.agents/skills/workflow-creator/references/computation-and-validation.md +5 -3
- package/.agents/skills/workflow-creator/references/control-flow.md +25 -11
- package/.agents/skills/workflow-creator/references/discovery-and-verification.md +3 -2
- package/.agents/skills/workflow-creator/references/failure-modes.md +35 -36
- package/.agents/skills/workflow-creator/references/getting-started.md +25 -12
- package/.agents/skills/workflow-creator/references/session-config.md +26 -5
- package/.agents/skills/workflow-creator/references/state-and-data-flow.md +3 -3
- package/.agents/skills/workflow-creator/references/workflow-inputs.md +52 -47
- package/README.md +63 -41
- package/package.json +2 -4
- package/src/commands/cli/workflow.ts +1 -1
- package/src/sdk/components/workflow-picker-panel.tsx +109 -47
- package/src/sdk/define-workflow.test.ts +58 -0
- package/src/sdk/define-workflow.ts +48 -30
- package/src/sdk/providers/claude.ts +234 -233
- package/src/sdk/runtime/discovery.ts +2 -3
- package/src/sdk/runtime/executor.ts +6 -1
- package/src/sdk/types.ts +24 -19
- package/src/sdk/workflows/builtin/deep-research-codebase/claude/index.ts +11 -30
- package/src/sdk/workflows/builtin/deep-research-codebase/copilot/index.ts +7 -4
- package/src/sdk/workflows/builtin/deep-research-codebase/opencode/index.ts +6 -2
- package/src/sdk/workflows/builtin/ralph/claude/index.ts +32 -38
- package/src/sdk/workflows/builtin/ralph/copilot/index.ts +5 -1
- package/src/sdk/workflows/builtin/ralph/opencode/index.ts +5 -1
- package/src/sdk/workflows/index.ts +2 -2
|
@@ -97,9 +97,16 @@ Workflow quality depends on two disciplines: **prompt engineering** (crafting cl
|
|
|
97
97
|
A workflow is a TypeScript file with a single `.run()` callback that orchestrates agent sessions dynamically. Inside the callback, `ctx.stage()` spawns sessions — each gets its own tmux window and graph node (unless running in headless mode). Native TypeScript handles all control flow: loops, conditionals, `Promise.all()`, `try`/`catch`.
|
|
98
98
|
|
|
99
99
|
```ts
|
|
100
|
-
import { defineWorkflow } from "@bastani/atomic/workflows";
|
|
101
|
-
|
|
102
|
-
export default defineWorkflow
|
|
100
|
+
import { defineWorkflow, extractAssistantText } from "@bastani/atomic/workflows";
|
|
101
|
+
|
|
102
|
+
export default defineWorkflow({
|
|
103
|
+
name: "my-workflow",
|
|
104
|
+
description: "...",
|
|
105
|
+
inputs: [
|
|
106
|
+
{ name: "prompt", type: "text", required: true, description: "task to perform" },
|
|
107
|
+
],
|
|
108
|
+
})
|
|
109
|
+
.for<"claude">()
|
|
103
110
|
.run(async (ctx) => {
|
|
104
111
|
const step1 = await ctx.stage({ name: "step-1" }, {}, {}, async (s) => { /* s.client, s.session */ });
|
|
105
112
|
await ctx.stage({ name: "step-2" }, {}, {}, async (s) => { /* s.client, s.session */ });
|
|
@@ -121,7 +128,7 @@ await ctx.stage(
|
|
|
121
128
|
async (s) => {
|
|
122
129
|
const result = await s.session.query("Analyze the codebase structure.");
|
|
123
130
|
s.save(s.sessionId);
|
|
124
|
-
return result
|
|
131
|
+
return extractAssistantText(result, 0);
|
|
125
132
|
},
|
|
126
133
|
);
|
|
127
134
|
```
|
|
@@ -182,23 +189,23 @@ Workflow files live at `.atomic/workflows/<name>/<agent>/index.ts`. Discovery so
|
|
|
182
189
|
| `WorkflowContext` (`ctx`) | `.run(async (ctx) => ...)` | No | Orchestration: spawn sessions, read transcripts, read `ctx.inputs` |
|
|
183
190
|
| `SessionContext` (`s`) | `ctx.stage(opts, clientOpts, sessionOpts, async (s) => ...)` | Yes | Agent work: use `s.client` and `s.session` for SDK calls, save output |
|
|
184
191
|
|
|
185
|
-
Both contexts expose `inputs
|
|
192
|
+
Both contexts expose typed `inputs` (keys restricted to declared input names), `stage()`, `transcript()`, and `getMessages()`. See `references/getting-started.md` for the full `SessionContext` field reference.
|
|
186
193
|
|
|
187
194
|
### Declared inputs: one API, three invocation surfaces
|
|
188
195
|
|
|
189
|
-
Workflows receive user data exclusively through `ctx.inputs` (and `s.inputs` inside stage callbacks).
|
|
196
|
+
Workflows receive user data exclusively through `ctx.inputs` (and `s.inputs` inside stage callbacks).
|
|
190
197
|
|
|
191
|
-
|
|
198
|
+
Declare `inputs: WorkflowInput[]` inline on `defineWorkflow()`. TypeScript infers literal field names from the array and restricts `ctx.inputs` to only those keys — accessing an undeclared field is a **compile-time error**. The CLI materializes one `--<field>=<value>` flag per entry, validates required fields + enum membership before launching, and the picker renders a form. Three field types: `string` (single-line), `text` (multi-line), `enum` (fixed set).
|
|
192
199
|
|
|
193
|
-
|
|
200
|
+
Workflows that accept a free-form prompt should declare it explicitly: `{ name: "prompt", type: "text", required: true }`.
|
|
194
201
|
|
|
195
|
-
**Load `references/workflow-inputs.md`** for the full schema shape, validation rules,
|
|
202
|
+
**Load `references/workflow-inputs.md`** for the full schema shape, validation rules, picker semantics, and invocation cheat sheet.
|
|
196
203
|
|
|
197
204
|
### Invocation surfaces
|
|
198
205
|
|
|
199
206
|
| Surface | Command | When |
|
|
200
207
|
|---|---|---|
|
|
201
|
-
| Named,
|
|
208
|
+
| Named, with prompt | `atomic workflow -n hello -a claude "fix the bug"` | Scripted runs; requires the workflow to declare a `prompt` input |
|
|
202
209
|
| Named, structured | `atomic workflow -n gen-spec -a claude --research_doc=notes.md` | Scripted structured runs |
|
|
203
210
|
| Interactive picker | `atomic workflow -a claude` | Discovery; shows fuzzy list + form |
|
|
204
211
|
| List | `atomic workflow -l` | Browse everything by source |
|
|
@@ -274,13 +281,13 @@ Then apply **design advisory checks** — these catch architectural and prompt q
|
|
|
274
281
|
|
|
275
282
|
### 2. Choose the Target Agent
|
|
276
283
|
|
|
277
|
-
|
|
284
|
+
Use `.for<"agent">()` on the builder to narrow all context types and get correct `s.client`/`s.session` types. Call `.for()` **before** `.run()`:
|
|
278
285
|
|
|
279
|
-
| Agent |
|
|
286
|
+
| Agent | Builder Chain | Primary Session API |
|
|
280
287
|
|-------|---------------|---------------------|
|
|
281
|
-
| Claude | `defineWorkflow<"claude"
|
|
282
|
-
| Copilot | `defineWorkflow<"copilot"
|
|
283
|
-
| OpenCode | `defineWorkflow<"opencode"
|
|
288
|
+
| Claude | `defineWorkflow({...}).for<"claude">()` | `s.session.query(prompt)` — sends prompt to the Claude TUI pane |
|
|
289
|
+
| Copilot | `defineWorkflow({...}).for<"copilot">()` | `s.session.send({ prompt })` — fire-and-forget; use `sendAndWait({ prompt }, timeoutMs)` only when the user explicitly requests timeout-based waiting |
|
|
290
|
+
| OpenCode | `defineWorkflow({...}).for<"opencode">()` | `s.client.session.prompt({ sessionID: s.session.id, parts: [...] })` |
|
|
284
291
|
|
|
285
292
|
The runtime manages client/session lifecycle automatically. For native SDK types and advanced APIs, import directly from the provider packages (`@github/copilot-sdk`, `@anthropic-ai/claude-agent-sdk`, `@opencode-ai/sdk/v2`).
|
|
286
293
|
|
|
@@ -309,7 +316,7 @@ Per-SDK cheat sheet:
|
|
|
309
316
|
| Save output | `s.save(s.sessionId)` | `s.save(await s.session.getMessages())` | `s.save(result.data!)` |
|
|
310
317
|
| Timeout | Per-query defaults via sessionOpts | N/A (`send` has no timeout; `sendAndWait` accepts optional timeout, default 60s) | N/A |
|
|
311
318
|
| Context model | Tmux pane (accumulates across turns) | Fresh per `ctx.stage()` | Fresh per `ctx.stage()` |
|
|
312
|
-
| Extract text | `result
|
|
319
|
+
| Extract text | `extractAssistantText(result, 0)` (uses `SessionMessage[]`) | `getAssistantText(messages)` (see `failure-modes.md` F1) | `extractResponseText(result.data!.parts)` (see `failure-modes.md` F3) |
|
|
313
320
|
|
|
314
321
|
The SDK ships two builtin workflows as production reference implementations:
|
|
315
322
|
- **`ralph`** — iterative plan → orchestrate → review → debug loop (all 3 SDKs)
|
|
@@ -326,7 +333,7 @@ bun typecheck
|
|
|
326
333
|
### 5. Test the Workflow
|
|
327
334
|
|
|
328
335
|
```bash
|
|
329
|
-
#
|
|
336
|
+
# Workflow with a declared prompt input
|
|
330
337
|
atomic workflow -n <workflow-name> -a <agent> "<your prompt>"
|
|
331
338
|
|
|
332
339
|
# Structured workflow
|
|
@@ -18,14 +18,14 @@ import { defineWorkflow } from "@bastani/atomic/workflows";
|
|
|
18
18
|
await ctx.stage(
|
|
19
19
|
{ name: "implement", description: "Implement the feature" },
|
|
20
20
|
{}, // clientOpts: chatFlags and readyTimeoutMs go here
|
|
21
|
-
{}, // sessionOpts: query defaults (
|
|
21
|
+
{}, // sessionOpts: query defaults (pollIntervalMs, readyTimeoutMs, etc.) go here
|
|
22
22
|
async (s) => {
|
|
23
23
|
// s.client — Claude CLI wrapper (already started by runtime)
|
|
24
24
|
// s.session — session wrapper (ready to accept queries via s.session.query())
|
|
25
25
|
|
|
26
26
|
// Send queries — Claude maintains conversation context across calls
|
|
27
|
+
// Returns SessionMessage[] (native SDK type from @anthropic-ai/claude-agent-sdk)
|
|
27
28
|
const result = await s.session.query((s.inputs.prompt ?? ""));
|
|
28
|
-
// result.output contains the captured response text
|
|
29
29
|
|
|
30
30
|
// Save transcript
|
|
31
31
|
s.save(s.sessionId);
|
|
@@ -44,26 +44,32 @@ Client options (2nd arg to `ctx.stage()`):
|
|
|
44
44
|
- `readyTimeoutMs` — timeout waiting for TUI readiness (default: 30s)
|
|
45
45
|
|
|
46
46
|
Session options (3rd arg to `ctx.stage()`), applied as defaults to every `s.session.query()` call:
|
|
47
|
-
- `timeoutMs` — timeout waiting for Claude to finish responding (default: 300s)
|
|
48
47
|
- `pollIntervalMs` — polling interval (default: 2000ms)
|
|
49
48
|
- `submitPresses` — C-m presses per submit round (default: 1)
|
|
50
49
|
- `maxSubmitRounds` — max submit rounds (default: 6)
|
|
51
50
|
- `readyTimeoutMs` — timeout waiting for pane readiness before sending (default: 30s)
|
|
52
51
|
|
|
52
|
+
No manual timeout is needed — idle detection watches for the pane prompt to return, and the session transcript is used to extract the response text.
|
|
53
|
+
|
|
53
54
|
### Basic usage with `s.session.query()`
|
|
54
55
|
|
|
55
56
|
```ts
|
|
56
57
|
import { defineWorkflow } from "@bastani/atomic/workflows";
|
|
57
58
|
|
|
58
|
-
export default defineWorkflow
|
|
59
|
+
export default defineWorkflow({
|
|
60
|
+
name: "implement",
|
|
61
|
+
inputs: [{ name: "prompt", type: "text", required: true, description: "task prompt" }],
|
|
62
|
+
})
|
|
63
|
+
.for<"claude">()
|
|
59
64
|
.run(async (ctx) => {
|
|
60
65
|
await ctx.stage(
|
|
61
66
|
{ name: "implement", description: "Implement the feature" },
|
|
62
67
|
{},
|
|
63
68
|
{},
|
|
64
69
|
async (s) => {
|
|
65
|
-
const
|
|
66
|
-
//
|
|
70
|
+
const messages = await s.session.query((s.inputs.prompt ?? ""));
|
|
71
|
+
// messages is SessionMessage[] — native SDK type
|
|
72
|
+
// Use extractAssistantText(messages, 0) to get the text response
|
|
67
73
|
s.save(s.sessionId);
|
|
68
74
|
},
|
|
69
75
|
);
|
|
@@ -71,7 +77,7 @@ export default defineWorkflow<"claude">({ name: "implement" })
|
|
|
71
77
|
.compile();
|
|
72
78
|
```
|
|
73
79
|
|
|
74
|
-
`s.session.query(prompt)` sends text to the Claude pane, verifies delivery, retries if needed, and waits for output stabilization. Returns `
|
|
80
|
+
`s.session.query(prompt)` sends text to the Claude pane, verifies delivery, retries if needed, and waits for output stabilization. Returns `SessionMessage[]` (the native transcript messages from this turn, imported from `@anthropic-ai/claude-agent-sdk`). Use `extractAssistantText(messages, 0)` to extract the plain text response.
|
|
75
81
|
|
|
76
82
|
### Multi-turn conversations
|
|
77
83
|
|
|
@@ -183,43 +189,72 @@ const result = query({ prompt: "Continue...", options: { resume: sessionId } });
|
|
|
183
189
|
const result = query({ prompt: "Try a different approach", options: { resume: sessionId, forkSession: true } });
|
|
184
190
|
```
|
|
185
191
|
|
|
186
|
-
### Sub-agent delegation
|
|
192
|
+
### Sub-agent delegation
|
|
193
|
+
|
|
194
|
+
For stages that call a single sub-agent, use `--agent` (interactive) or the SDK `agent` option (headless) to route all prompts through that agent. The agent must be defined in `.claude/agents/` or `.agents/skills/`.
|
|
187
195
|
|
|
188
|
-
|
|
196
|
+
**Interactive stages** — pass `--agent` via `chatFlags` in client opts (2nd arg):
|
|
189
197
|
|
|
190
198
|
```ts
|
|
191
199
|
.run(async (ctx) => {
|
|
192
|
-
await ctx.stage(
|
|
193
|
-
|
|
194
|
-
|
|
200
|
+
await ctx.stage(
|
|
201
|
+
{ name: "plan" },
|
|
202
|
+
{ chatFlags: ["--agent", "planner", "--allow-dangerously-skip-permissions", "--dangerously-skip-permissions"] },
|
|
203
|
+
{},
|
|
204
|
+
async (s) => {
|
|
205
|
+
await s.session.query(`Create a plan for: ${(s.inputs.prompt ?? "")}`);
|
|
206
|
+
s.save(s.sessionId);
|
|
207
|
+
},
|
|
208
|
+
);
|
|
209
|
+
})
|
|
210
|
+
```
|
|
195
211
|
|
|
196
|
-
|
|
197
|
-
await s.session.query(`@"orchestrator (agent)" Execute the plan above.`);
|
|
212
|
+
**Headless stages** — pass `agent` via SDK options in the `query()` call:
|
|
198
213
|
|
|
199
|
-
|
|
200
|
-
|
|
214
|
+
```ts
|
|
215
|
+
.run(async (ctx) => {
|
|
216
|
+
const handle = await ctx.stage(
|
|
217
|
+
{ name: "locate", headless: true },
|
|
218
|
+
{}, {},
|
|
219
|
+
async (s) => {
|
|
220
|
+
const result = await s.session.query(
|
|
221
|
+
"Find all API endpoint files",
|
|
222
|
+
{ agent: "codebase-locator", permissionMode: "bypassPermissions", allowDangerouslySkipPermissions: true },
|
|
223
|
+
);
|
|
224
|
+
s.save(s.sessionId);
|
|
225
|
+
return extractAssistantText(result, 0);
|
|
226
|
+
},
|
|
227
|
+
);
|
|
201
228
|
})
|
|
202
229
|
```
|
|
203
230
|
|
|
231
|
+
> **Note:** The `@"agent-name (agent)"` prompt prefix is for multi-agent conversations in a single stage where you switch between agents mid-session. For single-agent stages, prefer `--agent` (interactive) or the `agent` SDK option (headless) as shown above.
|
|
232
|
+
|
|
204
233
|
### Headless mode (background stages)
|
|
205
234
|
|
|
206
|
-
Claude headless stages use the Agent SDK's `query()` API directly in-process instead of automating a tmux pane. Set `headless: true` in the stage options:
|
|
235
|
+
Claude headless stages use the Agent SDK's `query()` API directly in-process instead of automating a tmux pane. Set `headless: true` in the stage options. SDK options like `agent`, `permissionMode`, and `allowDangerouslySkipPermissions` can be passed directly in the `query()` call:
|
|
207
236
|
|
|
208
237
|
```ts
|
|
238
|
+
import { defineWorkflow, extractAssistantText } from "@bastani/atomic/workflows";
|
|
239
|
+
|
|
240
|
+
// ...
|
|
209
241
|
await ctx.stage(
|
|
210
242
|
{ name: "background-analysis", headless: true },
|
|
211
243
|
{}, {},
|
|
212
244
|
async (s) => {
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
245
|
+
const result = await s.session.query(
|
|
246
|
+
"Analyze the codebase.",
|
|
247
|
+
{ agent: "codebase-analyzer", permissionMode: "bypassPermissions", allowDangerouslySkipPermissions: true },
|
|
248
|
+
);
|
|
216
249
|
s.save(s.sessionId);
|
|
217
|
-
return result
|
|
250
|
+
return extractAssistantText(result, 0);
|
|
218
251
|
},
|
|
219
252
|
);
|
|
220
253
|
```
|
|
221
254
|
|
|
222
|
-
The callback interface is identical to interactive stages. Internally, the runtime uses `
|
|
255
|
+
The callback interface is identical to interactive stages — `s.session.query()` returns `SessionMessage[]` in both cases. Internally, the runtime uses `HeadlessClaudeSessionWrapper` which calls `query()` from `@anthropic-ai/claude-agent-sdk` directly. No tmux pane is created, and the stage is invisible in the workflow graph.
|
|
256
|
+
|
|
257
|
+
**Design principle:** Never create custom message types. All provider return types are native SDK types — `SessionMessage[]` for Claude, `SessionEvent[]` for Copilot, `SessionPromptResponse` for OpenCode. Use `extractAssistantText()` to extract plain text from Claude's `SessionMessage[]`.
|
|
223
258
|
|
|
224
259
|
## Copilot SDK
|
|
225
260
|
|
|
@@ -230,7 +265,11 @@ Copilot uses a client-server architecture. The runtime auto-creates a `CopilotCl
|
|
|
230
265
|
```ts
|
|
231
266
|
import { defineWorkflow } from "@bastani/atomic/workflows";
|
|
232
267
|
|
|
233
|
-
export default defineWorkflow
|
|
268
|
+
export default defineWorkflow({
|
|
269
|
+
name: "implement",
|
|
270
|
+
inputs: [{ name: "prompt", type: "text", required: true, description: "task prompt" }],
|
|
271
|
+
})
|
|
272
|
+
.for<"copilot">()
|
|
234
273
|
.run(async (ctx) => {
|
|
235
274
|
await ctx.stage(
|
|
236
275
|
{ name: "implement" },
|
|
@@ -563,7 +602,11 @@ OpenCode uses a client-server model. The runtime auto-creates an `OpencodeClient
|
|
|
563
602
|
```ts
|
|
564
603
|
import { defineWorkflow } from "@bastani/atomic/workflows";
|
|
565
604
|
|
|
566
|
-
export default defineWorkflow
|
|
605
|
+
export default defineWorkflow({
|
|
606
|
+
name: "implement",
|
|
607
|
+
inputs: [{ name: "prompt", type: "text", required: true, description: "task prompt" }],
|
|
608
|
+
})
|
|
609
|
+
.for<"opencode">()
|
|
567
610
|
.run(async (ctx) => {
|
|
568
611
|
await ctx.stage(
|
|
569
612
|
{ name: "implement" },
|
|
@@ -34,11 +34,13 @@ Each SDK returns responses in different formats. Use helpers to extract text:
|
|
|
34
34
|
|
|
35
35
|
### Claude
|
|
36
36
|
|
|
37
|
-
`s.session.query()` returns `
|
|
37
|
+
`s.session.query()` returns `SessionMessage[]` — the native SDK transcript messages from this turn. Use `extractAssistantText()` to extract the plain text:
|
|
38
38
|
|
|
39
39
|
```ts
|
|
40
|
+
import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
41
|
+
|
|
40
42
|
const result = await s.session.query("...");
|
|
41
|
-
const text = result
|
|
43
|
+
const text = extractAssistantText(result, 0); // Extract text from SessionMessage[]
|
|
42
44
|
```
|
|
43
45
|
|
|
44
46
|
### Copilot
|
|
@@ -212,7 +214,7 @@ ${implTranscript.content}
|
|
|
212
214
|
Respond with JSON: { "correctness": N, "completeness": N, "style": N, "pass": boolean, "issues": [...] }`,
|
|
213
215
|
);
|
|
214
216
|
|
|
215
|
-
const scores = parseJsonResponse(result
|
|
217
|
+
const scores = parseJsonResponse(extractAssistantText(result, 0));
|
|
216
218
|
|
|
217
219
|
if (!scores.pass) {
|
|
218
220
|
await s.session.query(`Fix these quality issues:\n${scores.issues.join("\n")}`);
|
|
@@ -16,6 +16,8 @@ Prefer inter-session control flow when you want the workflow graph to reflect wh
|
|
|
16
16
|
Run a triage session first, then branch at the `.run()` level to spawn a purpose-built session for each outcome. Every branch appears as a distinct node in the graph:
|
|
17
17
|
|
|
18
18
|
```ts
|
|
19
|
+
import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
20
|
+
|
|
19
21
|
.run(async (ctx) => {
|
|
20
22
|
// Step 1: Classify the request
|
|
21
23
|
const triage = await ctx.stage({ name: "triage" }, {}, {}, async (s) => {
|
|
@@ -23,7 +25,7 @@ Run a triage session first, then branch at the `.run()` level to spawn a purpose
|
|
|
23
25
|
`Classify this as "bug", "feature", or "question": ${(ctx.inputs.prompt ?? "")}`,
|
|
24
26
|
);
|
|
25
27
|
s.save(s.sessionId);
|
|
26
|
-
return result.
|
|
28
|
+
return extractAssistantText(result, 0).toLowerCase();
|
|
27
29
|
});
|
|
28
30
|
|
|
29
31
|
const classification = triage.result;
|
|
@@ -53,13 +55,15 @@ Run a triage session first, then branch at the `.run()` level to spawn a purpose
|
|
|
53
55
|
When the branching logic is simple and you want the agent to retain full context across both the triage and the action, do it all inside a single session callback:
|
|
54
56
|
|
|
55
57
|
```ts
|
|
58
|
+
import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
59
|
+
|
|
56
60
|
.run(async (ctx) => {
|
|
57
61
|
await ctx.stage({ name: "triage-and-act" }, {}, {}, async (s) => {
|
|
58
62
|
const triageResult = await s.session.query(
|
|
59
63
|
`Classify this as "bug", "feature", or "question": ${(ctx.inputs.prompt ?? "")}`,
|
|
60
64
|
);
|
|
61
65
|
|
|
62
|
-
const classification = triageResult.
|
|
66
|
+
const classification = extractAssistantText(triageResult, 0).toLowerCase();
|
|
63
67
|
|
|
64
68
|
if (classification.includes("bug")) {
|
|
65
69
|
await s.session.query("Diagnose and fix the bug described above.");
|
|
@@ -81,6 +85,8 @@ When the branching logic is simple and you want the agent to retain full context
|
|
|
81
85
|
Each iteration spawns its own session, so the graph shows exactly how many passes ran:
|
|
82
86
|
|
|
83
87
|
```ts
|
|
88
|
+
import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
89
|
+
|
|
84
90
|
.run(async (ctx) => {
|
|
85
91
|
const MAX_ITERATIONS = 5;
|
|
86
92
|
|
|
@@ -88,7 +94,7 @@ Each iteration spawns its own session, so the graph shows exactly how many passe
|
|
|
88
94
|
const iteration = await ctx.stage({ name: `refine-${i}` }, {}, {}, async (s) => {
|
|
89
95
|
const result = await s.session.query(`Iteration ${i}: Improve the implementation.`);
|
|
90
96
|
s.save(s.sessionId);
|
|
91
|
-
return result
|
|
97
|
+
return extractAssistantText(result, 0);
|
|
92
98
|
});
|
|
93
99
|
|
|
94
100
|
if (iteration.result.includes("LGTM") || iteration.result.includes("no issues")) {
|
|
@@ -103,6 +109,8 @@ Each iteration spawns its own session, so the graph shows exactly how many passe
|
|
|
103
109
|
When the agent must remember every prior iteration's output to make progress, keep the loop inside one session:
|
|
104
110
|
|
|
105
111
|
```ts
|
|
112
|
+
import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
113
|
+
|
|
106
114
|
.run(async (ctx) => {
|
|
107
115
|
await ctx.stage({ name: "iterative-refinement" }, {}, {}, async (s) => {
|
|
108
116
|
const MAX_ITERATIONS = 5;
|
|
@@ -110,7 +118,7 @@ When the agent must remember every prior iteration's output to make progress, ke
|
|
|
110
118
|
for (let i = 0; i < MAX_ITERATIONS; i++) {
|
|
111
119
|
const result = await s.session.query(`Iteration ${i + 1}: Improve the implementation.`);
|
|
112
120
|
|
|
113
|
-
if (result.
|
|
121
|
+
if (extractAssistantText(result, 0).includes("LGTM") || extractAssistantText(result, 0).includes("no issues")) {
|
|
114
122
|
break;
|
|
115
123
|
}
|
|
116
124
|
}
|
|
@@ -125,6 +133,8 @@ When the agent must remember every prior iteration's output to make progress, ke
|
|
|
125
133
|
The inter-session pattern is the right fit here: every review and every fix becomes its own graph node, so the executed path is fully visible. This is the production-grade approach with consecutive clean-pass detection:
|
|
126
134
|
|
|
127
135
|
```ts
|
|
136
|
+
import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
137
|
+
|
|
128
138
|
.run(async (ctx) => {
|
|
129
139
|
const MAX_CYCLES = 10;
|
|
130
140
|
const CLEAN_THRESHOLD = 2;
|
|
@@ -135,7 +145,7 @@ The inter-session pattern is the right fit here: every review and every fix beco
|
|
|
135
145
|
const review = await ctx.stage({ name: `review-${cycle}` }, {}, {}, async (s) => {
|
|
136
146
|
const result = await s.session.query(buildReviewPrompt((ctx.inputs.prompt ?? "")));
|
|
137
147
|
s.save(s.sessionId);
|
|
138
|
-
return result
|
|
148
|
+
return extractAssistantText(result, 0);
|
|
139
149
|
});
|
|
140
150
|
|
|
141
151
|
const reviewRaw = review.result;
|
|
@@ -292,12 +302,14 @@ Each iteration's stages form a natural chain because each `await` follows the pr
|
|
|
292
302
|
Headless stages (`{ headless: true }`) are **invisible in the workflow graph** — they don't consume or update the execution frontier. This means they don't affect the parent-child edges inferred for visible stages.
|
|
293
303
|
|
|
294
304
|
```ts
|
|
305
|
+
import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
306
|
+
|
|
295
307
|
// ✅ Graph renders: seed → merge (headless stages are transparent)
|
|
296
308
|
.run(async (ctx) => {
|
|
297
309
|
const seed = await ctx.stage({ name: "seed" }, {}, {}, async (s) => {
|
|
298
310
|
const result = await s.session.query("Describe the project.");
|
|
299
311
|
s.save(s.sessionId);
|
|
300
|
-
return result
|
|
312
|
+
return extractAssistantText(result, 0);
|
|
301
313
|
});
|
|
302
314
|
|
|
303
315
|
// Three parallel headless stages — invisible in the graph
|
|
@@ -305,17 +317,17 @@ Headless stages (`{ headless: true }`) are **invisible in the workflow graph**
|
|
|
305
317
|
ctx.stage({ name: "gather-a", headless: true }, {}, {}, async (s) => {
|
|
306
318
|
const result = await s.session.query(`List 3 pros:\n\n${seed.result}`);
|
|
307
319
|
s.save(s.sessionId);
|
|
308
|
-
return result
|
|
320
|
+
return extractAssistantText(result, 0);
|
|
309
321
|
}),
|
|
310
322
|
ctx.stage({ name: "gather-b", headless: true }, {}, {}, async (s) => {
|
|
311
323
|
const result = await s.session.query(`List 3 cons:\n\n${seed.result}`);
|
|
312
324
|
s.save(s.sessionId);
|
|
313
|
-
return result
|
|
325
|
+
return extractAssistantText(result, 0);
|
|
314
326
|
}),
|
|
315
327
|
ctx.stage({ name: "gather-c", headless: true }, {}, {}, async (s) => {
|
|
316
328
|
const result = await s.session.query(`List 3 uses:\n\n${seed.result}`);
|
|
317
329
|
s.save(s.sessionId);
|
|
318
|
-
return result
|
|
330
|
+
return extractAssistantText(result, 0);
|
|
319
331
|
}),
|
|
320
332
|
]);
|
|
321
333
|
|
|
@@ -417,12 +429,14 @@ async function retryWithBackoff<T>(
|
|
|
417
429
|
Combine loops, conditionals, and inter-session data passing. Session callbacks return typed values via `SessionHandle<T>.result`, and `s.transcript(handle)` accepts a prior `SessionHandle` to read another session's saved output:
|
|
418
430
|
|
|
419
431
|
```ts
|
|
432
|
+
import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
433
|
+
|
|
420
434
|
.run(async (ctx) => {
|
|
421
435
|
// Step 1: Analyse — result is available as a typed handle
|
|
422
436
|
const analysisHandle = await ctx.stage({ name: "analyze" }, {}, {}, async (s) => {
|
|
423
437
|
const result = await s.session.query(`Analyse the task: ${(ctx.inputs.prompt ?? "")}`);
|
|
424
438
|
s.save(s.sessionId);
|
|
425
|
-
return result
|
|
439
|
+
return extractAssistantText(result, 0);
|
|
426
440
|
});
|
|
427
441
|
|
|
428
442
|
const isComplex = analysisHandle.result.includes("complex");
|
|
@@ -439,7 +453,7 @@ Combine loops, conditionals, and inter-session data passing. Session callbacks r
|
|
|
439
453
|
: "Continue improving the implementation.",
|
|
440
454
|
);
|
|
441
455
|
s.save(s.sessionId);
|
|
442
|
-
return result
|
|
456
|
+
return extractAssistantText(result, 0);
|
|
443
457
|
});
|
|
444
458
|
|
|
445
459
|
if (impl.result.includes("all tests pass")) {
|
|
@@ -66,10 +66,11 @@ Every workflow file must use `export default` with a compiled workflow:
|
|
|
66
66
|
```ts
|
|
67
67
|
import { defineWorkflow } from "@bastani/atomic/workflows";
|
|
68
68
|
|
|
69
|
-
export default defineWorkflow
|
|
69
|
+
export default defineWorkflow({
|
|
70
70
|
name: "my-workflow",
|
|
71
71
|
description: "What this workflow does",
|
|
72
72
|
})
|
|
73
|
+
.for<"claude">()
|
|
73
74
|
.run(async (ctx) => {
|
|
74
75
|
await ctx.stage({ name: "step-1" }, {}, {}, async (s) => { /* ... */ });
|
|
75
76
|
await ctx.stage({ name: "step-2" }, {}, {}, async (s) => { /* ... */ });
|
|
@@ -126,7 +127,7 @@ This catches:
|
|
|
126
127
|
- SDK type mismatches (e.g., passing wrong types to `s.save()`)
|
|
127
128
|
- Incorrect provider-specific method calls (e.g., calling `s.session.query()` in a Copilot workflow)
|
|
128
129
|
|
|
129
|
-
**Note on
|
|
130
|
+
**Note on provider type parameter:** Using `.for<"claude">()`, `.for<"copilot">()`, or `.for<"opencode">()` narrows `s.client` and `s.session` to the correct provider types throughout the `.run()` callback and all `ctx.stage()` callbacks. Without the type parameter, `s.client` and `s.session` resolve to a union of all provider types, which requires type guards to use provider-specific methods.
|
|
130
131
|
|
|
131
132
|
## Testing
|
|
132
133
|
|
|
@@ -33,7 +33,7 @@ Silent failures are catalogued first below. Loud failures are grouped at the end
|
|
|
33
33
|
| [F1](#f1-copilot-getlastassistanttext-returns-empty-string) | Copilot: `getLastAssistantText` returns empty string | Copilot | silent |
|
|
34
34
|
| [F2](#f2-copilot-sub-agent-messages-pollute-getmessages-stream) | Copilot: sub-agent messages pollute `getMessages()` stream | Copilot | silent |
|
|
35
35
|
| [F3](#f3-opencode-result-parts-contain-non-text-parts) | OpenCode: `result.data.parts` contains non-text parts | OpenCode | silent |
|
|
36
|
-
| [F4](#f4-
|
|
36
|
+
| [F4](#f4-claude-ssessionquery-returns-sessionmessage-extract-text-with-extractassistanttext) | Claude: `s.session.query()` returns `SessionMessage[]` — extract text with `extractAssistantText(result, 0)` | Claude | silent |
|
|
37
37
|
| [F5](#f5-fresh-session-wipes-prior-stage-context) | Fresh session wipes prior stage context | Copilot, OpenCode | silent |
|
|
38
38
|
| [F6](#f6-planner-prompts-that-dont-request-trailing-commentary-produce-empty-handoffs) | Planner prompts that don't request trailing commentary produce empty handoffs | all | silent |
|
|
39
39
|
| [F7](#f7-continued-sessions-accumulate-state-across-loop-iterations) | Continued sessions accumulate state across loop iterations (lost-in-middle) | all | silent |
|
|
@@ -176,49 +176,48 @@ function extractResponseText(
|
|
|
176
176
|
|
|
177
177
|
---
|
|
178
178
|
|
|
179
|
-
## F4. Claude: `s.session.query()`
|
|
179
|
+
## F4. Claude: `s.session.query()` returns `SessionMessage[]` — extract text with `extractAssistantText`
|
|
180
180
|
|
|
181
|
-
**Symptom.**
|
|
182
|
-
|
|
181
|
+
**Symptom.** Workflow code tries to access `.output` or `.text` on the
|
|
182
|
+
result of `s.session.query()` and gets `undefined`, or passes the result
|
|
183
|
+
directly to a string parser that throws.
|
|
183
184
|
|
|
184
|
-
**Root cause.** `s.session.query()`
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
185
|
+
**Root cause.** `s.session.query()` returns `SessionMessage[]` — the native
|
|
186
|
+
Claude Agent SDK type. It does NOT return a `{ output: string }` object or a
|
|
187
|
+
raw TUI scrollback string. The assistant's text lives inside structured content
|
|
188
|
+
blocks within those messages and must be extracted explicitly.
|
|
188
189
|
|
|
189
|
-
**Affected SDKs.** Claude
|
|
190
|
+
**Affected SDKs.** Claude.
|
|
190
191
|
|
|
191
192
|
### ❌ Wrong
|
|
192
193
|
|
|
193
194
|
```ts
|
|
194
|
-
//
|
|
195
|
-
const
|
|
195
|
+
// result is SessionMessage[], not { output: string }
|
|
196
|
+
const result = await s.session.query(prompt);
|
|
197
|
+
const parsed = JSON.parse(result.output); // TypeError: result.output is undefined
|
|
196
198
|
```
|
|
197
199
|
|
|
198
|
-
### ✅ Right —
|
|
200
|
+
### ✅ Right — use `extractAssistantText(result, 0)`
|
|
199
201
|
|
|
200
202
|
```ts
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
let last: string | null = null;
|
|
207
|
-
let match: RegExpExecArray | null;
|
|
208
|
-
while ((match = re.exec(content)) !== null) {
|
|
209
|
-
if (match[1]) last = match[1];
|
|
210
|
-
}
|
|
211
|
-
return last;
|
|
212
|
-
}
|
|
203
|
+
import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
204
|
+
|
|
205
|
+
const result = await s.session.query(prompt);
|
|
206
|
+
const text = extractAssistantText(result, 0);
|
|
207
|
+
// Now `text` is the concatenated assistant prose for this turn
|
|
213
208
|
```
|
|
214
209
|
|
|
210
|
+
`extractAssistantText(msgs, afterIndex)` walks `SessionMessage[]` from
|
|
211
|
+
`afterIndex` forward, pulls `TextBlock.text` from each `assistant` message's
|
|
212
|
+
content array, and joins them with newlines.
|
|
213
|
+
|
|
215
214
|
The ralph helpers in `src/sdk/workflows/builtin/ralph/helpers/prompts.ts`
|
|
216
|
-
(`parseReviewResult`, `extractMarkdownBlock`) use this pattern — always
|
|
217
|
-
|
|
215
|
+
(`parseReviewResult`, `extractMarkdownBlock`) use this pattern — always
|
|
216
|
+
extract text first, then parse.
|
|
218
217
|
|
|
219
|
-
**Detection.**
|
|
220
|
-
|
|
221
|
-
|
|
218
|
+
**Detection.** Log `typeof result` after `s.session.query()`. If it's
|
|
219
|
+
`object` (an array), you need `extractAssistantText`. Accessing `.output`
|
|
220
|
+
on an array returns `undefined`.
|
|
222
221
|
|
|
223
222
|
---
|
|
224
223
|
|
|
@@ -232,9 +231,9 @@ returns a **fresh, empty conversation**. The CLIENT object is just the
|
|
|
232
231
|
transport — each session is independent. The new session sees only what you
|
|
233
232
|
put in its first prompt.
|
|
234
233
|
|
|
235
|
-
**Affected SDKs.** Copilot, OpenCode. (Claude's
|
|
236
|
-
different — context accumulates
|
|
237
|
-
does NOT apply to `s.session.query()`.)
|
|
234
|
+
**Affected SDKs.** Copilot, OpenCode. (Claude's session model is
|
|
235
|
+
different — context accumulates within the same SDK session, so this failure
|
|
236
|
+
mode does NOT apply to `s.session.query()`.)
|
|
238
237
|
|
|
239
238
|
### ❌ Wrong
|
|
240
239
|
|
|
@@ -329,8 +328,8 @@ or "forgetting" a requirement that was clearly stated in the original spec.
|
|
|
329
328
|
session, and context grows past the attention window. The model starts
|
|
330
329
|
dropping middle-of-context information (classic lost-in-middle).
|
|
331
330
|
|
|
332
|
-
**Affected SDKs.** All three. Claude's
|
|
333
|
-
|
|
331
|
+
**Affected SDKs.** All three. Claude's session transcript accumulates every
|
|
332
|
+
intermediate turn, so long loops grow the context window substantially.
|
|
334
333
|
|
|
335
334
|
### ❌ Wrong — unbounded loop on a single session
|
|
336
335
|
|
|
@@ -455,8 +454,8 @@ expects, and the runtime doesn't type-check the argument beyond "anything".
|
|
|
455
454
|
### ❌ Wrong
|
|
456
455
|
|
|
457
456
|
```ts
|
|
458
|
-
// Claude — saves the wrong thing
|
|
459
|
-
s.save(result.output);
|
|
457
|
+
// Claude — saves the wrong thing (result is SessionMessage[], not { output: string })
|
|
458
|
+
s.save(result.output); // TypeError: result.output is undefined; use s.save(s.sessionId)
|
|
460
459
|
|
|
461
460
|
// Copilot — saves an empty array if called before send
|
|
462
461
|
s.save(await s.session.getMessages());
|