@bastani/atomic 0.5.23 → 0.5.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/workflow-creator/SKILL.md +137 -326
- package/.agents/skills/workflow-creator/references/agent-sessions.md +211 -152
- package/.agents/skills/workflow-creator/references/computation-and-validation.md +12 -37
- package/.agents/skills/workflow-creator/references/control-flow.md +20 -14
- package/.agents/skills/workflow-creator/references/discovery-and-verification.md +1 -1
- package/.agents/skills/workflow-creator/references/failure-modes.md +87 -62
- package/.agents/skills/workflow-creator/references/getting-started.md +14 -40
- package/.agents/skills/workflow-creator/references/running-workflows.md +235 -0
- package/.agents/skills/workflow-creator/references/session-config.md +24 -9
- package/.agents/skills/workflow-creator/references/state-and-data-flow.md +9 -26
- package/.agents/skills/workflow-creator/references/user-input.md +71 -43
- package/.agents/skills/workflow-creator/references/workflow-inputs.md +25 -42
- package/dist/sdk/providers/claude.d.ts +7 -2
- package/dist/sdk/providers/claude.d.ts.map +1 -1
- package/dist/sdk/providers/opencode.d.ts +18 -2
- package/dist/sdk/providers/opencode.d.ts.map +1 -1
- package/dist/sdk/runtime/executor.d.ts +5 -0
- package/dist/sdk/runtime/executor.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/sdk/providers/claude.ts +57 -12
- package/src/sdk/providers/headless-hil-policy.test.ts +171 -0
- package/src/sdk/providers/opencode.ts +62 -2
- package/src/sdk/runtime/executor.ts +57 -14
|
@@ -22,7 +22,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
|
22
22
|
// Step 1: Classify the request
|
|
23
23
|
const triage = await ctx.stage({ name: "triage" }, {}, {}, async (s) => {
|
|
24
24
|
const result = await s.session.query(
|
|
25
|
-
`Classify this as "bug", "feature", or "question": ${(
|
|
25
|
+
`Classify this as "bug", "feature", or "question": ${(s.inputs.prompt ?? "")}`,
|
|
26
26
|
);
|
|
27
27
|
s.save(s.sessionId);
|
|
28
28
|
return extractAssistantText(result, 0).toLowerCase();
|
|
@@ -60,7 +60,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
|
60
60
|
.run(async (ctx) => {
|
|
61
61
|
await ctx.stage({ name: "triage-and-act" }, {}, {}, async (s) => {
|
|
62
62
|
const triageResult = await s.session.query(
|
|
63
|
-
`Classify this as "bug", "feature", or "question": ${(
|
|
63
|
+
`Classify this as "bug", "feature", or "question": ${(s.inputs.prompt ?? "")}`,
|
|
64
64
|
);
|
|
65
65
|
|
|
66
66
|
const classification = extractAssistantText(triageResult, 0).toLowerCase();
|
|
@@ -143,7 +143,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
|
143
143
|
for (let cycle = 1; cycle <= MAX_CYCLES; cycle++) {
|
|
144
144
|
// Each review is a visible graph node
|
|
145
145
|
const review = await ctx.stage({ name: `review-${cycle}` }, {}, {}, async (s) => {
|
|
146
|
-
const result = await s.session.query(buildReviewPrompt((
|
|
146
|
+
const result = await s.session.query(buildReviewPrompt((s.inputs.prompt ?? "")));
|
|
147
147
|
s.save(s.sessionId);
|
|
148
148
|
return extractAssistantText(result, 0);
|
|
149
149
|
});
|
|
@@ -162,8 +162,8 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
|
162
162
|
consecutiveClean = 0;
|
|
163
163
|
|
|
164
164
|
const fixPrompt = parsed
|
|
165
|
-
? buildFixSpecFromReview(parsed, (
|
|
166
|
-
: buildFixSpecFromRawReview(reviewRaw, (
|
|
165
|
+
? buildFixSpecFromReview(parsed, (s.inputs.prompt ?? ""))
|
|
166
|
+
: buildFixSpecFromRawReview(reviewRaw, (s.inputs.prompt ?? ""));
|
|
167
167
|
|
|
168
168
|
// Each fix is also a visible graph node
|
|
169
169
|
await ctx.stage({ name: `fix-${cycle}` }, {}, {}, async (s) => {
|
|
@@ -176,7 +176,13 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
|
176
176
|
|
|
177
177
|
### Same pattern with Copilot
|
|
178
178
|
|
|
179
|
+
Copilot lacks a built-in text extractor — define `getAssistantText` as a
|
|
180
|
+
helper in your workflow (canonical definition in `failure-modes.md` §F1)
|
|
181
|
+
and import it from a sibling file:
|
|
182
|
+
|
|
179
183
|
```ts
|
|
184
|
+
import { getAssistantText } from "../helpers/parsers.ts"; // see failure-modes.md §F1
|
|
185
|
+
|
|
180
186
|
.run(async (ctx) => {
|
|
181
187
|
const MAX_CYCLES = 10;
|
|
182
188
|
let consecutiveClean = 0;
|
|
@@ -184,9 +190,9 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
|
184
190
|
for (let cycle = 1; cycle <= MAX_CYCLES; cycle++) {
|
|
185
191
|
const review = await ctx.stage({ name: `review-${cycle}` }, {}, {}, async (s) => {
|
|
186
192
|
await s.session.send({
|
|
187
|
-
prompt: buildReviewPrompt((
|
|
193
|
+
prompt: buildReviewPrompt((s.inputs.prompt ?? "")),
|
|
188
194
|
});
|
|
189
|
-
const reviewRaw = getAssistantText(await s.session.getMessages());
|
|
195
|
+
const reviewRaw = getAssistantText(await s.session.getMessages());
|
|
190
196
|
|
|
191
197
|
s.save(await s.session.getMessages());
|
|
192
198
|
return reviewRaw;
|
|
@@ -203,8 +209,8 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
|
203
209
|
consecutiveClean = 0;
|
|
204
210
|
|
|
205
211
|
const fixPrompt = parsed
|
|
206
|
-
? buildFixSpecFromReview(parsed, (
|
|
207
|
-
: buildFixSpecFromRawReview(reviewRaw, (
|
|
212
|
+
? buildFixSpecFromReview(parsed, (s.inputs.prompt ?? ""))
|
|
213
|
+
: buildFixSpecFromRawReview(reviewRaw, (s.inputs.prompt ?? ""));
|
|
208
214
|
|
|
209
215
|
await ctx.stage({ name: `fix-${cycle}` }, {}, {}, async (s) => {
|
|
210
216
|
await s.session.send({
|
|
@@ -252,7 +258,7 @@ Sessions passed to `Promise.all([...])` branch from the same parent and run conc
|
|
|
252
258
|
A stage awaited after a `Promise.all` resolves automatically receives all parallel stages as parents — the graph draws a merge node:
|
|
253
259
|
|
|
254
260
|
```ts
|
|
255
|
-
// ✅ Graph infers:
|
|
261
|
+
// ✅ Graph infers: A → [B, C] → D (fan-in merge)
|
|
256
262
|
.run(async (ctx) => {
|
|
257
263
|
await ctx.stage({ name: "A" }, {}, {}, async (s) => { /* ... */ });
|
|
258
264
|
|
|
@@ -385,11 +391,11 @@ Within a single session callback, each SDK call adds to the conversation context
|
|
|
385
391
|
.run(async (ctx) => {
|
|
386
392
|
await ctx.stage({ name: "implement" }, {}, {}, async (s) => {
|
|
387
393
|
try {
|
|
388
|
-
await s.session.query((
|
|
394
|
+
await s.session.query((s.inputs.prompt ?? ""));
|
|
389
395
|
} catch (error) {
|
|
390
396
|
// Retry with simpler prompt
|
|
391
397
|
await s.session.query(
|
|
392
|
-
`The previous attempt failed. Please try a simpler approach: ${(
|
|
398
|
+
`The previous attempt failed. Please try a simpler approach: ${(s.inputs.prompt ?? "")}`,
|
|
393
399
|
);
|
|
394
400
|
}
|
|
395
401
|
s.save(s.sessionId);
|
|
@@ -418,7 +424,7 @@ async function retryWithBackoff<T>(
|
|
|
418
424
|
|
|
419
425
|
.run(async (ctx) => {
|
|
420
426
|
await ctx.stage({ name: "implement" }, {}, {}, async (s) => {
|
|
421
|
-
await retryWithBackoff(() => s.session.query((
|
|
427
|
+
await retryWithBackoff(() => s.session.query((s.inputs.prompt ?? "")));
|
|
422
428
|
s.save(s.sessionId);
|
|
423
429
|
});
|
|
424
430
|
})
|
|
@@ -434,7 +440,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
|
|
|
434
440
|
.run(async (ctx) => {
|
|
435
441
|
// Step 1: Analyse — result is available as a typed handle
|
|
436
442
|
const analysisHandle = await ctx.stage({ name: "analyze" }, {}, {}, async (s) => {
|
|
437
|
-
const result = await s.session.query(`Analyse the task: ${(
|
|
443
|
+
const result = await s.session.query(`Analyse the task: ${(s.inputs.prompt ?? "")}`);
|
|
438
444
|
s.save(s.sessionId);
|
|
439
445
|
return extractAssistantText(result, 0);
|
|
440
446
|
});
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
```bash
|
|
6
6
|
bun init # Create a new project
|
|
7
7
|
bun add @bastani/atomic # Install the workflow SDK
|
|
8
|
-
bun add @github/copilot-sdk # For Copilot workflows
|
|
9
8
|
bun add @anthropic-ai/claude-agent-sdk # For Claude workflows
|
|
9
|
+
bun add @github/copilot-sdk # For Copilot workflows
|
|
10
10
|
bun add @opencode-ai/sdk # For OpenCode workflows
|
|
11
11
|
```
|
|
12
12
|
|
|
@@ -31,7 +31,7 @@ Silent failures are catalogued first below. Loud failures are grouped at the end
|
|
|
31
31
|
| # | Failure | Affected | Silent? |
|
|
32
32
|
|---|---|---|---|
|
|
33
33
|
| [F1](#f1-copilot-getlastassistanttext-returns-empty-string) | Copilot: `getLastAssistantText` returns empty string | Copilot | silent |
|
|
34
|
-
| [F2](#f2-copilot-
|
|
34
|
+
| [F2](#f2-copilot-subagent-messages-pollute-getmessages-stream) | Copilot: subagent messages pollute `getMessages()` stream | Copilot | silent |
|
|
35
35
|
| [F3](#f3-opencode-result-parts-contain-non-text-parts) | OpenCode: `result.data.parts` contains non-text parts | OpenCode | silent |
|
|
36
36
|
| [F4](#f4-claude-ssessionquery-returns-sessionmessage-extract-text-with-extractassistanttext) | Claude: `s.session.query()` returns `SessionMessage[]` — extract text with `extractAssistantText(result, 0)` | Claude | silent |
|
|
37
37
|
| [F5](#f5-fresh-session-wipes-prior-stage-context) | Fresh session wipes prior stage context | Copilot, OpenCode | silent |
|
|
@@ -40,17 +40,18 @@ Silent failures are catalogued first below. Loud failures are grouped at the end
|
|
|
40
40
|
| [F8](#f8-fenced-block-parsers-break-when-the-model-adds-prose) | Fenced-block parsers break when the model adds prose before/after | all | silent |
|
|
41
41
|
| [F9](#f9-ssave-receives-the-wrong-shape) | `s.save()` receives the wrong shape for the SDK | all | silent |
|
|
42
42
|
| [F10](#f10-copilot-sendandwait-default-60s-timeout-throws) | Copilot: `sendAndWait` default 60s timeout throws (use `send` by default) | Copilot | loud |
|
|
43
|
-
| [F11](#f11-
|
|
44
|
-
| [F12](#f12-
|
|
45
|
-
| [F13](#f13-
|
|
46
|
-
| [F14](#f14-
|
|
47
|
-
| [F15](#f15-
|
|
48
|
-
| [F16](#f16-
|
|
49
|
-
| [F17](#f17-claude-importing-sdk-query-inside-a-non-headless-stage) | Claude: importing the SDK `query()` inside a non-headless stage (anti-pattern) | Claude | silent |
|
|
43
|
+
| [F11](#f11-provider-level-resume-tries-to-swap-agents) | Provider-level resume tries to swap agents | Copilot, OpenCode | loud |
|
|
44
|
+
| [F12](#f12-parallel-siblings-read-each-others-transcripts) | Parallel siblings read each other's transcripts | all | loud |
|
|
45
|
+
| [F13](#f13-forgetting-to-await-ctxstage) | Forgetting to `await` `ctx.stage()` | all | silent |
|
|
46
|
+
| [F14](#f14-using-a-pending-sessionhandle-before-completion) | Using a pending `SessionHandle` before completion | all | silent |
|
|
47
|
+
| [F15](#f15-headless-stage-errors-are-invisible-in-the-graph) | Headless stage errors are invisible in the graph | all | silent |
|
|
48
|
+
| [F16](#f16-claude-importing-sdk-query-inside-a-non-headless-stage) | Claude: importing the SDK `query()` inside a non-headless stage (anti-pattern) | Claude | silent |
|
|
50
49
|
|
|
51
50
|
---
|
|
52
51
|
|
|
53
|
-
##
|
|
52
|
+
## Silent failures
|
|
53
|
+
|
|
54
|
+
### F1. Copilot: `getLastAssistantText` returns empty string
|
|
54
55
|
|
|
55
56
|
**Symptom.** The orchestrator (or any downstream stage) receives an empty
|
|
56
57
|
`plannerNotes` / `reviewerOutput` despite the prior agent running successfully
|
|
@@ -104,22 +105,22 @@ function getAssistantText(messages: SessionEvent[]): string {
|
|
|
104
105
|
}
|
|
105
106
|
```
|
|
106
107
|
|
|
107
|
-
**Detection.** Log the returned text length after every `
|
|
108
|
-
during development. An empty or surprisingly short string for a stage
|
|
108
|
+
**Detection.** Log the returned text length after every `getAssistantText`
|
|
109
|
+
call during development. An empty or surprisingly short string for a stage
|
|
109
110
|
that clearly ran is the signature.
|
|
110
111
|
|
|
111
112
|
---
|
|
112
113
|
|
|
113
|
-
|
|
114
|
+
### F2. Copilot: subagent messages pollute `getMessages()` stream
|
|
114
115
|
|
|
115
116
|
**Symptom.** Downstream stages receive a snippet of text that doesn't match
|
|
116
|
-
what the top-level agent said — it looks like a
|
|
117
|
+
what the top-level agent said — it looks like a subagent's output.
|
|
117
118
|
|
|
118
119
|
**Root cause.** `assistant.message` events carry a `parentToolCallId?: string`
|
|
119
120
|
field, documented as *"Tool call ID of the parent tool invocation when this
|
|
120
|
-
event originates from a
|
|
121
|
-
`getMessages()` returns **the complete history including
|
|
122
|
-
Filters that don't exclude `parentToolCallId` can pick a
|
|
121
|
+
event originates from a subagent"*. When the top-level agent delegates,
|
|
122
|
+
`getMessages()` returns **the complete history including subagent messages**.
|
|
123
|
+
Filters that don't exclude `parentToolCallId` can pick a subagent's final
|
|
123
124
|
message via `.at(-1)`.
|
|
124
125
|
|
|
125
126
|
**Affected SDKs.** Copilot.
|
|
@@ -143,7 +144,7 @@ scrollback for the top-level agent.
|
|
|
143
144
|
|
|
144
145
|
---
|
|
145
146
|
|
|
146
|
-
|
|
147
|
+
### F3. OpenCode: `result.data.parts` contains non-text parts
|
|
147
148
|
|
|
148
149
|
**Symptom.** Concatenated response text contains `[object Object]`,
|
|
149
150
|
truncated content, or swallows tool-call payloads into the prompt.
|
|
@@ -177,7 +178,7 @@ function extractResponseText(
|
|
|
177
178
|
|
|
178
179
|
---
|
|
179
180
|
|
|
180
|
-
|
|
181
|
+
### F4. Claude: `s.session.query()` returns `SessionMessage[]` — extract text with `extractAssistantText`
|
|
181
182
|
|
|
182
183
|
**Symptom.** Workflow code tries to access `.output` or `.text` on the
|
|
183
184
|
result of `s.session.query()` and gets `undefined`, or passes the result
|
|
@@ -222,7 +223,7 @@ on an array returns `undefined`.
|
|
|
222
223
|
|
|
223
224
|
---
|
|
224
225
|
|
|
225
|
-
|
|
226
|
+
### F5. Fresh session wipes prior stage context
|
|
226
227
|
|
|
227
228
|
**Symptom.** The orchestrator says "I don't see a task list" or "what
|
|
228
229
|
specification are you referring to?" even though the planner clearly ran.
|
|
@@ -239,18 +240,45 @@ mode does NOT apply to `s.session.query()`.)
|
|
|
239
240
|
### ❌ Wrong
|
|
240
241
|
|
|
241
242
|
```ts
|
|
242
|
-
await
|
|
243
|
+
await ctx.stage({ name: "planner" }, {}, { agent: "planner" }, async (s) => {
|
|
244
|
+
await s.session.send({ prompt: buildPlannerPrompt((s.inputs.prompt ?? "")) });
|
|
245
|
+
s.save(await s.session.getMessages());
|
|
246
|
+
});
|
|
243
247
|
// orchestrator is a fresh session — it has no idea what the planner produced
|
|
244
|
-
await
|
|
248
|
+
await ctx.stage({ name: "orchestrator" }, {}, { agent: "orchestrator" }, async (s) => {
|
|
249
|
+
await s.session.send({ prompt: buildOrchestratorPrompt() });
|
|
250
|
+
s.save(await s.session.getMessages());
|
|
251
|
+
});
|
|
245
252
|
```
|
|
246
253
|
|
|
247
254
|
### ✅ Right — explicit handoff
|
|
248
255
|
|
|
249
256
|
```ts
|
|
250
|
-
const
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
257
|
+
const plannerHandle = await ctx.stage(
|
|
258
|
+
{ name: "planner" },
|
|
259
|
+
{},
|
|
260
|
+
{ agent: "planner" },
|
|
261
|
+
async (s) => {
|
|
262
|
+
await s.session.send({ prompt: buildPlannerPrompt((s.inputs.prompt ?? "")) });
|
|
263
|
+
const messages = await s.session.getMessages();
|
|
264
|
+
s.save(messages);
|
|
265
|
+
return getAssistantText(messages); // see F1 for getAssistantText
|
|
266
|
+
},
|
|
267
|
+
);
|
|
268
|
+
|
|
269
|
+
await ctx.stage(
|
|
270
|
+
{ name: "orchestrator" },
|
|
271
|
+
{},
|
|
272
|
+
{ agent: "orchestrator" },
|
|
273
|
+
async (s) => {
|
|
274
|
+
await s.session.send({
|
|
275
|
+
prompt: buildOrchestratorPrompt(
|
|
276
|
+
(s.inputs.prompt ?? ""),
|
|
277
|
+
{ plannerNotes: plannerHandle.result },
|
|
278
|
+
),
|
|
279
|
+
});
|
|
280
|
+
s.save(await s.session.getMessages());
|
|
281
|
+
},
|
|
254
282
|
);
|
|
255
283
|
```
|
|
256
284
|
|
|
@@ -264,7 +292,7 @@ controls what context is available".
|
|
|
264
292
|
|
|
265
293
|
---
|
|
266
294
|
|
|
267
|
-
|
|
295
|
+
### F6. Planner prompts that don't request trailing commentary produce empty handoffs
|
|
268
296
|
|
|
269
297
|
**Symptom.** F1 / F5 are fixed, extraction is correct — and the orchestrator
|
|
270
298
|
still receives empty `plannerNotes` because the planner's last turn legitimately
|
|
@@ -319,7 +347,7 @@ string + a correctly-fixed extraction helper = F6.
|
|
|
319
347
|
|
|
320
348
|
---
|
|
321
349
|
|
|
322
|
-
|
|
350
|
+
### F7. Continued sessions accumulate state across loop iterations (lost-in-middle)
|
|
323
351
|
|
|
324
352
|
**Symptom.** A review/fix loop works on iterations 1-3 then starts
|
|
325
353
|
producing worse output — misidentifying files, hallucinating line numbers,
|
|
@@ -378,7 +406,7 @@ iteration N, N is your safe-turn budget before compaction.
|
|
|
378
406
|
|
|
379
407
|
---
|
|
380
408
|
|
|
381
|
-
|
|
409
|
+
### F8. Fenced-block parsers break when the model adds prose
|
|
382
410
|
|
|
383
411
|
**Symptom.** `JSON.parse(content)` throws, or a "matches the first fenced
|
|
384
412
|
block" regex picks up a code example inside prose instead of the actual
|
|
@@ -434,7 +462,7 @@ over several runs. If 1 in 20 runs fails to parse, you have F8.
|
|
|
434
462
|
|
|
435
463
|
---
|
|
436
464
|
|
|
437
|
-
|
|
465
|
+
### F9. `s.save()` receives the wrong shape
|
|
438
466
|
|
|
439
467
|
**Symptom.** `s.transcript("stage-name")` returns an empty or malformed
|
|
440
468
|
`content` string in the next stage.
|
|
@@ -458,9 +486,11 @@ expects, and the runtime doesn't type-check the argument beyond "anything".
|
|
|
458
486
|
// Claude — saves the wrong thing (result is SessionMessage[], not { output: string })
|
|
459
487
|
s.save(result.output); // TypeError: result.output is undefined; use s.save(s.sessionId)
|
|
460
488
|
|
|
461
|
-
// Copilot —
|
|
462
|
-
|
|
463
|
-
|
|
489
|
+
// Copilot — calling getMessages() BEFORE send() returns an empty array
|
|
490
|
+
const earlyMessages = await s.session.getMessages(); // [] — no turns yet
|
|
491
|
+
s.save(earlyMessages);
|
|
492
|
+
|
|
493
|
+
// Copilot — saving a single message instead of the full array
|
|
464
494
|
s.save((await s.session.getMessages()).at(-1));
|
|
465
495
|
|
|
466
496
|
// OpenCode — missing the data unwrap
|
|
@@ -479,7 +509,7 @@ log the length. A 0-length or JSON-that-isn't-prose signature = F9.
|
|
|
479
509
|
|
|
480
510
|
## Loud failures (throw, but still worth knowing)
|
|
481
511
|
|
|
482
|
-
|
|
512
|
+
### F10. Copilot: `sendAndWait` default 60s timeout throws
|
|
483
513
|
|
|
484
514
|
**Symptom.** `Timeout after 60000ms waiting for session.idle`. Every
|
|
485
515
|
subsequent `ctx.stage()` call never executes — the throw propagates out of
|
|
@@ -508,13 +538,7 @@ to "be safe", you want `send`.
|
|
|
508
538
|
|
|
509
539
|
---
|
|
510
540
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
No longer a failure mode. The runtime now auto-initializes `s.client` and `s.session` before the callback runs — just use `s.session.query()` directly.
|
|
514
|
-
|
|
515
|
-
---
|
|
516
|
-
|
|
517
|
-
## F12. Provider-level resume tries to swap agents
|
|
541
|
+
### F11. Provider-level resume tries to swap agents
|
|
518
542
|
|
|
519
543
|
**Symptom.** Resumed Copilot / OpenCode session behaves as the original
|
|
520
544
|
agent instead of the requested new one — or the SDK throws "agent mismatch"
|
|
@@ -530,7 +554,7 @@ over trying to reopen a prior stage.
|
|
|
530
554
|
|
|
531
555
|
---
|
|
532
556
|
|
|
533
|
-
|
|
557
|
+
### F12. Parallel siblings read each other's transcripts
|
|
534
558
|
|
|
535
559
|
**Symptom.** `s.transcript("sibling-name")` inside a parallel session
|
|
536
560
|
throws or returns empty.
|
|
@@ -546,27 +570,28 @@ shared state (files, DB) if siblings genuinely need to coordinate.
|
|
|
546
570
|
|
|
547
571
|
```ts
|
|
548
572
|
// Fan-out → merge
|
|
549
|
-
|
|
573
|
+
// Strings used here for brevity; prefer handles (s.transcript(handle)) when one is in scope.
|
|
574
|
+
const describe = await ctx.stage({ name: "describe" }, {}, {}, async (s) => { /* ... */ });
|
|
550
575
|
|
|
551
|
-
await Promise.all([
|
|
576
|
+
const [summarizeA, summarizeB] = await Promise.all([
|
|
552
577
|
ctx.stage({ name: "summarize-a" }, {}, {}, async (s) => {
|
|
553
|
-
const d = await s.transcript(
|
|
578
|
+
const d = await s.transcript(describe); // OK — prior completed session (handle-based, preferred)
|
|
554
579
|
// s.transcript("summarize-b") would fail here — sibling not yet complete
|
|
555
580
|
}),
|
|
556
581
|
ctx.stage({ name: "summarize-b" }, {}, {}, async (s) => {
|
|
557
|
-
const d = await s.transcript(
|
|
582
|
+
const d = await s.transcript(describe); // OK — prior completed session
|
|
558
583
|
}),
|
|
559
584
|
]);
|
|
560
585
|
|
|
561
586
|
await ctx.stage({ name: "merge" }, {}, {}, async (s) => {
|
|
562
|
-
const a = await s.transcript(
|
|
563
|
-
const b = await s.transcript(
|
|
587
|
+
const a = await s.transcript(summarizeA); // OK — handle-based, preferred over "summarize-a"
|
|
588
|
+
const b = await s.transcript(summarizeB);
|
|
564
589
|
});
|
|
565
590
|
```
|
|
566
591
|
|
|
567
592
|
---
|
|
568
593
|
|
|
569
|
-
|
|
594
|
+
### F13. Forgetting to `await` `ctx.stage()`
|
|
570
595
|
|
|
571
596
|
**Symptom.** A session runs (its tmux window opens, the agent does work)
|
|
572
597
|
but the orchestrator doesn't wait for it. Subsequent sessions that depend
|
|
@@ -617,7 +642,7 @@ this at compile time.
|
|
|
617
642
|
|
|
618
643
|
---
|
|
619
644
|
|
|
620
|
-
|
|
645
|
+
### F14. Using a pending `SessionHandle` before completion
|
|
621
646
|
|
|
622
647
|
**Symptom.** `handle.result` is `undefined` or stale, or
|
|
623
648
|
`s.transcript(handle)` throws / returns empty even though the session
|
|
@@ -670,7 +695,7 @@ accessing `.result` without awaiting, the type will be `Promise`, not `T`.
|
|
|
670
695
|
|
|
671
696
|
---
|
|
672
697
|
|
|
673
|
-
|
|
698
|
+
### F15. Headless stage errors are invisible in the graph
|
|
674
699
|
|
|
675
700
|
**Symptom.** A workflow fails but the graph shows all visible stages as
|
|
676
701
|
completed. The error message references a session name that doesn't appear
|
|
@@ -721,7 +746,7 @@ full error for each failed headless stage.
|
|
|
721
746
|
|
|
722
747
|
---
|
|
723
748
|
|
|
724
|
-
|
|
749
|
+
### F16. Claude: importing the SDK `query()` inside a non-headless stage
|
|
725
750
|
|
|
726
751
|
**Symptom.** A reviewer / extractor / structured-output stage shows up in
|
|
727
752
|
the workflow graph as a tmux pane, but the pane sits idle on the Claude
|
|
@@ -760,7 +785,7 @@ The runtime exposes exactly two routes for an SDK feature:
|
|
|
760
785
|
| You want to use… | Stage shape | Code in callback |
|
|
761
786
|
|---|---|---|
|
|
762
787
|
| `outputFormat`, custom `agents`, `maxBudgetUsd`, etc. **without** a visible pane | `{ headless: true }` | `s.session.query(prompt, sdkOptions)` — wraps `HeadlessClaudeSessionWrapper.query()` which forwards `options` to the SDK |
|
|
763
|
-
| The visible TUI with a
|
|
788
|
+
| The visible TUI with a subagent | omit `headless` and pass `chatFlags: ["--agent", "<name>", ...]` | `s.session.query(prompt)` — sends through tmux send-keys |
|
|
764
789
|
|
|
765
790
|
The one option that does **not** exist is "visible pane + in-process SDK call".
|
|
766
791
|
That combination is always wrong — pick one route or the other.
|
|
@@ -787,9 +812,9 @@ await ctx.stage({ name: "review" }, {}, {}, async (s) => {
|
|
|
787
812
|
});
|
|
788
813
|
```
|
|
789
814
|
|
|
790
|
-
### ✅ Right (a) — visible TUI with
|
|
815
|
+
### ✅ Right (a) — visible TUI with subagent + chatFlags
|
|
791
816
|
|
|
792
|
-
When you want the user to watch the review happen, run the
|
|
817
|
+
When you want the user to watch the review happen, run the subagent in
|
|
793
818
|
the pane via `--agent` and parse JSON out of the assistant text. The
|
|
794
819
|
prompt should enumerate the schema fields so the model emits matching
|
|
795
820
|
JSON; a tolerant parser (last-fenced-block + last-balanced-object
|
|
@@ -853,8 +878,8 @@ await ctx.stage(
|
|
|
853
878
|
`s.client` and `s.session`.
|
|
854
879
|
2. Watch the workflow run. If a visible pane shows the Claude welcome
|
|
855
880
|
screen for the entire duration of a stage and never receives a prompt,
|
|
856
|
-
you have
|
|
857
|
-
3. Cost monitoring.
|
|
881
|
+
you have F16.
|
|
882
|
+
3. Cost monitoring. F16 roughly doubles the Claude process count — if
|
|
858
883
|
stage spend looks 2× a single run, audit imports.
|
|
859
884
|
|
|
860
885
|
---
|
|
@@ -870,9 +895,9 @@ Before shipping a multi-session workflow, walk the list:
|
|
|
870
895
|
- [ ] Structured-output parsers extract the LAST fenced block, not the first (F8)
|
|
871
896
|
- [ ] `s.save()` receives the per-SDK correct shape — Copilot uses `s.session.getMessages()` (F9)
|
|
872
897
|
- [ ] Loops over 10 iterations have a compaction / reset strategy (F7)
|
|
873
|
-
- [ ] Parallel groups only read from prior completed sessions, never siblings (
|
|
874
|
-
- [ ] Every `ctx.stage()` call is `await`ed (
|
|
875
|
-
- [ ] `SessionHandle` values are only used after the promise resolves (
|
|
876
|
-
- [ ] If provider-level resume/fork is used at all, it stays within the same agent role (
|
|
877
|
-
- [ ] Headless stage callbacks include descriptive error context so failures can be diagnosed without a graph node (
|
|
878
|
-
- [ ] Claude stages never import `query` (or other entry points) from `@anthropic-ai/claude-agent-sdk` directly — go through `s.session.query()` so the runtime routes to the TUI (interactive) or the SDK (headless) consistently (
|
|
898
|
+
- [ ] Parallel groups only read from prior completed sessions, never siblings (F12)
|
|
899
|
+
- [ ] Every `ctx.stage()` call is `await`ed (F13)
|
|
900
|
+
- [ ] `SessionHandle` values are only used after the promise resolves (F14)
|
|
901
|
+
- [ ] If provider-level resume/fork is used at all, it stays within the same agent role (F11)
|
|
902
|
+
- [ ] Headless stage callbacks include descriptive error context so failures can be diagnosed without a graph node (F15)
|
|
903
|
+
- [ ] Claude stages never import `query` (or other entry points) from `@anthropic-ai/claude-agent-sdk` directly — go through `s.session.query()` so the runtime routes to the TUI (interactive) or the SDK (headless) consistently (F16)
|
|
@@ -171,47 +171,27 @@ if (needsReview) {
|
|
|
171
171
|
|
|
172
172
|
## Headless (background) stages
|
|
173
173
|
|
|
174
|
-
|
|
174
|
+
Set `headless: true` in the stage options to run the provider SDK
|
|
175
|
+
in-process instead of spawning a tmux window — invisible in the graph,
|
|
176
|
+
identical callback API.
|
|
175
177
|
|
|
176
178
|
```ts
|
|
177
|
-
// Headless stage — identical callback API, no tmux window
|
|
178
179
|
const result = await ctx.stage(
|
|
179
180
|
{ name: "background-task", headless: true },
|
|
180
181
|
{}, {},
|
|
181
182
|
async (s) => {
|
|
182
|
-
// s.client, s.session, s.save(), s.transcript() all work identically
|
|
183
183
|
const result = await s.session.query("Analyze the codebase.");
|
|
184
184
|
s.save(s.sessionId);
|
|
185
185
|
return extractAssistantText(result, 0);
|
|
186
186
|
},
|
|
187
187
|
);
|
|
188
|
-
// result.result contains the returned value
|
|
189
188
|
```
|
|
190
189
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
-
|
|
194
|
-
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
**Common pattern — visible seed, parallel headless gather, visible merge:**
|
|
198
|
-
|
|
199
|
-
```ts
|
|
200
|
-
const seed = await ctx.stage({ name: "seed" }, {}, {}, async (s) => { /* ... */ });
|
|
201
|
-
|
|
202
|
-
const [a, b, c] = await Promise.all([
|
|
203
|
-
ctx.stage({ name: "gather-a", headless: true }, {}, {}, async (s) => { /* ... */ }),
|
|
204
|
-
ctx.stage({ name: "gather-b", headless: true }, {}, {}, async (s) => { /* ... */ }),
|
|
205
|
-
ctx.stage({ name: "gather-c", headless: true }, {}, {}, async (s) => { /* ... */ }),
|
|
206
|
-
]);
|
|
207
|
-
|
|
208
|
-
await ctx.stage({ name: "merge" }, {}, {}, async (s) => {
|
|
209
|
-
await s.session.query(`Merge:\n${a.result}\n${b.result}\n${c.result}`);
|
|
210
|
-
s.save(s.sessionId);
|
|
211
|
-
});
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
Headless stages are transparent to graph topology — `seed → [3 headless] → merge` renders as `seed → merge` in the graph.
|
|
190
|
+
For per-provider mechanics, the canonical fan-out pattern (visible seed →
|
|
191
|
+
parallel headless → visible merge), and topology semantics, see
|
|
192
|
+
`control-flow.md` §"Headless stages: transparent to graph topology" and the
|
|
193
|
+
per-SDK "Headless mode" sections in `agent-sessions.md`. Failure visibility
|
|
194
|
+
caveats live in `failure-modes.md` §F15.
|
|
215
195
|
|
|
216
196
|
## SDK exports
|
|
217
197
|
|
|
@@ -275,16 +255,9 @@ The Atomic runtime provides `s.client` and `s.session` with types resolved from
|
|
|
275
255
|
|
|
276
256
|
## Reference files
|
|
277
257
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
| `agent-sessions.md` | Creating agent sessions with SDK calls per provider |
|
|
282
|
-
| `computation-and-validation.md` | Deterministic computation, parsing, validation inside `run()` |
|
|
283
|
-
| `user-input.md` | Collecting user input **mid-workflow** (for invocation-time inputs, see `workflow-inputs.md`) |
|
|
284
|
-
| `control-flow.md` | Loops, conditionals, early termination in plain TypeScript |
|
|
285
|
-
| `state-and-data-flow.md` | Data flow between sessions, transcripts, persistence |
|
|
286
|
-
| `session-config.md` | Per-SDK session configuration: model, tools, permissions, hooks |
|
|
287
|
-
| `discovery-and-verification.md` | Workflow file discovery, validation, TypeScript config |
|
|
258
|
+
The full table of references with load triggers lives in SKILL.md
|
|
259
|
+
§"Reference Files". Pull `failure-modes.md` before shipping any
|
|
260
|
+
multi-session workflow, and `agent-sessions.md` whenever writing SDK calls.
|
|
288
261
|
|
|
289
262
|
## Builtin reference implementations
|
|
290
263
|
|
|
@@ -292,10 +265,11 @@ The SDK ships two builtin workflows that demonstrate production patterns for all
|
|
|
292
265
|
|
|
293
266
|
- **`ralph`** (`src/sdk/workflows/builtin/ralph/`) — iterative plan → orchestrate → review → debug loop with consecutive clean-pass detection, shared helpers for prompts/parsing/git, and cross-SDK adaptation
|
|
294
267
|
- **`deep-research-codebase`** (`src/sdk/workflows/builtin/deep-research-codebase/`) — deterministic codebase scout → LOC-based heuristic explorer partitioning → parallel explorers → aggregator with file-based handoffs and context-aware prompt engineering
|
|
295
|
-
- **`headless-test`** (`.atomic/workflows/headless-test/`) — demonstrates the visible → [parallel headless] → visible merge pattern (all 3 SDKs)
|
|
296
268
|
|
|
297
269
|
Both include `helpers/` directories with SDK-agnostic logic (prompt builders, parsers, heuristics) and per-agent `index.ts` files showing how the same workflow topology adapts to Claude, Copilot, and OpenCode.
|
|
298
270
|
|
|
271
|
+
For a minimal headless example (not a builtin — it lives as a local workflow in this repo), see `.atomic/workflows/headless-test/` — demonstrates the visible → [parallel headless] → visible merge pattern for all three SDKs.
|
|
272
|
+
|
|
299
273
|
## Type safety
|
|
300
274
|
|
|
301
|
-
The SDK
|
|
275
|
+
The SDK avoids `any` and uses `unknown` only at well-defined boundaries (e.g., `SessionRef = string | SessionHandle<unknown>` for handle-erased lookups). `SessionContext` fields are precisely typed, and native provider types may appear inside Atomic generic aliases and runtime values — if you need to name those types in your own code, import them from the provider SDK directly. Use `import type` for type-only imports. Use `.for<"agent">()` to narrow `s.client` and `s.session` to the correct provider types. Declare `inputs` inline so TypeScript enforces typed access on `ctx.inputs`.
|