@bastani/atomic 0.5.23-0 → 0.5.24-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
22
22
  // Step 1: Classify the request
23
23
  const triage = await ctx.stage({ name: "triage" }, {}, {}, async (s) => {
24
24
  const result = await s.session.query(
25
- `Classify this as "bug", "feature", or "question": ${(ctx.inputs.prompt ?? "")}`,
25
+ `Classify this as "bug", "feature", or "question": ${(s.inputs.prompt ?? "")}`,
26
26
  );
27
27
  s.save(s.sessionId);
28
28
  return extractAssistantText(result, 0).toLowerCase();
@@ -60,7 +60,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
60
60
  .run(async (ctx) => {
61
61
  await ctx.stage({ name: "triage-and-act" }, {}, {}, async (s) => {
62
62
  const triageResult = await s.session.query(
63
- `Classify this as "bug", "feature", or "question": ${(ctx.inputs.prompt ?? "")}`,
63
+ `Classify this as "bug", "feature", or "question": ${(s.inputs.prompt ?? "")}`,
64
64
  );
65
65
 
66
66
  const classification = extractAssistantText(triageResult, 0).toLowerCase();
@@ -143,7 +143,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
143
143
  for (let cycle = 1; cycle <= MAX_CYCLES; cycle++) {
144
144
  // Each review is a visible graph node
145
145
  const review = await ctx.stage({ name: `review-${cycle}` }, {}, {}, async (s) => {
146
- const result = await s.session.query(buildReviewPrompt((ctx.inputs.prompt ?? "")));
146
+ const result = await s.session.query(buildReviewPrompt((s.inputs.prompt ?? "")));
147
147
  s.save(s.sessionId);
148
148
  return extractAssistantText(result, 0);
149
149
  });
@@ -162,8 +162,8 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
162
162
  consecutiveClean = 0;
163
163
 
164
164
  const fixPrompt = parsed
165
- ? buildFixSpecFromReview(parsed, (ctx.inputs.prompt ?? ""))
166
- : buildFixSpecFromRawReview(reviewRaw, (ctx.inputs.prompt ?? ""));
165
+ ? buildFixSpecFromReview(parsed, (s.inputs.prompt ?? ""))
166
+ : buildFixSpecFromRawReview(reviewRaw, (s.inputs.prompt ?? ""));
167
167
 
168
168
  // Each fix is also a visible graph node
169
169
  await ctx.stage({ name: `fix-${cycle}` }, {}, {}, async (s) => {
@@ -176,7 +176,13 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
176
176
 
177
177
  ### Same pattern with Copilot
178
178
 
179
+ Copilot lacks a built-in text extractor — define `getAssistantText` as a
180
+ helper in your workflow (canonical definition in `failure-modes.md` §F1)
181
+ and import it from a sibling file:
182
+
179
183
  ```ts
184
+ import { getAssistantText } from "../helpers/parsers.ts"; // see failure-modes.md §F1
185
+
180
186
  .run(async (ctx) => {
181
187
  const MAX_CYCLES = 10;
182
188
  let consecutiveClean = 0;
@@ -184,9 +190,9 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
184
190
  for (let cycle = 1; cycle <= MAX_CYCLES; cycle++) {
185
191
  const review = await ctx.stage({ name: `review-${cycle}` }, {}, {}, async (s) => {
186
192
  await s.session.send({
187
- prompt: buildReviewPrompt((ctx.inputs.prompt ?? "")),
193
+ prompt: buildReviewPrompt((s.inputs.prompt ?? "")),
188
194
  });
189
- const reviewRaw = getAssistantText(await s.session.getMessages()); // see failure-modes.md §F1
195
+ const reviewRaw = getAssistantText(await s.session.getMessages());
190
196
 
191
197
  s.save(await s.session.getMessages());
192
198
  return reviewRaw;
@@ -203,8 +209,8 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
203
209
  consecutiveClean = 0;
204
210
 
205
211
  const fixPrompt = parsed
206
- ? buildFixSpecFromReview(parsed, (ctx.inputs.prompt ?? ""))
207
- : buildFixSpecFromRawReview(reviewRaw, (ctx.inputs.prompt ?? ""));
212
+ ? buildFixSpecFromReview(parsed, (s.inputs.prompt ?? ""))
213
+ : buildFixSpecFromRawReview(reviewRaw, (s.inputs.prompt ?? ""));
208
214
 
209
215
  await ctx.stage({ name: `fix-${cycle}` }, {}, {}, async (s) => {
210
216
  await s.session.send({
@@ -252,7 +258,7 @@ Sessions passed to `Promise.all([...])` branch from the same parent and run conc
252
258
  A stage awaited after a `Promise.all` resolves automatically receives all parallel stages as parents — the graph draws a merge node:
253
259
 
254
260
  ```ts
255
- // ✅ Graph infers: orchestrator → A → [B, C] → D (fan-in merge)
261
+ // ✅ Graph infers: A → [B, C] → D (fan-in merge)
256
262
  .run(async (ctx) => {
257
263
  await ctx.stage({ name: "A" }, {}, {}, async (s) => { /* ... */ });
258
264
 
@@ -385,11 +391,11 @@ Within a single session callback, each SDK call adds to the conversation context
385
391
  .run(async (ctx) => {
386
392
  await ctx.stage({ name: "implement" }, {}, {}, async (s) => {
387
393
  try {
388
- await s.session.query((ctx.inputs.prompt ?? ""));
394
+ await s.session.query((s.inputs.prompt ?? ""));
389
395
  } catch (error) {
390
396
  // Retry with simpler prompt
391
397
  await s.session.query(
392
- `The previous attempt failed. Please try a simpler approach: ${(ctx.inputs.prompt ?? "")}`,
398
+ `The previous attempt failed. Please try a simpler approach: ${(s.inputs.prompt ?? "")}`,
393
399
  );
394
400
  }
395
401
  s.save(s.sessionId);
@@ -418,7 +424,7 @@ async function retryWithBackoff<T>(
418
424
 
419
425
  .run(async (ctx) => {
420
426
  await ctx.stage({ name: "implement" }, {}, {}, async (s) => {
421
- await retryWithBackoff(() => s.session.query((ctx.inputs.prompt ?? "")));
427
+ await retryWithBackoff(() => s.session.query((s.inputs.prompt ?? "")));
422
428
  s.save(s.sessionId);
423
429
  });
424
430
  })
@@ -434,7 +440,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
434
440
  .run(async (ctx) => {
435
441
  // Step 1: Analyse — result is available as a typed handle
436
442
  const analysisHandle = await ctx.stage({ name: "analyze" }, {}, {}, async (s) => {
437
- const result = await s.session.query(`Analyse the task: ${(ctx.inputs.prompt ?? "")}`);
443
+ const result = await s.session.query(`Analyse the task: ${(s.inputs.prompt ?? "")}`);
438
444
  s.save(s.sessionId);
439
445
  return extractAssistantText(result, 0);
440
446
  });
@@ -5,8 +5,8 @@
5
5
  ```bash
6
6
  bun init # Create a new project
7
7
  bun add @bastani/atomic # Install the workflow SDK
8
- bun add @github/copilot-sdk # For Copilot workflows
9
8
  bun add @anthropic-ai/claude-agent-sdk # For Claude workflows
9
+ bun add @github/copilot-sdk # For Copilot workflows
10
10
  bun add @opencode-ai/sdk # For OpenCode workflows
11
11
  ```
12
12
 
@@ -31,7 +31,7 @@ Silent failures are catalogued first below. Loud failures are grouped at the end
31
31
  | # | Failure | Affected | Silent? |
32
32
  |---|---|---|---|
33
33
  | [F1](#f1-copilot-getlastassistanttext-returns-empty-string) | Copilot: `getLastAssistantText` returns empty string | Copilot | silent |
34
- | [F2](#f2-copilot-sub-agent-messages-pollute-getmessages-stream) | Copilot: sub-agent messages pollute `getMessages()` stream | Copilot | silent |
34
+ | [F2](#f2-copilot-subagent-messages-pollute-getmessages-stream) | Copilot: subagent messages pollute `getMessages()` stream | Copilot | silent |
35
35
  | [F3](#f3-opencode-result-parts-contain-non-text-parts) | OpenCode: `result.data.parts` contains non-text parts | OpenCode | silent |
36
36
  | [F4](#f4-claude-ssessionquery-returns-sessionmessage-extract-text-with-extractassistanttext) | Claude: `s.session.query()` returns `SessionMessage[]` — extract text with `extractAssistantText(result, 0)` | Claude | silent |
37
37
  | [F5](#f5-fresh-session-wipes-prior-stage-context) | Fresh session wipes prior stage context | Copilot, OpenCode | silent |
@@ -40,17 +40,18 @@ Silent failures are catalogued first below. Loud failures are grouped at the end
40
40
  | [F8](#f8-fenced-block-parsers-break-when-the-model-adds-prose) | Fenced-block parsers break when the model adds prose before/after | all | silent |
41
41
  | [F9](#f9-ssave-receives-the-wrong-shape) | `s.save()` receives the wrong shape for the SDK | all | silent |
42
42
  | [F10](#f10-copilot-sendandwait-default-60s-timeout-throws) | Copilot: `sendAndWait` default 60s timeout throws (use `send` by default) | Copilot | loud |
43
- | [F11](#f11-manual-claude-session-initialization-resolved-by-runtime) | ~~Manual Claude session initialization~~ (resolved by runtime) | Claude | N/A |
44
- | [F12](#f12-resume-session-tries-to-swap-agents) | Resume session tries to swap agents | Copilot, OpenCode | loud |
45
- | [F13](#f13-parallel-siblings-read-each-others-transcripts) | Parallel siblings read each other's transcripts | all | loud |
46
- | [F14](#f14-forgetting-to-await-ctxstage) | Forgetting to `await` `ctx.stage()` | all | silent |
47
- | [F15](#f15-using-a-pending-sessionhandle-before-completion) | Using a pending `SessionHandle` before completion | all | silent |
48
- | [F16](#f16-headless-stage-errors-are-invisible-in-the-graph) | Headless stage errors are invisible in the graph | all | silent |
49
- | [F17](#f17-claude-importing-sdk-query-inside-a-non-headless-stage) | Claude: importing the SDK `query()` inside a non-headless stage (anti-pattern) | Claude | silent |
43
+ | [F11](#f11-provider-level-resume-tries-to-swap-agents) | Provider-level resume tries to swap agents | Copilot, OpenCode | loud |
44
+ | [F12](#f12-parallel-siblings-read-each-others-transcripts) | Parallel siblings read each other's transcripts | all | loud |
45
+ | [F13](#f13-forgetting-to-await-ctxstage) | Forgetting to `await` `ctx.stage()` | all | silent |
46
+ | [F14](#f14-using-a-pending-sessionhandle-before-completion) | Using a pending `SessionHandle` before completion | all | silent |
47
+ | [F15](#f15-headless-stage-errors-are-invisible-in-the-graph) | Headless stage errors are invisible in the graph | all | silent |
48
+ | [F16](#f16-claude-importing-sdk-query-inside-a-non-headless-stage) | Claude: importing the SDK `query()` inside a non-headless stage (anti-pattern) | Claude | silent |
50
49
 
51
50
  ---
52
51
 
53
- ## F1. Copilot: `getLastAssistantText` returns empty string
52
+ ## Silent failures
53
+
54
+ ### F1. Copilot: `getLastAssistantText` returns empty string
54
55
 
55
56
  **Symptom.** The orchestrator (or any downstream stage) receives an empty
56
57
  `plannerNotes` / `reviewerOutput` despite the prior agent running successfully
@@ -104,22 +105,22 @@ function getAssistantText(messages: SessionEvent[]): string {
104
105
  }
105
106
  ```
106
107
 
107
- **Detection.** Log the returned text length after every `runAgent` call
108
- during development. An empty or surprisingly short string for a stage
108
+ **Detection.** Log the returned text length after every `getAssistantText`
109
+ call during development. An empty or surprisingly short string for a stage
109
110
  that clearly ran is the signature.
110
111
 
111
112
  ---
112
113
 
113
- ## F2. Copilot: sub-agent messages pollute `getMessages()` stream
114
+ ### F2. Copilot: subagent messages pollute `getMessages()` stream
114
115
 
115
116
  **Symptom.** Downstream stages receive a snippet of text that doesn't match
116
- what the top-level agent said — it looks like a sub-agent's output.
117
+ what the top-level agent said — it looks like a subagent's output.
117
118
 
118
119
  **Root cause.** `assistant.message` events carry a `parentToolCallId?: string`
119
120
  field, documented as *"Tool call ID of the parent tool invocation when this
120
- event originates from a sub-agent"*. When the top-level agent delegates,
121
- `getMessages()` returns **the complete history including sub-agent messages**.
122
- Filters that don't exclude `parentToolCallId` can pick a sub-agent's final
121
+ event originates from a subagent"*. When the top-level agent delegates,
122
+ `getMessages()` returns **the complete history including subagent messages**.
123
+ Filters that don't exclude `parentToolCallId` can pick a subagent's final
123
124
  message via `.at(-1)`.
124
125
 
125
126
  **Affected SDKs.** Copilot.
@@ -143,7 +144,7 @@ scrollback for the top-level agent.
143
144
 
144
145
  ---
145
146
 
146
- ## F3. OpenCode: `result.data.parts` contains non-text parts
147
+ ### F3. OpenCode: `result.data.parts` contains non-text parts
147
148
 
148
149
  **Symptom.** Concatenated response text contains `[object Object]`,
149
150
  truncated content, or swallows tool-call payloads into the prompt.
@@ -177,7 +178,7 @@ function extractResponseText(
177
178
 
178
179
  ---
179
180
 
180
- ## F4. Claude: `s.session.query()` returns `SessionMessage[]` — extract text with `extractAssistantText`
181
+ ### F4. Claude: `s.session.query()` returns `SessionMessage[]` — extract text with `extractAssistantText`
181
182
 
182
183
  **Symptom.** Workflow code tries to access `.output` or `.text` on the
183
184
  result of `s.session.query()` and gets `undefined`, or passes the result
@@ -222,7 +223,7 @@ on an array returns `undefined`.
222
223
 
223
224
  ---
224
225
 
225
- ## F5. Fresh session wipes prior stage context
226
+ ### F5. Fresh session wipes prior stage context
226
227
 
227
228
  **Symptom.** The orchestrator says "I don't see a task list" or "what
228
229
  specification are you referring to?" even though the planner clearly ran.
@@ -239,18 +240,45 @@ mode does NOT apply to `s.session.query()`.)
239
240
  ### ❌ Wrong
240
241
 
241
242
  ```ts
242
- await runAgent("planner", buildPlannerPrompt((ctx.inputs.prompt ?? "")));
243
+ await ctx.stage({ name: "planner" }, {}, { agent: "planner" }, async (s) => {
244
+ await s.session.send({ prompt: buildPlannerPrompt((s.inputs.prompt ?? "")) });
245
+ s.save(await s.session.getMessages());
246
+ });
243
247
  // orchestrator is a fresh session — it has no idea what the planner produced
244
- await runAgent("orchestrator", buildOrchestratorPrompt());
248
+ await ctx.stage({ name: "orchestrator" }, {}, { agent: "orchestrator" }, async (s) => {
249
+ await s.session.send({ prompt: buildOrchestratorPrompt() });
250
+ s.save(await s.session.getMessages());
251
+ });
245
252
  ```
246
253
 
247
254
  ### ✅ Right — explicit handoff
248
255
 
249
256
  ```ts
250
- const plannerNotes = await runAgent("planner", buildPlannerPrompt((ctx.inputs.prompt ?? "")));
251
- await runAgent(
252
- "orchestrator",
253
- buildOrchestratorPrompt((ctx.inputs.prompt ?? ""), { plannerNotes }),
257
+ const plannerHandle = await ctx.stage(
258
+ { name: "planner" },
259
+ {},
260
+ { agent: "planner" },
261
+ async (s) => {
262
+ await s.session.send({ prompt: buildPlannerPrompt((s.inputs.prompt ?? "")) });
263
+ const messages = await s.session.getMessages();
264
+ s.save(messages);
265
+ return getAssistantText(messages); // see F1 for getAssistantText
266
+ },
267
+ );
268
+
269
+ await ctx.stage(
270
+ { name: "orchestrator" },
271
+ {},
272
+ { agent: "orchestrator" },
273
+ async (s) => {
274
+ await s.session.send({
275
+ prompt: buildOrchestratorPrompt(
276
+ (s.inputs.prompt ?? ""),
277
+ { plannerNotes: plannerHandle.result },
278
+ ),
279
+ });
280
+ s.save(await s.session.getMessages());
281
+ },
254
282
  );
255
283
  ```
256
284
 
@@ -264,7 +292,7 @@ controls what context is available".
264
292
 
265
293
  ---
266
294
 
267
- ## F6. Planner prompts that don't request trailing commentary produce empty handoffs
295
+ ### F6. Planner prompts that don't request trailing commentary produce empty handoffs
268
296
 
269
297
  **Symptom.** F1 / F5 are fixed, extraction is correct — and the orchestrator
270
298
  still receives empty `plannerNotes` because the planner's last turn legitimately
@@ -319,7 +347,7 @@ string + a correctly-fixed extraction helper = F6.
319
347
 
320
348
  ---
321
349
 
322
- ## F7. Continued sessions accumulate state across loop iterations (lost-in-middle)
350
+ ### F7. Continued sessions accumulate state across loop iterations (lost-in-middle)
323
351
 
324
352
  **Symptom.** A review/fix loop works on iterations 1-3 then starts
325
353
  producing worse output — misidentifying files, hallucinating line numbers,
@@ -378,7 +406,7 @@ iteration N, N is your safe-turn budget before compaction.
378
406
 
379
407
  ---
380
408
 
381
- ## F8. Fenced-block parsers break when the model adds prose
409
+ ### F8. Fenced-block parsers break when the model adds prose
382
410
 
383
411
  **Symptom.** `JSON.parse(content)` throws, or a "matches the first fenced
384
412
  block" regex picks up a code example inside prose instead of the actual
@@ -434,7 +462,7 @@ over several runs. If 1 in 20 runs fails to parse, you have F8.
434
462
 
435
463
  ---
436
464
 
437
- ## F9. `s.save()` receives the wrong shape
465
+ ### F9. `s.save()` receives the wrong shape
438
466
 
439
467
  **Symptom.** `s.transcript("stage-name")` returns an empty or malformed
440
468
  `content` string in the next stage.
@@ -458,9 +486,11 @@ expects, and the runtime doesn't type-check the argument beyond "anything".
458
486
  // Claude — saves the wrong thing (result is SessionMessage[], not { output: string })
459
487
  s.save(result.output); // TypeError: result.output is undefined; use s.save(s.sessionId)
460
488
 
461
- // Copilot — saves an empty array if called before send
462
- s.save(await s.session.getMessages());
463
- // Or saves one message object instead of the array
489
+ // Copilot — calling getMessages() BEFORE send() returns an empty array
490
+ const earlyMessages = await s.session.getMessages(); // [] — no turns yet
491
+ s.save(earlyMessages);
492
+
493
+ // Copilot — saving a single message instead of the full array
464
494
  s.save((await s.session.getMessages()).at(-1));
465
495
 
466
496
  // OpenCode — missing the data unwrap
@@ -479,7 +509,7 @@ log the length. A 0-length or JSON-that-isn't-prose signature = F9.
479
509
 
480
510
  ## Loud failures (throw, but still worth knowing)
481
511
 
482
- ## F10. Copilot: `sendAndWait` default 60s timeout throws
512
+ ### F10. Copilot: `sendAndWait` default 60s timeout throws
483
513
 
484
514
  **Symptom.** `Timeout after 60000ms waiting for session.idle`. Every
485
515
  subsequent `ctx.stage()` call never executes — the throw propagates out of
@@ -508,13 +538,7 @@ to "be safe", you want `send`.
508
538
 
509
539
  ---
510
540
 
511
- ## F11. ~~Manual Claude session initialization~~ (resolved by runtime)
512
-
513
- No longer a failure mode. The runtime now auto-initializes `s.client` and `s.session` before the callback runs — just use `s.session.query()` directly.
514
-
515
- ---
516
-
517
- ## F12. Provider-level resume tries to swap agents
541
+ ### F11. Provider-level resume tries to swap agents
518
542
 
519
543
  **Symptom.** Resumed Copilot / OpenCode session behaves as the original
520
544
  agent instead of the requested new one — or the SDK throws "agent mismatch"
@@ -530,7 +554,7 @@ over trying to reopen a prior stage.
530
554
 
531
555
  ---
532
556
 
533
- ## F13. Parallel siblings read each other's transcripts
557
+ ### F12. Parallel siblings read each other's transcripts
534
558
 
535
559
  **Symptom.** `s.transcript("sibling-name")` inside a parallel session
536
560
  throws or returns empty.
@@ -546,27 +570,28 @@ shared state (files, DB) if siblings genuinely need to coordinate.
546
570
 
547
571
  ```ts
548
572
  // Fan-out → merge
549
- await ctx.stage({ name: "describe" }, {}, {}, async (s) => { /* ... */ });
573
+ // Strings used here for brevity; prefer handles (s.transcript(handle)) when one is in scope.
574
+ const describe = await ctx.stage({ name: "describe" }, {}, {}, async (s) => { /* ... */ });
550
575
 
551
- await Promise.all([
576
+ const [summarizeA, summarizeB] = await Promise.all([
552
577
  ctx.stage({ name: "summarize-a" }, {}, {}, async (s) => {
553
- const d = await s.transcript("describe"); // OK — prior completed session
578
+ const d = await s.transcript(describe); // OK — prior completed session (handle-based, preferred)
554
579
  // s.transcript("summarize-b") would fail here — sibling not yet complete
555
580
  }),
556
581
  ctx.stage({ name: "summarize-b" }, {}, {}, async (s) => {
557
- const d = await s.transcript("describe"); // OK — prior completed session
582
+ const d = await s.transcript(describe); // OK — prior completed session
558
583
  }),
559
584
  ]);
560
585
 
561
586
  await ctx.stage({ name: "merge" }, {}, {}, async (s) => {
562
- const a = await s.transcript("summarize-a"); // OK — prior completed session
563
- const b = await s.transcript("summarize-b"); // OK — prior completed session
587
+ const a = await s.transcript(summarizeA); // OK — handle-based, preferred over "summarize-a"
588
+ const b = await s.transcript(summarizeB);
564
589
  });
565
590
  ```
566
591
 
567
592
  ---
568
593
 
569
- ## F14. Forgetting to `await` `ctx.stage()`
594
+ ### F13. Forgetting to `await` `ctx.stage()`
570
595
 
571
596
  **Symptom.** A session runs (its tmux window opens, the agent does work)
572
597
  but the orchestrator doesn't wait for it. Subsequent sessions that depend
@@ -617,7 +642,7 @@ this at compile time.
617
642
 
618
643
  ---
619
644
 
620
- ## F15. Using a pending `SessionHandle` before completion
645
+ ### F14. Using a pending `SessionHandle` before completion
621
646
 
622
647
  **Symptom.** `handle.result` is `undefined` or stale, or
623
648
  `s.transcript(handle)` throws / returns empty even though the session
@@ -670,7 +695,7 @@ accessing `.result` without awaiting, the type will be `Promise`, not `T`.
670
695
 
671
696
  ---
672
697
 
673
- ## F16. Headless stage errors are invisible in the graph
698
+ ### F15. Headless stage errors are invisible in the graph
674
699
 
675
700
  **Symptom.** A workflow fails but the graph shows all visible stages as
676
701
  completed. The error message references a session name that doesn't appear
@@ -721,7 +746,7 @@ full error for each failed headless stage.
721
746
 
722
747
  ---
723
748
 
724
- ## F17. Claude: importing the SDK `query()` inside a non-headless stage
749
+ ### F16. Claude: importing the SDK `query()` inside a non-headless stage
725
750
 
726
751
  **Symptom.** A reviewer / extractor / structured-output stage shows up in
727
752
  the workflow graph as a tmux pane, but the pane sits idle on the Claude
@@ -760,7 +785,7 @@ The runtime exposes exactly two routes for an SDK feature:
760
785
  | You want to use… | Stage shape | Code in callback |
761
786
  |---|---|---|
762
787
  | `outputFormat`, custom `agents`, `maxBudgetUsd`, etc. **without** a visible pane | `{ headless: true }` | `s.session.query(prompt, sdkOptions)` — wraps `HeadlessClaudeSessionWrapper.query()` which forwards `options` to the SDK |
763
- | The visible TUI with a sub-agent | omit `headless` and pass `chatFlags: ["--agent", "<name>", ...]` | `s.session.query(prompt)` — sends through tmux send-keys |
788
+ | The visible TUI with a subagent | omit `headless` and pass `chatFlags: ["--agent", "<name>", ...]` | `s.session.query(prompt)` — sends through tmux send-keys |
764
789
 
765
790
  The one option that does **not** exist is "visible pane + in-process SDK call".
766
791
  That combination is always wrong — pick one route or the other.
@@ -787,9 +812,9 @@ await ctx.stage({ name: "review" }, {}, {}, async (s) => {
787
812
  });
788
813
  ```
789
814
 
790
- ### ✅ Right (a) — visible TUI with sub-agent + chatFlags
815
+ ### ✅ Right (a) — visible TUI with subagent + chatFlags
791
816
 
792
- When you want the user to watch the review happen, run the sub-agent in
817
+ When you want the user to watch the review happen, run the subagent in
793
818
  the pane via `--agent` and parse JSON out of the assistant text. The
794
819
  prompt should enumerate the schema fields so the model emits matching
795
820
  JSON; a tolerant parser (last-fenced-block + last-balanced-object
@@ -853,8 +878,8 @@ await ctx.stage(
853
878
  `s.client` and `s.session`.
854
879
  2. Watch the workflow run. If a visible pane shows the Claude welcome
855
880
  screen for the entire duration of a stage and never receives a prompt,
856
- you have F17.
857
- 3. Cost monitoring. F17 roughly doubles the Claude process count — if
881
+ you have F16.
882
+ 3. Cost monitoring. F16 roughly doubles the Claude process count — if
858
883
  stage spend looks 2× a single run, audit imports.
859
884
 
860
885
  ---
@@ -870,9 +895,9 @@ Before shipping a multi-session workflow, walk the list:
870
895
  - [ ] Structured-output parsers extract the LAST fenced block, not the first (F8)
871
896
  - [ ] `s.save()` receives the per-SDK correct shape — Copilot uses `s.session.getMessages()` (F9)
872
897
  - [ ] Loops over 10 iterations have a compaction / reset strategy (F7)
873
- - [ ] Parallel groups only read from prior completed sessions, never siblings (F13)
874
- - [ ] Every `ctx.stage()` call is `await`ed (F14)
875
- - [ ] `SessionHandle` values are only used after the promise resolves (F15)
876
- - [ ] If provider-level resume/fork is used at all, it stays within the same agent role (F12)
877
- - [ ] Headless stage callbacks include descriptive error context so failures can be diagnosed without a graph node (F16)
878
- - [ ] Claude stages never import `query` (or other entry points) from `@anthropic-ai/claude-agent-sdk` directly — go through `s.session.query()` so the runtime routes to the TUI (interactive) or the SDK (headless) consistently (F17)
898
+ - [ ] Parallel groups only read from prior completed sessions, never siblings (F12)
899
+ - [ ] Every `ctx.stage()` call is `await`ed (F13)
900
+ - [ ] `SessionHandle` values are only used after the promise resolves (F14)
901
+ - [ ] If provider-level resume/fork is used at all, it stays within the same agent role (F11)
902
+ - [ ] Headless stage callbacks include descriptive error context so failures can be diagnosed without a graph node (F15)
903
+ - [ ] Claude stages never import `query` (or other entry points) from `@anthropic-ai/claude-agent-sdk` directly — go through `s.session.query()` so the runtime routes to the TUI (interactive) or the SDK (headless) consistently (F16)
@@ -171,47 +171,27 @@ if (needsReview) {
171
171
 
172
172
  ## Headless (background) stages
173
173
 
174
- Stages can run in headless mode by setting `headless: true` in the first argument to `ctx.stage()`. Headless stages execute the provider SDK in-process instead of spawning a tmux window — they are invisible in the workflow graph but tracked via a background task counter in the statusline.
174
+ Set `headless: true` in the stage options to run the provider SDK
175
+ in-process instead of spawning a tmux window — invisible in the graph,
176
+ identical callback API.
175
177
 
176
178
  ```ts
177
- // Headless stage — identical callback API, no tmux window
178
179
  const result = await ctx.stage(
179
180
  { name: "background-task", headless: true },
180
181
  {}, {},
181
182
  async (s) => {
182
- // s.client, s.session, s.save(), s.transcript() all work identically
183
183
  const result = await s.session.query("Analyze the codebase.");
184
184
  s.save(s.sessionId);
185
185
  return extractAssistantText(result, 0);
186
186
  },
187
187
  );
188
- // result.result contains the returned value
189
188
  ```
190
189
 
191
- The callback interface is identical to interactive stages. The only differences:
192
- - No tmux window is created
193
- - The stage does not appear as a node in the workflow graph
194
- - The `paneId` is a virtual identifier: `headless-<name>-<sessionId>`
195
- - Background stages are tracked by a counter in the orchestrator statusline
196
-
197
- **Common pattern — visible seed, parallel headless gather, visible merge:**
198
-
199
- ```ts
200
- const seed = await ctx.stage({ name: "seed" }, {}, {}, async (s) => { /* ... */ });
201
-
202
- const [a, b, c] = await Promise.all([
203
- ctx.stage({ name: "gather-a", headless: true }, {}, {}, async (s) => { /* ... */ }),
204
- ctx.stage({ name: "gather-b", headless: true }, {}, {}, async (s) => { /* ... */ }),
205
- ctx.stage({ name: "gather-c", headless: true }, {}, {}, async (s) => { /* ... */ }),
206
- ]);
207
-
208
- await ctx.stage({ name: "merge" }, {}, {}, async (s) => {
209
- await s.session.query(`Merge:\n${a.result}\n${b.result}\n${c.result}`);
210
- s.save(s.sessionId);
211
- });
212
- ```
213
-
214
- Headless stages are transparent to graph topology — `seed → [3 headless] → merge` renders as `seed → merge` in the graph.
190
+ For per-provider mechanics, the canonical fan-out pattern (visible seed
191
+ parallel headless visible merge), and topology semantics, see
192
+ `control-flow.md` §"Headless stages: transparent to graph topology" and the
193
+ per-SDK "Headless mode" sections in `agent-sessions.md`. Failure visibility
194
+ caveats live in `failure-modes.md` §F15.
215
195
 
216
196
  ## SDK exports
217
197
 
@@ -275,16 +255,9 @@ The Atomic runtime provides `s.client` and `s.session` with types resolved from
275
255
 
276
256
  ## Reference files
277
257
 
278
- | File | Topic |
279
- |---|---|
280
- | `workflow-inputs.md` | Declaring the `inputs: WorkflowInput[]` schema, the free-form vs structured decision, CLI flag + picker invocation surfaces, builtin protection |
281
- | `agent-sessions.md` | Creating agent sessions with SDK calls per provider |
282
- | `computation-and-validation.md` | Deterministic computation, parsing, validation inside `run()` |
283
- | `user-input.md` | Collecting user input **mid-workflow** (for invocation-time inputs, see `workflow-inputs.md`) |
284
- | `control-flow.md` | Loops, conditionals, early termination in plain TypeScript |
285
- | `state-and-data-flow.md` | Data flow between sessions, transcripts, persistence |
286
- | `session-config.md` | Per-SDK session configuration: model, tools, permissions, hooks |
287
- | `discovery-and-verification.md` | Workflow file discovery, validation, TypeScript config |
258
+ The full table of references with load triggers lives in SKILL.md
259
+ §"Reference Files". Pull `failure-modes.md` before shipping any
260
+ multi-session workflow, and `agent-sessions.md` whenever writing SDK calls.
288
261
 
289
262
  ## Builtin reference implementations
290
263
 
@@ -292,10 +265,11 @@ The SDK ships two builtin workflows that demonstrate production patterns for all
292
265
 
293
266
  - **`ralph`** (`src/sdk/workflows/builtin/ralph/`) — iterative plan → orchestrate → review → debug loop with consecutive clean-pass detection, shared helpers for prompts/parsing/git, and cross-SDK adaptation
294
267
  - **`deep-research-codebase`** (`src/sdk/workflows/builtin/deep-research-codebase/`) — deterministic codebase scout → LOC-based heuristic explorer partitioning → parallel explorers → aggregator with file-based handoffs and context-aware prompt engineering
295
- - **`headless-test`** (`.atomic/workflows/headless-test/`) — demonstrates the visible → [parallel headless] → visible merge pattern (all 3 SDKs)
296
268
 
297
269
  Both include `helpers/` directories with SDK-agnostic logic (prompt builders, parsers, heuristics) and per-agent `index.ts` files showing how the same workflow topology adapts to Claude, Copilot, and OpenCode.
298
270
 
271
+ For a minimal headless example (not a builtin — it lives as a local workflow in this repo), see `.atomic/workflows/headless-test/` — demonstrates the visible → [parallel headless] → visible merge pattern for all three SDKs.
272
+
299
273
  ## Type safety
300
274
 
301
- The SDK is typed with **no `unknown` or `any`**. `SessionContext` fields are precisely typed, and native provider types may appear inside Atomic generic aliases and runtime values — if you need to name those types in your own code, import them from the provider SDK directly. Use `import type` for type-only imports. Use `.for<"agent">()` to narrow `s.client` and `s.session` to the correct provider types. Declare `inputs` inline so TypeScript enforces typed access on `ctx.inputs`.
275
+ The SDK avoids `any` and uses `unknown` only at well-defined boundaries (e.g., `SessionRef = string | SessionHandle<unknown>` for handle-erased lookups). `SessionContext` fields are precisely typed, and native provider types may appear inside Atomic generic aliases and runtime values — if you need to name those types in your own code, import them from the provider SDK directly. Use `import type` for type-only imports. Use `.for<"agent">()` to narrow `s.client` and `s.session` to the correct provider types. Declare `inputs` inline so TypeScript enforces typed access on `ctx.inputs`.