npm - @bastani/atomic - Versions diffs - 0.5.23-0 → 0.5.24-0 - Mend

@bastani/atomic 0.5.23-0 → 0.5.24-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/.agents/skills/workflow-creator/references/control-flow.md CHANGED Viewed

@@ -22,7 +22,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
   // Step 1: Classify the request
   const triage = await ctx.stage({ name: "triage" }, {}, {}, async (s) => {
     const result = await s.session.query(
-      `Classify this as "bug", "feature", or "question": ${(ctx.inputs.prompt ?? "")}`,
+      `Classify this as "bug", "feature", or "question": ${(s.inputs.prompt ?? "")}`,
     );
     s.save(s.sessionId);
     return extractAssistantText(result, 0).toLowerCase();
@@ -60,7 +60,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
 .run(async (ctx) => {
   await ctx.stage({ name: "triage-and-act" }, {}, {}, async (s) => {
     const triageResult = await s.session.query(
-      `Classify this as "bug", "feature", or "question": ${(ctx.inputs.prompt ?? "")}`,
+      `Classify this as "bug", "feature", or "question": ${(s.inputs.prompt ?? "")}`,
     );
     const classification = extractAssistantText(triageResult, 0).toLowerCase();
@@ -143,7 +143,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
   for (let cycle = 1; cycle <= MAX_CYCLES; cycle++) {
     // Each review is a visible graph node
     const review = await ctx.stage({ name: `review-${cycle}` }, {}, {}, async (s) => {
-      const result = await s.session.query(buildReviewPrompt((ctx.inputs.prompt ?? "")));
+      const result = await s.session.query(buildReviewPrompt((s.inputs.prompt ?? "")));
       s.save(s.sessionId);
       return extractAssistantText(result, 0);
     });
@@ -162,8 +162,8 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
     consecutiveClean = 0;
     const fixPrompt = parsed
-      ? buildFixSpecFromReview(parsed, (ctx.inputs.prompt ?? ""))
-      : buildFixSpecFromRawReview(reviewRaw, (ctx.inputs.prompt ?? ""));
+      ? buildFixSpecFromReview(parsed, (s.inputs.prompt ?? ""))
+      : buildFixSpecFromRawReview(reviewRaw, (s.inputs.prompt ?? ""));
     // Each fix is also a visible graph node
     await ctx.stage({ name: `fix-${cycle}` }, {}, {}, async (s) => {
@@ -176,7 +176,13 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
 ### Same pattern with Copilot
+Copilot lacks a built-in text extractor — define `getAssistantText` as a
+helper in your workflow (canonical definition in `failure-modes.md` §F1)
+and import it from a sibling file:
 ```ts
+import { getAssistantText } from "../helpers/parsers.ts"; // see failure-modes.md §F1
 .run(async (ctx) => {
   const MAX_CYCLES = 10;
   let consecutiveClean = 0;
@@ -184,9 +190,9 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
   for (let cycle = 1; cycle <= MAX_CYCLES; cycle++) {
     const review = await ctx.stage({ name: `review-${cycle}` }, {}, {}, async (s) => {
       await s.session.send({
-        prompt: buildReviewPrompt((ctx.inputs.prompt ?? "")),
+        prompt: buildReviewPrompt((s.inputs.prompt ?? "")),
       });
-      const reviewRaw = getAssistantText(await s.session.getMessages()); // see failure-modes.md §F1
+      const reviewRaw = getAssistantText(await s.session.getMessages());
       s.save(await s.session.getMessages());
       return reviewRaw;
@@ -203,8 +209,8 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
     consecutiveClean = 0;
     const fixPrompt = parsed
-      ? buildFixSpecFromReview(parsed, (ctx.inputs.prompt ?? ""))
-      : buildFixSpecFromRawReview(reviewRaw, (ctx.inputs.prompt ?? ""));
+      ? buildFixSpecFromReview(parsed, (s.inputs.prompt ?? ""))
+      : buildFixSpecFromRawReview(reviewRaw, (s.inputs.prompt ?? ""));
     await ctx.stage({ name: `fix-${cycle}` }, {}, {}, async (s) => {
       await s.session.send({
@@ -252,7 +258,7 @@ Sessions passed to `Promise.all([...])` branch from the same parent and run conc
 A stage awaited after a `Promise.all` resolves automatically receives all parallel stages as parents — the graph draws a merge node:
 ```ts
-// ✅ Graph infers: orchestrator → A → [B, C] → D (fan-in merge)
+// ✅ Graph infers: A → [B, C] → D (fan-in merge)
 .run(async (ctx) => {
   await ctx.stage({ name: "A" }, {}, {}, async (s) => { /* ... */ });
@@ -385,11 +391,11 @@ Within a single session callback, each SDK call adds to the conversation context
 .run(async (ctx) => {
   await ctx.stage({ name: "implement" }, {}, {}, async (s) => {
     try {
-      await s.session.query((ctx.inputs.prompt ?? ""));
+      await s.session.query((s.inputs.prompt ?? ""));
     } catch (error) {
       // Retry with simpler prompt
       await s.session.query(
-        `The previous attempt failed. Please try a simpler approach: ${(ctx.inputs.prompt ?? "")}`,
+        `The previous attempt failed. Please try a simpler approach: ${(s.inputs.prompt ?? "")}`,
       );
     }
     s.save(s.sessionId);
@@ -418,7 +424,7 @@ async function retryWithBackoff<T>(
 .run(async (ctx) => {
   await ctx.stage({ name: "implement" }, {}, {}, async (s) => {
-    await retryWithBackoff(() => s.session.query((ctx.inputs.prompt ?? "")));
+    await retryWithBackoff(() => s.session.query((s.inputs.prompt ?? "")));
     s.save(s.sessionId);
   });
 })
@@ -434,7 +440,7 @@ import { extractAssistantText } from "@bastani/atomic/workflows";
 .run(async (ctx) => {
   // Step 1: Analyse — result is available as a typed handle
   const analysisHandle = await ctx.stage({ name: "analyze" }, {}, {}, async (s) => {
-    const result = await s.session.query(`Analyse the task: ${(ctx.inputs.prompt ?? "")}`);
+    const result = await s.session.query(`Analyse the task: ${(s.inputs.prompt ?? "")}`);
     s.save(s.sessionId);
     return extractAssistantText(result, 0);
   });

package/.agents/skills/workflow-creator/references/discovery-and-verification.md CHANGED Viewed

@@ -5,8 +5,8 @@
 ```bash
 bun init                                   # Create a new project
 bun add @bastani/atomic                    # Install the workflow SDK
-bun add @github/copilot-sdk               # For Copilot workflows
 bun add @anthropic-ai/claude-agent-sdk    # For Claude workflows
+bun add @github/copilot-sdk               # For Copilot workflows
 bun add @opencode-ai/sdk                  # For OpenCode workflows
 ```

package/.agents/skills/workflow-creator/references/failure-modes.md CHANGED Viewed

@@ -31,7 +31,7 @@ Silent failures are catalogued first below. Loud failures are grouped at the end
 | # | Failure | Affected | Silent? |
 |---|---|---|---|
 | [F1](#f1-copilot-getlastassistanttext-returns-empty-string) | Copilot: `getLastAssistantText` returns empty string | Copilot | silent |
-| [F2](#f2-copilot-sub-agent-messages-pollute-getmessages-stream) | Copilot: sub-agent messages pollute `getMessages()` stream | Copilot | silent |
+| [F2](#f2-copilot-subagent-messages-pollute-getmessages-stream) | Copilot: subagent messages pollute `getMessages()` stream | Copilot | silent |
 | [F3](#f3-opencode-result-parts-contain-non-text-parts) | OpenCode: `result.data.parts` contains non-text parts | OpenCode | silent |
 | [F4](#f4-claude-ssessionquery-returns-sessionmessage-extract-text-with-extractassistanttext) | Claude: `s.session.query()` returns `SessionMessage[]` — extract text with `extractAssistantText(result, 0)` | Claude | silent |
 | [F5](#f5-fresh-session-wipes-prior-stage-context) | Fresh session wipes prior stage context | Copilot, OpenCode | silent |
@@ -40,17 +40,18 @@ Silent failures are catalogued first below. Loud failures are grouped at the end
 | [F8](#f8-fenced-block-parsers-break-when-the-model-adds-prose) | Fenced-block parsers break when the model adds prose before/after | all | silent |
 | [F9](#f9-ssave-receives-the-wrong-shape) | `s.save()` receives the wrong shape for the SDK | all | silent |
 | [F10](#f10-copilot-sendandwait-default-60s-timeout-throws) | Copilot: `sendAndWait` default 60s timeout throws (use `send` by default) | Copilot | loud |
-| [F11](#f11-manual-claude-session-initialization-resolved-by-runtime) | ~~Manual Claude session initialization~~ (resolved by runtime) | Claude | N/A |
-| [F12](#f12-resume-session-tries-to-swap-agents) | Resume session tries to swap agents | Copilot, OpenCode | loud |
-| [F13](#f13-parallel-siblings-read-each-others-transcripts) | Parallel siblings read each other's transcripts | all | loud |
-| [F14](#f14-forgetting-to-await-ctxstage) | Forgetting to `await` `ctx.stage()` | all | silent |
-| [F15](#f15-using-a-pending-sessionhandle-before-completion) | Using a pending `SessionHandle` before completion | all | silent |
-| [F16](#f16-headless-stage-errors-are-invisible-in-the-graph) | Headless stage errors are invisible in the graph | all | silent |
-| [F17](#f17-claude-importing-sdk-query-inside-a-non-headless-stage) | Claude: importing the SDK `query()` inside a non-headless stage (anti-pattern) | Claude | silent |
+| [F11](#f11-provider-level-resume-tries-to-swap-agents) | Provider-level resume tries to swap agents | Copilot, OpenCode | loud |
+| [F12](#f12-parallel-siblings-read-each-others-transcripts) | Parallel siblings read each other's transcripts | all | loud |
+| [F13](#f13-forgetting-to-await-ctxstage) | Forgetting to `await` `ctx.stage()` | all | silent |
+| [F14](#f14-using-a-pending-sessionhandle-before-completion) | Using a pending `SessionHandle` before completion | all | silent |
+| [F15](#f15-headless-stage-errors-are-invisible-in-the-graph) | Headless stage errors are invisible in the graph | all | silent |
+| [F16](#f16-claude-importing-sdk-query-inside-a-non-headless-stage) | Claude: importing the SDK `query()` inside a non-headless stage (anti-pattern) | Claude | silent |
 ---
-## F1. Copilot: `getLastAssistantText` returns empty string
+## Silent failures
+### F1. Copilot: `getLastAssistantText` returns empty string
 **Symptom.** The orchestrator (or any downstream stage) receives an empty
 `plannerNotes` / `reviewerOutput` despite the prior agent running successfully
@@ -104,22 +105,22 @@ function getAssistantText(messages: SessionEvent[]): string {
 }
 ```
-**Detection.** Log the returned text length after every `runAgent` call
-during development. An empty or surprisingly short string for a stage
+**Detection.** Log the returned text length after every `getAssistantText`
+call during development. An empty or surprisingly short string for a stage
 that clearly ran is the signature.
 ---
-## F2. Copilot: sub-agent messages pollute `getMessages()` stream
+### F2. Copilot: subagent messages pollute `getMessages()` stream
 **Symptom.** Downstream stages receive a snippet of text that doesn't match
-what the top-level agent said — it looks like a sub-agent's output.
+what the top-level agent said — it looks like a subagent's output.
 **Root cause.** `assistant.message` events carry a `parentToolCallId?: string`
 field, documented as *"Tool call ID of the parent tool invocation when this
-event originates from a sub-agent"*. When the top-level agent delegates,
-`getMessages()` returns **the complete history including sub-agent messages**.
-Filters that don't exclude `parentToolCallId` can pick a sub-agent's final
+event originates from a subagent"*. When the top-level agent delegates,
+`getMessages()` returns **the complete history including subagent messages**.
+Filters that don't exclude `parentToolCallId` can pick a subagent's final
 message via `.at(-1)`.
 **Affected SDKs.** Copilot.
@@ -143,7 +144,7 @@ scrollback for the top-level agent.
 ---
-## F3. OpenCode: `result.data.parts` contains non-text parts
+### F3. OpenCode: `result.data.parts` contains non-text parts
 **Symptom.** Concatenated response text contains `[object Object]`,
 truncated content, or swallows tool-call payloads into the prompt.
@@ -177,7 +178,7 @@ function extractResponseText(
 ---
-## F4. Claude: `s.session.query()` returns `SessionMessage[]` — extract text with `extractAssistantText`
+### F4. Claude: `s.session.query()` returns `SessionMessage[]` — extract text with `extractAssistantText`
 **Symptom.** Workflow code tries to access `.output` or `.text` on the
 result of `s.session.query()` and gets `undefined`, or passes the result
@@ -222,7 +223,7 @@ on an array returns `undefined`.
 ---
-## F5. Fresh session wipes prior stage context
+### F5. Fresh session wipes prior stage context
 **Symptom.** The orchestrator says "I don't see a task list" or "what
 specification are you referring to?" even though the planner clearly ran.
@@ -239,18 +240,45 @@ mode does NOT apply to `s.session.query()`.)
 ### ❌ Wrong
 ```ts
-await runAgent("planner", buildPlannerPrompt((ctx.inputs.prompt ?? "")));
+await ctx.stage({ name: "planner" }, {}, { agent: "planner" }, async (s) => {
+  await s.session.send({ prompt: buildPlannerPrompt((s.inputs.prompt ?? "")) });
+  s.save(await s.session.getMessages());
+});
 // orchestrator is a fresh session — it has no idea what the planner produced
-await runAgent("orchestrator", buildOrchestratorPrompt());
+await ctx.stage({ name: "orchestrator" }, {}, { agent: "orchestrator" }, async (s) => {
+  await s.session.send({ prompt: buildOrchestratorPrompt() });
+  s.save(await s.session.getMessages());
+});
 ```
 ### ✅ Right — explicit handoff
 ```ts
-const plannerNotes = await runAgent("planner", buildPlannerPrompt((ctx.inputs.prompt ?? "")));
-await runAgent(
-  "orchestrator",
-  buildOrchestratorPrompt((ctx.inputs.prompt ?? ""), { plannerNotes }),
+const plannerHandle = await ctx.stage(
+  { name: "planner" },
+  {},
+  { agent: "planner" },
+  async (s) => {
+    await s.session.send({ prompt: buildPlannerPrompt((s.inputs.prompt ?? "")) });
+    const messages = await s.session.getMessages();
+    s.save(messages);
+    return getAssistantText(messages); // see F1 for getAssistantText
+  },
+);
+await ctx.stage(
+  { name: "orchestrator" },
+  {},
+  { agent: "orchestrator" },
+  async (s) => {
+    await s.session.send({
+      prompt: buildOrchestratorPrompt(
+        (s.inputs.prompt ?? ""),
+        { plannerNotes: plannerHandle.result },
+      ),
+    });
+    s.save(await s.session.getMessages());
+  },
 );
 ```
@@ -264,7 +292,7 @@ controls what context is available".
 ---
-## F6. Planner prompts that don't request trailing commentary produce empty handoffs
+### F6. Planner prompts that don't request trailing commentary produce empty handoffs
 **Symptom.** F1 / F5 are fixed, extraction is correct — and the orchestrator
 still receives empty `plannerNotes` because the planner's last turn legitimately
@@ -319,7 +347,7 @@ string + a correctly-fixed extraction helper = F6.
 ---
-## F7. Continued sessions accumulate state across loop iterations (lost-in-middle)
+### F7. Continued sessions accumulate state across loop iterations (lost-in-middle)
 **Symptom.** A review/fix loop works on iterations 1-3 then starts
 producing worse output — misidentifying files, hallucinating line numbers,
@@ -378,7 +406,7 @@ iteration N, N is your safe-turn budget before compaction.
 ---
-## F8. Fenced-block parsers break when the model adds prose
+### F8. Fenced-block parsers break when the model adds prose
 **Symptom.** `JSON.parse(content)` throws, or a "matches the first fenced
 block" regex picks up a code example inside prose instead of the actual
@@ -434,7 +462,7 @@ over several runs. If 1 in 20 runs fails to parse, you have F8.
 ---
-## F9. `s.save()` receives the wrong shape
+### F9. `s.save()` receives the wrong shape
 **Symptom.** `s.transcript("stage-name")` returns an empty or malformed
 `content` string in the next stage.
@@ -458,9 +486,11 @@ expects, and the runtime doesn't type-check the argument beyond "anything".
 // Claude — saves the wrong thing (result is SessionMessage[], not { output: string })
 s.save(result.output);  // TypeError: result.output is undefined; use s.save(s.sessionId)
-// Copilot — saves an empty array if called before send
-s.save(await s.session.getMessages());
-// Or saves one message object instead of the array
+// Copilot — calling getMessages() BEFORE send() returns an empty array
+const earlyMessages = await s.session.getMessages(); // [] — no turns yet
+s.save(earlyMessages);
+// Copilot — saving a single message instead of the full array
 s.save((await s.session.getMessages()).at(-1));
 // OpenCode — missing the data unwrap
@@ -479,7 +509,7 @@ log the length. A 0-length or JSON-that-isn't-prose signature = F9.
 ## Loud failures (throw, but still worth knowing)
-## F10. Copilot: `sendAndWait` default 60s timeout throws
+### F10. Copilot: `sendAndWait` default 60s timeout throws
 **Symptom.** `Timeout after 60000ms waiting for session.idle`. Every
 subsequent `ctx.stage()` call never executes — the throw propagates out of
@@ -508,13 +538,7 @@ to "be safe", you want `send`.
 ---
-## F11. ~~Manual Claude session initialization~~ (resolved by runtime)
-No longer a failure mode. The runtime now auto-initializes `s.client` and `s.session` before the callback runs — just use `s.session.query()` directly.
----
-## F12. Provider-level resume tries to swap agents
+### F11. Provider-level resume tries to swap agents
 **Symptom.** Resumed Copilot / OpenCode session behaves as the original
 agent instead of the requested new one — or the SDK throws "agent mismatch"
@@ -530,7 +554,7 @@ over trying to reopen a prior stage.
 ---
-## F13. Parallel siblings read each other's transcripts
+### F12. Parallel siblings read each other's transcripts
 **Symptom.** `s.transcript("sibling-name")` inside a parallel session
 throws or returns empty.
@@ -546,27 +570,28 @@ shared state (files, DB) if siblings genuinely need to coordinate.
 ```ts
 // Fan-out → merge
-await ctx.stage({ name: "describe" }, {}, {}, async (s) => { /* ... */ });
+// Strings used here for brevity; prefer handles (s.transcript(handle)) when one is in scope.
+const describe = await ctx.stage({ name: "describe" }, {}, {}, async (s) => { /* ... */ });
-await Promise.all([
+const [summarizeA, summarizeB] = await Promise.all([
   ctx.stage({ name: "summarize-a" }, {}, {}, async (s) => {
-    const d = await s.transcript("describe"); // OK — prior completed session
+    const d = await s.transcript(describe); // OK — prior completed session (handle-based, preferred)
     // s.transcript("summarize-b") would fail here — sibling not yet complete
   }),
   ctx.stage({ name: "summarize-b" }, {}, {}, async (s) => {
-    const d = await s.transcript("describe"); // OK — prior completed session
+    const d = await s.transcript(describe); // OK — prior completed session
   }),
 ]);
 await ctx.stage({ name: "merge" }, {}, {}, async (s) => {
-  const a = await s.transcript("summarize-a"); // OK — prior completed session
-  const b = await s.transcript("summarize-b"); // OK — prior completed session
+  const a = await s.transcript(summarizeA); // OK — handle-based, preferred over "summarize-a"
+  const b = await s.transcript(summarizeB);
 });
 ```
 ---
-## F14. Forgetting to `await` `ctx.stage()`
+### F13. Forgetting to `await` `ctx.stage()`
 **Symptom.** A session runs (its tmux window opens, the agent does work)
 but the orchestrator doesn't wait for it. Subsequent sessions that depend
@@ -617,7 +642,7 @@ this at compile time.
 ---
-## F15. Using a pending `SessionHandle` before completion
+### F14. Using a pending `SessionHandle` before completion
 **Symptom.** `handle.result` is `undefined` or stale, or
 `s.transcript(handle)` throws / returns empty even though the session
@@ -670,7 +695,7 @@ accessing `.result` without awaiting, the type will be `Promise`, not `T`.
 ---
-## F16. Headless stage errors are invisible in the graph
+### F15. Headless stage errors are invisible in the graph
 **Symptom.** A workflow fails but the graph shows all visible stages as
 completed. The error message references a session name that doesn't appear
@@ -721,7 +746,7 @@ full error for each failed headless stage.
 ---
-## F17. Claude: importing the SDK `query()` inside a non-headless stage
+### F16. Claude: importing the SDK `query()` inside a non-headless stage
 **Symptom.** A reviewer / extractor / structured-output stage shows up in
 the workflow graph as a tmux pane, but the pane sits idle on the Claude
@@ -760,7 +785,7 @@ The runtime exposes exactly two routes for an SDK feature:
 | You want to use… | Stage shape | Code in callback |
 |---|---|---|
 | `outputFormat`, custom `agents`, `maxBudgetUsd`, etc. **without** a visible pane | `{ headless: true }` | `s.session.query(prompt, sdkOptions)` — wraps `HeadlessClaudeSessionWrapper.query()` which forwards `options` to the SDK |
-| The visible TUI with a sub-agent | omit `headless` and pass `chatFlags: ["--agent", "<name>", ...]` | `s.session.query(prompt)` — sends through tmux send-keys |
+| The visible TUI with a subagent | omit `headless` and pass `chatFlags: ["--agent", "<name>", ...]` | `s.session.query(prompt)` — sends through tmux send-keys |
 The one option that does **not** exist is "visible pane + in-process SDK call".
 That combination is always wrong — pick one route or the other.
@@ -787,9 +812,9 @@ await ctx.stage({ name: "review" }, {}, {}, async (s) => {
 });
 ```
-### ✅ Right (a) — visible TUI with sub-agent + chatFlags
+### ✅ Right (a) — visible TUI with subagent + chatFlags
-When you want the user to watch the review happen, run the sub-agent in
+When you want the user to watch the review happen, run the subagent in
 the pane via `--agent` and parse JSON out of the assistant text. The
 prompt should enumerate the schema fields so the model emits matching
 JSON; a tolerant parser (last-fenced-block + last-balanced-object
@@ -853,8 +878,8 @@ await ctx.stage(
    `s.client` and `s.session`.
 2. Watch the workflow run. If a visible pane shows the Claude welcome
    screen for the entire duration of a stage and never receives a prompt,
-   you have F17.
-3. Cost monitoring. F17 roughly doubles the Claude process count — if
+   you have F16.
+3. Cost monitoring. F16 roughly doubles the Claude process count — if
    stage spend looks 2× a single run, audit imports.
 ---
@@ -870,9 +895,9 @@ Before shipping a multi-session workflow, walk the list:
 - [ ] Structured-output parsers extract the LAST fenced block, not the first (F8)
 - [ ] `s.save()` receives the per-SDK correct shape — Copilot uses `s.session.getMessages()` (F9)
 - [ ] Loops over 10 iterations have a compaction / reset strategy (F7)
-- [ ] Parallel groups only read from prior completed sessions, never siblings (F13)
-- [ ] Every `ctx.stage()` call is `await`ed (F14)
-- [ ] `SessionHandle` values are only used after the promise resolves (F15)
-- [ ] If provider-level resume/fork is used at all, it stays within the same agent role (F12)
-- [ ] Headless stage callbacks include descriptive error context so failures can be diagnosed without a graph node (F16)
-- [ ] Claude stages never import `query` (or other entry points) from `@anthropic-ai/claude-agent-sdk` directly — go through `s.session.query()` so the runtime routes to the TUI (interactive) or the SDK (headless) consistently (F17)
+- [ ] Parallel groups only read from prior completed sessions, never siblings (F12)
+- [ ] Every `ctx.stage()` call is `await`ed (F13)
+- [ ] `SessionHandle` values are only used after the promise resolves (F14)
+- [ ] If provider-level resume/fork is used at all, it stays within the same agent role (F11)
+- [ ] Headless stage callbacks include descriptive error context so failures can be diagnosed without a graph node (F15)
+- [ ] Claude stages never import `query` (or other entry points) from `@anthropic-ai/claude-agent-sdk` directly — go through `s.session.query()` so the runtime routes to the TUI (interactive) or the SDK (headless) consistently (F16)

package/.agents/skills/workflow-creator/references/getting-started.md CHANGED Viewed

@@ -171,47 +171,27 @@ if (needsReview) {
 ## Headless (background) stages
-Stages can run in headless mode by setting `headless: true` in the first argument to `ctx.stage()`. Headless stages execute the provider SDK in-process instead of spawning a tmux window — they are invisible in the workflow graph but tracked via a background task counter in the statusline.
+Set `headless: true` in the stage options to run the provider SDK
+in-process instead of spawning a tmux window — invisible in the graph,
+identical callback API.
 ```ts
-// Headless stage — identical callback API, no tmux window
 const result = await ctx.stage(
   { name: "background-task", headless: true },
   {}, {},
   async (s) => {
-    // s.client, s.session, s.save(), s.transcript() all work identically
     const result = await s.session.query("Analyze the codebase.");
     s.save(s.sessionId);
     return extractAssistantText(result, 0);
   },
 );
-// result.result contains the returned value
 ```
-The callback interface is identical to interactive stages. The only differences:
-- No tmux window is created
-- The stage does not appear as a node in the workflow graph
-- The `paneId` is a virtual identifier: `headless-<name>-<sessionId>`
-- Background stages are tracked by a counter in the orchestrator statusline
-**Common pattern — visible seed, parallel headless gather, visible merge:**
-```ts
-const seed = await ctx.stage({ name: "seed" }, {}, {}, async (s) => { /* ... */ });
-const [a, b, c] = await Promise.all([
-  ctx.stage({ name: "gather-a", headless: true }, {}, {}, async (s) => { /* ... */ }),
-  ctx.stage({ name: "gather-b", headless: true }, {}, {}, async (s) => { /* ... */ }),
-  ctx.stage({ name: "gather-c", headless: true }, {}, {}, async (s) => { /* ... */ }),
-]);
-await ctx.stage({ name: "merge" }, {}, {}, async (s) => {
-  await s.session.query(`Merge:\n${a.result}\n${b.result}\n${c.result}`);
-  s.save(s.sessionId);
-});
-```
-Headless stages are transparent to graph topology — `seed → [3 headless] → merge` renders as `seed → merge` in the graph.
+For per-provider mechanics, the canonical fan-out pattern (visible seed →
+parallel headless → visible merge), and topology semantics, see
+`control-flow.md` §"Headless stages: transparent to graph topology" and the
+per-SDK "Headless mode" sections in `agent-sessions.md`. Failure visibility
+caveats live in `failure-modes.md` §F15.
 ## SDK exports
@@ -275,16 +255,9 @@ The Atomic runtime provides `s.client` and `s.session` with types resolved from
 ## Reference files
-| File | Topic |
-|---|---|
-| `workflow-inputs.md` | Declaring the `inputs: WorkflowInput[]` schema, the free-form vs structured decision, CLI flag + picker invocation surfaces, builtin protection |
-| `agent-sessions.md` | Creating agent sessions with SDK calls per provider |
-| `computation-and-validation.md` | Deterministic computation, parsing, validation inside `run()` |
-| `user-input.md` | Collecting user input **mid-workflow** (for invocation-time inputs, see `workflow-inputs.md`) |
-| `control-flow.md` | Loops, conditionals, early termination in plain TypeScript |
-| `state-and-data-flow.md` | Data flow between sessions, transcripts, persistence |
-| `session-config.md` | Per-SDK session configuration: model, tools, permissions, hooks |
-| `discovery-and-verification.md` | Workflow file discovery, validation, TypeScript config |
+The full table of references with load triggers lives in SKILL.md
+§"Reference Files". Pull `failure-modes.md` before shipping any
+multi-session workflow, and `agent-sessions.md` whenever writing SDK calls.
 ## Builtin reference implementations
@@ -292,10 +265,11 @@ The SDK ships two builtin workflows that demonstrate production patterns for all
 - **`ralph`** (`src/sdk/workflows/builtin/ralph/`) — iterative plan → orchestrate → review → debug loop with consecutive clean-pass detection, shared helpers for prompts/parsing/git, and cross-SDK adaptation
 - **`deep-research-codebase`** (`src/sdk/workflows/builtin/deep-research-codebase/`) — deterministic codebase scout → LOC-based heuristic explorer partitioning → parallel explorers → aggregator with file-based handoffs and context-aware prompt engineering
-- **`headless-test`** (`.atomic/workflows/headless-test/`) — demonstrates the visible → [parallel headless] → visible merge pattern (all 3 SDKs)
 Both include `helpers/` directories with SDK-agnostic logic (prompt builders, parsers, heuristics) and per-agent `index.ts` files showing how the same workflow topology adapts to Claude, Copilot, and OpenCode.
+For a minimal headless example (not a builtin — it lives as a local workflow in this repo), see `.atomic/workflows/headless-test/` — demonstrates the visible → [parallel headless] → visible merge pattern for all three SDKs.
 ## Type safety
-The SDK is typed with **no `unknown` or `any`**. `SessionContext` fields are precisely typed, and native provider types may appear inside Atomic generic aliases and runtime values — if you need to name those types in your own code, import them from the provider SDK directly. Use `import type` for type-only imports. Use `.for<"agent">()` to narrow `s.client` and `s.session` to the correct provider types. Declare `inputs` inline so TypeScript enforces typed access on `ctx.inputs`.
+The SDK avoids `any` and uses `unknown` only at well-defined boundaries (e.g., `SessionRef = string | SessionHandle<unknown>` for handle-erased lookups). `SessionContext` fields are precisely typed, and native provider types may appear inside Atomic generic aliases and runtime values — if you need to name those types in your own code, import them from the provider SDK directly. Use `import type` for type-only imports. Use `.for<"agent">()` to narrow `s.client` and `s.session` to the correct provider types. Declare `inputs` inline so TypeScript enforces typed access on `ctx.inputs`.