@bastani/atomic 0.8.28 → 0.8.29-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/CHANGELOG.md +37 -0
  2. package/dist/builtin/cursor/CHANGELOG.md +27 -0
  3. package/dist/builtin/cursor/LICENSE +26 -0
  4. package/dist/builtin/cursor/README.md +22 -0
  5. package/dist/builtin/cursor/index.ts +9 -0
  6. package/dist/builtin/cursor/package.json +46 -0
  7. package/dist/builtin/cursor/src/auth.ts +352 -0
  8. package/dist/builtin/cursor/src/catalog-cache.ts +155 -0
  9. package/dist/builtin/cursor/src/config.ts +123 -0
  10. package/dist/builtin/cursor/src/conversation-state.ts +135 -0
  11. package/dist/builtin/cursor/src/cursor-models-raw.json +583 -0
  12. package/dist/builtin/cursor/src/model-mapper.ts +270 -0
  13. package/dist/builtin/cursor/src/models.ts +54 -0
  14. package/dist/builtin/cursor/src/native-loader.ts +71 -0
  15. package/dist/builtin/cursor/src/proto/README.md +34 -0
  16. package/dist/builtin/cursor/src/proto/agent_pb.ts +15294 -0
  17. package/dist/builtin/cursor/src/proto/protobuf-codec.ts +717 -0
  18. package/dist/builtin/cursor/src/provider.ts +301 -0
  19. package/dist/builtin/cursor/src/stream.ts +564 -0
  20. package/dist/builtin/cursor/src/transport.ts +791 -0
  21. package/dist/builtin/intercom/CHANGELOG.md +4 -0
  22. package/dist/builtin/intercom/package.json +2 -2
  23. package/dist/builtin/intercom/skills/intercom/SKILL.md +5 -5
  24. package/dist/builtin/mcp/CHANGELOG.md +4 -0
  25. package/dist/builtin/mcp/package.json +3 -3
  26. package/dist/builtin/subagents/CHANGELOG.md +13 -0
  27. package/dist/builtin/subagents/README.md +7 -3
  28. package/dist/builtin/subagents/agents/codebase-online-researcher.md +9 -24
  29. package/dist/builtin/subagents/agents/debugger.md +3 -5
  30. package/dist/builtin/subagents/package.json +4 -4
  31. package/dist/builtin/subagents/src/runs/background/subagent-runner.ts +2 -1
  32. package/dist/builtin/subagents/src/runs/foreground/execution.ts +2 -1
  33. package/dist/builtin/subagents/src/runs/shared/parallel-utils.ts +1 -0
  34. package/dist/builtin/subagents/src/runs/shared/pi-args.ts +19 -2
  35. package/dist/builtin/subagents/src/runs/shared/structured-output.ts +271 -10
  36. package/dist/builtin/subagents/src/runs/shared/subagent-prompt-runtime.ts +12 -39
  37. package/dist/builtin/subagents/src/shared/types.ts +5 -3
  38. package/dist/builtin/subagents/src/shared/utils.ts +50 -10
  39. package/dist/builtin/subagents/src/slash/saved-chain-mapping.ts +77 -0
  40. package/dist/builtin/subagents/src/slash/slash-commands.ts +1 -55
  41. package/dist/builtin/web-access/CHANGELOG.md +5 -1
  42. package/dist/builtin/web-access/README.md +1 -1
  43. package/dist/builtin/web-access/github-extract.ts +1 -1
  44. package/dist/builtin/web-access/package.json +3 -3
  45. package/dist/builtin/workflows/CHANGELOG.md +26 -0
  46. package/dist/builtin/workflows/README.md +28 -8
  47. package/dist/builtin/workflows/builtin/deep-research-codebase.ts +9 -49
  48. package/dist/builtin/workflows/builtin/goal.ts +63 -106
  49. package/dist/builtin/workflows/builtin/index.d.ts +2 -0
  50. package/dist/builtin/workflows/builtin/open-claude-design.ts +31 -76
  51. package/dist/builtin/workflows/builtin/ralph.d.ts +2 -0
  52. package/dist/builtin/workflows/builtin/ralph.ts +227 -518
  53. package/dist/builtin/workflows/builtin/shared-prompts.ts +7 -0
  54. package/dist/builtin/workflows/package.json +2 -2
  55. package/dist/builtin/workflows/skills/research-codebase/SKILL.md +17 -3
  56. package/dist/builtin/workflows/src/extension/wiring.ts +72 -9
  57. package/dist/builtin/workflows/src/extension/workflow-schema.ts +34 -0
  58. package/dist/builtin/workflows/src/runs/foreground/executor.ts +13 -2
  59. package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +86 -14
  60. package/dist/builtin/workflows/src/shared/authoring-contract.d.ts +11 -3
  61. package/dist/builtin/workflows/src/shared/types.ts +8 -4
  62. package/dist/builtin/workflows/src/tui/overlay-adapter.ts +64 -2
  63. package/dist/builtin/workflows/src/tui/workflow-attach-pane.ts +8 -8
  64. package/dist/builtin/workflows/src/tui/workflow-status.ts +2 -0
  65. package/dist/core/atomic-guide-command.d.ts.map +1 -1
  66. package/dist/core/atomic-guide-command.js +7 -7
  67. package/dist/core/atomic-guide-command.js.map +1 -1
  68. package/dist/core/builtin-packages.d.ts.map +1 -1
  69. package/dist/core/builtin-packages.js +6 -0
  70. package/dist/core/builtin-packages.js.map +1 -1
  71. package/dist/core/extensions/index.d.ts +1 -1
  72. package/dist/core/extensions/index.d.ts.map +1 -1
  73. package/dist/core/extensions/index.js.map +1 -1
  74. package/dist/core/extensions/types.d.ts +20 -0
  75. package/dist/core/extensions/types.d.ts.map +1 -1
  76. package/dist/core/extensions/types.js.map +1 -1
  77. package/dist/core/model-resolver.d.ts +1 -0
  78. package/dist/core/model-resolver.d.ts.map +1 -1
  79. package/dist/core/model-resolver.js +17 -8
  80. package/dist/core/model-resolver.js.map +1 -1
  81. package/dist/core/package-manager.d.ts +11 -9
  82. package/dist/core/package-manager.d.ts.map +1 -1
  83. package/dist/core/package-manager.js +55 -10
  84. package/dist/core/package-manager.js.map +1 -1
  85. package/dist/core/project-trust.d.ts +1 -0
  86. package/dist/core/project-trust.d.ts.map +1 -1
  87. package/dist/core/project-trust.js +3 -3
  88. package/dist/core/project-trust.js.map +1 -1
  89. package/dist/core/resource-loader.d.ts +11 -2
  90. package/dist/core/resource-loader.d.ts.map +1 -1
  91. package/dist/core/resource-loader.js +72 -9
  92. package/dist/core/resource-loader.js.map +1 -1
  93. package/dist/core/sdk.d.ts +3 -3
  94. package/dist/core/sdk.d.ts.map +1 -1
  95. package/dist/core/sdk.js +5 -5
  96. package/dist/core/sdk.js.map +1 -1
  97. package/dist/core/tools/index.d.ts +1 -0
  98. package/dist/core/tools/index.d.ts.map +1 -1
  99. package/dist/core/tools/index.js +1 -0
  100. package/dist/core/tools/index.js.map +1 -1
  101. package/dist/core/tools/structured-output.d.ts +39 -0
  102. package/dist/core/tools/structured-output.d.ts.map +1 -0
  103. package/dist/core/tools/structured-output.js +141 -0
  104. package/dist/core/tools/structured-output.js.map +1 -0
  105. package/dist/index.d.ts +1 -1
  106. package/dist/index.d.ts.map +1 -1
  107. package/dist/index.js +1 -1
  108. package/dist/index.js.map +1 -1
  109. package/dist/main.d.ts.map +1 -1
  110. package/dist/main.js +36 -14
  111. package/dist/main.js.map +1 -1
  112. package/dist/modes/interactive/components/login-dialog.d.ts +3 -0
  113. package/dist/modes/interactive/components/login-dialog.d.ts.map +1 -1
  114. package/dist/modes/interactive/components/login-dialog.js +16 -0
  115. package/dist/modes/interactive/components/login-dialog.js.map +1 -1
  116. package/dist/modes/interactive/interactive-mode.d.ts +11 -0
  117. package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  118. package/dist/modes/interactive/interactive-mode.js +158 -11
  119. package/dist/modes/interactive/interactive-mode.js.map +1 -1
  120. package/dist/modes/print-mode.d.ts.map +1 -1
  121. package/dist/modes/print-mode.js +39 -0
  122. package/dist/modes/print-mode.js.map +1 -1
  123. package/docs/custom-provider.md +1 -0
  124. package/docs/extensions.md +2 -2
  125. package/docs/models.md +2 -0
  126. package/docs/packages.md +3 -1
  127. package/docs/providers.md +15 -0
  128. package/docs/quickstart.md +3 -3
  129. package/docs/sdk.md +61 -0
  130. package/docs/security.md +1 -1
  131. package/docs/subagents.md +21 -0
  132. package/docs/usage.md +2 -0
  133. package/docs/workflows.md +28 -21
  134. package/examples/extensions/README.md +1 -1
  135. package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
  136. package/examples/extensions/custom-provider-anthropic/package.json +1 -1
  137. package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
  138. package/examples/extensions/gondolin/package-lock.json +2 -2
  139. package/examples/extensions/gondolin/package.json +1 -1
  140. package/examples/extensions/sandbox/package-lock.json +2 -2
  141. package/examples/extensions/sandbox/package.json +1 -1
  142. package/examples/extensions/structured-output.ts +22 -53
  143. package/examples/extensions/with-deps/package-lock.json +2 -2
  144. package/examples/extensions/with-deps/package.json +1 -1
  145. package/package.json +12 -9
@@ -9,3 +9,10 @@ export const WORKER_PREFLIGHT_CONTRACT = [
9
9
  "If setup requirements cannot be determined confidently, delegate a focused discovery task before implementation instead of guessing.",
10
10
  "If setup remains blocked after evidence-based discovery and setup attempts, report the blocker with commands tried and the exact evidence needed to continue.",
11
11
  ].join("\n");
12
+
13
+ export const E2E_VERIFICATION_GUIDANCE = [
14
+ "Verify correctness end-to-end whenever practical for user-visible behavior; do not rely only on code inspection, unit tests, or stage summaries when an executable user scenario can prove the outcome.",
15
+ "For web or frontend flows — including frontend changes whose correctness depends on backend/API behavior — use the browser skill, or delegate to a subagent with `skill: \"browser\"`, to drive the application like a user and capture screenshot, DOM, or network evidence when that proves the objective.",
16
+ "For TUI or terminal-app flows, use the tmux skill, or delegate to a subagent with `skill: \"tmux\"`, to launch the app in an isolated tmux session, send keys, capture pane output, and simulate the scenario end to end.",
17
+ "If end-to-end verification is not practical in this checkout, record what was attempted, the smallest missing prerequisite, and the narrower validation that was run instead; do not claim end-to-end proof when it was not performed.",
18
+ ].join("\n");
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bastani/workflows",
3
- "version": "0.8.28",
3
+ "version": "0.8.29-alpha.3",
4
4
  "private": true,
5
5
  "description": "Atomic extension for multi-stage workflow authoring and execution.",
6
6
  "contributors": [
@@ -83,7 +83,7 @@
83
83
  },
84
84
  "peerDependencies": {
85
85
  "@bastani/atomic": "*",
86
- "@earendil-works/pi-tui": "^0.78.1"
86
+ "@earendil-works/pi-tui": "^0.79.3"
87
87
  },
88
88
  "peerDependenciesMeta": {
89
89
  "@bastani/atomic": {
@@ -65,10 +65,24 @@ The user's research question/request is: **$ARGUMENTS**
65
65
  - The agent fetches live web content using the **browser** skill's `browse` CLI (or `npx browse` / `curl`). Instruct it to apply the token-efficient fetch order: (1) try `curl https://<site>/llms.txt` for an AI-friendly index (see [llmstxt.org](https://llmstxt.org/llms.txt)), (2) try `curl <url> -H "Accept: text/markdown"` to get pre-converted Markdown (supported on Cloudflare-hosted docs via [Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/)), (3) fall back to HTML parsing via `browse`
66
66
  - Instruct the agent to return LINKS with their findings and INCLUDE those links in the research document
67
67
  - The agent should persist reusable source documents under `research/web/<YYYY-MM-DD>-<kebab-case-topic>.md` (with frontmatter noting `source_url`, `fetched_at`, and `fetch_method`) so future research can reuse them without re-fetching
68
- - Output directory for the synthesized research artifact: `research/docs/`
68
+ - Output directory for the synthesized web research artifacts: `research/web/`:
69
+
70
+ When you fetch a document that is worth keeping for future sessions (reference docs, API schemas, SDK guides, release notes, troubleshooting writeups, architecture articles), `write` it to `research/web/<YYYY-MM-DD>-<kebab-case-topic>.md` with frontmatter capturing:
71
+
72
+ ```markdown
73
+ ---
74
+ source_url: <original URL>
75
+ fetched_at: <YYYY-MM-DD>
76
+ fetch_method: read | llms.txt | markdown-accept-header | browser | browse
77
+ topic: <short description>
78
+ ---
79
+ ```
80
+
81
+ - Followed by the extracted content (trimmed of nav chrome, ads, and irrelevant boilerplate). This lets future work reuse the lookup without re-fetching. Before fetching anything, quickly `find research/web/` for an existing, recent copy.
82
+
69
83
  - Examples:
70
- - If researching `Redis` locks usage, the agent might find relevant usage and create a document `research/docs/2024-01-15-redis-locks-usage.md` with internal links to Redis docs and code references (and cache the fetched Redis docs under `research/web/`)
71
- - If researching `OAuth` flows, the agent might find relevant external articles and create a document `research/docs/2024-01-16-oauth-flows.md` with links to those articles
84
+ - If researching `Redis` locks usage, the agent might find relevant usage and create a document `research/web/2024-01-15-redis-locks-usage.md` with internal links to Redis docs and code references (and cache the fetched Redis docs under `research/web/`)
85
+ - If researching `OAuth` flows, the agent might find relevant external articles and create a document `research/web/2024-01-16-oauth-flows.md` with links to those articles
72
86
 
73
87
  The key is to use these agents intelligently:
74
88
  - Start with locator agents to find what exists
@@ -22,7 +22,7 @@
22
22
  */
23
23
 
24
24
  import { basename } from "node:path";
25
- import type { ChatMessageRenderOptions, CreateAgentSessionOptions } from "@bastani/atomic";
25
+ import type { ChatMessageRenderOptions, CreateAgentSessionOptions, PackageSource } from "@bastani/atomic";
26
26
  import type { StageAdapters, StageSessionCreateResult, StageSessionRuntime } from "../runs/foreground/stage-runner.js";
27
27
  import type { StageExecutionMeta, StageOptions } from "../shared/types.js";
28
28
  import { stageUiBroker, type StageUiBroker } from "../shared/stage-ui-broker.js";
@@ -109,7 +109,7 @@ export interface PiCodingAgentSdk {
109
109
  cwd: string;
110
110
  agentDir: string;
111
111
  settingsManager?: PiSdkSettingsManager;
112
- builtinPackagePaths?: string[];
112
+ builtinPackagePaths?: PackageSource[];
113
113
  }) => PiSdkResourceLoader;
114
114
  createAgentSession(options?: AtomicCreateAgentSessionOptions): Promise<{ session: StageSessionRuntime }>;
115
115
  }
@@ -156,7 +156,7 @@ export async function prepareAtomicStageSessionOptions(
156
156
  settingsManager,
157
157
  builtinPackagePaths: stageBuiltinPackagePaths(sdk.getBuiltinPackagePaths?.() ?? []),
158
158
  });
159
- await resourceLoader.reload();
159
+ await reloadWorkflowStageResources(resourceLoader);
160
160
 
161
161
  return {
162
162
  ...atomicOptions,
@@ -167,13 +167,60 @@ export async function prepareAtomicStageSessionOptions(
167
167
  };
168
168
  }
169
169
 
170
- function stageBuiltinPackagePaths(paths: readonly string[]): string[] {
170
+ function stageBuiltinPackagePaths(paths: readonly string[]): PackageSource[] {
171
171
  // Workflow stages are child AgentSessions owned by the workflow extension.
172
172
  // Loading the workflows extension again inside that child session replays its
173
173
  // `session_start` lifecycle and clears/kills the parent workflow store. Keep
174
- // the other builtin packages (subagents, mcp, web-access, intercom), but do
175
- // not recursively install workflows into workflow stage sessions.
176
- return paths.filter((path) => basename(path) !== "workflows");
174
+ // the workflows package itself so its bundled skills/prompts/resources remain
175
+ // available, but disable only its extension entry for stage sessions.
176
+ return paths.map((path) =>
177
+ basename(path) === "workflows" ? { source: path, extensions: [] } : path,
178
+ );
179
+ }
180
+
181
+ const SUBAGENT_CHILD_EXTENSION_ENV_KEYS = [
182
+ "ATOMIC_SUBAGENT_CHILD",
183
+ "ATOMIC_SUBAGENT_FANOUT_CHILD",
184
+ "PI_SUBAGENT_CHILD",
185
+ "PI_SUBAGENT_FANOUT_CHILD",
186
+ ] as const;
187
+
188
+ let workflowStageResourceReloadQueue: Promise<void> = Promise.resolve();
189
+
190
+ async function reloadWorkflowStageResources(resourceLoader: PiSdkResourceLoader): Promise<void> {
191
+ const queuedReload = workflowStageResourceReloadQueue.then(() =>
192
+ reloadWorkflowStageResourcesWithEnvIsolation(resourceLoader),
193
+ );
194
+ workflowStageResourceReloadQueue = queuedReload.catch(() => undefined);
195
+ return queuedReload;
196
+ }
197
+
198
+ async function reloadWorkflowStageResourcesWithEnvIsolation(resourceLoader: PiSdkResourceLoader): Promise<void> {
199
+ // Workflow stage sessions are already governed by an orchestration context
200
+ // that disables recursive workflow tools and caps nested subagent depth. When
201
+ // a workflow itself runs inside a subagent child process, inherited subagent
202
+ // child env flags would otherwise make the bundled subagents extension skip
203
+ // registering its `subagent` tool before the stage session exists. Isolate
204
+ // extension discovery from those parent-process flags so an explicit
205
+ // `tools: ["subagent"]` allowlist works the same in workflow stages everywhere.
206
+ // The isolation mutates process-global env, so serialize the full
207
+ // save/delete/reload/restore section. Without this queue, overlapping workflow
208
+ // stage session creation can snapshot an already-cleared env and restore that
209
+ // stale snapshot after another reload restores the real parent values.
210
+ const previousValues = new Map<string, string | undefined>();
211
+ for (const key of SUBAGENT_CHILD_EXTENSION_ENV_KEYS) {
212
+ previousValues.set(key, process.env[key]);
213
+ delete process.env[key];
214
+ }
215
+ try {
216
+ await resourceLoader.reload();
217
+ } finally {
218
+ for (const key of SUBAGENT_CHILD_EXTENSION_ENV_KEYS) {
219
+ const previousValue = previousValues.get(key);
220
+ if (previousValue === undefined) delete process.env[key];
221
+ else process.env[key] = previousValue;
222
+ }
223
+ }
177
224
  }
178
225
 
179
226
  async function createPiSdkAgentSession(
@@ -253,7 +300,7 @@ async function createTestAgentSession(_options?: CreateAgentSessionOptions): Pro
253
300
  function stripWorkflowOnlyOptions(options: (StageOptions | CreateAgentSessionOptions) | undefined): CreateAgentSessionOptions | undefined {
254
301
  if (!options) return options;
255
302
  const maybeWorkflowOptions = options as StageOptions;
256
- const { mcp: _mcp, fallbackModels: _fallbackModels, ...sessionOptions } = maybeWorkflowOptions;
303
+ const { schema: _schema, mcp: _mcp, fallbackModels: _fallbackModels, ...sessionOptions } = maybeWorkflowOptions;
257
304
  return sessionOptions as CreateAgentSessionOptions;
258
305
  }
259
306
 
@@ -265,7 +312,7 @@ function makeWorkflowStageOrchestrationContext(meta: StageExecutionMeta): NonNul
265
312
  workflowStageName: meta.stageName,
266
313
  constraints: {
267
314
  disableWorkflowTool: true,
268
- maxSubagentDepth: 1,
315
+ maxSubagentDepth: 2,
269
316
  },
270
317
  };
271
318
  }
@@ -499,6 +546,14 @@ export interface PiOverlayHandle {
499
546
  * (`overlay-adapter.ts`); inline pickers leave it unset and dismiss
500
547
  * via the factory `done()` callback.
501
548
  */
549
+ export interface PiHostCustomUiState {
550
+ blockingInlineCustomUiDepth: number;
551
+ blockingInlineCustomUiActive: boolean;
552
+ blockingInlineCustomUiFocusDeferred?: boolean;
553
+ }
554
+
555
+ export type PiHostCustomUiStateListener = (state: PiHostCustomUiState) => void;
556
+
502
557
  export interface PiCustomOverlayOptions {
503
558
  /**
504
559
  * `true` mounts a floating popup; `false` mounts a focused
@@ -506,6 +561,8 @@ export interface PiCustomOverlayOptions {
506
561
  * place of the editor until the factory's `done()` callback fires.
507
562
  */
508
563
  overlay: boolean;
564
+ /** Keep host inline custom UI pending in the background while this overlay is visible. */
565
+ deferInlineCustomUiFocus?: boolean;
509
566
  /**
510
567
  * Geometry / anchoring intended for pi-tui's `resolveOverlayLayout`.
511
568
  * NOT forwarded by current pi interactive `custom()` — see
@@ -636,6 +693,12 @@ export interface PiUISurface {
636
693
  setTitle?: (title: string) => void;
637
694
  /** Show a custom component or overlay. */
638
695
  custom?: PiCustomOverlayFunction;
696
+ /** Get host-owned inline custom UI focus state, if exposed by the host. */
697
+ getHostCustomUiState?: () => PiHostCustomUiState;
698
+ /** Observe host-owned inline custom UI focus state changes, if exposed by the host. */
699
+ onHostCustomUiStateChange?: (listener: PiHostCustomUiStateListener) => () => void;
700
+ /** Move focus to a mounted host-owned inline custom UI, if one is pending. */
701
+ focusHostInlineCustomUi?: () => boolean;
639
702
  pasteToEditor?: (text: string) => void;
640
703
  setEditorText?: (text: string) => void;
641
704
  getEditorText?: () => string;
@@ -37,6 +37,39 @@ const McpOptionsSchema = Type.Object({
37
37
  deny: Type.Optional(Type.Array(Type.String())),
38
38
  });
39
39
 
40
+ const JsonSchemaObjectTypeValue = {
41
+ anyOf: [
42
+ { const: "object" },
43
+ { type: "array", minItems: 1, maxItems: 1, items: { const: "object" } },
44
+ ],
45
+ };
46
+
47
+ const JsonSchemaExplicitObjectDescriptor = {
48
+ type: "object",
49
+ required: ["type"],
50
+ properties: { type: JsonSchemaObjectTypeValue },
51
+ additionalProperties: true,
52
+ };
53
+
54
+ const JsonSchemaObject = Type.Unsafe<Record<string, unknown>>({
55
+ description: "Top-level object JSON Schema used as structured_output tool arguments for this workflow item.",
56
+ anyOf: [
57
+ JsonSchemaExplicitObjectDescriptor,
58
+ {
59
+ type: "object",
60
+ required: ["allOf"],
61
+ properties: {
62
+ allOf: {
63
+ type: "array",
64
+ minItems: 1,
65
+ items: JsonSchemaExplicitObjectDescriptor,
66
+ },
67
+ },
68
+ additionalProperties: true,
69
+ },
70
+ ],
71
+ });
72
+
40
73
  const BashCommandRuleSchema = Type.Union([
41
74
  Type.String(),
42
75
  Type.Object({ prefix: Type.String() }, { additionalProperties: false }),
@@ -55,6 +88,7 @@ const BashCommandPolicySchema = Type.Object({
55
88
  }, { additionalProperties: false });
56
89
 
57
90
  const StageSessionOptionProperties = {
91
+ schema: Type.Optional(JsonSchemaObject),
58
92
  cwd: Type.Optional(Type.String()),
59
93
  agentDir: Type.Optional(Type.String()),
60
94
  authStorage: Type.Optional(SdkSessionOptionSchema("authStorage")),
@@ -1267,6 +1267,15 @@ function truncateByBytes(text: string, maxBytes: number): { text: string; trunca
1267
1267
  return { text: text.slice(0, low), truncated: true };
1268
1268
  }
1269
1269
 
1270
+ function structuredTaskOutputText(value: unknown): string {
1271
+ if (typeof value === "string") return value;
1272
+ try {
1273
+ return JSON.stringify(value, null, 2);
1274
+ } catch (error) {
1275
+ throw new Error(`atomic-workflows: structured task output is not JSON-serializable: ${error instanceof Error ? error.message : String(error)}`);
1276
+ }
1277
+ }
1278
+
1270
1279
  function truncateTaskOutput(text: string, maxOutput: WorkflowMaxOutput | undefined): string {
1271
1280
  const limits = normalizeMaxOutput(maxOutput);
1272
1281
  const byLines = truncateByLines(text, limits.lines);
@@ -4827,11 +4836,12 @@ export async function run<TInputs extends WorkflowInputValues>(
4827
4836
  taskStageOptions(resolvedTaskOptions),
4828
4837
  stageFailFastScope,
4829
4838
  );
4830
- const rawText = await stage.prompt(
4839
+ const rawOutput = await stage.prompt(
4831
4840
  applyTaskContext(`${taskReadInstruction(resolvedTaskOptions)}${taskPrompt(resolvedTaskOptions)}`, taskPrevious(resolvedTaskOptions)),
4832
4841
  taskPromptOptions(resolvedTaskOptions),
4833
4842
  );
4834
- const text = truncateTaskOutput(rawText, resolvedTaskOptions.maxOutput);
4843
+ const structured = typeof rawOutput === "string" ? undefined : rawOutput;
4844
+ const text = truncateTaskOutput(structuredTaskOutputText(rawOutput), resolvedTaskOptions.maxOutput);
4835
4845
  const sessionId = (() => {
4836
4846
  try {
4837
4847
  return stage.sessionId;
@@ -4844,6 +4854,7 @@ export async function run<TInputs extends WorkflowInputValues>(
4844
4854
  name,
4845
4855
  stageName: name,
4846
4856
  text,
4857
+ ...(structured !== undefined ? { structured: structured as WorkflowSerializableValue } : {}),
4847
4858
  ...(sessionId !== undefined ? { sessionId } : {}),
4848
4859
  ...(stage.sessionFile !== undefined ? { sessionFile: stage.sessionFile } : {}),
4849
4860
  ...(stageMeta.model !== undefined ? { model: stageMeta.model } : {}),
@@ -10,11 +10,14 @@
10
10
  import { mkdir, writeFile } from "node:fs/promises";
11
11
  import { dirname, isAbsolute, resolve } from "node:path";
12
12
  import {
13
+ createStructuredOutputCapture,
14
+ createStructuredOutputTool,
13
15
  shouldApplyCodexFastModeForScope,
14
16
  SessionManager,
15
17
  type AgentSession,
16
18
  type CreateAgentSessionOptions,
17
19
  type PromptOptions,
20
+ type StructuredOutputCapture,
18
21
  } from "@bastani/atomic";
19
22
  import type {
20
23
  CompleteStageOpts,
@@ -28,6 +31,7 @@ import type {
28
31
  WorkflowExecutionMode,
29
32
  WorkflowModelCatalogPort,
30
33
  } from "../../shared/types.js";
34
+ import type { Static, TSchema } from "typebox";
31
35
  import {
32
36
  buildModelCandidatesFromCatalog,
33
37
  errorMessage,
@@ -167,6 +171,7 @@ export interface InternalStageContext extends StageContext {
167
171
  function stripWorkflowOnlyOptions(options: StageOptions | undefined): CreateAgentSessionOptions {
168
172
  if (!options) return {};
169
173
  const {
174
+ schema: _schema,
170
175
  mcp: _mcp,
171
176
  fallbackModels: _fallbackModels,
172
177
  fallbackThinkingLevels: _fallbackThinkingLevels,
@@ -530,6 +535,43 @@ function splitPromptOptions(options: StagePromptOptions | undefined): {
530
535
  };
531
536
  }
532
537
 
538
+ const STRUCTURED_OUTPUT_TOOL_NAME = "structured_output";
539
+
540
+ function structuredOutputPrompt(text: string): string {
541
+ return `${text}\n\nFinal output contract:\n- Your final action MUST be a structured_output tool call.\n- Pass the schema fields directly as tool arguments; do not wrap them in { value: ... } unless the schema explicitly defines a top-level value field.\n- Do not emit a prose final answer instead of structured_output.\n- If you need to inspect files or run commands first, do so, then call structured_output exactly once.`;
542
+ }
543
+
544
+ function stringifyStructuredOutputValue(value: unknown): string {
545
+ try {
546
+ return JSON.stringify(value, null, 2);
547
+ } catch (error) {
548
+ throw new Error(`atomic-workflows: structured_output returned a non-serializable value: ${error instanceof Error ? error.message : String(error)}`);
549
+ }
550
+ }
551
+
552
+ function stageOptionsWithStructuredOutput(
553
+ options: StageOptions | undefined,
554
+ capture: StructuredOutputCapture<unknown> | undefined,
555
+ ): StageOptions | undefined {
556
+ if (!options?.schema || !capture) return options;
557
+ const tools = options.tools === undefined
558
+ ? undefined
559
+ : Array.from(new Set([...options.tools, STRUCTURED_OUTPUT_TOOL_NAME]));
560
+ const excludedTools = options.excludedTools?.filter((toolName) => toolName !== STRUCTURED_OUTPUT_TOOL_NAME);
561
+ return {
562
+ ...options,
563
+ ...(tools !== undefined ? { tools } : {}),
564
+ ...(excludedTools !== undefined ? { excludedTools } : {}),
565
+ customTools: [
566
+ ...(options.customTools ?? []),
567
+ createStructuredOutputTool({
568
+ schema: options.schema as TSchema,
569
+ capture: capture as StructuredOutputCapture<Static<TSchema>>,
570
+ }),
571
+ ],
572
+ };
573
+ }
574
+
533
575
  function validatePromptOutputOptions(outputOptions: StageOutputOptions): void {
534
576
  if (outputOptions.outputMode === "file-only" && (typeof outputOptions.output !== "string" || outputOptions.output.length === 0)) {
535
577
  throw new Error(
@@ -564,7 +606,9 @@ async function finalizePromptOutput(
564
606
 
565
607
  export function createStageContext(opts: StageRunnerOpts): InternalStageContext {
566
608
  const { stageId, stageName, adapters, runId, signal, stageOptions, executionMode } = opts;
567
- const meta: StageExecutionMeta = { runId, stageId, stageName, signal, stageOptions, executionMode };
609
+ const structuredOutputCapture = stageOptions?.schema ? createStructuredOutputCapture<unknown>() : undefined;
610
+ const effectiveStageOptions = stageOptionsWithStructuredOutput(stageOptions, structuredOutputCapture);
611
+ const meta: StageExecutionMeta = { runId, stageId, stageName, signal, stageOptions: effectiveStageOptions, executionMode };
568
612
  let session: StageSessionRuntime | undefined;
569
613
  let sessionPromise: Promise<StageSessionRuntime> | undefined;
570
614
  let lastAssistantText: string | undefined;
@@ -633,7 +677,7 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
633
677
  }
634
678
 
635
679
  const hasExplicitModelFallbackConfig =
636
- stageOptions?.model !== undefined || (stageOptions?.fallbackModels?.length ?? 0) > 0;
680
+ effectiveStageOptions?.model !== undefined || (effectiveStageOptions?.fallbackModels?.length ?? 0) > 0;
637
681
  let candidatesPromise: Promise<WorkflowResolvedModelCandidate[]> | undefined;
638
682
  let activeCandidateIndex: number | undefined;
639
683
  let selectedModel: string | undefined;
@@ -653,9 +697,9 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
653
697
  function modelCandidates(): Promise<WorkflowResolvedModelCandidate[]> {
654
698
  if (!candidatesPromise) {
655
699
  candidatesPromise = buildModelCandidatesFromCatalog({
656
- primaryModel: stageOptions?.model,
657
- fallbackModels: stageOptions?.fallbackModels,
658
- fallbackThinkingLevels: stageOptions?.fallbackThinkingLevels,
700
+ primaryModel: effectiveStageOptions?.model,
701
+ fallbackModels: effectiveStageOptions?.fallbackModels,
702
+ fallbackThinkingLevels: effectiveStageOptions?.fallbackThinkingLevels,
659
703
  catalog: modelCatalog,
660
704
  });
661
705
  }
@@ -663,9 +707,9 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
663
707
  }
664
708
 
665
709
  function stageOptionsForCandidate(candidate: WorkflowResolvedModelCandidate | undefined): StageOptions | undefined {
666
- if (candidate === undefined) return stageOptions;
710
+ if (candidate === undefined) return effectiveStageOptions;
667
711
  return {
668
- ...(stageOptions ?? {}),
712
+ ...(effectiveStageOptions ?? {}),
669
713
  model: candidate.value,
670
714
  ...(candidate.reasoningLevel !== undefined ? { thinkingLevel: candidate.reasoningLevel } : {}),
671
715
  fallbackModels: undefined,
@@ -677,7 +721,7 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
677
721
 
678
722
  function isWorkflowFastModeEnabled(): boolean | undefined {
679
723
  const model = session?.model;
680
- const settingsManager = sessionSettingsManager ?? stageOptions?.settingsManager;
724
+ const settingsManager = sessionSettingsManager ?? effectiveStageOptions?.settingsManager;
681
725
  if (model === undefined || settingsManager === undefined) return undefined;
682
726
  return shouldApplyCodexFastModeForScope(model, settingsManager.getCodexFastModeSettings(), "workflow");
683
727
  }
@@ -705,7 +749,7 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
705
749
  }
706
750
 
707
751
  function effectiveCandidateReasoning(candidate: WorkflowResolvedModelCandidate): StageOptions["thinkingLevel"] | undefined {
708
- return candidate.reasoningLevel ?? stageOptions?.thinkingLevel;
752
+ return candidate.reasoningLevel ?? effectiveStageOptions?.thinkingLevel;
709
753
  }
710
754
 
711
755
  function modelAttemptReasoning(candidate: WorkflowResolvedModelCandidate): Pick<WorkflowModelAttempt, "reasoningLevel"> {
@@ -715,7 +759,7 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
715
759
 
716
760
  function applyCandidateThinking(candidate: WorkflowResolvedModelCandidate | undefined): void {
717
761
  pendingThinkingLevel = candidate === undefined
718
- ? stageOptions?.thinkingLevel
762
+ ? effectiveStageOptions?.thinkingLevel
719
763
  : effectiveCandidateReasoning(candidate);
720
764
  }
721
765
 
@@ -843,6 +887,13 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
843
887
  }
844
888
 
845
889
  let index = activeCandidateIndex ?? 0;
890
+ const capturedStructuredOutputForAttempt = (): boolean =>
891
+ structuredOutputCapture?.called === true && signal?.aborted !== true;
892
+ const recordSuccessfulAttempt = (candidate: WorkflowResolvedModelCandidate): void => {
893
+ modelAttempts.push({ model: candidate.id, success: true, ...modelAttemptReasoning(candidate) });
894
+ pendingFallbackWarnings.length = 0;
895
+ };
896
+
846
897
  while (index < candidates.length) {
847
898
  const candidate = candidates[index]!;
848
899
  const activeSession = session && activeCandidateIndex === index
@@ -855,13 +906,20 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
855
906
  const { terminalScanStartIndex } = await promptWithPauseResume(activeSession, text, sdkOptions);
856
907
  const terminalFailure = latestTerminalAssistantFailureSince(activeSession.messages, terminalScanStartIndex);
857
908
  if (terminalFailure !== undefined) {
909
+ if (capturedStructuredOutputForAttempt()) {
910
+ recordSuccessfulAttempt(candidate);
911
+ return;
912
+ }
858
913
  throw new WorkflowPromptModelFailure(terminalFailure);
859
914
  }
860
- modelAttempts.push({ model: candidate.id, success: true, ...modelAttemptReasoning(candidate) });
861
- pendingFallbackWarnings.length = 0;
915
+ recordSuccessfulAttempt(candidate);
862
916
  return;
863
917
  } catch (err) {
864
918
  const message = errorMessage(err);
919
+ if (capturedStructuredOutputForAttempt() && isRetryableModelFailure(err)) {
920
+ recordSuccessfulAttempt(candidate);
921
+ return;
922
+ }
865
923
  modelAttempts.push({ model: candidate.id, success: false, ...modelAttemptReasoning(candidate), error: message });
866
924
  if (signal?.aborted || !isRetryableModelFailure(err) || index === candidates.length - 1) {
867
925
  modelWarnings.push(...pendingFallbackWarnings);
@@ -887,15 +945,29 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
887
945
 
888
946
  async prompt(text, options) {
889
947
  const { sdkOptions, outputOptions } = splitPromptOptions(options);
890
- const runtimeCwd = typeof stageOptions?.cwd === "string" ? stageOptions.cwd : process.cwd();
948
+ const runtimeCwd = typeof effectiveStageOptions?.cwd === "string" ? effectiveStageOptions.cwd : process.cwd();
891
949
  validatePromptOutputOptions(outputOptions);
950
+ if (structuredOutputCapture?.called) {
951
+ throw new Error("atomic-workflows: stage schema supports one prompt() call per stage context because structured_output may be called exactly once. Create a new ctx.stage(...) for each additional schema-backed prompt.");
952
+ }
892
953
  if (adapters.prompt) {
954
+ if (structuredOutputCapture) {
955
+ throw new Error("atomic-workflows: stage schema requires an AgentSessionAdapter so the structured_output tool can be registered.");
956
+ }
893
957
  const rawText = await adapters.prompt.prompt(text, meta);
894
958
  lastAssistantText = await finalizePromptOutput(rawText, outputOptions, runtimeCwd);
895
959
  adapterMessages = assistantMessage(lastAssistantText);
896
960
  return lastAssistantText;
897
961
  }
898
- await promptWithFallback(text, sdkOptions);
962
+ await promptWithFallback(structuredOutputCapture ? structuredOutputPrompt(text) : text, sdkOptions);
963
+ if (structuredOutputCapture) {
964
+ if (!structuredOutputCapture.called) {
965
+ throw new Error("atomic-workflows: stage configured with schema must finish by calling structured_output.");
966
+ }
967
+ const rawStructuredText = stringifyStructuredOutputValue(structuredOutputCapture.value);
968
+ lastAssistantText = await finalizePromptOutput(rawStructuredText, outputOptions, runtimeCwd);
969
+ return structuredOutputCapture.value as never;
970
+ }
899
971
  const rawText = lastAssistantTextFromSession(session, lastAssistantText, terminatingToolCallIds) ?? "";
900
972
  lastAssistantText = await finalizePromptOutput(rawText, outputOptions, runtimeCwd);
901
973
  return lastAssistantText;
@@ -48,6 +48,7 @@ export interface WorkflowModelFallbackFields {
48
48
  readonly fallbackThinkingLevels?: readonly string[];
49
49
  }
50
50
  export type WorkflowModelValue = string | object;
51
+ export type WorkflowStageResult<TSchemaDef extends TSchema | undefined = undefined> = [TSchemaDef] extends [TSchema] ? Static<TSchemaDef> : string;
51
52
  export interface WorkflowModelUsage extends WorkflowSerializableObject {
52
53
  readonly input?: number;
53
54
  readonly output?: number;
@@ -111,7 +112,9 @@ export interface WorkflowFastModeSettings extends WorkflowSerializableObject {
111
112
  export interface WorkflowFastModeSettingsManager {
112
113
  getCodexFastModeSettings(): WorkflowFastModeSettings;
113
114
  }
114
- export interface StageOptions extends WorkflowModelFallbackFields {
115
+ export interface StageOptions<TSchemaDef extends TSchema | undefined = TSchema | undefined> extends WorkflowModelFallbackFields {
116
+ /** Optional structured final-answer schema. When set, the stage receives a schema-specific `structured_output` tool and must finish by calling it. */
117
+ readonly schema?: TSchemaDef;
115
118
  readonly model?: WorkflowModelValue;
116
119
  readonly mcp?: StageMcpOptions;
117
120
  readonly tools?: readonly string[];
@@ -231,9 +234,9 @@ export interface StageAdapters {
231
234
  readonly prompt?: PromptAdapter;
232
235
  readonly complete?: CompleteAdapter;
233
236
  }
234
- export interface StageContext {
237
+ export interface StageContext<TSchemaDef extends TSchema | undefined = undefined> {
235
238
  readonly name: string;
236
- prompt(text: string, options?: StagePromptOptions): Promise<string>;
239
+ prompt(text: string, options?: StagePromptOptions): Promise<WorkflowStageResult<TSchemaDef>>;
237
240
  complete(text: string, options?: CompleteStageOpts): Promise<string>;
238
241
  steer(text: string): Promise<void>;
239
242
  followUp(text: string): Promise<void>;
@@ -279,6 +282,8 @@ export interface WorkflowTaskContext extends WorkflowSerializableObject {
279
282
  export type WorkflowTaskContextInput = string | WorkflowTaskContext | WorkflowTaskResult;
280
283
  export interface WorkflowTaskResult extends WorkflowTaskContext {
281
284
  readonly stageName: string;
285
+ /** Parsed structured value when the task/stage was configured with `schema`. */
286
+ readonly structured?: WorkflowSerializableValue;
282
287
  readonly sessionId?: string;
283
288
  readonly sessionFile?: string;
284
289
  readonly artifacts?: readonly WorkflowArtifact[];
@@ -400,6 +405,9 @@ export interface WorkflowRunContext<TInputs extends WorkflowInputValues = Workfl
400
405
  readonly inputs: Readonly<TInputs>;
401
406
  readonly cwd?: string;
402
407
  exit(options?: WorkflowExitOptions<TOutputs>): never;
408
+ stage<TSchemaDef extends TSchema>(name: string, options: StageOptions<TSchemaDef> & {
409
+ readonly schema: TSchemaDef;
410
+ }): StageContext<TSchemaDef>;
403
411
  stage(name: string, options?: StageOptions): StageContext;
404
412
  task(name: string, options: WorkflowTaskOptions): Promise<WorkflowTaskResult>;
405
413
  chain(steps: readonly WorkflowTaskStep[], options?: WorkflowChainOptions): Promise<WorkflowTaskResult[]>;
@@ -153,9 +153,11 @@ export interface StageMcpOptions extends AuthoringContract.StageMcpOptions {
153
153
  * All pi SDK createAgentSession options are forwarded to the stage session;
154
154
  * workflow-owned options such as `mcp` and `gitWorktreeDir` are stripped before SDK session creation.
155
155
  */
156
- export interface StageOptions
156
+ export interface StageOptions<TSchemaDef extends TSchema | undefined = TSchema | undefined>
157
157
  extends Omit<CreateAgentSessionOptions, "model" | keyof AuthoringContract.StageOptions>,
158
- Omit<Mutable<AuthoringContract.StageOptions>, "sessionManager" | "settingsManager"> {
158
+ Omit<Mutable<AuthoringContract.StageOptions<TSchemaDef>>, "sessionManager" | "settingsManager"> {
159
+ /** Optional structured final-answer schema. When set, the stage receives a schema-specific `structured_output` tool and must finish by calling it. */
160
+ schema?: TSchemaDef;
159
161
  /** Model id or pi SDK model object used as the primary stage model. */
160
162
  model?: WorkflowModelValue;
161
163
  /** Per-stage MCP server gating. No-op when no WorkflowMcpPort is configured. */
@@ -231,6 +233,7 @@ export interface WorkflowPersistencePort {
231
233
  export type WorkflowTaskContext = AuthoringContract.WorkflowTaskContext;
232
234
  export type WorkflowTaskContextInput = AuthoringContract.WorkflowTaskContextInput;
233
235
  export type WorkflowTaskResult = AuthoringContract.WorkflowTaskResult;
236
+ export type WorkflowStageResult<TSchemaDef extends TSchema | undefined = undefined> = AuthoringContract.WorkflowStageResult<TSchemaDef>;
234
237
 
235
238
  /**
236
239
  * Higher-level task API: create a tracked stage, optionally inject prior task
@@ -276,12 +279,12 @@ export interface WorkflowDirectOptions extends StageOptions, Omit<Mutable<Author
276
279
  * This exposes the supported subset of pi's SDK AgentSession. The workflow
277
280
  * executor owns disposal and wraps prompt() with stage lifecycle tracking.
278
281
  */
279
- export interface StageContext {
282
+ export interface StageContext<TSchemaDef extends TSchema | undefined = undefined> {
280
283
  /** Human-readable name for this stage (used in TUI + persistence). */
281
284
  readonly name: string;
282
285
 
283
286
  /** Send a prompt and wait for completion. */
284
- prompt(text: string, options?: StagePromptOptions): Promise<string>;
287
+ prompt(text: string, options?: StagePromptOptions): Promise<WorkflowStageResult<TSchemaDef>>;
285
288
  complete(text: string, options?: CompleteStageOpts): Promise<string>;
286
289
 
287
290
  /** Queue messages during streaming. */
@@ -344,6 +347,7 @@ export interface WorkflowRunContext<
344
347
  * @param name Human-readable stage name (used in TUI + persistence).
345
348
  * @param options Optional per-stage configuration (mcp allow/deny, etc.).
346
349
  */
350
+ stage<TSchemaDef extends TSchema>(name: string, options: StageOptions<TSchemaDef> & { schema: TSchemaDef }): StageContext<TSchemaDef>;
347
351
  stage(name: string, options?: StageOptions): StageContext;
348
352
  /**
349
353
  * Safe high-level task primitive. Equivalent to creating a named stage and