@bastani/atomic 0.8.29-alpha.2 → 0.8.29-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +14 -6
  2. package/dist/builtin/cursor/package.json +2 -2
  3. package/dist/builtin/intercom/CHANGELOG.md +1 -1
  4. package/dist/builtin/intercom/package.json +1 -1
  5. package/dist/builtin/mcp/CHANGELOG.md +1 -1
  6. package/dist/builtin/mcp/package.json +1 -1
  7. package/dist/builtin/subagents/CHANGELOG.md +4 -4
  8. package/dist/builtin/subagents/README.md +4 -4
  9. package/dist/builtin/subagents/package.json +1 -1
  10. package/dist/builtin/subagents/src/extension/index.ts +14 -0
  11. package/dist/builtin/subagents/src/extension/schemas.ts +1 -1
  12. package/dist/builtin/subagents/src/runs/background/subagent-runner.ts +1 -6
  13. package/dist/builtin/subagents/src/runs/foreground/execution.ts +1 -6
  14. package/dist/builtin/subagents/src/runs/shared/parallel-utils.ts +0 -1
  15. package/dist/builtin/subagents/src/runs/shared/pi-args.ts +0 -1
  16. package/dist/builtin/subagents/src/runs/shared/structured-output.ts +16 -285
  17. package/dist/builtin/subagents/src/runs/shared/subagent-prompt-runtime.ts +1 -9
  18. package/dist/builtin/subagents/src/shared/types.ts +4 -4
  19. package/dist/builtin/subagents/src/slash/saved-chain-mapping.ts +3 -18
  20. package/dist/builtin/web-access/CHANGELOG.md +1 -1
  21. package/dist/builtin/web-access/package.json +1 -1
  22. package/dist/builtin/workflows/CHANGELOG.md +12 -5
  23. package/dist/builtin/workflows/README.md +10 -8
  24. package/dist/builtin/workflows/builtin/deep-research-codebase.ts +9 -49
  25. package/dist/builtin/workflows/builtin/goal.ts +68 -155
  26. package/dist/builtin/workflows/builtin/index.d.ts +2 -0
  27. package/dist/builtin/workflows/builtin/open-claude-design.ts +42 -110
  28. package/dist/builtin/workflows/builtin/ralph.d.ts +2 -0
  29. package/dist/builtin/workflows/builtin/ralph.ts +235 -565
  30. package/dist/builtin/workflows/builtin/shared-prompts.ts +7 -0
  31. package/dist/builtin/workflows/package.json +1 -1
  32. package/dist/builtin/workflows/src/extension/index.ts +17 -0
  33. package/dist/builtin/workflows/src/extension/wiring.ts +55 -8
  34. package/dist/builtin/workflows/src/extension/workflow-schema.ts +2 -29
  35. package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +1 -5
  36. package/dist/builtin/workflows/src/shared/authoring-contract.d.ts +1 -1
  37. package/dist/builtin/workflows/src/shared/types.ts +1 -1
  38. package/dist/core/atomic-guide-command.d.ts.map +1 -1
  39. package/dist/core/atomic-guide-command.js +7 -7
  40. package/dist/core/atomic-guide-command.js.map +1 -1
  41. package/dist/core/resource-loader.d.ts +2 -2
  42. package/dist/core/resource-loader.d.ts.map +1 -1
  43. package/dist/core/resource-loader.js.map +1 -1
  44. package/dist/core/sdk.d.ts +3 -3
  45. package/dist/core/sdk.d.ts.map +1 -1
  46. package/dist/core/sdk.js +2 -2
  47. package/dist/core/sdk.js.map +1 -1
  48. package/dist/core/system-prompt.d.ts.map +1 -1
  49. package/dist/core/system-prompt.js +0 -36
  50. package/dist/core/system-prompt.js.map +1 -1
  51. package/dist/core/tools/index.d.ts +1 -1
  52. package/dist/core/tools/index.d.ts.map +1 -1
  53. package/dist/core/tools/index.js +1 -1
  54. package/dist/core/tools/index.js.map +1 -1
  55. package/dist/core/tools/structured-output.d.ts +7 -18
  56. package/dist/core/tools/structured-output.d.ts.map +1 -1
  57. package/dist/core/tools/structured-output.js +9 -89
  58. package/dist/core/tools/structured-output.js.map +1 -1
  59. package/dist/core/tools/todos.d.ts +1 -0
  60. package/dist/core/tools/todos.d.ts.map +1 -1
  61. package/dist/core/tools/todos.js +4 -0
  62. package/dist/core/tools/todos.js.map +1 -1
  63. package/dist/index.d.ts +1 -1
  64. package/dist/index.d.ts.map +1 -1
  65. package/dist/index.js +1 -1
  66. package/dist/index.js.map +1 -1
  67. package/docs/extensions.md +1 -1
  68. package/docs/quickstart.md +3 -3
  69. package/docs/sdk.md +1 -1
  70. package/docs/subagents.md +4 -6
  71. package/docs/usage.md +1 -1
  72. package/docs/workflows.md +23 -19
  73. package/package.json +2 -2
@@ -80,7 +80,7 @@ Atomic ships with four workflows you can run immediately. Use `/workflow list` t
80
80
  |---|---|---|
81
81
  | `deep-research-codebase` | Broad, cross-cutting research before you decide what to change. Scout → research-history → parallel specialist waves → aggregator. | `/workflow deep-research-codebase prompt="How do payment retries work end to end?"` |
82
82
  | `goal` | Small-to-medium scope changes when you can identify the work surface, state the exact outcome, and name the validation that proves it is done — for example tests, lint/typecheck, docs builds, or observable behavior. Keeps the run bounded with a goal ledger, reviewer gates, and final status `complete`, `blocked`, or `needs_human`. | `/workflow goal objective="Implement specs/2026-03-rate-limit.md, run the focused tests, and finish when burst traffic returns 429"` |
83
- | `ralph` | Larger migrations, broad refactors, multi-package changes, and spec-to-reviewed-change work where you want Atomic to plan the approach, delegate implementation through sub-agents, simplify, review, iterate, and optionally let only the final stage attempt PR creation with `create_pr=true`. | `/workflow ralph prompt="Plan the database migration, implement it, and review it" create_pr=true` |
83
+ | `ralph` | Larger migrations, broad refactors, and multi-package changes where you want Atomic to transform the prompt into a research question, research the codebase first, delegate implementation through sub-agents, review, iterate, and optionally let only the final stage attempt PR creation with `create_pr=true`. | `/workflow ralph prompt="Migrate the database layer to Drizzle" create_pr=true` |
84
84
  | `open-claude-design` | UI and design-system work with generation, critique, and refinement loops; renders a live `preview.html` you can iterate against. | `/workflow open-claude-design prompt="Refresh the settings page hierarchy" output_type=page` |
85
85
 
86
86
  <p align="center"><img src="images/workflow-list.png" alt="Workflow List" width="600" /></p>
@@ -101,7 +101,7 @@ Atomic picks the workflow, fills in inputs from the request, and confirms before
101
101
 
102
102
  Use `goal` for small-to-medium scope changes when you can identify the work surface, state the exact outcome you want, and name the validation that proves it is done — for example specific tests, lint/typecheck commands, docs builds, or observable behavior. It keeps the run bounded, captures receipts in a goal ledger, gates completion through reviewers, and stops as `complete`, `blocked`, or `needs_human`.
103
103
 
104
- Keep using `ralph` for larger migrations, broad refactors, multi-package changes, and spec-to-reviewed-change work where you want Atomic to plan the approach, delegate implementation through sub-agents, simplify, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`.
104
+ Keep using `ralph` for larger migrations, broad refactors, and multi-package changes where you want Atomic to transform the prompt into a research question, research the codebase first, delegate implementation through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`.
105
105
 
106
106
  ### Monitor and steer a run
107
107
 
@@ -132,7 +132,7 @@ Skills are reusable expert instructions. Trigger one with `/skill:<name>` follow
132
132
  | `tdd` | Test-first feature or bug work. | `/skill:tdd` |
133
133
  | `impeccable` | Critique or refine frontend and product UI. | `/skill:impeccable` |
134
134
 
135
- Use `/skill:research-codebase` for a focused area and `/workflow deep-research-codebase` when the answer spans the whole repo. A typical focused flow is `/skill:research-codebase` → `/skill:create-spec` → `/workflow goal` with an objective that identifies the work surface, states the exact outcome, and names the validation that proves it is done. Keep using `/workflow ralph` for larger migrations, broad refactors, multi-package changes, and spec-to-reviewed-change work where you want Atomic to plan, delegate through sub-agents, simplify, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`.
135
+ Use `/skill:research-codebase` for a focused area and `/workflow deep-research-codebase` when the answer spans the whole repo. A typical focused flow is `/skill:research-codebase` → `/skill:create-spec` → `/workflow goal` with an objective that identifies the work surface, states the exact outcome, and names the validation that proves it is done. Keep using `/workflow ralph` for larger migrations, broad refactors, and multi-package changes where you want Atomic to research first, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`.
136
136
 
137
137
  ### Create your own workflow in natural language
138
138
 
package/docs/sdk.md CHANGED
@@ -655,7 +655,7 @@ const { session } = await createAgentSession({
655
655
  });
656
656
  ```
657
657
 
658
- The tool parameters are exactly `DecisionSchema`: the model calls `structured_output({ approved, findings })`, not `structured_output({ value: { approved, findings } })`. A successful call stores the flat params in `capture.value`, returns them as tool `details`, and sets `terminate: true` so there is no extra follow-up assistant turn. When an `output` file sink is configured, the factory writes the same flat schema-valid params to `output.json` and writes call metadata (`toolName`, `toolCallId`, `success`, `terminate`) to a private `output.meta.json` sidecar for finality-checked parent readback. Structured-output schemas must be top-level object tool-argument schemas; wrap array or primitive final values in object fields such as `{ items: [...] }` or `{ value: ... }`. Structured-output tool definitions opt out of oversized-result persistence, so large final JSON remains inline as the machine-readable result instead of being replaced by a `<persisted-output>` pointer; text print mode also emits the terminating JSON for factory-created tools, including custom names.
658
+ The tool parameters are exactly the supplied schema: with `DecisionSchema`, the model calls `structured_output({ approved, findings })`. Array and primitive schemas are also accepted by the factory when the target provider/tool runtime supports them; the captured value is whatever JSON value matches the schema. A successful call stores the params in `capture.value`, returns them as pretty-printed JSON tool-result text for text print mode, keeps the flat value in tool `details`, writes the same JSON to the configured `output.outputPath` when an `output` file sink is configured, and sets `terminate: true` so there is no extra follow-up assistant turn. Atomic relies on the tool schema instead of extra structured-output parsing or sidecar validation. Structured-output tool definitions opt out of oversized-result persistence.
659
659
 
660
660
  Custom tool names are supported, and the prompt metadata follows the configured name. If you use a custom name such as `final_decision`, include that name in any explicit `tools` allowlist. If the standard `structured_output` name is required, register the factory with its default name:
661
661
 
package/docs/subagents.md CHANGED
@@ -148,7 +148,7 @@ If an agent or chain step uses an explicit empty `tools: []` allowlist together
148
148
 
149
149
  ## Structured output schemas
150
150
 
151
- Chain and parallel steps can declare an `outputSchema` when the parent needs reliable machine-readable handoff data. Atomic passes that schema to the child as a schema-specific `structured_output` tool backed by the shared Atomic factory. The child must finish by calling `structured_output` exactly once with arguments that match the schema directly:
151
+ Chain and parallel steps can declare an `outputSchema` when the parent needs reliable machine-readable handoff data. Atomic passes that schema directly to a `structured_output` tool backed by the shared Atomic factory. The child should call `structured_output` when it is done:
152
152
 
153
153
  ```ts
154
154
  structured_output({
@@ -157,13 +157,11 @@ structured_output({
157
157
  })
158
158
  ```
159
159
 
160
- `outputSchema` itself must be a top-level object schema because the schema is used directly as the tool's argument contract. Wrap array or primitive handoff values in an explicit object field, such as `{ items: [...] }` or `{ value: ... }`.
160
+ `outputSchema` is a plain JSON Schema descriptor object. It may describe object, array, or primitive final values, and the child should pass a JSON value that matches that schema directly. Atomic no longer adds object-root restrictions, sidecar metadata, transcript-finality checks, duplicate-call guards, or extra parent-side schema parsing. The child runtime writes the tool arguments to `output.json`; the parent reads that JSON back as `result.structuredOutput` and in named-chain references under `outputs.name.structured`.
161
161
 
162
- Do not wrap the payload as `structured_output({ value: ... })` unless your own schema explicitly defines a top-level `value` field. The child runtime writes the flat schema-valid params to `output.json` and call metadata (`toolName`, `toolCallId`, `success`, `terminate`) to the `output.meta.json` sidecar; the parent validates both files before checking transcript finality. Prose-only completion, missing tool calls, missing sidecar metadata, invalid schema data, duplicate structured-output calls, stale output-file captures, sibling tool calls in the same assistant batch, and any assistant/custom/tool-result messages after the successful structured-output result fail the step. The parent accepts cross-process captures only when the transcript proves the same `structured_output` call was the final successful terminating action, then returns the validated flat value as `result.structuredOutput` and in named-chain references under `outputs.name.structured`.
162
+ Children without `outputSchema` do not receive `structured_output` from Atomic's default tool registry. They can still use a custom extension-provided terminating tool if you explicitly add one.
163
163
 
164
- Children without `outputSchema` do not receive `structured_output` from Atomic's default tool registry. They can still use a custom extension-provided terminating tool if you explicitly add one, but validated `result.structuredOutput` is reserved for schema-backed `outputSchema` captures.
165
-
166
- Dynamic fanout `collect.outputSchema` is different: it validates the collected result array after child runs finish, not a child tool-call argument object. Collection schemas remain general JSON Schemas and may use array roots such as `{ "type": "array", "minItems": 1 }`.
164
+ Dynamic fanout `collect.outputSchema` validates the collected result array after child runs finish.
167
165
 
168
166
  ## Fallback models
169
167
 
package/docs/usage.md CHANGED
@@ -160,7 +160,7 @@ In print mode, Atomic also reads piped stdin and merges it into the initial prom
160
160
  cat README.md | atomic -p "Summarize this text"
161
161
  ```
162
162
 
163
- When a print-mode turn correctly finishes by calling an opt-in terminating structured-output tool created with `createStructuredOutputTool` (for example from an extension, SDK caller, or workflow item with a schema), stdout contains the tool result's JSON text even though there is no follow-up assistant prose. This also works for custom factory names such as `final_decision`. Large final JSON from structured-output tools is preserved inline rather than redirected to a `<persisted-output>` pointer. Non-terminating or unrelated tool results are not printed as the final response.
163
+ When a print-mode turn correctly finishes by calling an opt-in terminating structured-output tool created with `createStructuredOutputTool` (for example from an extension, SDK caller, or workflow item with a schema), Atomic ends after that tool result without an extra follow-up assistant turn. Print-mode stdout contains the terminating structured JSON payload, so `atomic -p` remains script-friendly while the same value is also available through the SDK `capture` sink, tool `details`, a configured file sink, workflow `result.structured`, or subagent `result.structuredOutput`. This also works for custom factory names such as `final_decision`. Non-terminating or unrelated tool results are not printed as the final response.
164
164
 
165
165
  ### Model Options
166
166
 
package/docs/workflows.md CHANGED
@@ -153,7 +153,7 @@ For the builtin result tables below, `deep-research-codebase`, `goal`, and `ralp
153
153
  |---|---|---|
154
154
  | `deep-research-codebase` | Scout + research-history chain → parallel specialist waves → aggregator. Indexes the whole repo and synthesizes findings. | Broad or cross-cutting research before you decide what to change. Prefer `/skill:research-codebase` for one subsystem. |
155
155
  | `goal` | Persisted goal ledger → bounded worker turns → receipts → three-reviewer gate → deterministic reducer → final report. | Small-to-medium scope changes when you can identify the work surface, state the exact outcome, and name the validation that proves it is done — for example tests, lint/typecheck, docs builds, or observable behavior. |
156
- | `ralph` | RFC planning → sub-agent orchestration → simplification → parallel review → optional final-stage PR handoff. | Larger migrations, broad refactors, multi-package changes, and spec-to-reviewed-change work where you want Atomic to plan the approach, delegate implementation through sub-agents, simplify, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`. |
156
+ | `ralph` | Prompt-engineering codebase/online research → sub-agent orchestration → parallel review → optional final-stage PR handoff. | Larger migrations, broad refactors, and multi-package changes where you want Atomic to transform the prompt into a research question, research the codebase before implementing, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`. |
157
157
  | `open-claude-design` | Design-system onboarding → reference import → HTML generation → impeccable-driven refinement → quality gate → rich HTML handoff. Renders a live `preview.html` you can iterate against (opens through `browser` when available). | UI, page, component, theme, or design-token work that benefits from generation + critique loops. |
158
158
 
159
159
  ### `deep-research-codebase`
@@ -224,7 +224,7 @@ Run examples:
224
224
 
225
225
  Write the `objective` like a compact acceptance spec. Say what should exist when the run is done, how you want testing handled, which command(s) or manual checks matter, and what outcome proves completion. The workflow is intentionally lean: it does not first generate an RFC or migration plan, so the developer-supplied objective is where scope, validation, and completion criteria belong.
226
226
 
227
- The worker may claim readiness, but it cannot finalize completion. Three reviewers independently inspect the ledger, worker receipt, repository state, and diff against `base_branch`; each returns structured JSON with findings, evidence, verification still remaining, and an optional blocker. A TypeScript reducer marks the goal complete only when reviewer quorum approves, marks blocked only when the same dependency/tool blocker repeats for the blocker threshold, continues when evidence is missing, and returns `needs_human` when `max_turns` is exhausted or worker execution fails.
227
+ The worker may claim readiness, but it cannot finalize completion. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical, using browser-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Three reviewers independently inspect the ledger, worker receipt, repository state, and diff against `base_branch`; each returns structured JSON with findings, evidence, verification still remaining, and an optional blocker. A TypeScript reducer marks the goal complete only when reviewer quorum approves, marks blocked only when the same dependency/tool blocker repeats for the blocker threshold, continues when evidence is missing, and returns `needs_human` when `max_turns` is exhausted or worker execution fails.
228
228
 
229
229
  Result fields:
230
230
 
@@ -248,8 +248,8 @@ Inputs:
248
248
 
249
249
  | Input | Type | Required | Default | Description |
250
250
  |---|---|---|---|---|
251
- | `prompt` | text | yes | — | Task, feature request, issue summary, or spec path to plan, execute, refine, and review. |
252
- | `max_loops` | number | no | `10` | Maximum plan/orchestrate/review iterations before the workflow completes or, when enabled, proceeds to final handoff without reviewer approval. |
251
+ | `prompt` | text | yes | — | Task, feature request, issue summary, or spec path to research, execute, refine, and review. |
252
+ | `max_loops` | number | no | `10` | Maximum research/orchestrate/review iterations before the workflow completes or, when enabled, proceeds to final handoff without reviewer approval. |
253
253
  | `base_branch` | string | no | `origin/main` | Branch reviewers and the optional final stage compare the current code delta against; also used to create a missing worktree. |
254
254
  | `git_worktree_dir` | string | no | `""` | Optional reusable Git worktree root. Empty runs in the invoking checkout; non-empty values run Ralph stages in the created/reused worktree. |
255
255
  | `create_pr` | boolean | no | `false` | Safe-by-default PR creation flag. Omitted or `false` skips the final `pull-request` stage and omits `pr_report`; prompt text alone does not opt in, and only strict `true` authorizes the final `pull-request` stage to attempt provider-appropriate PR/MR/review creation. |
@@ -257,12 +257,12 @@ Inputs:
257
257
  Run examples:
258
258
 
259
259
  ```text
260
- /workflow ralph prompt="Plan and migrate the database layer to Drizzle" max_loops=3 base_branch=develop
260
+ /workflow ralph prompt="Migrate the database layer to Drizzle" max_loops=3 base_branch=develop
261
261
  /workflow ralph prompt="Refactor authentication across the API, CLI, and web UI" create_pr=true
262
262
  /workflow ralph prompt="Safely implement the API refactor" git_worktree_dir=../atomic-ralph-api-wt base_branch=main
263
263
  ```
264
264
 
265
- Each `ralph` iteration writes an RFC-style technical design document under `specs/`, initializes an OS-temp implementation notes file, delegates implementation through sub-agents, runs a behavior-preserving code simplifier, and asks two reviewers to inspect the patch directly against `base_branch`. Reviewers discover any needed repository infrastructure themselves while inspecting the actual diff; Ralph no longer runs separate `infra-*` discovery stages. The loop stops when every reviewer approves or `max_loops` is reached. By default Ralph does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling; Ralph's own PR-creation instructions live in that final stage.
265
+ Each `ralph` iteration starts by prompt-engineering the user prompt with `/skill:prompt-engineer Transform the following user prompt to a codebase and online research question which can be thoroughly explored: ...`, then researches that transformed question with `/skill:research-codebase ...` and writes the findings under `research/`. The orchestrator treats that research artifact as its primary implementation context, initializes/updates an OS-temp implementation notes file, delegates implementation through sub-agents, and asks two reviewers to inspect the patch directly against `base_branch`. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical, using browser-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. If reviewers find issues, the next prompt-engineering and research stages receive the review artifact path so follow-up research can address unresolved findings, and research stages fork from prior research session data when available. The loop stops when every reviewer approves or `max_loops` is reached. By default Ralph does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling; Ralph's own PR-creation instructions live in that final stage.
266
266
 
267
267
  Set `git_worktree_dir` when you want Ralph's worker stages isolated in a reusable Git worktree. Relative paths resolve from the invoking repository root, existing same-repository worktree roots are reused, and missing paths are created from `base_branch`. Ralph preserves the invoking repo-relative cwd inside the worktree, so launching from `repo/packages/api` with `git_worktree_dir=../repo-wt` runs stages from `../repo-wt/packages/api`.
268
268
 
@@ -271,16 +271,18 @@ Result fields:
271
271
  | Field | Meaning |
272
272
  |---|---|
273
273
  | `result` | Final implementation report from the orchestrator stage. |
274
- | `plan` | Latest RFC-style plan text. |
275
- | `plan_path` | Path to the latest generated spec under `specs/`. |
274
+ | `plan` | Latest transformed research question, retained for compatibility. |
275
+ | `plan_path` | Backward-compatible alias for `research_path`. |
276
+ | `research` | Latest research report text or artifact reference. |
277
+ | `research_path` | Path to the latest generated research artifact under `research/`. |
276
278
  | `implementation_notes_path` | OS-temp notes file containing decisions, deviations, blockers, and validation notes. |
277
279
  | `pr_report` | Pull-request report emitted only when `create_pr=true` and the final `pull-request` stage runs. |
278
280
  | `approved` | Whether the reviewer loop approved before completion or optional final handoff. |
279
- | `iterations_completed` | Number of plan/orchestrate/review loops completed. |
281
+ | `iterations_completed` | Number of research/orchestrate/review loops completed. |
280
282
  | `review_report` | Compact reference to the latest reviewer payload artifact. |
281
283
  | `review_report_path` | JSON artifact path for the latest Ralph review round. |
282
284
 
283
- A typical end-to-end flow is `/skill:research-codebase` → `/skill:create-spec` → `/workflow goal objective="Implement the researched rate-limit behavior, run the focused tests, and finish when the documented burst behavior is validated"` when you can identify the work surface, state the exact outcome, and name the validation that proves it is done. Keep using `/workflow ralph` for larger migrations, broad refactors, multi-package changes, and spec-to-reviewed-change work where you want Atomic to plan, delegate through sub-agents, simplify, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`.
285
+ A typical end-to-end flow is `/skill:research-codebase` → `/skill:create-spec` → `/workflow goal objective="Implement the researched rate-limit behavior, run the focused tests, and finish when the documented burst behavior is validated"` when you can identify the work surface, state the exact outcome, and name the validation that proves it is done. Keep using `/workflow ralph` for larger migrations, broad refactors, and multi-package changes where you want Atomic to research first, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`.
284
286
 
285
287
  ### `open-claude-design`
286
288
 
@@ -335,7 +337,7 @@ Use the goal workflow to implement specs/2026-03-rate-limit.md, run the focused
335
337
  ```
336
338
 
337
339
  ```text
338
- Use the ralph workflow to plan a database-layer migration, implement it, review it, and set `create_pr=true` for final-stage PR handoff.
340
+ Use the ralph workflow to research a database-layer migration, implement it, review it, and set `create_pr=true` for final-stage PR handoff.
339
341
  ```
340
342
 
341
343
  ```text
@@ -381,7 +383,7 @@ If the task is only deterministic TypeScript with no LLM/session stage, use a sc
381
383
  |-----------|-----|
382
384
  | Run, inspect, attach to, pause, interrupt, resume, or check status for an existing workflow | `/workflow ...` or `workflow({ action: ... })` |
383
385
  | Implement a small-to-medium scope change with an identifiable work surface, exact outcome, and named validation | `/workflow goal objective="..."` so Atomic keeps the run bounded, captures receipts in a goal ledger, gates completion through reviewers, and stops as `complete`, `blocked`, or `needs_human` |
384
- | Plan and execute a larger migration, broad refactor, multi-package change, or spec-to-reviewed-change effort | `/workflow ralph prompt="..."` so Atomic can plan the approach, delegate implementation through sub-agents, simplify, review, and iterate; prompt text alone does not opt in to PR creation, so add `create_pr=true` only when you want the final `pull-request` stage and `pr_report` |
386
+ | Research and execute a larger migration, broad refactor, or multi-package change | `/workflow ralph prompt="..."` so Atomic can transform the prompt into a research question, research the codebase first, delegate implementation through sub-agents, review, and iterate; prompt text alone does not opt in to PR creation, so add `create_pr=true` only when you want the final `pull-request` stage and `pr_report` |
385
387
  | Create or edit reusable automation | a TypeScript workflow definition exported from `defineWorkflow(...).compile()` |
386
388
  | Track one-off work without saving a workflow file | direct `workflow({ task })`, `workflow({ tasks })`, or `workflow({ chain })` calls |
387
389
  | Make a workflow robust | design the stage graph, context handoffs, artifacts, validation gates, model fallbacks, and human approval points before coding |
@@ -1100,7 +1102,7 @@ Control-signal probing is fail-closed. When the executor inspects an arbitrary t
1100
1102
  - Avoid workflow-specific or stage-specific vocabulary that is not explained inside the current prompt.
1101
1103
  - Use clear software engineering terminology in self-described prompts.
1102
1104
  - Avoid hard-coded regular expressions for condition matching when gating reviews or model outputs.
1103
- - Prefer schema-backed workflow stages (`ctx.stage(..., { schema })`, `ctx.chain` items, or `ctx.parallel` items) for review/gate decisions whenever model output needs to be evaluated; Atomic injects the canonical `structured_output` tool only for those schema-enabled items.
1105
+ - Prefer schema-backed workflow stages (`ctx.stage(..., { schema })`, `ctx.chain` items, or `ctx.parallel` items) for review/gate decisions whenever model output needs to be evaluated; a schema-enabled item receives the structured-output tool automatically.
1104
1106
  - Treat atomic workflow units as language model stages, not deterministic tools.
1105
1107
  - When deterministic gates are needed, create small dedicated stages that instruct a model to run a specific tool or perform a specific check. This keeps gates adaptive to the current codebase while preserving explicit workflow structure.
1106
1108
 
@@ -1315,7 +1317,7 @@ Common builtin import targets:
1315
1317
  |---|---|---|---|
1316
1318
  | `deep-research-codebase` | `deepResearchCodebase` | `@bastani/workflows/builtin/deep-research-codebase` | Gather broad repo research before planning, synthesis, or implementation. |
1317
1319
  | `goal` | `goal` | `@bastani/workflows/builtin/goal` | Run a bounded implementation/check loop with receipts and reviewer-gated completion. |
1318
- | `ralph` | `ralph` | `@bastani/workflows/builtin/ralph` | Delegate a larger migration/refactor/spec-to-reviewed-change effort to Ralph's plan/orchestrate/review loop; pass `create_pr=true` to authorize only the final PR-creation stage. |
1320
+ | `ralph` | `ralph` | `@bastani/workflows/builtin/ralph` | Delegate a larger migration/refactor effort to Ralph's research/orchestrate/review loop; pass `create_pr=true` to authorize only the final PR-creation stage. |
1319
1321
  | `open-claude-design` | `openClaudeDesign` | `@bastani/workflows/builtin/open-claude-design` | Generate and refine a UI/design artifact and handoff spec. |
1320
1322
 
1321
1323
  Example parent workflow that runs builtin deep research, then chooses either `goal` or `ralph` as the nested implementation runner:
@@ -1330,7 +1332,7 @@ export default defineWorkflow("research-then-implement")
1330
1332
  "runner",
1331
1333
  Type.Union([Type.Literal("goal"), Type.Literal("ralph")], {
1332
1334
  default: "goal",
1333
- description: "Use goal for bounded changes or Ralph for broad spec-to-reviewed-change work.",
1335
+ description: "Use goal for bounded changes or Ralph for broad research-first implementation work.",
1334
1336
  }),
1335
1337
  )
1336
1338
  .output("research_doc_path", Type.Optional(Type.String({ description: "Path to the deep-research document used for implementation." })))
@@ -1477,7 +1479,9 @@ Common task/stage options include:
1477
1479
  - `output`, `outputMode`, `reads`, `worktree`, `gitWorktreeDir`, `baseBranch`, `maxOutput`, `artifacts`, `sessionDir`, `cwd`, `agentDir`
1478
1480
  - advanced host-supplied SDK seams: `authStorage`, `resourceLoader`, `sessionManager`, `settingsManager`, `sessionStartEvent`
1479
1481
 
1480
- `schema` is opt-in. When a `ctx.stage` call, `ctx.task` call, `ctx.chain` item, or `ctx.parallel` item includes a top-level object TypeBox/JSON Schema, Atomic registers a schema-specific `structured_output` tool for that item only, appends final-answer instructions, and requires the stage to finish by calling the tool exactly once. The prompt result is the parsed structured value for `ctx.stage(..., { schema }).prompt(...)`; task/chain/parallel results also include `result.structured` and keep `result.text` as formatted JSON for handoffs. Because the result contract is single-use, a schema-backed `StageContext` supports one `prompt()` call; create a new `ctx.stage(..., { schema })` for each additional structured prompt. If the item also uses an explicit `tools` allowlist, Atomic automatically adds `structured_output` to that allowlist. Items without `schema` do not receive `structured_output` from the normal tool registry.
1482
+ `schema` is opt-in. When a `ctx.stage` call, `ctx.task` call, `ctx.chain` item, or `ctx.parallel` item includes a TypeBox schema or plain JSON Schema descriptor object, Atomic registers a schema-specific final-answer tool for that item only. The schema may describe object, array, or primitive final values; the captured value is the JSON value passed to the tool. The prompt result is the captured structured value for `ctx.stage(..., { schema }).prompt(...)`; task/chain/parallel results also include `result.structured` and keep `result.text` as formatted JSON for handoffs. Because the result contract is single-use, a schema-backed `StageContext` supports one `prompt()` call; create a new `ctx.stage(..., { schema })` for each additional structured prompt. If the item also uses an explicit `tools` allowlist, Atomic automatically adds the final-answer tool to that allowlist. Items without `schema` do not receive it from the normal tool registry.
1483
+
1484
+ `subagent` is available as a default workflow-stage tool, with the same default two-hop nesting budget as main chat: a workflow stage can launch a subagent, and that subagent can launch one nested subagent before the guard blocks further delegation. `tools` remains an allowlist across built-in tools and bundled extension tools; if you set `tools`, list every tool the stage should see. Explicitly listing tools such as `subagent`, `web_search`, `fetch_content`, or `intercom` exposes those tools to the stage, while `excludedTools` and `noTools: "all"` still win. The bundled subagent definitions from `@bastani/subagents` are available to the `subagent` tool in workflow stages; when a workflow is itself running inside a subagent child process, Atomic isolates stage resource discovery from the parent child-process flags so `subagent` remains available while workflow-stage nested-depth guards remain in force.
1481
1485
 
1482
1486
  `bashPolicy` scopes the built-in `bash` tool for one stage or task. `tools` must still include `"bash"` (or leave it available by default); the policy only narrows command text after the shell tool is exposed. It supports exact strings, `{ prefix }`, command-string `{ glob }`, and `{ regex, flags? }` rules, `default: "allow" | "deny"` (default `"allow"`), `deny` precedence, and `match: "segments" | "whole"` (default `"segments"`). Omitting `bashPolicy`, passing `{}`, or passing a default-allow policy with no `allow`/`deny` rules (including empty arrays or match-only default-allow policies) preserves legacy behavior and does not parse commands; malformed policy shapes such as unknown top-level keys (`denny`, `extra`), non-array `allow`/`deny`, invalid rule objects, invalid regexes, invalid glob bracket ranges, or stateful `g`/`y` regex flags fail closed as `invalid-policy`. Segment mode checks each command in pipelines/chains/substitutions before execution, treats unquoted LF, CRLF, and bare CR as command separators, keeps non-leading Bash `>|` noclobber redirections inside the current command segment, and rejects reserved/compound shell heads, leading redirections, attached command-head redirections, and command heads that are not literal words.
1483
1487
 
@@ -1738,7 +1742,7 @@ Build validation into the workflow instead of waiting for a final manual check.
1738
1742
  - reviewer stages: fresh-context reviewers that inspect artifacts and current files
1739
1743
  - LLM-as-judge stages: direct scoring, pairwise comparison, or rubric-based grading for subjective outputs
1740
1744
 
1741
- Prefer schema-enabled workflow items for model review and gate decisions. `structured_output` is not available to workflow stages through the normal Atomic tool registry; it is injected only when a `ctx.stage`, `ctx.task`, `ctx.chain` item, or `ctx.parallel` item includes `schema`. Structured-output schemas must be top-level object tool-argument schemas, so wrap array or primitive decisions in object fields such as `{ items: [...] }` or `{ value: ... }`; direct JSON invocations of the `workflow` tool must use a schema with `type: "object"` so invalid array/primitive contracts fail at argument validation instead of later in the stage. Terminating `structured_output` JSON is preserved inline even when it exceeds the normal oversized-tool-result threshold, so workflow code can consume the parsed value instead of a `<persisted-output>` pointer. Do not add the old synthetic `{ value: ... }` wrapper around an object payload unless your schema defines that field, and do not make correctness depend on brittle regular-expression matching against free-form prose such as “looks good”, “approved”, or “PASS”. A schema with explicit booleans/enums, findings arrays, confidence, evidence fields, and error reporting is easier to validate, replay, and safely default to “not approved” when malformed.
1745
+ Prefer schema-enabled workflow items for model review and gate decisions. Atomic passes the schema directly to the final-answer tool and captures the tool arguments; it no longer adds separate structured-output parsing, object-root restrictions, or sidecar validation. Object-shaped decision schemas with explicit booleans/enums, findings arrays, confidence, evidence fields, and error reporting are usually easiest to consume, but array or primitive schemas are valid when they fit the handoff. Avoid brittle regular-expression matching against free-form prose such as “looks good”, “approved”, or “PASS”.
1742
1746
 
1743
1747
  Use small dedicated model stages for adaptive gates when deterministic code alone cannot decide what to check. For example, a stage can read an artifact, inspect the repo, run a named tool or command, and then emit a structured decision by configuring `schema` on that workflow item. Keep that stage's prompt narrow: tell it the specific check to perform, the files/tools it may use, and the structured decision it must return.
1744
1748
 
@@ -1797,5 +1801,5 @@ Good workflows are information-flow systems, not just prompt sequences. Keep sta
1797
1801
  - Do not call `kill` when the user asks to interrupt or pause resumably.
1798
1802
  - Keep stage names readable because they appear in workflow status and UI.
1799
1803
  - Do not write stage prompts that depend on hidden workflow-wide awareness; make each model stage locally scoped and self-described.
1800
- - Do not parse model gate decisions from ad-hoc prose with regular expressions; configure `schema` on a focused workflow item so Atomic injects the canonical `structured_output` tool for that item.
1801
- - Return compact structured output for decisions and save large artifacts to files; schema-enabled workflow items preserve final JSON inline, but artifact handoffs should still use files when the next stage does not need the whole payload in context.
1804
+ - Do not parse model gate decisions from ad-hoc prose with regular expressions; configure `schema` on a focused workflow item and consume `result.structured`.
1805
+ - Return compact structured decisions and save large artifacts to files; artifact handoffs should still use files when the next stage does not need the whole payload in context.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bastani/atomic",
3
- "version": "0.8.29-alpha.2",
3
+ "version": "0.8.29-alpha.4",
4
4
  "description": "Atomic coding agent CLI with read, bash, edit, write tools and session management",
5
5
  "type": "module",
6
6
  "atomicConfig": {
@@ -68,7 +68,7 @@
68
68
  "prepublishOnly": "bun run clean && bun run build"
69
69
  },
70
70
  "dependencies": {
71
- "@bastani/atomic-natives": "0.8.29-alpha.2",
71
+ "@bastani/atomic-natives": "0.8.29-alpha.4",
72
72
  "@bufbuild/protobuf": "^2.0.0",
73
73
  "@earendil-works/pi-agent-core": "^0.79.3",
74
74
  "@earendil-works/pi-ai": "^0.79.3",