npm - @bastani/atomic - Versions diffs - 0.8.28 → 0.8.29-alpha.3 - Mend

@bastani/atomic 0.8.28 → 0.8.29-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

package/CHANGELOG.md +37 -0
package/dist/builtin/cursor/CHANGELOG.md +27 -0
package/dist/builtin/cursor/LICENSE +26 -0
package/dist/builtin/cursor/README.md +22 -0
package/dist/builtin/cursor/index.ts +9 -0
package/dist/builtin/cursor/package.json +46 -0
package/dist/builtin/cursor/src/auth.ts +352 -0
package/dist/builtin/cursor/src/catalog-cache.ts +155 -0
package/dist/builtin/cursor/src/config.ts +123 -0
package/dist/builtin/cursor/src/conversation-state.ts +135 -0
package/dist/builtin/cursor/src/cursor-models-raw.json +583 -0
package/dist/builtin/cursor/src/model-mapper.ts +270 -0
package/dist/builtin/cursor/src/models.ts +54 -0
package/dist/builtin/cursor/src/native-loader.ts +71 -0
package/dist/builtin/cursor/src/proto/README.md +34 -0
package/dist/builtin/cursor/src/proto/agent_pb.ts +15294 -0
package/dist/builtin/cursor/src/proto/protobuf-codec.ts +717 -0
package/dist/builtin/cursor/src/provider.ts +301 -0
package/dist/builtin/cursor/src/stream.ts +564 -0
package/dist/builtin/cursor/src/transport.ts +791 -0
package/dist/builtin/intercom/CHANGELOG.md +4 -0
package/dist/builtin/intercom/package.json +2 -2
package/dist/builtin/intercom/skills/intercom/SKILL.md +5 -5
package/dist/builtin/mcp/CHANGELOG.md +4 -0
package/dist/builtin/mcp/package.json +3 -3
package/dist/builtin/subagents/CHANGELOG.md +13 -0
package/dist/builtin/subagents/README.md +7 -3
package/dist/builtin/subagents/agents/codebase-online-researcher.md +9 -24
package/dist/builtin/subagents/agents/debugger.md +3 -5
package/dist/builtin/subagents/package.json +4 -4
package/dist/builtin/subagents/src/runs/background/subagent-runner.ts +2 -1
package/dist/builtin/subagents/src/runs/foreground/execution.ts +2 -1
package/dist/builtin/subagents/src/runs/shared/parallel-utils.ts +1 -0
package/dist/builtin/subagents/src/runs/shared/pi-args.ts +19 -2
package/dist/builtin/subagents/src/runs/shared/structured-output.ts +271 -10
package/dist/builtin/subagents/src/runs/shared/subagent-prompt-runtime.ts +12 -39
package/dist/builtin/subagents/src/shared/types.ts +5 -3
package/dist/builtin/subagents/src/shared/utils.ts +50 -10
package/dist/builtin/subagents/src/slash/saved-chain-mapping.ts +77 -0
package/dist/builtin/subagents/src/slash/slash-commands.ts +1 -55
package/dist/builtin/web-access/CHANGELOG.md +5 -1
package/dist/builtin/web-access/README.md +1 -1
package/dist/builtin/web-access/github-extract.ts +1 -1
package/dist/builtin/web-access/package.json +3 -3
package/dist/builtin/workflows/CHANGELOG.md +26 -0
package/dist/builtin/workflows/README.md +28 -8
package/dist/builtin/workflows/builtin/deep-research-codebase.ts +9 -49
package/dist/builtin/workflows/builtin/goal.ts +63 -106
package/dist/builtin/workflows/builtin/index.d.ts +2 -0
package/dist/builtin/workflows/builtin/open-claude-design.ts +31 -76
package/dist/builtin/workflows/builtin/ralph.d.ts +2 -0
package/dist/builtin/workflows/builtin/ralph.ts +227 -518
package/dist/builtin/workflows/builtin/shared-prompts.ts +7 -0
package/dist/builtin/workflows/package.json +2 -2
package/dist/builtin/workflows/skills/research-codebase/SKILL.md +17 -3
package/dist/builtin/workflows/src/extension/wiring.ts +72 -9
package/dist/builtin/workflows/src/extension/workflow-schema.ts +34 -0
package/dist/builtin/workflows/src/runs/foreground/executor.ts +13 -2
package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +86 -14
package/dist/builtin/workflows/src/shared/authoring-contract.d.ts +11 -3
package/dist/builtin/workflows/src/shared/types.ts +8 -4
package/dist/builtin/workflows/src/tui/overlay-adapter.ts +64 -2
package/dist/builtin/workflows/src/tui/workflow-attach-pane.ts +8 -8
package/dist/builtin/workflows/src/tui/workflow-status.ts +2 -0
package/dist/core/atomic-guide-command.d.ts.map +1 -1
package/dist/core/atomic-guide-command.js +7 -7
package/dist/core/atomic-guide-command.js.map +1 -1
package/dist/core/builtin-packages.d.ts.map +1 -1
package/dist/core/builtin-packages.js +6 -0
package/dist/core/builtin-packages.js.map +1 -1
package/dist/core/extensions/index.d.ts +1 -1
package/dist/core/extensions/index.d.ts.map +1 -1
package/dist/core/extensions/index.js.map +1 -1
package/dist/core/extensions/types.d.ts +20 -0
package/dist/core/extensions/types.d.ts.map +1 -1
package/dist/core/extensions/types.js.map +1 -1
package/dist/core/model-resolver.d.ts +1 -0
package/dist/core/model-resolver.d.ts.map +1 -1
package/dist/core/model-resolver.js +17 -8
package/dist/core/model-resolver.js.map +1 -1
package/dist/core/package-manager.d.ts +11 -9
package/dist/core/package-manager.d.ts.map +1 -1
package/dist/core/package-manager.js +55 -10
package/dist/core/package-manager.js.map +1 -1
package/dist/core/project-trust.d.ts +1 -0
package/dist/core/project-trust.d.ts.map +1 -1
package/dist/core/project-trust.js +3 -3
package/dist/core/project-trust.js.map +1 -1
package/dist/core/resource-loader.d.ts +11 -2
package/dist/core/resource-loader.d.ts.map +1 -1
package/dist/core/resource-loader.js +72 -9
package/dist/core/resource-loader.js.map +1 -1
package/dist/core/sdk.d.ts +3 -3
package/dist/core/sdk.d.ts.map +1 -1
package/dist/core/sdk.js +5 -5
package/dist/core/sdk.js.map +1 -1
package/dist/core/tools/index.d.ts +1 -0
package/dist/core/tools/index.d.ts.map +1 -1
package/dist/core/tools/index.js +1 -0
package/dist/core/tools/index.js.map +1 -1
package/dist/core/tools/structured-output.d.ts +39 -0
package/dist/core/tools/structured-output.d.ts.map +1 -0
package/dist/core/tools/structured-output.js +141 -0
package/dist/core/tools/structured-output.js.map +1 -0
package/dist/index.d.ts +1 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -1
package/dist/index.js.map +1 -1
package/dist/main.d.ts.map +1 -1
package/dist/main.js +36 -14
package/dist/main.js.map +1 -1
package/dist/modes/interactive/components/login-dialog.d.ts +3 -0
package/dist/modes/interactive/components/login-dialog.d.ts.map +1 -1
package/dist/modes/interactive/components/login-dialog.js +16 -0
package/dist/modes/interactive/components/login-dialog.js.map +1 -1
package/dist/modes/interactive/interactive-mode.d.ts +11 -0
package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
package/dist/modes/interactive/interactive-mode.js +158 -11
package/dist/modes/interactive/interactive-mode.js.map +1 -1
package/dist/modes/print-mode.d.ts.map +1 -1
package/dist/modes/print-mode.js +39 -0
package/dist/modes/print-mode.js.map +1 -1
package/docs/custom-provider.md +1 -0
package/docs/extensions.md +2 -2
package/docs/models.md +2 -0
package/docs/packages.md +3 -1
package/docs/providers.md +15 -0
package/docs/quickstart.md +3 -3
package/docs/sdk.md +61 -0
package/docs/security.md +1 -1
package/docs/subagents.md +21 -0
package/docs/usage.md +2 -0
package/docs/workflows.md +28 -21
package/examples/extensions/README.md +1 -1
package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
package/examples/extensions/custom-provider-anthropic/package.json +1 -1
package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
package/examples/extensions/gondolin/package-lock.json +2 -2
package/examples/extensions/gondolin/package.json +1 -1
package/examples/extensions/sandbox/package-lock.json +2 -2
package/examples/extensions/sandbox/package.json +1 -1
package/examples/extensions/structured-output.ts +22 -53
package/examples/extensions/with-deps/package-lock.json +2 -2
package/examples/extensions/with-deps/package.json +1 -1
package/package.json +12 -9

package/dist/builtin/web-access/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
 ## [Unreleased]
+### Changed
+- Published a synchronized Atomic 0.8.29-alpha.1 prerelease with the upstream pi TUI dependency aligned to `^0.79.3`; no functional changes were made in the web-access extension.
 ## [0.8.28] - 2026-06-11
 ### Changed
@@ -434,7 +438,7 @@ All notable changes to this project will be documented in this file.
 ## [0.5.0] - 2026-02-01
 ### Added
-- GitHub repository clone extraction for `fetch_content` -- detects GitHub code URLs, clones repos to `/tmp/pi-github-repos/`, and returns actual file contents plus local path for further exploration with `read` and `bash`
+- GitHub repository clone extraction for `fetch_content` -- detects GitHub code URLs, clones repos to `/tmp/atomic-github-repos/`, and returns actual file contents plus local path for further exploration with `read` and `bash`
 - Lightweight API fallback for oversized repos (>350MB) and commit SHA URLs via `gh api`
 - Clone cache with concurrent request deduplication (second request awaits first's clone)
 - `forceClone` parameter on `fetch_content` to override the size threshold

package/dist/builtin/web-access/README.md CHANGED Viewed

@@ -265,7 +265,7 @@ All config lives in `~/.pi/web-search.json`. Every field is optional.
     "enabled": true,
     "maxRepoSizeMB": 350,
     "cloneTimeoutSeconds": 30,
-    "clonePath": "/tmp/pi-github-repos"
+    "clonePath": "/tmp/atomic-github-repos"
   },
   "youtube": {
     "enabled": true,

package/dist/builtin/web-access/github-extract.ts CHANGED Viewed

@@ -78,7 +78,7 @@ function loadGitHubConfig(): GitHubCloneConfig {
 		enabled: true,
 		maxRepoSizeMB: 350,
 		cloneTimeoutSeconds: 30,
-		clonePath: "/tmp/pi-github-repos",
+		clonePath: "/tmp/atomic-github-repos",
 	};
 	if (!existsSync(CONFIG_PATH)) {

package/dist/builtin/web-access/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bastani/web-access",
-  "version": "0.8.28",
+  "version": "0.8.29-alpha.3",
   "private": true,
   "description": "Atomic extension for web search, URL fetching, GitHub repo cloning, PDF/video extraction. Fork of: https://github.com/nicobailon/pi-web-access",
   "contributors": [
@@ -30,7 +30,7 @@
   },
   "peerDependencies": {
     "@bastani/atomic": "*",
-    "@earendil-works/pi-tui": "^0.78.1"
+    "@earendil-works/pi-tui": "^0.79.3"
   },
   "peerDependenciesMeta": {
     "@bastani/atomic": {
@@ -43,7 +43,7 @@
   "dependencies": {
     "@mozilla/readability": "^0.6.0",
     "linkedom": "^0.18.12",
-    "p-limit": "^6.1.0",
+    "p-limit": "^7.3.0",
     "turndown": "^7.2.0",
     "unpdf": "^1.6.2"
   }

package/dist/builtin/workflows/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,32 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 ## [Unreleased]
+### Added
+- Added opt-in schema-backed workflow item results: `ctx.stage(..., { schema })`, `ctx.task(..., { schema })`, `ctx.chain` items, and `ctx.parallel` items now receive a schema-specific `structured_output` tool only for that item, require the final tool call, return the parsed value from `ctx.stage().prompt(...)`, and expose parsed task values as `result.structured` while preserving formatted JSON handoff text ([#1350](https://github.com/bastani-inc/atomic/issues/1350)).
+### Changed
+- Changed the builtin `ralph` workflow to start each iteration with `/skill:prompt-engineer` prompt-engineering and `/skill:research-codebase` research instead of an RFC/planner stage, pass the resulting research artifact to the orchestrator as primary implementation context, fork follow-up research from prior research session data, and feed unresolved reviewer findings into subsequent research passes ([#1371](https://github.com/bastani-inc/atomic/issues/1371)).
+- Changed builtin `goal`, `ralph`, and `open-claude-design` decision gates to use schema-backed workflow `structured_output` stages with TypeBox-native schema builders instead of registering bespoke terminating custom tools or wrapping plain JSON schemas with `Type.Unsafe`.
+- Changed the builtin `ralph` prompt-engineering stage to disable all tools while relying on the `/skill:prompt-engineer` skill prompt, keeping that first-pass rewrite focused and tool-free.
+- Changed builtin `goal` worker/reviewer prompts and `ralph` orchestrator/reviewer prompts to request end-to-end verification when practical, using browser-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios.
+- Aligned the workflows extension with upstream pi TUI `^0.79.3` so workflow graph, custom UI, and prompt-broker integrations inherit the latest shared TUI compatibility fixes.
+- Documented the opt-in `structured_output` workflow path and clarified that ordinary workflow stages do not receive `structured_output` from the default tool registry; schema-enabled items auto-add the runtime tool to explicit `tools` allowlists ([#1350](https://github.com/bastani-inc/atomic/issues/1350)).
+- Clarified that workflow `structured_output` gate schemas must be top-level object tool-argument schemas, with arrays and primitives wrapped in object fields before being returned through the terminating tool, and documented the one-`prompt()` limit for schema-backed `StageContext` result contracts ([#1350](https://github.com/bastani-inc/atomic/issues/1350)).
+- Documented that terminating workflow-stage `structured_output` JSON stays inline even when large, while artifact-sized handoffs should still be saved to files when downstream stages do not need the full payload in context ([#1350](https://github.com/bastani-inc/atomic/issues/1350)).
+### Fixed
+- Fixed direct workflow tool validation so schema-enabled `task`, `tasks`, `chain`, and `parallel` items reject array or primitive structured-output schemas at argument-validation time while accepting the same object-root contracts as runtime validation, including object-only `allOf` schemas ([#1350](https://github.com/bastani-inc/atomic/issues/1350)).
+- Fixed schema-backed workflow stages to fail with a clear stage-level error when `prompt()` is called more than once on the same `StageContext`, rather than surfacing the lower-level structured-output single-use guard ([#1350](https://github.com/bastani-inc/atomic/issues/1350)).
+- Fixed schema-backed workflow model fallback so an attempt that already captured a valid terminating `structured_output` result is treated as successful instead of retrying against fallback models and tripping the single-use result guard ([#1350](https://github.com/bastani-inc/atomic/issues/1350)).
+- Fixed the workflow graph overlay remaining interactive when the parent/main-chat agent opens `ask_user_question`: the graph keeps focus, the parent question stays pending behind it with a clear “Main chat needs input — exit graph to answer.” status hint, hiding/exiting the graph focuses the pending question, and host custom-UI state changes no longer hide, restore, remount, or repaint the overlay ([#1353](https://github.com/bastani-inc/atomic/issues/1353)).
+- Fixed builtin `ralph` skill-prompt stages to invoke bundled skills through `/skill:<name>` expansion so prompt engineering and research stages receive the intended skill instructions.
+- Fixed concurrent workflow stage resource reloads to serialize temporary subagent child environment isolation so parallel stage startup cannot leave parent process child flags accidentally cleared.
+- Fixed workflow stage sessions to keep bundled workflow package skills (`create-spec`, `impeccable`, `prompt-engineer`, `research-codebase`, and `skill-creator`) available while still disabling only the recursive workflows extension inside child sessions.
+- Fixed workflow stage resource discovery so bundled subagent definitions stay available, `subagent` is active by default with the same two-hop nesting budget as main chat, and explicitly allowlisted bundled extension tools such as `subagent`, `web_search`, `fetch_content`, and `intercom` remain visible even when a workflow is launched from a subagent child process.
 ## [0.8.28] - 2026-06-11
 ### Added

package/dist/builtin/workflows/README.md CHANGED Viewed

@@ -264,6 +264,26 @@ Worktree semantics:
 For advanced integrations, the SDK also exports `setupGitWorktree(options)`, which returns `{ worktreeRoot, cwd, repositoryRoot, created }` and uses the same validation/path behavior as the executor.
+### Structured stage results
+`structured_output` is opt-in for workflow items. Add `schema` to `ctx.stage`, `ctx.task`, `ctx.chain` items, or `ctx.parallel` items when the stage must finish with machine-readable JSON:
+```typescript
+const Decision = Type.Object({
+  approved: Type.Boolean(),
+  findings: Type.Array(Type.String()),
+}, { additionalProperties: false });
+const decision = await ctx.stage("review-gate", { schema: Decision }).prompt(
+  "Review the artifact and return the decision.",
+);
+// decision.approved is typed from the schema.
+```
+Atomic registers the canonical `structured_output` tool only for schema-enabled items, automatically adds it to explicit `tools` allowlists, and fails the item if the model completes without the final tool call. The schema is used directly as the tool argument contract, so wrap arrays or primitives in an object field such as `{ items: [...] }` or `{ value: ... }`. A schema-backed `StageContext` supports one `prompt()` call because the final-answer tool is an exact-once result contract; create another `ctx.stage(..., { schema })` for another structured prompt. `ctx.task`/`ctx.chain`/`ctx.parallel` results expose the parsed value as `result.structured` and keep `result.text` as formatted JSON for handoffs.
+`subagent` is available as a default workflow-stage tool with the same default two-hop nesting budget as main chat: a stage can launch a subagent, and that child can launch one nested subagent before the guard blocks further delegation. `tools` allowlists apply to bundled extension tools as well as built-ins; if a stage sets `tools`, list every tool it should see. Workflow stages can explicitly list `subagent`, `web_search`, `fetch_content`, `intercom`, and other loaded extension tools, while `excludedTools` and `noTools: "all"` still win. Bundled `@bastani/subagents` agent definitions are available to the `subagent` tool in workflow stages, including workflows launched from a subagent child process.
 ### Model fallbacks
 Stages and high-level task helpers can retry transient provider/model failures with an ordered `fallbackModels` list. The primary `model` is tried first, then each fallback, and finally the current Atomic-selected model when available. Fallbacks are only used for retryable model/provider failures such as rate limits, quota/auth/provider outages, unavailable models, network timeouts, and 5xx errors — ordinary tool, shell, validation, cancellation, and workflow-code failures are not retried.
@@ -501,7 +521,7 @@ Prompt answer replay is live-memory only. `StageSnapshot.promptAnswerState` repo
     "async": "optional boolean to dispatch a run in the background",
     "intercom": "optional intercom coordination options",
     "chainDir": "optional directory for direct chain artifacts",
-    "session/task options": "per-stage overrides also accepted at the top level and on direct task items — model, thinkingLevel, fallbackModels, tools, noTools, customTools, mcp, context, cwd, output, outputMode, reads, worktree, gitWorktreeDir, baseBranch, maxOutput, artifacts, and more"
+    "session/task options": "per-stage overrides also accepted at the top level and on direct task items — schema, model, thinkingLevel, fallbackModels, tools, noTools, customTools, mcp, context, cwd, output, outputMode, reads, worktree, gitWorktreeDir, baseBranch, maxOutput, artifacts, and more"
   }
 }
 ```
@@ -571,7 +591,7 @@ Child workflow outputs: `result`, `findings`, `research_doc_path`, `artifact_dir
 ### `goal`
-Goal Runner workflow: initialize a persisted goal ledger with a per-run goal id and lifecycle events, render goal-continuation context, run bounded worker LM turns, append receipts, run three independent reviewers, and let a TypeScript reducer decide `complete`, `continue`, `blocked`, or `needs_human`. Token budget behavior is intentionally excluded.
+Goal Runner workflow: initialize a persisted goal ledger with a per-run goal id and lifecycle events, render goal-continuation context, run bounded worker LM turns, append receipts, run three independent reviewers, and let a TypeScript reducer decide `complete`, `continue`, `blocked`, or `needs_human`. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical with browser-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Token budget behavior is intentionally excluded.
 ```text
 /workflow goal objective="Migrate the database layer to Drizzle ORM" base_branch=develop
@@ -589,22 +609,22 @@ Child workflow outputs: `result`, `status`, `approved`, `goal_id`, `objective`,
 ### `ralph`
-Plan → orchestrate → simplify → review workflow with optional final-stage PR handoff: write an RFC-style technical design document under `specs/`, delegate implementation through sub-agents, simplify recent changes, run parallel reviewers, and iterate until approval or the loop limit. Ralph skips PR creation by default; prompt text alone does not opt in. Pass `create_pr=true` to authorize only the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation (for example GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling). Ralph's own PR-creation instructions live in that final stage. Reviewers inspect repository infrastructure directly as needed; Ralph no longer runs separate `infra-*` discovery stages.
+Prompt-engineering → research → orchestrate → review workflow with optional final-stage PR handoff: transform the user prompt into a codebase and online research question with `/skill:prompt-engineer`, run `/skill:research-codebase` against it, write findings under `research/`, delegate implementation through sub-agents from that research, run parallel reviewers, and iterate until approval or the loop limit. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical with browser-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Follow-up iterations pass unresolved review artifacts into prompt-engineering/research and fork research from prior research session data when available. Ralph skips PR creation by default; prompt text alone does not opt in. Pass `create_pr=true` to authorize only the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation (for example GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling). Ralph's own PR-creation instructions live in that final stage. Reviewers inspect repository infrastructure directly as needed; Ralph no longer runs separate `infra-*` discovery stages.
 ```text
-/workflow ralph prompt="Plan and migrate the database layer to Drizzle ORM" max_loops=3 base_branch=develop
-/workflow ralph prompt="Plan and migrate the database layer to Drizzle ORM" max_loops=3 base_branch=develop create_pr=true
+/workflow ralph prompt="Migrate the database layer to Drizzle ORM" max_loops=3 base_branch=develop
+/workflow ralph prompt="Migrate the database layer to Drizzle ORM" max_loops=3 base_branch=develop create_pr=true
 ```
 | Input              | Type      | Required | Default       | Description                                                   |
 | ------------------ | --------- | -------- | ------------- | ------------------------------------------------------------- |
-| `prompt`           | `text`    | ✓        | —             | Task, feature request, issue summary, or spec path to plan, execute, refine, and review. |
-| `max_loops`        | `number`  | —        | `10`          | Maximum plan/orchestrate/review iterations before completion or optional final handoff. |
+| `prompt`           | `text`    | ✓        | —             | Task, feature request, issue summary, or spec path to research, execute, refine, and review. |
+| `max_loops`        | `number`  | —        | `10`          | Maximum research/orchestrate/review iterations before completion or optional final handoff. |
 | `base_branch`      | `string`  | —        | `origin/main` | Branch reviewers and the optional final stage compare the current delta with; also used to create a missing worktree. |
 | `git_worktree_dir` | `string`  | —        | `""`          | Optional reusable Git worktree root. Empty runs in the invoking checkout; non-empty values run Ralph stages in the created/reused worktree. |
 | `create_pr`        | `boolean` | —        | `false`       | Safe-by-default PR creation flag. Omitted or `false` skips the final `pull-request` stage and omits `pr_report`; prompt text alone does not opt in, and only strict `true` authorizes the final `pull-request` stage to attempt provider-appropriate PR/MR/review creation. |
-Child workflow outputs: `result`, `plan`, `plan_path`, `implementation_notes_path`, `approved`, `iterations_completed`, `review_report`, and `review_report_path`. `pr_report` is included only when `create_pr=true` and the final `pull-request` stage runs.
+Child workflow outputs: `result`, `plan` (latest transformed research question), `plan_path` (compatibility alias for `research_path`), `research`, `research_path`, `implementation_notes_path`, `approved`, `iterations_completed`, `review_report`, and `review_report_path`. `pr_report` is included only when `create_pr=true` and the final `pull-request` stage runs.
 ### `open-claude-design`

package/dist/builtin/workflows/builtin/deep-research-codebase.ts CHANGED Viewed

@@ -44,27 +44,6 @@ interface DeepResearchCodebaseResult {
 const FILE_ONLY_OUTPUT = "file-only" satisfies WorkflowOutputMode;
-const CODEBASE_SKILLS = {
-  locator:
-    "codebase-locator — use this skill's search-first discipline when mapping where files, symbols, docs, tests, and configuration live.",
-  analyzer:
-    "codebase-analyzer — use this skill's evidence-driven deep-read style when explaining behavior, architecture, control flow, data flow, and edge cases.",
-  patternFinder:
-    "codebase-pattern-finder — use this skill's example-mining approach when separating reusable conventions from one-off details.",
-  researchLocator:
-    "codebase-research-locator — use this skill's historical-discovery approach when finding prior research, specs, ADRs, issues, and TODOs.",
-  researchAnalyzer:
-    "codebase-research-analyzer — use this skill's synthesis approach when extracting decisions, constraints, stale assumptions, and open questions from prior research.",
-  onlineResearcher:
-    "codebase-online-researcher — use this skill's source-citing approach when external documentation or ecosystem behavior materially affects the answer.",
-} as const;
-function codebaseSkillGuidance(
-  ...skills: readonly (keyof typeof CODEBASE_SKILLS)[]
-): string {
-  return skills.map((skill) => CODEBASE_SKILLS[skill]).join("\n");
-}
 function taggedPrompt(sections: readonly PromptSection[]): string {
   return sections
     .map(([tag, content]) => {
@@ -446,11 +425,7 @@ export default defineWorkflow("deep-research-codebase")
               "role",
               "You are a senior codebase research scout preparing work for specialist agents.",
             ],
-            ["objective", `Map the repository. Research question: ${prompt}`],
-            [
-              "codebase_skills",
-              codebaseSkillGuidance("locator", "analyzer", "patternFinder"),
-            ],
+            ["objective", `Map the repository using parallel codebase-locator, codebase-analyzer, and codebase-pattern-finder subagents. Research question: ${prompt}`],
             [
               "instructions",
               [
@@ -480,10 +455,9 @@ export default defineWorkflow("deep-research-codebase")
             ["role", "You locate prior project research and decision history."],
             [
               "objective",
-              "Find existing docs, specs, ADRs, issues/PR notes, TODOs, and research artifacts relevant to the task.",
+              "Find existing docs, specs, ADRs, issues/PR notes, TODOs, and research artifacts relevant to the task using parallel codebase-research-locator subagents.",
             ],
             ["task", "{task}"],
-            ["codebase_skills", codebaseSkillGuidance("researchLocator")],
             [
               "instructions",
               [
@@ -520,10 +494,9 @@ export default defineWorkflow("deep-research-codebase")
             ],
             [
               "objective",
-              `Extract reusable historical context. Research question: ${prompt}`,
+              `Extract reusable historical context using parallel codebase-research-analyzer subagents. Research question: ${prompt}`,
             ],
             ["prior_research_locator_output", "{previous}"],
-            ["codebase_skills", codebaseSkillGuidance("researchAnalyzer")],
             [
               "instructions",
               [
@@ -558,13 +531,9 @@ export default defineWorkflow("deep-research-codebase")
         ["role", "You turn scout research into clean work partitions."],
         [
           "objective",
-          `Return at most ${partitionCap} independent partitions for this research question: ${prompt}`,
+          `Return at most ${partitionCap} independent partitions for this research question: ${prompt}. Use parallel codebase-locator, codebase-analyzer, and codebase-pattern-finder subagents.`,
         ],
         ["scout_output", "{previous}"],
-        [
-          "codebase_skills",
-          codebaseSkillGuidance("locator", "analyzer", "patternFinder"),
-        ],
         [
           "instructions",
           [
@@ -607,11 +576,11 @@ export default defineWorkflow("deep-research-codebase")
                 "scout_context",
                 `Read the scout artifact before making evidence claims: ${displayWorkflowPath(scoutPath)}\nCompact saved-output reference: {previous}`,
               ],
-              ["codebase_skills", codebaseSkillGuidance("locator")],
               [
                 "instructions",
                 [
                   "Find the highest-signal files, tests, docs, commands, configs, and symbols for this partition.",
+                  "Use parallel codebase-locator subagents to explore different areas of the partition.",
                   "Explain why each path matters for the research question.",
                   "Prioritize exact paths and symbol names over broad descriptions.",
                   "Flag areas that look relevant but could not be verified.",
@@ -643,11 +612,10 @@ export default defineWorkflow("deep-research-codebase")
                 "scout_context",
                 `Read the scout artifact before making evidence claims: ${displayWorkflowPath(scoutPath)}\nCompact saved-output reference: {previous}`,
               ],
-              ["codebase_skills", codebaseSkillGuidance("patternFinder")],
               [
                 "instructions",
                 [
-                  "Identify recurring implementation patterns, abstractions, naming conventions, and anti-patterns in this partition.",
+                  "Identify recurring implementation patterns, abstractions, naming conventions, and anti-patterns in this partition using parallel codebase-pattern-finder subagents.",
                   "Use concrete examples with paths, symbols, or test names.",
                   "Distinguish established conventions from one-off implementation details.",
                   "Avoid generic advice that is not grounded in the repository.",
@@ -711,11 +679,10 @@ export default defineWorkflow("deep-research-codebase")
                 "context",
                 `Read these artifacts before analyzing: ${displayWorkflowPaths(analyzerReads)}\nCompact saved-output reference: {previous}`,
               ],
-              ["codebase_skills", codebaseSkillGuidance("analyzer")],
               [
                 "instructions",
                 [
-                  "Analyze behavior, control flow, data flow, lifecycle, error handling, and test coverage for this partition.",
+                  "Analyze behavior, control flow, data flow, lifecycle, error handling, and test coverage for this partition using parallel codebase-analyzer subagents.",
                   "Build on the locator output; do not repeat file discovery except where needed as evidence.",
                   "Call out edge cases, invariants, and coupling to other partitions.",
                   "If evidence is incomplete, explain what remains unknown and how to verify it.",
@@ -747,11 +714,11 @@ export default defineWorkflow("deep-research-codebase")
               ["assignment", `Partition ${i}/${partitions.length}: ${partition}`],
               ["research_question", prompt],
               ["local_context", onlineResearcherLocalContext],
-              ["codebase_skills", codebaseSkillGuidance("onlineResearcher")],
               [
                 "instructions",
                 [
                   "Identify external library/framework behavior, standards, or docs that materially affect the local interpretation.",
+                  "Use parallel codebase-online-researcher subagents to explore different angles of external research.",
                   "Cite sources, package names, API names, versions, or documentation titles when available.",
                   "Explain how each external fact applies to this repository.",
                   "If external research is unnecessary or unavailable, say so and focus on local implications.",
@@ -829,14 +796,6 @@ export default defineWorkflow("deep-research-codebase")
           "specialist_reports",
           `Read the complete explorer handoff artifact(s) at ${displayWorkflowPaths(explorerPaths)}. They preserve every partition's Locator, Pattern Finder, Analyzer, and Online Researcher output from the original inline specialist handoff while keeping this prompt bounded.`,
         ],
-        [
-          "codebase_skills",
-          codebaseSkillGuidance(
-            "analyzer",
-            "researchAnalyzer",
-            "onlineResearcher",
-          ),
-        ],
         [
           "instructions",
           [
@@ -845,6 +804,7 @@ export default defineWorkflow("deep-research-codebase")
             "Prioritize claims supported by concrete paths, symbols, tests, docs, or cited external references.",
             "Resolve contradictions explicitly and preserve important uncertainty.",
             "Avoid inventing facts not supported by the supplied reports; state unknowns instead.",
+            "Use parallel codebase-analyzer, codebase-research-analyzer, and codebase-online-researcher subagents as needed to verify claims or fill critical gaps in the supplied reports.",
             "End with actionable next steps for a developer who will use this research.",
           ].join("\n"),
         ],

package/dist/builtin/workflows/builtin/goal.ts CHANGED Viewed

@@ -13,7 +13,7 @@ import { join } from "node:path";
 import { defineWorkflow } from "../src/workflows/define-workflow.js";
 import { Type } from "typebox";
 import type { WorkflowTaskResult } from "../src/shared/types.js";
-import { WORKER_PREFLIGHT_CONTRACT } from "./shared-prompts.js";
+import { E2E_VERIFICATION_GUIDANCE, WORKER_PREFLIGHT_CONTRACT } from "./shared-prompts.js";
 const DEFAULT_MAX_TURNS = 10;
 // Goal Runner runs three independent reviewer personas; two approvals form a majority.
@@ -135,108 +135,64 @@ function positiveInteger(value: number | undefined, fallback: number): number {
   return floored >= 1 ? floored : fallback;
 }
-const reviewDecisionSchema = {
-  type: "object",
-  additionalProperties: false,
-  required: [
-    "findings",
-    "overall_correctness",
-    "overall_explanation",
-    "overall_confidence_score",
-    "goal_oracle_satisfied",
-    "receipt_assessment",
-    "verification_remaining",
-    "stop_review_loop",
-  ],
-  properties: {
-    findings: {
-      type: "array",
-      items: {
-        type: "object",
-        additionalProperties: false,
-        required: ["title", "body", "confidence_score", "code_location"],
-        properties: {
-          title: { type: "string" },
-          body: { type: "string" },
-          confidence_score: { type: "number", minimum: 0, maximum: 1 },
-          priority: { type: ["integer", "null"], minimum: 0, maximum: 3 },
-          code_location: {
-            type: "object",
-            additionalProperties: false,
-            required: ["absolute_file_path", "line_range"],
-            properties: {
-              absolute_file_path: { type: "string" },
-              line_range: {
-                type: "object",
-                additionalProperties: false,
-                required: ["start", "end"],
-                properties: {
-                  start: { type: "integer", minimum: 1 },
-                  end: { type: "integer", minimum: 1 },
-                },
-              },
-            },
+const reviewFindingSchema = Type.Object(
+  {
+    title: Type.String(),
+    body: Type.String(),
+    confidence_score: Type.Number({ minimum: 0, maximum: 1 }),
+    priority: Type.Optional(
+      Type.Union([Type.Integer({ minimum: 0, maximum: 3 }), Type.Null()]),
+    ),
+    code_location: Type.Object(
+      {
+        absolute_file_path: Type.String(),
+        line_range: Type.Object(
+          {
+            start: Type.Integer({ minimum: 1 }),
+            end: Type.Integer({ minimum: 1 }),
           },
-        },
+          { additionalProperties: false },
+        ),
       },
-    },
-    overall_correctness: {
-      type: "string",
-      enum: ["patch is correct", "patch is incorrect"],
-    },
-    overall_explanation: { type: "string" },
-    overall_confidence_score: { type: "number", minimum: 0, maximum: 1 },
-    goal_oracle_satisfied: { type: "boolean" },
-    receipt_assessment: { type: "string" },
-    verification_remaining: { type: "string" },
-    stop_review_loop: { type: "boolean" },
-    reviewer_error: {
-      anyOf: [
-        { type: "null" },
-        {
-          type: "object",
-          additionalProperties: false,
-          required: ["kind", "message", "attempted_recovery"],
-          properties: {
-            kind: {
-              type: "string",
-              enum: [
-                "validation_unavailable",
-                "dependency_unavailable",
-                "tool_failure",
-                "reviewer_failure",
-              ],
-            },
-            message: { type: "string" },
-            attempted_recovery: { type: "string" },
-          },
-        },
-      ],
-    },
+      { additionalProperties: false },
+    ),
   },
-} as const;
-const reviewDecisionTool = {
-  name: "review_decision",
-  label: "Review Decision",
-  description:
-    "Emit the final structured review verdict after inspecting the patch.",
-  promptSnippet: "Emit the final review verdict as structured data",
-  promptGuidelines: [
-    "Call review_decision after completing review investigation and validation.",
-    "This is a terminating structured-output tool; do not emit another assistant response after calling it.",
-  ],
-  parameters: reviewDecisionSchema,
-  async execute(_toolCallId: string, params: ReviewDecision) {
-    return {
-      content: [
-        { type: "text" as const, text: JSON.stringify(params, null, 2) },
-      ],
-      details: params,
-      terminate: true,
-    };
+  { additionalProperties: false },
+);
+const reviewerErrorSchema = Type.Object(
+  {
+    kind: Type.Union([
+      Type.Literal("validation_unavailable"),
+      Type.Literal("dependency_unavailable"),
+      Type.Literal("tool_failure"),
+      Type.Literal("reviewer_failure"),
+    ]),
+    message: Type.String(),
+    attempted_recovery: Type.String(),
   },
-};
+  { additionalProperties: false },
+);
+const reviewDecisionSchema = Type.Object(
+  {
+    findings: Type.Array(reviewFindingSchema),
+    overall_correctness: Type.Union([
+      Type.Literal("patch is correct"),
+      Type.Literal("patch is incorrect"),
+    ]),
+    overall_explanation: Type.String(),
+    overall_confidence_score: Type.Number({ minimum: 0, maximum: 1 }),
+    goal_oracle_satisfied: Type.Boolean(),
+    receipt_assessment: Type.String(),
+    verification_remaining: Type.String(),
+    stop_review_loop: Type.Boolean(),
+    reviewer_error: Type.Optional(
+      Type.Union([Type.Null(), reviewerErrorSchema]),
+    ),
+  },
+  { additionalProperties: false },
+);
 const GOAL_CONTINUATION_REFERENCE = [
   "Continuation behavior:",
@@ -589,6 +545,7 @@ function renderGoalContinuationPrompt(
       ].join("\n"),
     ],
     ["goal_guidelines", GOAL_CONTINUATION_REFERENCE],
+    ["e2e_verification", E2E_VERIFICATION_GUIDANCE],
   ]);
 }
@@ -619,6 +576,7 @@ function renderForkedGoalWorkerPrompt(
         renderLatestReviewArtifacts(latestReviewArtifactPaths),
       ].join("\n"),
     ],
+    ["e2e_verification", E2E_VERIFICATION_GUIDANCE],
   ]);
 }
@@ -795,6 +753,7 @@ function renderReviewerPrompt(args: {
     ["goal_framework", GOAL_METHOD_REFERENCE],
     ["goal_guidelines", GOAL_CONTINUATION_REFERENCE],
     ["auditability", RECEIPT_EXPECTATIONS],
+    ["e2e_verification", E2E_VERIFICATION_GUIDANCE],
     [
       "goal_context",
       [
@@ -829,8 +788,6 @@ function renderReviewerPrompt(args: {
       [
         "Inspect the actual diff/repository state rather than trusting stage summaries.",
         "Identify the smallest relevant validation set from repository evidence: targeted tests, lint, typecheck, build, generated-artifact checks, CI-equivalent scripts, or user-flow proof.",
-        "When practical, include an end-to-end QA check that exercises the app the way a user would: use the tmux skill for terminal app environments and browser for web app environments.",
-        "For web app environments, capture a screenshot as a certificate of correct completion when the UI state proves the objective; for terminal app environments, capture the terminal window/output that shows proof of correctness.",
         "Run or delegate focused validation when it is necessary to distinguish a real bug from a hunch.",
         "If tests or typechecks fail because dependencies are missing, install/download the missing dependencies with the repo's documented package manager instead of bypassing the check.",
         "If validation cannot be completed after reasonable recovery, record the limitation in overall_explanation and reviewer_error; do not use missing dependencies as a reason to approve.",
@@ -915,14 +872,14 @@ function renderReviewerPrompt(args: {
     [
       "output_format",
       [
-        "You have a structured-output tool named review_decision. Use it after your investigation and validation attempts.",
+        "Use the schema-backed structured_output tool after your investigation and validation attempts.",
         "The tool terminates the turn and provides the structured data; do not emit a separate final assistant response after calling it.",
-        "The review gate decides completion only by parsing the JSON object returned by this tool; invalid JSON, missing fields, reviewer_error, or stop_review_loop=false are treated as not approved for safety.",
+        "The review gate decides completion only from the JSON object captured by structured_output; invalid JSON, missing fields, reviewer_error, or stop_review_loop=false are treated as not approved for safety.",
         "Set stop_review_loop=true only when there are no P0/P1/P2 findings, overall_correctness is patch is correct, goal_oracle_satisfied is true, no objective-relevant verification remains, and reviewer_error is null/omitted.",
         "P3 nice-to-have findings are non-blocking when the rest of the approval contract is satisfied; do not use P3 for work required by the objective or verification oracle.",
         "If you hit a reviewer/tool/validation error, still return the object with stop_review_loop=false and reviewer_error populated instead of pretending the patch is approved.",
         [
-          "The review_decision tool schema is authoritative; do not copy a hand-written JSON blob into the final response. Here is an example output:",
+          "The structured_output schema is authoritative; do not copy a hand-written JSON blob into the final response. Here is an example output:",
           "{",
           '  "findings": [',
           "    {",
@@ -1080,8 +1037,8 @@ export default defineWorkflow("goal")
           "github-copilot/claude-opus-4.8:xhigh",
           "anthropic/claude-opus-4-8:xhigh"
       ],
-      tools: [...goalRunnerTools, reviewDecisionTool.name],
-      customTools: [reviewDecisionTool],
+      tools: goalRunnerTools,
+      schema: reviewDecisionSchema,
     };
     let latestReviews: ReviewRecord[] = [];

package/dist/builtin/workflows/builtin/index.d.ts CHANGED Viewed

@@ -83,6 +83,8 @@ export type RalphWorkflowOutputs = WorkflowOutputValues & {
   readonly result?: string;
   readonly plan?: string;
   readonly plan_path?: string;
+  readonly research?: string;
+  readonly research_path?: string;
   readonly implementation_notes_path?: string;
   readonly pr_report?: string;
   readonly approved?: boolean;