@ai-hero/sandcastle 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +384 -58
  2. package/dist/AgentProvider.d.ts +22 -12
  3. package/dist/AgentProvider.d.ts.map +1 -1
  4. package/dist/AgentProvider.js +46 -47
  5. package/dist/AgentProvider.js.map +1 -1
  6. package/dist/DockerLifecycle.d.ts +5 -1
  7. package/dist/DockerLifecycle.d.ts.map +1 -1
  8. package/dist/DockerLifecycle.js +8 -1
  9. package/dist/DockerLifecycle.js.map +1 -1
  10. package/dist/InitService.d.ts.map +1 -1
  11. package/dist/InitService.js +57 -6
  12. package/dist/InitService.js.map +1 -1
  13. package/dist/MountConfig.d.ts +15 -0
  14. package/dist/MountConfig.d.ts.map +1 -0
  15. package/dist/MountConfig.js +7 -0
  16. package/dist/MountConfig.js.map +1 -0
  17. package/dist/Orchestrator.d.ts +0 -1
  18. package/dist/Orchestrator.d.ts.map +1 -1
  19. package/dist/Orchestrator.js +28 -29
  20. package/dist/Orchestrator.js.map +1 -1
  21. package/dist/SandboxFactory.d.ts +21 -17
  22. package/dist/SandboxFactory.d.ts.map +1 -1
  23. package/dist/SandboxFactory.js +48 -50
  24. package/dist/SandboxFactory.js.map +1 -1
  25. package/dist/SandboxLifecycle.d.ts +1 -1
  26. package/dist/SandboxLifecycle.d.ts.map +1 -1
  27. package/dist/SandboxLifecycle.js +2 -2
  28. package/dist/SandboxLifecycle.js.map +1 -1
  29. package/dist/SandboxProvider.d.ts +50 -13
  30. package/dist/SandboxProvider.d.ts.map +1 -1
  31. package/dist/SandboxProvider.js +2 -0
  32. package/dist/SandboxProvider.js.map +1 -1
  33. package/dist/TextDeltaBuffer.d.ts +24 -0
  34. package/dist/TextDeltaBuffer.d.ts.map +1 -0
  35. package/dist/TextDeltaBuffer.js +68 -0
  36. package/dist/TextDeltaBuffer.js.map +1 -0
  37. package/dist/WorktreeManager.d.ts +2 -0
  38. package/dist/WorktreeManager.d.ts.map +1 -1
  39. package/dist/WorktreeManager.js +3 -0
  40. package/dist/WorktreeManager.js.map +1 -1
  41. package/dist/cli.d.ts.map +1 -1
  42. package/dist/cli.js +11 -6
  43. package/dist/cli.js.map +1 -1
  44. package/dist/createSandbox.d.ts +6 -5
  45. package/dist/createSandbox.d.ts.map +1 -1
  46. package/dist/createSandbox.js +14 -6
  47. package/dist/createSandbox.js.map +1 -1
  48. package/dist/index.d.ts +5 -4
  49. package/dist/index.d.ts.map +1 -1
  50. package/dist/index.js +1 -1
  51. package/dist/index.js.map +1 -1
  52. package/dist/mergeProviderEnv.d.ts +13 -0
  53. package/dist/mergeProviderEnv.d.ts.map +1 -0
  54. package/dist/mergeProviderEnv.js +23 -0
  55. package/dist/mergeProviderEnv.js.map +1 -0
  56. package/dist/run.d.ts +7 -18
  57. package/dist/run.d.ts.map +1 -1
  58. package/dist/run.js +35 -22
  59. package/dist/run.js.map +1 -1
  60. package/dist/sandboxes/daytona.d.ts +48 -0
  61. package/dist/sandboxes/daytona.d.ts.map +1 -0
  62. package/dist/sandboxes/daytona.js +125 -0
  63. package/dist/sandboxes/daytona.js.map +1 -0
  64. package/dist/sandboxes/docker.d.ts +10 -0
  65. package/dist/sandboxes/docker.d.ts.map +1 -1
  66. package/dist/sandboxes/docker.js +69 -42
  67. package/dist/sandboxes/docker.js.map +1 -1
  68. package/dist/sandboxes/podman.d.ts +46 -0
  69. package/dist/sandboxes/podman.d.ts.map +1 -0
  70. package/dist/sandboxes/podman.js +195 -0
  71. package/dist/sandboxes/podman.js.map +1 -0
  72. package/dist/sandboxes/test-isolated.d.ts +1 -1
  73. package/dist/sandboxes/test-isolated.d.ts.map +1 -1
  74. package/dist/sandboxes/test-isolated.js +56 -45
  75. package/dist/sandboxes/test-isolated.js.map +1 -1
  76. package/dist/sandboxes/vercel.d.ts +92 -0
  77. package/dist/sandboxes/vercel.d.ts.map +1 -0
  78. package/dist/sandboxes/vercel.js +165 -0
  79. package/dist/sandboxes/vercel.js.map +1 -0
  80. package/dist/syncIn.d.ts +4 -2
  81. package/dist/syncIn.d.ts.map +1 -1
  82. package/dist/syncIn.js +72 -22
  83. package/dist/syncIn.js.map +1 -1
  84. package/dist/syncOut.d.ts +4 -2
  85. package/dist/syncOut.d.ts.map +1 -1
  86. package/dist/syncOut.js +156 -77
  87. package/dist/syncOut.js.map +1 -1
  88. package/dist/templates/blank/.env.example +1 -0
  89. package/dist/templates/parallel-planner/.env.example +1 -0
  90. package/dist/templates/parallel-planner/main.mts +3 -3
  91. package/dist/templates/parallel-planner-with-review/.env.example +5 -0
  92. package/dist/templates/parallel-planner-with-review/CODING_STANDARDS.md +27 -0
  93. package/dist/templates/parallel-planner-with-review/implement-prompt.md +62 -0
  94. package/dist/templates/parallel-planner-with-review/main.mts +249 -0
  95. package/dist/templates/parallel-planner-with-review/merge-prompt.md +22 -0
  96. package/dist/templates/parallel-planner-with-review/plan-prompt.md +33 -0
  97. package/dist/templates/parallel-planner-with-review/review-prompt.md +55 -0
  98. package/dist/templates/parallel-planner-with-review/template.json +4 -0
  99. package/dist/templates/sequential-reviewer/.env.example +1 -0
  100. package/dist/templates/sequential-reviewer/CODING_STANDARDS.md +27 -0
  101. package/dist/templates/sequential-reviewer/implement-prompt.md +34 -45
  102. package/dist/templates/sequential-reviewer/main.mts +2 -2
  103. package/dist/templates/sequential-reviewer/review-prompt.md +1 -1
  104. package/dist/templates/simple-loop/.env.example +1 -0
  105. package/dist/testSandbox.d.ts.map +1 -1
  106. package/dist/testSandbox.js +58 -53
  107. package/dist/testSandbox.js.map +1 -1
  108. package/package.json +25 -1
package/README.md CHANGED
@@ -11,7 +11,7 @@
11
11
  A TypeScript library for orchestrating AI coding agents in isolated Docker containers:
12
12
 
13
13
  1. You invoke agents with a single `sandcastle.run()`.
14
- 2. Sandcastle handles building worktrees and sandboxing the agent.
14
+ 2. Sandcastle handles sandboxing the agent with a configurable branch strategy.
15
15
  3. The commits made on the branches get merged back.
16
16
 
17
17
  Great for parallelizing multiple AFK agents, creating review pipelines, or even just orchestrating your own agents.
@@ -35,7 +35,7 @@ npm install @ai-hero/sandcastle
35
35
  npx sandcastle init
36
36
  ```
37
37
 
38
- 3. Edit `.sandcastle/.env` and fill in your default values for `ANTHROPIC_API_KEY`
38
+ 3. Edit `.sandcastle/.env` and fill in your default values for `ANTHROPIC_API_KEY`. If you want to use your Claude subscription instead of an API key, see [#191](https://github.com/mattpocock/sandcastle/issues/191).
39
39
 
40
40
  ```bash
41
41
  cp .sandcastle/.env.example .sandcastle/.env
@@ -90,8 +90,20 @@ const result = await run({
90
90
  agent: claudeCode("claude-opus-4-6", { effort: "high" }),
91
91
 
92
92
  // Sandbox provider — required. Import from "@ai-hero/sandcastle/sandboxes/docker".
93
- // Provider-specific config (like imageName) lives inside the provider factory call.
94
- sandbox: docker({ imageName: "sandcastle:local" }),
93
+ // Provider-specific config (like imageName, mounts) lives inside the provider factory call.
94
+ sandbox: docker({
95
+ imageName: "sandcastle:local",
96
+ // Optional: mount host directories into the sandbox (e.g. package manager caches)
97
+ mounts: [
98
+ { hostPath: "~/.npm", sandboxPath: "/home/agent/.npm", readonly: true },
99
+ ],
100
+ // Optional: provider-level env vars merged at launch time
101
+ env: { DOCKER_SPECIFIC: "value" },
102
+ }),
103
+
104
+ // Branch strategy — controls how the agent's changes relate to branches.
105
+ // Defaults to { type: "head" } for bind-mount and { type: "merge-to-head" } for isolated providers.
106
+ branchStrategy: { type: "branch", branch: "agent/fix-42" },
95
107
 
96
108
  // Prompt source — provide one of these, not both:
97
109
  promptFile: ".sandcastle/prompt.md", // path to a prompt file
@@ -105,22 +117,17 @@ const result = await run({
105
117
  // Maximum number of agent iterations to run before stopping. Default: 1
106
118
  maxIterations: 5,
107
119
 
108
- // Worktree mode for sandbox work. Defaults to { mode: 'temp-branch' }.
109
- // { mode: 'none' } — bind-mount host working directory directly (no worktree).
110
- // { mode: 'temp-branch' } — create a temp worktree, merge back.
111
- // { mode: 'branch', branch } — create a worktree on an explicit branch.
112
- worktree: { mode: "branch", branch: "agent/fix-42" },
113
-
114
120
  // Display name for this run, shown as a prefix in log output.
115
121
  name: "fix-issue-42",
116
122
 
117
123
  // Lifecycle hooks — arrays of shell commands run sequentially inside the sandbox.
118
124
  hooks: {
119
- // Runs after the worktree is mounted into the sandbox.
125
+ // Runs after the sandbox is ready.
120
126
  onSandboxReady: [{ command: "npm install" }],
121
127
  },
122
128
 
123
- // Host-relative file paths to copy into the worktree before the container starts.
129
+ // Host-relative file paths to copy into the sandbox before the container starts.
130
+ // Not supported with branchStrategy: { type: "head" }.
124
131
  copyToSandbox: [".env"],
125
132
 
126
133
  // How to record progress. Default: write to a file under .sandcastle/logs/
@@ -143,7 +150,7 @@ console.log(result.branch); // target branch name
143
150
 
144
151
  ### `createSandbox()` — reusable sandbox
145
152
 
146
- Use `createSandbox()` when you need to run multiple agents (or multiple rounds of the same agent) inside a single sandbox. It creates the worktree and container once, and you call `sandbox.run()` as many times as you need. This avoids repeated container startup costs and keeps all runs on the same branch.
153
+ Use `createSandbox()` when you need to run multiple agents (or multiple rounds of the same agent) inside a single sandbox. It creates the sandbox once, and you call `sandbox.run()` as many times as you need. This avoids repeated container startup costs and keeps all runs on the same branch.
147
154
 
148
155
  Use `run()` instead when you only need a single one-shot invocation — it handles sandbox lifecycle automatically.
149
156
 
@@ -196,7 +203,7 @@ Commits from all `run()` calls accumulate on the same branch. The sandbox contai
196
203
 
197
204
  #### Automatic cleanup with `await using`
198
205
 
199
- `await using` calls `sandbox.close()` automatically when the block exits. If the worktree has uncommitted changes, it is preserved on disk; if clean, both container and worktree are removed.
206
+ `await using` calls `sandbox.close()` automatically when the block exits. If the sandbox has uncommitted changes, the worktree is preserved on disk; if clean, both container and worktree are removed.
200
207
 
201
208
  #### Manual `close()` with `CloseResult`
202
209
 
@@ -214,21 +221,22 @@ if (closeResult.preservedWorktreePath) {
214
221
 
215
222
  #### `CreateSandboxOptions`
216
223
 
217
- | Option | Type | Default | Description |
218
- | --------------- | --------------- | ------- | ------------------------------------------------------------------- |
219
- | `branch` | string | — | **Required.** Explicit branch for the worktree |
220
- | `sandbox` | SandboxProvider | — | **Required.** Sandbox provider (e.g. `docker()`) |
221
- | `hooks` | object | — | Lifecycle hooks (`onSandboxReady`) — run once at creation time |
222
- | `copyToSandbox` | string[] | — | Host-relative file paths to copy into the worktree at creation time |
224
+ | Option | Type | Default | Description |
225
+ | -------------------------- | --------------- | ------- | ------------------------------------------------------------------------ |
226
+ | `branch` | string | — | **Required.** Explicit branch for the sandbox |
227
+ | `sandbox` | SandboxProvider | — | **Required.** Sandbox provider (e.g. `docker()`, `podman()`) |
228
+ | `hooks` | object | — | Lifecycle hooks (`onSandboxReady`) — run once at creation time |
229
+ | `copyToSandbox` | string[] | — | Host-relative file paths to copy into the sandbox at creation time |
230
+ | `throwOnDuplicateWorktree` | boolean | `true` | When `false`, reuse an existing worktree instead of failing on collision |
223
231
 
224
232
  #### `Sandbox`
225
233
 
226
234
  | Property / Method | Type | Description |
227
235
  | ----------------------- | -------------------------------------------------- | ------------------------------------------- |
228
- | `branch` | string | The branch the worktree is on |
236
+ | `branch` | string | The branch the sandbox is on |
229
237
  | `worktreePath` | string | Host path to the worktree |
230
238
  | `run(options)` | `(SandboxRunOptions) => Promise<SandboxRunResult>` | Invoke an agent inside the existing sandbox |
231
- | `close()` | `() => Promise<CloseResult>` | Tear down the container and worktree |
239
+ | `close()` | `() => Promise<CloseResult>` | Tear down the container and sandbox |
232
240
  | `[Symbol.asyncDispose]` | `() => Promise<void>` | Auto teardown via `await using` |
233
241
 
234
242
  #### `SandboxRunOptions`
@@ -263,14 +271,15 @@ if (closeResult.preservedWorktreePath) {
263
271
 
264
272
  ## How it works
265
273
 
266
- Sandcastle uses a worktree-based architecture for agent execution:
274
+ Sandcastle uses a **branch strategy** configured on the sandbox provider to control how the agent's changes relate to branches. There are three strategies:
275
+
276
+ - **Head** (`{ type: "head" }`) — The agent writes directly to the host working directory. No worktree, no branch indirection. This is the default for bind-mount providers like `docker()`.
277
+ - **Merge-to-head** (`{ type: "merge-to-head" }`) — Sandcastle creates a temporary branch in a git worktree. The agent works on the temp branch, and changes are merged back to HEAD when done. The temp branch is cleaned up after merge.
278
+ - **Branch** (`{ type: "branch", branch: "foo" }`) — Commits land on an explicitly named branch in a git worktree.
267
279
 
268
- - **Worktree**: Sandcastle creates a git worktree on the host at `.sandcastle/worktrees/`. The worktree is a just a normal `git worktree`.
269
- - **Bind-mount**: The worktree directory is bind-mounted into the sandbox container as the agent's working directory. The agent writes directly to the host filesystem through the mount.
270
- - **No sync needed**: Because the agent writes directly to the host filesystem, there are no sync-in or sync-out operations. Commits made by the agent are immediately visible on the host.
271
- - **Merge back**: After the run completes, the temp worktree branch is fast-forward merged back to the target branch, and the worktree is cleaned up.
280
+ For bind-mount providers (like Docker), the worktree directory is bind-mounted into the container the agent writes directly to the host filesystem through the mount, so no sync is needed.
272
281
 
273
- From your point of view, you just run `sandcastle.run({ worktree: { mode: 'branch', branch: 'foo' } })`, and get a commit on branch `foo` once it's complete. All 100% local.
282
+ From your point of view, you just configure `branchStrategy: { type: 'branch', branch: 'foo' }` on `run()`, and get a commit on branch `foo` once it's complete. All 100% local.
274
283
 
275
284
  ## Prompts
276
285
 
@@ -291,7 +300,7 @@ You must provide exactly one of:
291
300
 
292
301
  Use `` !`command` `` expressions in your prompt to pull in dynamic context. Each expression is replaced with the command's stdout before the prompt is sent to the agent.
293
302
 
294
- Commands run **inside the sandbox** after the worktree is mounted and `onSandboxReady` hooks complete, so they see the same repo state the agent sees (including installed dependencies).
303
+ Commands run **inside the sandbox** after `onSandboxReady` hooks complete, so they see the same repo state the agent sees (including installed dependencies).
295
304
 
296
305
  ```markdown
297
306
  # Open issues
@@ -336,10 +345,10 @@ A `{{KEY}}` placeholder with no matching prompt argument is an error. Unused pro
336
345
 
337
346
  Sandcastle automatically injects two built-in prompt arguments into every prompt:
338
347
 
339
- | Placeholder | Value |
340
- | ------------------- | -------------------------------------------------------------------- |
341
- | `{{SOURCE_BRANCH}}` | The branch the agent works on inside the worktree (temp or explicit) |
342
- | `{{TARGET_BRANCH}}` | The host's active branch at `run()` time |
348
+ | Placeholder | Value |
349
+ | ------------------- | ----------------------------------------------------------------- |
350
+ | `{{SOURCE_BRANCH}}` | The branch the agent works on (determined by the branch strategy) |
351
+ | `{{TARGET_BRANCH}}` | The host's active branch at `run()` time |
343
352
 
344
353
  Use them in your prompt without passing them via `promptArgs`:
345
354
 
@@ -376,12 +385,13 @@ Tell the agent to output your chosen string(s) in the prompt, and the orchestrat
376
385
 
377
386
  `sandcastle init` prompts you to choose a template, which scaffolds a ready-to-use prompt and `main.mts` suited to a specific workflow. If your project's `package.json` has `"type": "module"`, the file will be named `main.ts` instead. Four templates are available:
378
387
 
379
- | Template | Description |
380
- | --------------------- | ----------------------------------------------------------------------- |
381
- | `blank` | Bare scaffold — write your own prompt and orchestration |
382
- | `simple-loop` | Picks GitHub issues one by one and closes them |
383
- | `sequential-reviewer` | Implements issues one by one, with a code review step after each |
384
- | `parallel-planner` | Plans parallelizable issues, executes on separate branches, then merges |
388
+ | Template | Description |
389
+ | ------------------------------ | ------------------------------------------------------------------------- |
390
+ | `blank` | Bare scaffold — write your own prompt and orchestration |
391
+ | `simple-loop` | Picks GitHub issues one by one and closes them |
392
+ | `sequential-reviewer` | Implements issues one by one, with a code review step after each |
393
+ | `parallel-planner` | Plans parallelizable issues, executes on separate branches, then merges |
394
+ | `parallel-planner-with-review` | Plans parallelizable issues, executes with per-branch review, then merges |
385
395
 
386
396
  Select a template during `sandcastle init` when prompted, or re-run init in a fresh repo to try a different one.
387
397
 
@@ -394,7 +404,7 @@ Scaffolds the `.sandcastle/` config directory and builds the Docker image. This
394
404
  | Option | Required | Default | Description |
395
405
  | -------------- | -------- | ---------------------------- | -------------------------------------------------------------------- |
396
406
  | `--image-name` | No | `sandcastle:<repo-dir-name>` | Docker image name |
397
- | `--agent` | No | Interactive prompt | Agent to use (`claude-code`, `pi`, `codex`) |
407
+ | `--agent` | No | Interactive prompt | Agent to use (`claude-code`, `pi`, `codex`, `opencode`) |
398
408
  | `--model` | No | Agent's default model | Model to use (e.g. `claude-sonnet-4-6`). Defaults to agent's default |
399
409
  | `--template` | No | Interactive prompt | Template to scaffold (e.g. `blank`, `simple-loop`) |
400
410
 
@@ -405,7 +415,7 @@ Creates the following files:
405
415
  ├── Dockerfile # Sandbox environment (customize as needed)
406
416
  ├── prompt.md # Agent instructions
407
417
  ├── .env.example # Token placeholders
408
- └── .gitignore # Ignores .env, logs/, worktrees/
418
+ └── .gitignore # Ignores .env, logs/
409
419
  ```
410
420
 
411
421
  Errors if `.sandcastle/` already exists to prevent overwriting customizations.
@@ -429,21 +439,22 @@ Removes the Docker image.
429
439
 
430
440
  ### `RunOptions`
431
441
 
432
- | Option | Type | Default | Description |
433
- | -------------------- | ------------------ | ----------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
434
- | `agent` | AgentProvider | — | **Required.** Agent provider (e.g. `claudeCode("claude-opus-4-6")`, `pi("claude-sonnet-4-6")`, `codex("gpt-5.4-mini")`) |
435
- | `sandbox` | SandboxProvider | — | **Required.** Sandbox provider (e.g. `docker()`, `docker({ imageName: "sandcastle:local" })`) |
436
- | `prompt` | string | — | Inline prompt (mutually exclusive with `promptFile`) |
437
- | `promptFile` | string | — | Path to prompt file (mutually exclusive with `prompt`) |
438
- | `maxIterations` | number | `1` | Maximum iterations to run |
439
- | `hooks` | object | — | Lifecycle hooks (`onSandboxReady`) |
440
- | `worktree` | WorktreeMode | `{ mode: 'temp-branch' }` | Worktree mode: `{ mode: 'none' }`, `{ mode: 'temp-branch' }`, or `{ mode: 'branch', branch }` |
441
- | `name` | string | — | Display name for the run, shown as a prefix in log output |
442
- | `promptArgs` | PromptArgs | | Key-value map for `{{KEY}}` placeholder substitution |
443
- | `copyToSandbox` | string[] | — | Host-relative file paths to copy into the worktree before start (not supported with `mode: 'none'`) |
444
- | `logging` | object | file (auto-generated) | `{ type: 'file', path }` or `{ type: 'stdout' }` |
445
- | `completionSignal` | string \| string[] | `<promise>COMPLETE</promise>` | String or array of strings the agent emits to stop the iteration loop early |
446
- | `idleTimeoutSeconds` | number | `600` | Idle timeout in seconds — resets on each agent output event |
442
+ | Option | Type | Default | Description |
443
+ | -------------------------- | ------------------ | ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
444
+ | `agent` | AgentProvider | — | **Required.** Agent provider (e.g. `claudeCode("claude-opus-4-6")`, `pi("claude-sonnet-4-6")`, `codex("gpt-5.4-mini")`, `opencode("opencode/big-pickle")`) |
445
+ | `sandbox` | SandboxProvider | — | **Required.** Sandbox provider (e.g. `docker()`, `podman()`, `docker({ imageName: "sandcastle:local" })`) |
446
+ | `prompt` | string | — | Inline prompt (mutually exclusive with `promptFile`) |
447
+ | `promptFile` | string | — | Path to prompt file (mutually exclusive with `prompt`) |
448
+ | `maxIterations` | number | `1` | Maximum iterations to run |
449
+ | `hooks` | object | — | Lifecycle hooks (`onSandboxReady`) |
450
+ | `name` | string | | Display name for the run, shown as a prefix in log output |
451
+ | `promptArgs` | PromptArgs | — | Key-value map for `{{KEY}}` placeholder substitution |
452
+ | `branchStrategy` | BranchStrategy | per-provider default | Branch strategy: `{ type: 'head' }`, `{ type: 'merge-to-head' }`, or `{ type: 'branch', branch: '…' }` |
453
+ | `copyToSandbox` | string[] | — | Host-relative file paths to copy into the sandbox before start (not supported with `branchStrategy: { type: 'head' }`) |
454
+ | `logging` | object | file (auto-generated) | `{ type: 'file', path }` or `{ type: 'stdout' }` |
455
+ | `completionSignal` | string \| string[] | `<promise>COMPLETE</promise>` | String or array of strings the agent emits to stop the iteration loop early |
456
+ | `idleTimeoutSeconds` | number | `600` | Idle timeout in seconds — resets on each agent output event |
457
+ | `throwOnDuplicateWorktree` | boolean | `true` | When `false`, reuse an existing worktree for the target branch instead of failing on collision |
447
458
 
448
459
  ### `RunResult`
449
460
 
@@ -467,8 +478,323 @@ agent: claudeCode("claude-opus-4-6", { effort: "high" });
467
478
  | Option | Type | Default | Description |
468
479
  | -------- | -------------------------------------------- | ------- | ------------------------------------------------------- |
469
480
  | `effort` | `"low"` \| `"medium"` \| `"high"` \| `"max"` | — | Claude Code reasoning effort level (`max` is Opus only) |
481
+ | `env` | `Record<string, string>` | `{}` | Environment variables injected by this agent provider |
482
+
483
+ ### Provider `env`
484
+
485
+ Both **agent providers** and **sandbox providers** accept an optional `env: Record<string, string>` in their options. These environment variables are merged with the `.sandcastle/.env` resolver output at launch time:
486
+
487
+ ```typescript
488
+ await run({
489
+ agent: claudeCode("claude-opus-4-6", {
490
+ env: { ANTHROPIC_API_KEY: "sk-ant-..." },
491
+ }),
492
+ sandbox: docker({
493
+ env: { DOCKER_SPECIFIC_VAR: "value" },
494
+ }),
495
+ prompt: "Fix issue #42",
496
+ });
497
+ ```
498
+
499
+ **Merge rules:**
500
+
501
+ - Provider env (agent + sandbox) overrides `.sandcastle/.env` resolver output for shared keys
502
+ - Agent provider env and sandbox provider env **must not overlap** — if they share any key, `run()` throws an error
503
+ - When `env` is not provided, it defaults to `{}`
504
+
505
+ Environment variables are also resolved automatically from `.sandcastle/.env` and `process.env` — no need to pass them to the API. The required variables depend on the **agent provider** (see `sandcastle init` output for details).
506
+
507
+ ## Custom Sandbox Providers
508
+
509
+ Sandcastle ships with a Docker provider, but you can create your own. A sandbox provider tells Sandcastle how to execute commands in an isolated environment. There are two kinds:
510
+
511
+ - **Bind-mount** — the sandbox can mount a host directory. Sandcastle creates a worktree on the host and the provider mounts it in. No file sync needed. Use this for Docker, Podman, or any local container runtime.
512
+ - **Isolated** — the sandbox has its own filesystem (e.g. a cloud VM). The provider handles syncing code in and out via `copyIn` and `copyFileOut`. Use this when the sandbox cannot access the host filesystem.
513
+
514
+ ### The sandbox handle contract
515
+
516
+ Both provider types return a **sandbox handle** from their `create()` function. The handle exposes:
517
+
518
+ | Method | Required | Description |
519
+ | --------------- | -------- | ---------------------------------------------------------------------------- |
520
+ | `exec` | Both | Run a command, optionally streaming stdout line-by-line via `options.onLine` |
521
+ | `close` | Both | Tear down the sandbox |
522
+ | `copyIn` | Isolated | Copy a file or directory from the host into the sandbox |
523
+ | `copyOut` | Isolated | Copy a file from the sandbox to the host |
524
+ | `workspacePath` | Both | Absolute path to the workspace inside the sandbox |
525
+
526
+ ### `ExecResult`
527
+
528
+ Every `exec` call returns an `ExecResult`:
529
+
530
+ ```typescript
531
+ interface ExecResult {
532
+ readonly stdout: string;
533
+ readonly stderr: string;
534
+ readonly exitCode: number;
535
+ }
536
+ ```
537
+
538
+ ### Bind-mount provider example
539
+
540
+ A minimal bind-mount provider that shells out to local processes (no container):
541
+
542
+ ```typescript
543
+ import {
544
+ createBindMountSandboxProvider,
545
+ type BindMountCreateOptions,
546
+ type BindMountSandboxHandle,
547
+ type ExecResult,
548
+ } from "@ai-hero/sandcastle";
549
+ import { execFile, spawn } from "node:child_process";
550
+ import { createInterface } from "node:readline";
551
+
552
+ const localProcess = () =>
553
+ createBindMountSandboxProvider({
554
+ name: "local-process",
555
+ create: async (
556
+ options: BindMountCreateOptions,
557
+ ): Promise<BindMountSandboxHandle> => {
558
+ const workspacePath = options.worktreePath;
559
+
560
+ return {
561
+ workspacePath,
562
+
563
+ exec: (
564
+ command: string,
565
+ opts?: { onLine?: (line: string) => void; cwd?: string },
566
+ ): Promise<ExecResult> => {
567
+ if (opts?.onLine) {
568
+ const onLine = opts.onLine;
569
+ return new Promise((resolve, reject) => {
570
+ const proc = spawn("sh", ["-c", command], {
571
+ cwd: opts?.cwd ?? workspacePath,
572
+ stdio: ["ignore", "pipe", "pipe"],
573
+ });
574
+
575
+ const stdoutChunks: string[] = [];
576
+ const stderrChunks: string[] = [];
577
+
578
+ const rl = createInterface({ input: proc.stdout! });
579
+ rl.on("line", (line) => {
580
+ stdoutChunks.push(line);
581
+ onLine(line); // forward each line to Sandcastle
582
+ });
583
+
584
+ proc.stderr!.on("data", (chunk: Buffer) => {
585
+ stderrChunks.push(chunk.toString());
586
+ });
587
+
588
+ proc.on("error", (err) => reject(err));
589
+ proc.on("close", (code) => {
590
+ resolve({
591
+ stdout: stdoutChunks.join("\n"),
592
+ stderr: stderrChunks.join(""),
593
+ exitCode: code ?? 0,
594
+ });
595
+ });
596
+ });
597
+ }
598
+
599
+ return new Promise((resolve, reject) => {
600
+ execFile(
601
+ "sh",
602
+ ["-c", command],
603
+ { cwd: opts?.cwd ?? workspacePath, maxBuffer: 10 * 1024 * 1024 },
604
+ (error, stdout, stderr) => {
605
+ if (error && error.code === undefined) {
606
+ reject(new Error(`exec failed: ${error.message}`));
607
+ } else {
608
+ resolve({
609
+ stdout: stdout.toString(),
610
+ stderr: stderr.toString(),
611
+ exitCode: typeof error?.code === "number" ? error.code : 0,
612
+ });
613
+ }
614
+ },
615
+ );
616
+ });
617
+ },
618
+
619
+ close: async () => {
620
+ // nothing to tear down for a local process
621
+ },
622
+ };
623
+ },
624
+ });
625
+ ```
626
+
627
+ ### Isolated provider example
628
+
629
+ A minimal isolated provider using a temp directory:
630
+
631
+ ```typescript
632
+ import {
633
+ createIsolatedSandboxProvider,
634
+ type IsolatedSandboxHandle,
635
+ type ExecResult,
636
+ } from "@ai-hero/sandcastle";
637
+ import { execFile, spawn } from "node:child_process";
638
+ import { copyFile, mkdir, mkdtemp, rm } from "node:fs/promises";
639
+ import { tmpdir } from "node:os";
640
+ import { dirname, join } from "node:path";
641
+ import { createInterface } from "node:readline";
642
+
643
+ const tempDir = () =>
644
+ createIsolatedSandboxProvider({
645
+ name: "temp-dir",
646
+ create: async (): Promise<IsolatedSandboxHandle> => {
647
+ const root = await mkdtemp(join(tmpdir(), "sandbox-"));
648
+ const workspacePath = join(root, "workspace");
649
+ await mkdir(workspacePath, { recursive: true });
650
+
651
+ return {
652
+ workspacePath,
653
+
654
+ exec: (
655
+ command: string,
656
+ opts?: { onLine?: (line: string) => void; cwd?: string },
657
+ ): Promise<ExecResult> => {
658
+ if (opts?.onLine) {
659
+ const onLine = opts.onLine;
660
+ return new Promise((resolve, reject) => {
661
+ const proc = spawn("sh", ["-c", command], {
662
+ cwd: opts?.cwd ?? workspacePath,
663
+ stdio: ["ignore", "pipe", "pipe"],
664
+ });
665
+
666
+ const stdoutChunks: string[] = [];
667
+ const stderrChunks: string[] = [];
668
+
669
+ const rl = createInterface({ input: proc.stdout! });
670
+ rl.on("line", (line) => {
671
+ stdoutChunks.push(line);
672
+ onLine(line);
673
+ });
674
+
675
+ proc.stderr!.on("data", (chunk: Buffer) => {
676
+ stderrChunks.push(chunk.toString());
677
+ });
678
+
679
+ proc.on("error", (err) => reject(err));
680
+ proc.on("close", (code) => {
681
+ resolve({
682
+ stdout: stdoutChunks.join("\n"),
683
+ stderr: stderrChunks.join(""),
684
+ exitCode: code ?? 0,
685
+ });
686
+ });
687
+ });
688
+ }
689
+
690
+ return new Promise((resolve, reject) => {
691
+ execFile(
692
+ "sh",
693
+ ["-c", command],
694
+ { cwd: opts?.cwd ?? workspacePath, maxBuffer: 10 * 1024 * 1024 },
695
+ (error, stdout, stderr) => {
696
+ if (error && error.code === undefined) {
697
+ reject(new Error(`exec failed: ${error.message}`));
698
+ } else {
699
+ resolve({
700
+ stdout: stdout.toString(),
701
+ stderr: stderr.toString(),
702
+ exitCode: typeof error?.code === "number" ? error.code : 0,
703
+ });
704
+ }
705
+ },
706
+ );
707
+ });
708
+ },
709
+
710
+ copyIn: async (hostPath: string, sandboxPath: string) => {
711
+ const info = await stat(hostPath);
712
+ if (info.isDirectory()) {
713
+ await cp(hostPath, sandboxPath, { recursive: true });
714
+ } else {
715
+ await mkdir(dirname(sandboxPath), { recursive: true });
716
+ await copyFile(hostPath, sandboxPath);
717
+ }
718
+ },
719
+
720
+ copyFileOut: async (sandboxPath: string, hostPath: string) => {
721
+ await mkdir(dirname(hostPath), { recursive: true });
722
+ await copyFile(sandboxPath, hostPath);
723
+ },
724
+
725
+ close: async () => {
726
+ await rm(root, { recursive: true, force: true });
727
+ },
728
+ };
729
+ },
730
+ });
731
+ ```
732
+
733
+ ### Branch strategies
734
+
735
+ A branch strategy controls where the agent's commits land. Configure it when constructing the provider:
736
+
737
+ | Strategy | Behavior | Bind-mount | Isolated |
738
+ | --------------- | ------------------------------------------------------------------------ | ---------- | --------- |
739
+ | `head` | Agent writes directly to the host working directory. No worktree created | Default | N/A |
740
+ | `merge-to-head` | Sandcastle creates a temp branch, merges back to HEAD when done | Supported | Default |
741
+ | `branch` | Commits land on an explicit named branch you provide | Supported | Supported |
742
+
743
+ **When to use each:**
744
+
745
+ - **`head`** — fast iteration during development. No branch indirection, no merge step. Only works with bind-mount providers since the agent needs direct host filesystem access.
746
+ - **`merge-to-head`** — safe default for automation. The agent works on a throwaway branch; if something goes wrong, HEAD is untouched. Use this for CI or unattended runs.
747
+ - **`branch`** — when you want commits on a specific branch (e.g. for a PR). Pass `{ type: "branch", branch: "agent/fix-42" }`.
748
+
749
+ Branch strategy is now configured on `run()`, not on the provider:
750
+
751
+ ```typescript
752
+ import { run, claudeCode } from "@ai-hero/sandcastle";
753
+ import { docker } from "@ai-hero/sandcastle/sandboxes/docker";
754
+
755
+ // head — direct write, bind-mount only (default for bind-mount providers)
756
+ await run({
757
+ agent: claudeCode("claude-opus-4-6"),
758
+ sandbox: docker(),
759
+ prompt: "…",
760
+ });
761
+ // merge-to-head — temp branch, merge back (default for isolated providers)
762
+ await run({
763
+ agent: claudeCode("claude-opus-4-6"),
764
+ sandbox: tempDir(),
765
+ prompt: "…",
766
+ });
767
+ // branch — explicit named branch
768
+ await run({
769
+ agent: claudeCode("claude-opus-4-6"),
770
+ sandbox: docker(),
771
+ branchStrategy: { type: "branch", branch: "agent/fix-42" },
772
+ prompt: "…",
773
+ });
774
+ ```
775
+
776
+ ### Passing to `run()`
777
+
778
+ Pass your custom provider via the `sandbox` option — it works the same as the built-in `docker()` provider:
779
+
780
+ ```typescript
781
+ import { run, claudeCode } from "@ai-hero/sandcastle";
782
+
783
+ const result = await run({
784
+ agent: claudeCode("claude-opus-4-6"),
785
+ sandbox: localProcess(), // your custom provider
786
+ prompt: "Fix issue #42 in this repo.",
787
+ });
788
+ ```
789
+
790
+ ### Reference implementations
791
+
792
+ For real-world examples, see:
470
793
 
471
- Environment variables are resolved automatically from `.sandcastle/.env` and `process.env`no need to pass them to the API. The required variables depend on the **agent provider** (see `sandcastle init` output for details).
794
+ - [`src/sandboxes/docker.ts`](src/sandboxes/docker.ts)bind-mount provider using Docker containers
795
+ - [`src/sandboxes/vercel.ts`](src/sandboxes/vercel.ts) — isolated provider using Vercel Firecracker microVMs via `@vercel/sandbox`
796
+ - [`src/sandboxes/podman.ts`](src/sandboxes/podman.ts) — bind-mount provider using Podman containers (with SELinux label support)
797
+ - [`src/sandboxes/test-isolated.ts`](src/sandboxes/test-isolated.ts) — isolated provider using temp directories (used in tests)
472
798
 
473
799
  ## Configuration
474
800
 
@@ -503,7 +829,7 @@ Hooks are arrays of `{ "command": "..." }` objects executed sequentially inside
503
829
  | ---------------- | -------------------------- | ---------------------- |
504
830
  | `onSandboxReady` | After the sandbox is ready | Sandbox repo directory |
505
831
 
506
- **`onSandboxReady`** runs after the worktree is mounted into the sandbox. Use it for dependency installation or build steps (e.g., `npm install`).
832
+ **`onSandboxReady`** runs after the sandbox is ready. Use it for dependency installation or build steps (e.g., `npm install`).
507
833
 
508
834
  Pass hooks programmatically via `run()`:
509
835
 
@@ -1,19 +1,9 @@
1
- export interface TokenUsage {
2
- readonly input_tokens: number;
3
- readonly output_tokens: number;
4
- readonly cache_read_input_tokens: number;
5
- readonly cache_creation_input_tokens: number;
6
- readonly total_cost_usd: number;
7
- readonly num_turns: number;
8
- readonly duration_ms: number;
9
- }
10
1
  export type ParsedStreamEvent = {
11
2
  type: "text";
12
3
  text: string;
13
4
  } | {
14
5
  type: "result";
15
6
  result: string;
16
- usage: TokenUsage | null;
17
7
  } | {
18
8
  type: "tool_call";
19
9
  name: string;
@@ -21,15 +11,35 @@ export type ParsedStreamEvent = {
21
11
  };
22
12
  export interface AgentProvider {
23
13
  readonly name: string;
14
+ /** Environment variables injected by this agent provider. Merged at launch time with env resolver and sandbox provider env. */
15
+ readonly env: Record<string, string>;
24
16
  buildPrintCommand(prompt: string): string;
25
17
  buildInteractiveArgs(prompt: string): string[];
26
18
  parseStreamLine(line: string): ParsedStreamEvent[];
27
19
  }
28
20
  export declare const DEFAULT_MODEL = "claude-opus-4-6";
29
- export declare const pi: (model: string) => AgentProvider;
30
- export declare const codex: (model: string) => AgentProvider;
21
+ /** Options for the pi agent provider. */
22
+ export interface PiOptions {
23
+ /** Environment variables injected by this agent provider. */
24
+ readonly env?: Record<string, string>;
25
+ }
26
+ export declare const pi: (model: string, options?: PiOptions | undefined) => AgentProvider;
27
+ /** Options for the codex agent provider. */
28
+ export interface CodexOptions {
29
+ /** Environment variables injected by this agent provider. */
30
+ readonly env?: Record<string, string>;
31
+ }
32
+ export declare const codex: (model: string, options?: CodexOptions | undefined) => AgentProvider;
33
+ /** Options for the opencode agent provider. */
34
+ export interface OpenCodeOptions {
35
+ /** Environment variables injected by this agent provider. */
36
+ readonly env?: Record<string, string>;
37
+ }
38
+ export declare const opencode: (model: string, options?: OpenCodeOptions | undefined) => AgentProvider;
31
39
  export interface ClaudeCodeOptions {
32
40
  readonly effort?: "low" | "medium" | "high" | "max";
41
+ /** Environment variables injected by this agent provider. */
42
+ readonly env?: Record<string, string>;
33
43
  }
34
44
  export declare const claudeCode: (model: string, options?: ClaudeCodeOptions | undefined) => AgentProvider;
35
45
  //# sourceMappingURL=AgentProvider.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"AgentProvider.d.ts","sourceRoot":"","sources":["../src/AgentProvider.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,uBAAuB,EAAE,MAAM,CAAC;IACzC,QAAQ,CAAC,2BAA2B,EAAE,MAAM,CAAC;IAC7C,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,MAAM,iBAAiB,GACzB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC9B;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,UAAU,GAAG,IAAI,CAAA;CAAE,GAC5D;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAwFtD,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC;IAC1C,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC/C,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAAC;CACpD;AAED,eAAO,MAAM,aAAa,oBAAoB,CAAC;AAsD/C,eAAO,MAAM,EAAE,kCAcb,CAAC;AAwCH,eAAO,MAAM,KAAK,kCAchB,CAAC;AAMH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,MAAM,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,CAAC;CACrD;AAED,eAAO,MAAM,UAAU,2EAoBrB,CAAC"}
1
+ {"version":3,"file":"AgentProvider.d.ts","sourceRoot":"","sources":["../src/AgentProvider.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,iBAAiB,GACzB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC9B;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAClC;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AA6DtD,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,+HAA+H;IAC/H,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACrC,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC;IAC1C,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC/C,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAAC;CACpD;AAED,eAAO,MAAM,aAAa,oBAAoB,CAAC;AA2D/C,yCAAyC;AACzC,MAAM,WAAW,SAAS;IACxB,6DAA6D;IAC7D,QAAQ,CAAC,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACvC;AAED,eAAO,MAAM,EAAE,mEAeb,CAAC;AAwCH,4CAA4C;AAC5C,MAAM,WAAW,YAAY;IAC3B,6DAA6D;IAC7D,QAAQ,CAAC,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACvC;AAED,eAAO,MAAM,KAAK,sEAkBhB,CAAC;AAMH,+CAA+C;AAC/C,MAAM,WAAW,eAAe;IAC9B,6DAA6D;IAC7D,QAAQ,CAAC,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACvC;AAED,eAAO,MAAM,QAAQ,yEAkBnB,CAAC;AAMH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,MAAM,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,CAAC;IACpD,6DAA6D;IAC7D,QAAQ,CAAC,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACvC;AAED,eAAO,MAAM,UAAU,2EAqBrB,CAAC"}