@ai-hero/sandcastle 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -39,7 +39,7 @@ npm install --save-dev @ai-hero/sandcastle
39
39
  npx @ai-hero/sandcastle init
40
40
  ```
41
41
 
42
- 3. Edit `.sandcastle/.env` and fill in your default values for `ANTHROPIC_API_KEY`. If you want to use your Claude subscription instead of an API key, see [#191](https://github.com/mattpocock/sandcastle/issues/191).
42
+ 3. Edit `.sandcastle/.env` and fill in your default values for `CLAUDE_CODE_OAUTH_TOKEN` (run `claude setup-token` on your host to get one). To use an Anthropic API key instead, uncomment and fill in `ANTHROPIC_API_KEY`.
43
43
 
44
44
  ```bash
45
45
  cp .sandcastle/.env.example .sandcastle/.env
@@ -775,7 +775,7 @@ Removes the Podman image.
775
775
 
776
776
  | Option | Type | Default | Description |
777
777
  | -------------------------- | ------------------ | ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
778
- | `agent` | AgentProvider | — | **Required.** Agent provider (e.g. `claudeCode("claude-opus-4-7")`, `pi("claude-sonnet-4-6")`, `codex("gpt-5.4-mini")`, `cursor("composer-2")`, `opencode("opencode/big-pickle")`, `copilot("claude-sonnet-4.5")`) |
778
+ | `agent` | AgentProvider | — | **Required.** Agent provider (e.g. `claudeCode("claude-opus-4-7")`, `pi("claude-sonnet-4-6")`, `codex("gpt-5.4")`, `cursor("composer-2")`, `opencode("opencode/big-pickle")`, `copilot("claude-sonnet-4.5")`) |
779
779
  | `sandbox` | SandboxProvider | — | **Required.** Sandbox provider (e.g. `docker()`, `podman()`, `docker({ imageName: "sandcastle:local" })`) |
780
780
  | `cwd` | string | `process.cwd()` | Host repo directory — anchor for `.sandcastle/` artifacts and git operations. Relative paths resolve against `process.cwd()`. |
781
781
  | `prompt` | string | — | Inline prompt (mutually exclusive with `promptFile`) |
@@ -828,6 +828,8 @@ Removes the Podman image.
828
828
 
829
829
  After each resumable provider iteration, Sandcastle automatically captures the agent's session file from the sandbox to the host. Claude Code sessions are stored under `~/.claude/projects/<encoded-path>/<session-id>.jsonl`; Codex sessions are stored under `~/.codex/sessions/YYYY/MM/DD/rollout-*-<session-id>.jsonl`; Pi sessions are stored under `~/.pi/agent/sessions/--<encoded-cwd>--/<timestamp>_<session-id>.jsonl`. Any provider-specific `cwd` fields are rewritten to match the host repo root, so the provider's native resume command works.
830
830
 
831
+ For Claude Code, any `Agent`-tool or `Workflow`-tool subagent transcripts written under `<session-id>/subagents/agent-*.jsonl` are captured alongside the main session. Subagent capture is best-effort: a failure on an individual transcript logs a warning and lets siblings and the main session through. Main-session capture failure still fails the run (see below).
832
+
831
833
  Session capture is enabled by default for `claudeCode()`, `codex()`, and `pi()` and can be opted out via `captureSessions: false`. Providers without `sessionStorage` do not attempt capture. Capture failure fails the run.
832
834
 
833
835
  ### Session resume
@@ -847,7 +849,7 @@ You can also continue the last captured session from a result:
847
849
 
848
850
  ```typescript
849
851
  const first = await run({
850
- agent: codex("gpt-5.4-mini"),
852
+ agent: codex("gpt-5.4"),
851
853
  sandbox: docker(),
852
854
  prompt: "Draft a plan",
853
855
  });
@@ -901,11 +903,12 @@ The `claudeCode()` factory accepts an optional second argument for provider-spec
901
903
  agent: claudeCode("claude-opus-4-7", { effort: "high" });
902
904
  ```
903
905
 
904
- | Option | Type | Default | Description |
905
- | ----------------- | --------------------------------------------------------- | ------- | --------------------------------------------------------- |
906
- | `effort` | `"low"` \| `"medium"` \| `"high"` \| `"xhigh"` \| `"max"` | — | Claude Code reasoning effort level (`max` is Opus only) |
907
- | `env` | `Record<string, string>` | `{}` | Environment variables injected by this agent provider |
908
- | `captureSessions` | `boolean` | `true` | Capture agent session JSONL to host for `claude --resume` |
906
+ | Option | Type | Default | Description |
907
+ | ----------------- | ---------------------------------------------------------------------------------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
908
+ | `effort` | `"low"` \| `"medium"` \| `"high"` \| `"xhigh"` \| `"max"` | — | Claude Code reasoning effort level (`max` is Opus only) |
909
+ | `env` | `Record<string, string>` | `{}` | Environment variables injected by this agent provider |
910
+ | `captureSessions` | `boolean` | `true` | Capture agent session JSONL to host for `claude --resume` |
911
+ | `permissionMode` | `"default"` \| `"acceptEdits"` \| `"plan"` \| `"auto"` \| `"dontAsk"` \| `"bypassPermissions"` | — | Maps to Claude's `--permission-mode` flag. When set, replaces Sandcastle's default `--dangerously-skip-permissions` on AFK runs. Use `"auto"` for AI-mediated per-tool approve/deny without bypass. |
909
912
 
910
913
  ### `CodexOptions`
911
914
 
@@ -915,11 +918,12 @@ The `codex()` factory accepts an optional second argument for provider-specific
915
918
  agent: codex("gpt-5.4", { effort: "high" });
916
919
  ```
917
920
 
918
- | Option | Type | Default | Description |
919
- | ----------------- | ---------------------------------------------- | ------- | --------------------------------------------------------- |
920
- | `effort` | `"low"` \| `"medium"` \| `"high"` \| `"xhigh"` | — | Codex reasoning effort level via `model_reasoning_effort` |
921
- | `env` | `Record<string, string>` | `{}` | Environment variables injected by this agent provider |
922
- | `captureSessions` | `boolean` | `true` | Capture Codex rollout JSONL to host for resume |
921
+ | Option | Type | Default | Description |
922
+ | ------------------- | ---------------------------------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
923
+ | `effort` | `"low"` \| `"medium"` \| `"high"` \| `"xhigh"` | — | Codex reasoning effort level via `model_reasoning_effort` |
924
+ | `env` | `Record<string, string>` | `{}` | Environment variables injected by this agent provider |
925
+ | `captureSessions` | `boolean` | `true` | Capture Codex rollout JSONL to host for resume |
926
+ | `approvalsReviewer` | `"user"` \| `"auto_review"` | — | Maps to Codex's `approvals_reviewer` config. When `"auto_review"`, swaps `--dangerously-bypass-approvals-and-sandbox` for `-a on-request -s danger-full-access` so the reviewer agent evaluates each approval prompt. |
923
927
 
924
928
  ### `PiOptions`
925
929
 
package/dist/index.d.ts CHANGED
@@ -175,6 +175,16 @@ interface CodexOptions {
175
175
  readonly hostSessionsDir?: string;
176
176
  readonly sandboxSessionsDir?: string;
177
177
  };
178
+ /**
179
+ * Maps to Codex's `approvals_reviewer` config key (set via
180
+ * `-c approvals_reviewer="<value>"`). When set to `"auto_review"`, the
181
+ * provider swaps the default `--dangerously-bypass-approvals-and-sandbox`
182
+ * for an interactive approval policy (`-a on-request`) and Codex's most
183
+ * permissive sandbox (`-s danger-full-access`) — auto-review needs
184
+ * something to review, and the safety boundary is the reviewer agent
185
+ * rather than the filesystem sandbox.
186
+ */
187
+ readonly approvalsReviewer?: "user" | "auto_review";
178
188
  }
179
189
  declare const codex: (model: string, options?: CodexOptions) => AgentProvider & {
180
190
  readonly sessionStorage: AgentSessionStorage;
@@ -218,6 +228,13 @@ interface ClaudeCodeOptions {
218
228
  readonly hostProjectsDir?: string;
219
229
  readonly sandboxProjectsDir?: string;
220
230
  };
231
+ /**
232
+ * Maps directly to Claude's `--permission-mode` flag. When set, replaces the
233
+ * default `--dangerously-skip-permissions` Sandcastle passes on AFK runs —
234
+ * the two flags are mutually exclusive on Claude's CLI. Use `"auto"` for
235
+ * AI-mediated per-tool approve/deny on unsandboxed host runs.
236
+ */
237
+ readonly permissionMode?: "default" | "acceptEdits" | "plan" | "auto" | "dontAsk" | "bypassPermissions";
221
238
  }
222
239
  declare const claudeCode: (model: string, options?: ClaudeCodeOptions) => AgentProvider & {
223
240
  readonly sessionStorage: AgentSessionStorage;
package/dist/index.js CHANGED
@@ -676,7 +676,7 @@ var findMissingPromptArgKeys = (prompt, providedArgs) => {
676
676
  if (seen.has(key)) continue;
677
677
  seen.add(key);
678
678
  if (builtInSet.has(key)) continue;
679
- if (key in providedArgs) continue;
679
+ if (key in providedArgs && providedArgs[key] != null) continue;
680
680
  missing.push(key);
681
681
  }
682
682
  return missing;
@@ -704,6 +704,14 @@ var substitutePromptArgs = (prompt, args, silentKeys) => {
704
704
  })
705
705
  );
706
706
  }
707
+ const value = sanitizedArgs[key];
708
+ if (value == null) {
709
+ return yield* Effect_exports.fail(
710
+ new PromptError({
711
+ message: `Prompt argument "{{${key}}}" has value ${value === null ? "null" : "undefined"} in promptArgs`
712
+ })
713
+ );
714
+ }
707
715
  }
708
716
  for (const key of Object.keys(sanitizedArgs)) {
709
717
  if (!referencedKeys.has(key) && !silentKeys?.has(key)) {
@@ -2403,6 +2411,21 @@ var claudeHostSessionPath = (cwd, id, projectsDir) => {
2403
2411
  return join(base, encodeProjectPath(cwd), `${id}.jsonl`);
2404
2412
  };
2405
2413
  var claudeSandboxSessionPath = (cwd, id, projectsDir) => posix.join(projectsDir, encodeProjectPath(cwd), `${id}.jsonl`);
2414
+ var claudeSubagentsDirInSandbox = (cwd, id, projectsDir) => posix.join(projectsDir, encodeProjectPath(cwd), id, "subagents");
2415
+ var claudeSubagentsDirOnHost = (cwd, id, projectsDir) => {
2416
+ const base = projectsDir ?? join(process.env.HOME ?? "~", ".claude", "projects");
2417
+ return join(base, encodeProjectPath(cwd), id, "subagents");
2418
+ };
2419
+ var listClaudeSubagentSessionsInSandbox = async (cwd, id, handle, sandboxProjectsDir) => {
2420
+ const dir = claudeSubagentsDirInSandbox(cwd, id, sandboxProjectsDir);
2421
+ const result = await handle.exec(
2422
+ `find ${JSON.stringify(dir)} -type f -name ${JSON.stringify("agent-*.jsonl")} 2>/dev/null`
2423
+ );
2424
+ if (result.exitCode !== 0) return [];
2425
+ const stdout = result.stdout.trim();
2426
+ if (stdout === "") return [];
2427
+ return stdout.split("\n").filter((line) => line !== "");
2428
+ };
2406
2429
  var findClaudeSessionOnHost = async (id, projectsDir) => {
2407
2430
  const root = projectsDir ?? join(process.env.HOME ?? "~", ".claude", "projects");
2408
2431
  let entries;
@@ -2424,14 +2447,18 @@ var rewriteSessionCwd = (content, fromCwd, toCwd) => {
2424
2447
  if (content === "") return "";
2425
2448
  return content.split("\n").map((line) => {
2426
2449
  if (line === "") return line;
2427
- const entry = JSON.parse(line);
2428
- if (typeof entry.cwd === "string" && entry.cwd === fromCwd) {
2429
- entry.cwd = toCwd;
2430
- }
2431
- if (entry.type === "session_meta" && typeof entry.payload === "object" && entry.payload !== null && typeof entry.payload.cwd === "string" && entry.payload.cwd === fromCwd) {
2432
- entry.payload.cwd = toCwd;
2450
+ try {
2451
+ const entry = JSON.parse(line);
2452
+ if (typeof entry.cwd === "string" && entry.cwd === fromCwd) {
2453
+ entry.cwd = toCwd;
2454
+ }
2455
+ if (entry.type === "session_meta" && typeof entry.payload === "object" && entry.payload !== null && typeof entry.payload.cwd === "string" && entry.payload.cwd === fromCwd) {
2456
+ entry.payload.cwd = toCwd;
2457
+ }
2458
+ return JSON.stringify(entry);
2459
+ } catch {
2460
+ return line;
2433
2461
  }
2434
- return JSON.stringify(entry);
2435
2462
  }).join("\n");
2436
2463
  };
2437
2464
  var transferClaudeSession = (jsonl, fromCwd, toCwd) => rewriteSessionCwd(jsonl, fromCwd, toCwd);
@@ -2705,6 +2732,19 @@ var writeSandboxFile = async (handle, sandboxPath, content, tag) => {
2705
2732
  });
2706
2733
  }
2707
2734
  };
2735
+ var copyClaudeSessionFile = async ({
2736
+ handle,
2737
+ sourcePath,
2738
+ fromCwd,
2739
+ toCwd,
2740
+ destPath,
2741
+ tag
2742
+ }) => {
2743
+ const jsonl = await readSandboxFile(handle, sourcePath, tag);
2744
+ const rewritten = transferClaudeSession(jsonl, fromCwd, toCwd);
2745
+ await mkdir(dirname(destPath), { recursive: true });
2746
+ await writeFile(destPath, rewritten);
2747
+ };
2708
2748
  var makeClaudeSessionStorage = (options) => {
2709
2749
  const hostProjectsDir = options?.sessionStorage?.hostProjectsDir;
2710
2750
  const sandboxProjectsDir = options?.sessionStorage?.sandboxProjectsDir ?? "/home/agent/.claude/projects";
@@ -2717,20 +2757,48 @@ var makeClaudeSessionStorage = (options) => {
2717
2757
  return readFile(path2, "utf-8");
2718
2758
  },
2719
2759
  captureToHost: async ({ hostCwd, sandboxCwd, sessionId, handle }) => {
2720
- const sandboxPath = claudeSandboxSessionPath(
2760
+ await copyClaudeSessionFile({
2761
+ handle,
2762
+ sourcePath: claudeSandboxSessionPath(
2763
+ sandboxCwd,
2764
+ sessionId,
2765
+ sandboxProjectsDir
2766
+ ),
2767
+ fromCwd: sandboxCwd,
2768
+ toCwd: hostCwd,
2769
+ destPath: claudeHostSessionPath(hostCwd, sessionId, hostProjectsDir),
2770
+ tag: "claude-cap"
2771
+ });
2772
+ const subagentSandboxPaths = await listClaudeSubagentSessionsInSandbox(
2721
2773
  sandboxCwd,
2722
2774
  sessionId,
2775
+ handle,
2723
2776
  sandboxProjectsDir
2724
2777
  );
2725
- const jsonl = await readSandboxFile(handle, sandboxPath, "claude-cap");
2726
- const rewritten = transferClaudeSession(jsonl, sandboxCwd, hostCwd);
2727
- const hostPath = claudeHostSessionPath(
2778
+ const hostSubagentsDir = claudeSubagentsDirOnHost(
2728
2779
  hostCwd,
2729
2780
  sessionId,
2730
2781
  hostProjectsDir
2731
2782
  );
2732
- await mkdir(dirname(hostPath), { recursive: true });
2733
- await writeFile(hostPath, rewritten);
2783
+ for (const sandboxSubagentPath of subagentSandboxPaths) {
2784
+ try {
2785
+ await copyClaudeSessionFile({
2786
+ handle,
2787
+ sourcePath: sandboxSubagentPath,
2788
+ fromCwd: sandboxCwd,
2789
+ toCwd: hostCwd,
2790
+ destPath: join(
2791
+ hostSubagentsDir,
2792
+ posix.basename(sandboxSubagentPath)
2793
+ ),
2794
+ tag: "claude-sub"
2795
+ });
2796
+ } catch (err) {
2797
+ console.error(
2798
+ `sandcastle: failed to capture Claude subagent transcript ${sandboxSubagentPath}: ${err instanceof Error ? err.message : String(err)}`
2799
+ );
2800
+ }
2801
+ }
2734
2802
  },
2735
2803
  resumeIntoSandbox: async ({ hostCwd, sandboxCwd, sessionId, handle }) => {
2736
2804
  const hostPath = claudeHostSessionPath(
@@ -2961,6 +3029,7 @@ var codex = (model, options) => ({
2961
3029
  forkSession
2962
3030
  }) {
2963
3031
  const effortFlag = options?.effort ? ` -c ${shellEscape(`model_reasoning_effort="${options.effort}"`)}` : "";
3032
+ const approvalsFlags = options?.approvalsReviewer === "auto_review" ? ` -a on-request -s danger-full-access -c ${shellEscape(`approvals_reviewer="auto_review"`)}` : " --dangerously-bypass-approvals-and-sandbox";
2964
3033
  let base;
2965
3034
  if (resumeSession && forkSession) {
2966
3035
  base = `codex exec fork ${shellEscape(resumeSession)}`;
@@ -2971,7 +3040,7 @@ var codex = (model, options) => ({
2971
3040
  }
2972
3041
  const stdinArg = resumeSession ? " -" : "";
2973
3042
  return {
2974
- command: `${base} --json --dangerously-bypass-approvals-and-sandbox -m ${shellEscape(model)}${effortFlag}${stdinArg}`,
3043
+ command: `${base} --json${approvalsFlags} -m ${shellEscape(model)}${effortFlag}${stdinArg}`,
2975
3044
  stdin: prompt
2976
3045
  };
2977
3046
  },
@@ -3070,7 +3139,7 @@ var opencode = (model, options) => ({
3070
3139
  buildInteractiveArgs({ prompt }) {
3071
3140
  const args = ["opencode", "--model", model];
3072
3141
  if (options?.agent) args.push("--agent", options.agent);
3073
- if (prompt) args.push("-p", prompt);
3142
+ if (prompt) args.push("--prompt", prompt);
3074
3143
  return args;
3075
3144
  },
3076
3145
  parseStreamLine(line) {
@@ -3160,12 +3229,12 @@ var claudeCode = (model, options) => ({
3160
3229
  resumeSession,
3161
3230
  forkSession
3162
3231
  }) {
3163
- const skipPerms = dangerouslySkipPermissions ? " --dangerously-skip-permissions" : "";
3232
+ const permissionFlag = options?.permissionMode ? ` --permission-mode ${options.permissionMode}` : dangerouslySkipPermissions ? " --dangerously-skip-permissions" : "";
3164
3233
  const effortFlag = options?.effort ? ` --effort ${options.effort}` : "";
3165
3234
  const resumeFlag = resumeSession ? ` --resume ${shellEscape(resumeSession)}` : "";
3166
3235
  const forkFlag = resumeSession && forkSession ? " --fork-session" : "";
3167
3236
  return {
3168
- command: `claude --print --verbose${skipPerms} --output-format stream-json --model ${shellEscape(model)}${effortFlag}${resumeFlag}${forkFlag} -p -`,
3237
+ command: `claude --print --verbose${permissionFlag} --output-format stream-json --model ${shellEscape(model)}${effortFlag}${resumeFlag}${forkFlag} -p -`,
3169
3238
  stdin: prompt
3170
3239
  };
3171
3240
  },
@@ -3174,7 +3243,11 @@ var claudeCode = (model, options) => ({
3174
3243
  dangerouslySkipPermissions
3175
3244
  }) {
3176
3245
  const args = ["claude"];
3177
- if (dangerouslySkipPermissions) args.push("--dangerously-skip-permissions");
3246
+ if (options?.permissionMode) {
3247
+ args.push("--permission-mode", options.permissionMode);
3248
+ } else if (dangerouslySkipPermissions) {
3249
+ args.push("--dangerously-skip-permissions");
3250
+ }
3178
3251
  args.push("--model", model);
3179
3252
  if (options?.effort) args.push("--effort", options.effort);
3180
3253
  if (prompt) args.push(prompt);