npm - @ai-hero/sandcastle - Versions diffs - 0.7.0 → 0.9.0 - Mend

@ai-hero/sandcastle 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -39,7 +39,7 @@ npm install --save-dev @ai-hero/sandcastle
 npx @ai-hero/sandcastle init
 ```
-3. Edit `.sandcastle/.env` and fill in your default values for `ANTHROPIC_API_KEY`. If you want to use your Claude subscription instead of an API key, see [#191](https://github.com/mattpocock/sandcastle/issues/191).
+3. Edit `.sandcastle/.env` and fill in your default values for `CLAUDE_CODE_OAUTH_TOKEN` (run `claude setup-token` on your host to get one). To use an Anthropic API key instead, uncomment and fill in `ANTHROPIC_API_KEY`.
 ```bash
 cp .sandcastle/.env.example .sandcastle/.env
@@ -775,7 +775,7 @@ Removes the Podman image.
 | Option                     | Type               | Default                       | Description                                                                                                                                                                                                                  |
 | -------------------------- | ------------------ | ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `agent`                    | AgentProvider      | —                             | **Required.** Agent provider (e.g. `claudeCode("claude-opus-4-7")`, `pi("claude-sonnet-4-6")`, `codex("gpt-5.4-mini")`, `cursor("composer-2")`, `opencode("opencode/big-pickle")`, `copilot("claude-sonnet-4.5")`)           |
+| `agent`                    | AgentProvider      | —                             | **Required.** Agent provider (e.g. `claudeCode("claude-opus-4-7")`, `pi("claude-sonnet-4-6")`, `codex("gpt-5.4")`, `cursor("composer-2")`, `opencode("opencode/big-pickle")`, `copilot("claude-sonnet-4.5")`)                |
 | `sandbox`                  | SandboxProvider    | —                             | **Required.** Sandbox provider (e.g. `docker()`, `podman()`, `docker({ imageName: "sandcastle:local" })`)                                                                                                                    |
 | `cwd`                      | string             | `process.cwd()`               | Host repo directory — anchor for `.sandcastle/` artifacts and git operations. Relative paths resolve against `process.cwd()`.                                                                                                |
 | `prompt`                   | string             | —                             | Inline prompt (mutually exclusive with `promptFile`)                                                                                                                                                                         |
@@ -828,6 +828,8 @@ Removes the Podman image.
 After each resumable provider iteration, Sandcastle automatically captures the agent's session file from the sandbox to the host. Claude Code sessions are stored under `~/.claude/projects/<encoded-path>/<session-id>.jsonl`; Codex sessions are stored under `~/.codex/sessions/YYYY/MM/DD/rollout-*-<session-id>.jsonl`; Pi sessions are stored under `~/.pi/agent/sessions/--<encoded-cwd>--/<timestamp>_<session-id>.jsonl`. Any provider-specific `cwd` fields are rewritten to match the host repo root, so the provider's native resume command works.
+For Claude Code, any `Agent`-tool or `Workflow`-tool subagent transcripts written under `<session-id>/subagents/agent-*.jsonl` are captured alongside the main session. Subagent capture is best-effort: a failure on an individual transcript logs a warning and lets siblings and the main session through. Main-session capture failure still fails the run (see below).
 Session capture is enabled by default for `claudeCode()`, `codex()`, and `pi()` and can be opted out via `captureSessions: false`. Providers without `sessionStorage` do not attempt capture. Capture failure fails the run.
 ### Session resume
@@ -847,7 +849,7 @@ You can also continue the last captured session from a result:
 ```typescript
 const first = await run({
-  agent: codex("gpt-5.4-mini"),
+  agent: codex("gpt-5.4"),
   sandbox: docker(),
   prompt: "Draft a plan",
 });
@@ -901,11 +903,12 @@ The `claudeCode()` factory accepts an optional second argument for provider-spec
 agent: claudeCode("claude-opus-4-7", { effort: "high" });
 ```
-| Option            | Type                                                      | Default | Description                                               |
-| ----------------- | --------------------------------------------------------- | ------- | --------------------------------------------------------- |
-| `effort`          | `"low"` \| `"medium"` \| `"high"` \| `"xhigh"` \| `"max"` | —       | Claude Code reasoning effort level (`max` is Opus only)   |
-| `env`             | `Record<string, string>`                                  | `{}`    | Environment variables injected by this agent provider     |
-| `captureSessions` | `boolean`                                                 | `true`  | Capture agent session JSONL to host for `claude --resume` |
+| Option            | Type                                                                                           | Default | Description                                                                                                                                                                                         |
+| ----------------- | ---------------------------------------------------------------------------------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `effort`          | `"low"` \| `"medium"` \| `"high"` \| `"xhigh"` \| `"max"`                                      | —       | Claude Code reasoning effort level (`max` is Opus only)                                                                                                                                             |
+| `env`             | `Record<string, string>`                                                                       | `{}`    | Environment variables injected by this agent provider                                                                                                                                               |
+| `captureSessions` | `boolean`                                                                                      | `true`  | Capture agent session JSONL to host for `claude --resume`                                                                                                                                           |
+| `permissionMode`  | `"default"` \| `"acceptEdits"` \| `"plan"` \| `"auto"` \| `"dontAsk"` \| `"bypassPermissions"` | —       | Maps to Claude's `--permission-mode` flag. When set, replaces Sandcastle's default `--dangerously-skip-permissions` on AFK runs. Use `"auto"` for AI-mediated per-tool approve/deny without bypass. |
 ### `CodexOptions`
@@ -915,11 +918,12 @@ The `codex()` factory accepts an optional second argument for provider-specific
 agent: codex("gpt-5.4", { effort: "high" });
 ```
-| Option            | Type                                           | Default | Description                                               |
-| ----------------- | ---------------------------------------------- | ------- | --------------------------------------------------------- |
-| `effort`          | `"low"` \| `"medium"` \| `"high"` \| `"xhigh"` | —       | Codex reasoning effort level via `model_reasoning_effort` |
-| `env`             | `Record<string, string>`                       | `{}`    | Environment variables injected by this agent provider     |
-| `captureSessions` | `boolean`                                      | `true`  | Capture Codex rollout JSONL to host for resume            |
+| Option              | Type                                           | Default | Description                                                                                                                                                                                                           |
+| ------------------- | ---------------------------------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `effort`            | `"low"` \| `"medium"` \| `"high"` \| `"xhigh"` | —       | Codex reasoning effort level via `model_reasoning_effort`                                                                                                                                                             |
+| `env`               | `Record<string, string>`                       | `{}`    | Environment variables injected by this agent provider                                                                                                                                                                 |
+| `captureSessions`   | `boolean`                                      | `true`  | Capture Codex rollout JSONL to host for resume                                                                                                                                                                        |
+| `approvalsReviewer` | `"user"` \| `"auto_review"`                    | —       | Maps to Codex's `approvals_reviewer` config. When `"auto_review"`, swaps `--dangerously-bypass-approvals-and-sandbox` for `-a on-request -s danger-full-access` so the reviewer agent evaluates each approval prompt. |
 ### `PiOptions`

package/dist/index.d.ts CHANGED Viewed

@@ -175,6 +175,16 @@ interface CodexOptions {
         readonly hostSessionsDir?: string;
         readonly sandboxSessionsDir?: string;
     };
+    /**
+     * Maps to Codex's `approvals_reviewer` config key (set via
+     * `-c approvals_reviewer="<value>"`). When set to `"auto_review"`, the
+     * provider swaps the default `--dangerously-bypass-approvals-and-sandbox`
+     * for an interactive approval policy (`-a on-request`) and Codex's most
+     * permissive sandbox (`-s danger-full-access`) — auto-review needs
+     * something to review, and the safety boundary is the reviewer agent
+     * rather than the filesystem sandbox.
+     */
+    readonly approvalsReviewer?: "user" | "auto_review";
 }
 declare const codex: (model: string, options?: CodexOptions) => AgentProvider & {
     readonly sessionStorage: AgentSessionStorage;
@@ -218,6 +228,13 @@ interface ClaudeCodeOptions {
         readonly hostProjectsDir?: string;
         readonly sandboxProjectsDir?: string;
     };
+    /**
+     * Maps directly to Claude's `--permission-mode` flag. When set, replaces the
+     * default `--dangerously-skip-permissions` Sandcastle passes on AFK runs —
+     * the two flags are mutually exclusive on Claude's CLI. Use `"auto"` for
+     * AI-mediated per-tool approve/deny on unsandboxed host runs.
+     */
+    readonly permissionMode?: "default" | "acceptEdits" | "plan" | "auto" | "dontAsk" | "bypassPermissions";
 }
 declare const claudeCode: (model: string, options?: ClaudeCodeOptions) => AgentProvider & {
     readonly sessionStorage: AgentSessionStorage;

package/dist/index.js CHANGED Viewed

@@ -676,7 +676,7 @@ var findMissingPromptArgKeys = (prompt, providedArgs) => {
     if (seen.has(key)) continue;
     seen.add(key);
     if (builtInSet.has(key)) continue;
-    if (key in providedArgs) continue;
+    if (key in providedArgs && providedArgs[key] != null) continue;
     missing.push(key);
   }
   return missing;
@@ -704,6 +704,14 @@ var substitutePromptArgs = (prompt, args, silentKeys) => {
           })
         );
       }
+      const value = sanitizedArgs[key];
+      if (value == null) {
+        return yield* Effect_exports.fail(
+          new PromptError({
+            message: `Prompt argument "{{${key}}}" has value ${value === null ? "null" : "undefined"} in promptArgs`
+          })
+        );
+      }
     }
     for (const key of Object.keys(sanitizedArgs)) {
       if (!referencedKeys.has(key) && !silentKeys?.has(key)) {
@@ -2403,6 +2411,21 @@ var claudeHostSessionPath = (cwd, id, projectsDir) => {
   return join(base, encodeProjectPath(cwd), `${id}.jsonl`);
 };
 var claudeSandboxSessionPath = (cwd, id, projectsDir) => posix.join(projectsDir, encodeProjectPath(cwd), `${id}.jsonl`);
+var claudeSubagentsDirInSandbox = (cwd, id, projectsDir) => posix.join(projectsDir, encodeProjectPath(cwd), id, "subagents");
+var claudeSubagentsDirOnHost = (cwd, id, projectsDir) => {
+  const base = projectsDir ?? join(process.env.HOME ?? "~", ".claude", "projects");
+  return join(base, encodeProjectPath(cwd), id, "subagents");
+};
+var listClaudeSubagentSessionsInSandbox = async (cwd, id, handle, sandboxProjectsDir) => {
+  const dir = claudeSubagentsDirInSandbox(cwd, id, sandboxProjectsDir);
+  const result = await handle.exec(
+    `find ${JSON.stringify(dir)} -type f -name ${JSON.stringify("agent-*.jsonl")} 2>/dev/null`
+  );
+  if (result.exitCode !== 0) return [];
+  const stdout = result.stdout.trim();
+  if (stdout === "") return [];
+  return stdout.split("\n").filter((line) => line !== "");
+};
 var findClaudeSessionOnHost = async (id, projectsDir) => {
   const root = projectsDir ?? join(process.env.HOME ?? "~", ".claude", "projects");
   let entries;
@@ -2424,14 +2447,18 @@ var rewriteSessionCwd = (content, fromCwd, toCwd) => {
   if (content === "") return "";
   return content.split("\n").map((line) => {
     if (line === "") return line;
-    const entry = JSON.parse(line);
-    if (typeof entry.cwd === "string" && entry.cwd === fromCwd) {
-      entry.cwd = toCwd;
-    }
-    if (entry.type === "session_meta" && typeof entry.payload === "object" && entry.payload !== null && typeof entry.payload.cwd === "string" && entry.payload.cwd === fromCwd) {
-      entry.payload.cwd = toCwd;
+    try {
+      const entry = JSON.parse(line);
+      if (typeof entry.cwd === "string" && entry.cwd === fromCwd) {
+        entry.cwd = toCwd;
+      }
+      if (entry.type === "session_meta" && typeof entry.payload === "object" && entry.payload !== null && typeof entry.payload.cwd === "string" && entry.payload.cwd === fromCwd) {
+        entry.payload.cwd = toCwd;
+      }
+      return JSON.stringify(entry);
+    } catch {
+      return line;
     }
-    return JSON.stringify(entry);
   }).join("\n");
 };
 var transferClaudeSession = (jsonl, fromCwd, toCwd) => rewriteSessionCwd(jsonl, fromCwd, toCwd);
@@ -2705,6 +2732,19 @@ var writeSandboxFile = async (handle, sandboxPath, content, tag) => {
     });
   }
 };
+var copyClaudeSessionFile = async ({
+  handle,
+  sourcePath,
+  fromCwd,
+  toCwd,
+  destPath,
+  tag
+}) => {
+  const jsonl = await readSandboxFile(handle, sourcePath, tag);
+  const rewritten = transferClaudeSession(jsonl, fromCwd, toCwd);
+  await mkdir(dirname(destPath), { recursive: true });
+  await writeFile(destPath, rewritten);
+};
 var makeClaudeSessionStorage = (options) => {
   const hostProjectsDir = options?.sessionStorage?.hostProjectsDir;
   const sandboxProjectsDir = options?.sessionStorage?.sandboxProjectsDir ?? "/home/agent/.claude/projects";
@@ -2717,20 +2757,48 @@ var makeClaudeSessionStorage = (options) => {
       return readFile(path2, "utf-8");
     },
     captureToHost: async ({ hostCwd, sandboxCwd, sessionId, handle }) => {
-      const sandboxPath = claudeSandboxSessionPath(
+      await copyClaudeSessionFile({
+        handle,
+        sourcePath: claudeSandboxSessionPath(
+          sandboxCwd,
+          sessionId,
+          sandboxProjectsDir
+        ),
+        fromCwd: sandboxCwd,
+        toCwd: hostCwd,
+        destPath: claudeHostSessionPath(hostCwd, sessionId, hostProjectsDir),
+        tag: "claude-cap"
+      });
+      const subagentSandboxPaths = await listClaudeSubagentSessionsInSandbox(
         sandboxCwd,
         sessionId,
+        handle,
         sandboxProjectsDir
       );
-      const jsonl = await readSandboxFile(handle, sandboxPath, "claude-cap");
-      const rewritten = transferClaudeSession(jsonl, sandboxCwd, hostCwd);
-      const hostPath = claudeHostSessionPath(
+      const hostSubagentsDir = claudeSubagentsDirOnHost(
         hostCwd,
         sessionId,
         hostProjectsDir
       );
-      await mkdir(dirname(hostPath), { recursive: true });
-      await writeFile(hostPath, rewritten);
+      for (const sandboxSubagentPath of subagentSandboxPaths) {
+        try {
+          await copyClaudeSessionFile({
+            handle,
+            sourcePath: sandboxSubagentPath,
+            fromCwd: sandboxCwd,
+            toCwd: hostCwd,
+            destPath: join(
+              hostSubagentsDir,
+              posix.basename(sandboxSubagentPath)
+            ),
+            tag: "claude-sub"
+          });
+        } catch (err) {
+          console.error(
+            `sandcastle: failed to capture Claude subagent transcript ${sandboxSubagentPath}: ${err instanceof Error ? err.message : String(err)}`
+          );
+        }
+      }
     },
     resumeIntoSandbox: async ({ hostCwd, sandboxCwd, sessionId, handle }) => {
       const hostPath = claudeHostSessionPath(
@@ -2961,6 +3029,7 @@ var codex = (model, options) => ({
     forkSession
   }) {
     const effortFlag = options?.effort ? ` -c ${shellEscape(`model_reasoning_effort="${options.effort}"`)}` : "";
+    const approvalsFlags = options?.approvalsReviewer === "auto_review" ? ` -a on-request -s danger-full-access -c ${shellEscape(`approvals_reviewer="auto_review"`)}` : " --dangerously-bypass-approvals-and-sandbox";
     let base;
     if (resumeSession && forkSession) {
       base = `codex exec fork ${shellEscape(resumeSession)}`;
@@ -2971,7 +3040,7 @@ var codex = (model, options) => ({
     }
     const stdinArg = resumeSession ? " -" : "";
     return {
-      command: `${base} --json --dangerously-bypass-approvals-and-sandbox -m ${shellEscape(model)}${effortFlag}${stdinArg}`,
+      command: `${base} --json${approvalsFlags} -m ${shellEscape(model)}${effortFlag}${stdinArg}`,
       stdin: prompt
     };
   },
@@ -3070,7 +3139,7 @@ var opencode = (model, options) => ({
   buildInteractiveArgs({ prompt }) {
     const args = ["opencode", "--model", model];
     if (options?.agent) args.push("--agent", options.agent);
-    if (prompt) args.push("-p", prompt);
+    if (prompt) args.push("--prompt", prompt);
     return args;
   },
   parseStreamLine(line) {
@@ -3160,12 +3229,12 @@ var claudeCode = (model, options) => ({
     resumeSession,
     forkSession
   }) {
-    const skipPerms = dangerouslySkipPermissions ? " --dangerously-skip-permissions" : "";
+    const permissionFlag = options?.permissionMode ? ` --permission-mode ${options.permissionMode}` : dangerouslySkipPermissions ? " --dangerously-skip-permissions" : "";
     const effortFlag = options?.effort ? ` --effort ${options.effort}` : "";
     const resumeFlag = resumeSession ? ` --resume ${shellEscape(resumeSession)}` : "";
     const forkFlag = resumeSession && forkSession ? " --fork-session" : "";
     return {
-      command: `claude --print --verbose${skipPerms} --output-format stream-json --model ${shellEscape(model)}${effortFlag}${resumeFlag}${forkFlag} -p -`,
+      command: `claude --print --verbose${permissionFlag} --output-format stream-json --model ${shellEscape(model)}${effortFlag}${resumeFlag}${forkFlag} -p -`,
       stdin: prompt
     };
   },
@@ -3174,7 +3243,11 @@ var claudeCode = (model, options) => ({
     dangerouslySkipPermissions
   }) {
     const args = ["claude"];
-    if (dangerouslySkipPermissions) args.push("--dangerously-skip-permissions");
+    if (options?.permissionMode) {
+      args.push("--permission-mode", options.permissionMode);
+    } else if (dangerouslySkipPermissions) {
+      args.push("--dangerously-skip-permissions");
+    }
     args.push("--model", model);
     if (options?.effort) args.push("--effort", options.effort);
     if (prompt) args.push(prompt);