@ai-hero/sandcastle 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -39,7 +39,7 @@ npm install --save-dev @ai-hero/sandcastle
39
39
  npx @ai-hero/sandcastle init
40
40
  ```
41
41
 
42
- 3. Edit `.sandcastle/.env` and fill in your default values for `ANTHROPIC_API_KEY`. If you want to use your Claude subscription instead of an API key, see [#191](https://github.com/mattpocock/sandcastle/issues/191).
42
+ 3. Edit `.sandcastle/.env` and fill in your default values for `CLAUDE_CODE_OAUTH_TOKEN` (run `claude setup-token` on your host to get one). To use an Anthropic API key instead, uncomment and fill in `ANTHROPIC_API_KEY`.
43
43
 
44
44
  ```bash
45
45
  cp .sandcastle/.env.example .sandcastle/.env
@@ -218,14 +218,20 @@ const result = await run({
218
218
  type: "file",
219
219
  path: ".sandcastle/logs/my-run.log",
220
220
  // Optional: forward the agent's output stream to your own observability system.
221
- // Fires for each text chunk and tool call the agent produces. Errors thrown
222
- // by the callback are swallowed so a broken forwarder cannot kill the run.
221
+ // Fires for each text chunk, tool call, and raw stdout line the agent
222
+ // produces. Errors thrown by the callback are swallowed so a broken
223
+ // forwarder cannot kill the run.
223
224
  onAgentStreamEvent: (event) => {
224
- // event is { type: "text" | "toolCall", iteration, timestamp, ... }
225
+ // event is { type: "text" | "toolCall" | "raw", iteration, timestamp, ... }
225
226
  myLogger.info(event);
226
227
  },
228
+ // Optional: append every raw stdout line the agent emits to the same
229
+ // log file, interleaved with the human-readable output. Includes lines
230
+ // the provider's stream parser would otherwise drop. Intended for
231
+ // debugging stuck or unexpected agent behaviour.
232
+ verbose: true,
227
233
  },
228
- // logging: { type: "stdout" }, // OR render an interactive UI in the terminal
234
+ // logging: { type: "stdout", verbose: true }, // OR terminal mode (verbose: raw lines to stdout)
229
235
 
230
236
  // String (or array of strings) the agent emits to end the iteration loop early.
231
237
  // Default: "<promise>COMPLETE</promise>"
@@ -828,6 +834,8 @@ Removes the Podman image.
828
834
 
829
835
  After each resumable provider iteration, Sandcastle automatically captures the agent's session file from the sandbox to the host. Claude Code sessions are stored under `~/.claude/projects/<encoded-path>/<session-id>.jsonl`; Codex sessions are stored under `~/.codex/sessions/YYYY/MM/DD/rollout-*-<session-id>.jsonl`; Pi sessions are stored under `~/.pi/agent/sessions/--<encoded-cwd>--/<timestamp>_<session-id>.jsonl`. Any provider-specific `cwd` fields are rewritten to match the host repo root, so the provider's native resume command works.
830
836
 
837
+ For Claude Code, any `Agent`-tool or `Workflow`-tool subagent transcripts written under `<session-id>/subagents/agent-*.jsonl` are captured alongside the main session. Subagent capture is best-effort: a failure on an individual transcript logs a warning and lets siblings and the main session through. Main-session capture failure still fails the run (see below).
838
+
831
839
  Session capture is enabled by default for `claudeCode()`, `codex()`, and `pi()` and can be opted out via `captureSessions: false`. Providers without `sessionStorage` do not attempt capture. Capture failure fails the run.
832
840
 
833
841
  ### Session resume
package/dist/index.d.ts CHANGED
@@ -246,6 +246,12 @@ declare const claudeCode: (model: string, options?: ClaudeCodeOptions) => AgentP
246
246
  *
247
247
  * Emitted only in log-to-file mode when an `onAgentStreamEvent` callback is
248
248
  * provided via `logging`. See `run()`.
249
+ *
250
+ * The `"raw"` variant carries every stdout line the agent emits, verbatim and
251
+ * before parsing — including lines that the provider's stream parser would
252
+ * otherwise drop (e.g. tool-use blocks for unrecognised tools). Intended for
253
+ * debugging when the typed `"text"` / `"toolCall"` events don't surface
254
+ * enough detail.
249
255
  */
250
256
  type AgentStreamEvent = {
251
257
  readonly type: "text";
@@ -258,6 +264,11 @@ type AgentStreamEvent = {
258
264
  readonly formattedArgs: string;
259
265
  readonly iteration: number;
260
266
  readonly timestamp: Date;
267
+ } | {
268
+ readonly type: "raw";
269
+ readonly line: string;
270
+ readonly iteration: number;
271
+ readonly timestamp: Date;
261
272
  };
262
273
 
263
274
  type SandboxHooks = {
@@ -414,16 +425,34 @@ type LoggingOption =
414
425
  readonly type: "file";
415
426
  readonly path: string;
416
427
  /**
417
- * Optional callback invoked for each agent stream event (text chunk or
418
- * tool call) in addition to being written to the log file. Intended for
419
- * forwarding the agent's output stream to external observability
420
- * systems. Errors thrown by the callback are swallowed.
428
+ * Optional callback invoked for each agent stream event (text chunk,
429
+ * tool call, or raw stdout line) in addition to being written to the
430
+ * log file. Intended for forwarding the agent's output stream to
431
+ * external observability systems. Errors thrown by the callback are
432
+ * swallowed.
421
433
  */
422
434
  readonly onAgentStreamEvent?: (event: AgentStreamEvent) => void;
435
+ /**
436
+ * When `true`, every raw stdout line the agent emits is appended
437
+ * verbatim to the same log file at `path`, in real time. Includes
438
+ * lines the provider's stream parser would otherwise drop (e.g.
439
+ * tool-use blocks for unrecognised tools). Intended for debugging
440
+ * stuck or unexpected agent behavior — note that the raw JSON is
441
+ * interleaved with the human-readable log output. Default: `false`.
442
+ */
443
+ readonly verbose?: boolean;
423
444
  }
424
445
  /** Render progress and agent output as an interactive UI in the terminal (terminal mode). */
425
446
  | {
426
447
  readonly type: "stdout";
448
+ /**
449
+ * When `true`, every raw stdout line the agent emits is written
450
+ * verbatim to `process.stdout`, in real time. Includes lines the
451
+ * provider's stream parser would otherwise drop. Intended for
452
+ * debugging stuck or unexpected agent behavior. Note: the raw output
453
+ * is interleaved with the interactive terminal UI. Default: `false`.
454
+ */
455
+ readonly verbose?: boolean;
427
456
  };
428
457
  /** Override default timeouts for built-in lifecycle steps. Unset keys keep their defaults. */
429
458
  interface Timeouts {
package/dist/index.js CHANGED
@@ -4,6 +4,7 @@ import { Context_exports, CwdError, Effect_exports, resolveCwd, getCurrentBranch
4
4
  export { createBindMountSandboxProvider, createIsolatedSandboxProvider } from './chunk-BIWNFKGV.js';
5
5
  import { noSandbox } from './chunk-72UVAC7B.js';
6
6
  import './chunk-NGBM7T3E.js';
7
+ import { mkdirSync, appendFileSync } from 'fs';
7
8
  import path, { join, posix, dirname, relative } from 'path';
8
9
  import { styleText } from 'util';
9
10
  import * as clack from '@clack/prompts';
@@ -177,7 +178,7 @@ var TextDeltaBuffer = class {
177
178
 
178
179
  // src/Orchestrator.ts
179
180
  var IDLE_WARNING_INTERVAL_MS = 6e4;
180
- var invokeAgent = (sandbox, sandboxRepoDir, prompt, provider, idleTimeoutMs, completionTimeoutMs, completionSignals, onText, onToolCall, onIdleWarning, onCompletionTimeout, idleWarningIntervalMs = IDLE_WARNING_INTERVAL_MS, resumeSession, forkSession, signal) => Effect_exports.gen(function* () {
181
+ var invokeAgent = (sandbox, sandboxRepoDir, prompt, provider, idleTimeoutMs, completionTimeoutMs, completionSignals, onText, onToolCall, onRawLine, onIdleWarning, onCompletionTimeout, idleWarningIntervalMs = IDLE_WARNING_INTERVAL_MS, resumeSession, forkSession, signal) => Effect_exports.gen(function* () {
181
182
  let resultText = "";
182
183
  let sessionId;
183
184
  let usage;
@@ -256,6 +257,10 @@ var invokeAgent = (sandbox, sandboxRepoDir, prompt, provider, idleTimeoutMs, com
256
257
  });
257
258
  const execResult = yield* sandbox.exec(printCmd.command, {
258
259
  onLine: (line) => {
260
+ try {
261
+ onRawLine(line);
262
+ } catch {
263
+ }
259
264
  for (const parsed of provider.parseStreamLine(line)) {
260
265
  if (parsed.type === "text") {
261
266
  onText(parsed.text);
@@ -421,6 +426,16 @@ var orchestrate = (options) => {
421
426
  })
422
427
  );
423
428
  };
429
+ const onRawLine = (line) => {
430
+ Effect_exports.runPromise(
431
+ streamEmitter.emit({
432
+ type: "raw",
433
+ line,
434
+ iteration: i,
435
+ timestamp: /* @__PURE__ */ new Date()
436
+ })
437
+ );
438
+ };
424
439
  const onIdleWarning = (minutes) => {
425
440
  const msg = minutes === 1 ? "Agent idle for 1 minute" : `Agent idle for ${minutes} minutes`;
426
441
  Effect_exports.runPromise(display.status(label(msg), "warn"));
@@ -449,6 +464,7 @@ var orchestrate = (options) => {
449
464
  completionSignals,
450
465
  onText,
451
466
  onToolCall,
467
+ onRawLine,
452
468
  onIdleWarning,
453
469
  onCompletionTimeout,
454
470
  options._idleWarningIntervalMs,
@@ -676,7 +692,7 @@ var findMissingPromptArgKeys = (prompt, providedArgs) => {
676
692
  if (seen.has(key)) continue;
677
693
  seen.add(key);
678
694
  if (builtInSet.has(key)) continue;
679
- if (key in providedArgs) continue;
695
+ if (key in providedArgs && providedArgs[key] != null) continue;
680
696
  missing.push(key);
681
697
  }
682
698
  return missing;
@@ -704,6 +720,14 @@ var substitutePromptArgs = (prompt, args, silentKeys) => {
704
720
  })
705
721
  );
706
722
  }
723
+ const value = sanitizedArgs[key];
724
+ if (value == null) {
725
+ return yield* Effect_exports.fail(
726
+ new PromptError({
727
+ message: `Prompt argument "{{${key}}}" has value ${value === null ? "null" : "undefined"} in promptArgs`
728
+ })
729
+ );
730
+ }
707
731
  }
708
732
  for (const key of Object.keys(sanitizedArgs)) {
709
733
  if (!referencedKeys.has(key) && !silentKeys?.has(key)) {
@@ -888,6 +912,40 @@ var formatContextWindowSize = (usage) => {
888
912
  return `${Math.ceil(total / 1e3)}k`;
889
913
  };
890
914
  var buildContextWindowLines = (iterations) => iterations.filter((it) => it.usage !== void 0).map((it) => `Context window: ${formatContextWindowSize(it.usage)}`);
915
+ var buildAgentStreamHandler = (logging) => {
916
+ const userHandler = logging.type === "file" ? logging.onAgentStreamEvent : void 0;
917
+ const verboseSink = logging.verbose ? buildVerboseRawLineSink(logging) : void 0;
918
+ if (!userHandler && !verboseSink) return void 0;
919
+ return (event) => {
920
+ if (userHandler) {
921
+ try {
922
+ userHandler(event);
923
+ } catch {
924
+ }
925
+ }
926
+ if (verboseSink && event.type === "raw") {
927
+ verboseSink(event.line);
928
+ }
929
+ };
930
+ };
931
+ var buildVerboseRawLineSink = (logging) => {
932
+ if (logging.type === "file") {
933
+ const logPath = logging.path;
934
+ try {
935
+ mkdirSync(path.dirname(logPath), { recursive: true });
936
+ } catch {
937
+ }
938
+ return (line) => {
939
+ try {
940
+ appendFileSync(logPath, line + "\n");
941
+ } catch {
942
+ }
943
+ };
944
+ }
945
+ return (line) => {
946
+ process.stdout.write(line + "\n");
947
+ };
948
+ };
891
949
  async function run(options) {
892
950
  options.signal?.throwIfAborted();
893
951
  const {
@@ -1005,7 +1063,7 @@ async function run(options) {
1005
1063
  )
1006
1064
  );
1007
1065
  const streamEmitterLayer = agentStreamEmitterLayer(
1008
- resolvedLogging.type === "file" ? resolvedLogging.onAgentStreamEvent : void 0
1066
+ buildAgentStreamHandler(resolvedLogging)
1009
1067
  );
1010
1068
  const runLayer = Layer_exports.mergeAll(
1011
1069
  factoryLayer,
@@ -1517,7 +1575,7 @@ var buildSandboxHandle = (ctx, close) => {
1517
1575
  )
1518
1576
  });
1519
1577
  const streamEmitterLayer = agentStreamEmitterLayer(
1520
- resolvedLogging.type === "file" ? resolvedLogging.onAgentStreamEvent : void 0
1578
+ buildAgentStreamHandler(resolvedLogging)
1521
1579
  );
1522
1580
  const runLayer = Layer_exports.mergeAll(
1523
1581
  reuseFactoryLayer,
@@ -2296,7 +2354,7 @@ var createWorktree = async (options) => {
2296
2354
  )
2297
2355
  });
2298
2356
  const streamEmitterLayer = agentStreamEmitterLayer(
2299
- resolvedLogging.type === "file" ? resolvedLogging.onAgentStreamEvent : void 0
2357
+ buildAgentStreamHandler(resolvedLogging)
2300
2358
  );
2301
2359
  const runLayer = Layer_exports.mergeAll(
2302
2360
  reuseFactoryLayer,
@@ -2403,6 +2461,21 @@ var claudeHostSessionPath = (cwd, id, projectsDir) => {
2403
2461
  return join(base, encodeProjectPath(cwd), `${id}.jsonl`);
2404
2462
  };
2405
2463
  var claudeSandboxSessionPath = (cwd, id, projectsDir) => posix.join(projectsDir, encodeProjectPath(cwd), `${id}.jsonl`);
2464
+ var claudeSubagentsDirInSandbox = (cwd, id, projectsDir) => posix.join(projectsDir, encodeProjectPath(cwd), id, "subagents");
2465
+ var claudeSubagentsDirOnHost = (cwd, id, projectsDir) => {
2466
+ const base = projectsDir ?? join(process.env.HOME ?? "~", ".claude", "projects");
2467
+ return join(base, encodeProjectPath(cwd), id, "subagents");
2468
+ };
2469
+ var listClaudeSubagentSessionsInSandbox = async (cwd, id, handle, sandboxProjectsDir) => {
2470
+ const dir = claudeSubagentsDirInSandbox(cwd, id, sandboxProjectsDir);
2471
+ const result = await handle.exec(
2472
+ `find ${JSON.stringify(dir)} -type f -name ${JSON.stringify("agent-*.jsonl")} 2>/dev/null`
2473
+ );
2474
+ if (result.exitCode !== 0) return [];
2475
+ const stdout = result.stdout.trim();
2476
+ if (stdout === "") return [];
2477
+ return stdout.split("\n").filter((line) => line !== "");
2478
+ };
2406
2479
  var findClaudeSessionOnHost = async (id, projectsDir) => {
2407
2480
  const root = projectsDir ?? join(process.env.HOME ?? "~", ".claude", "projects");
2408
2481
  let entries;
@@ -2424,14 +2497,18 @@ var rewriteSessionCwd = (content, fromCwd, toCwd) => {
2424
2497
  if (content === "") return "";
2425
2498
  return content.split("\n").map((line) => {
2426
2499
  if (line === "") return line;
2427
- const entry = JSON.parse(line);
2428
- if (typeof entry.cwd === "string" && entry.cwd === fromCwd) {
2429
- entry.cwd = toCwd;
2430
- }
2431
- if (entry.type === "session_meta" && typeof entry.payload === "object" && entry.payload !== null && typeof entry.payload.cwd === "string" && entry.payload.cwd === fromCwd) {
2432
- entry.payload.cwd = toCwd;
2500
+ try {
2501
+ const entry = JSON.parse(line);
2502
+ if (typeof entry.cwd === "string" && entry.cwd === fromCwd) {
2503
+ entry.cwd = toCwd;
2504
+ }
2505
+ if (entry.type === "session_meta" && typeof entry.payload === "object" && entry.payload !== null && typeof entry.payload.cwd === "string" && entry.payload.cwd === fromCwd) {
2506
+ entry.payload.cwd = toCwd;
2507
+ }
2508
+ return JSON.stringify(entry);
2509
+ } catch {
2510
+ return line;
2433
2511
  }
2434
- return JSON.stringify(entry);
2435
2512
  }).join("\n");
2436
2513
  };
2437
2514
  var transferClaudeSession = (jsonl, fromCwd, toCwd) => rewriteSessionCwd(jsonl, fromCwd, toCwd);
@@ -2705,6 +2782,19 @@ var writeSandboxFile = async (handle, sandboxPath, content, tag) => {
2705
2782
  });
2706
2783
  }
2707
2784
  };
2785
+ var copyClaudeSessionFile = async ({
2786
+ handle,
2787
+ sourcePath,
2788
+ fromCwd,
2789
+ toCwd,
2790
+ destPath,
2791
+ tag
2792
+ }) => {
2793
+ const jsonl = await readSandboxFile(handle, sourcePath, tag);
2794
+ const rewritten = transferClaudeSession(jsonl, fromCwd, toCwd);
2795
+ await mkdir(dirname(destPath), { recursive: true });
2796
+ await writeFile(destPath, rewritten);
2797
+ };
2708
2798
  var makeClaudeSessionStorage = (options) => {
2709
2799
  const hostProjectsDir = options?.sessionStorage?.hostProjectsDir;
2710
2800
  const sandboxProjectsDir = options?.sessionStorage?.sandboxProjectsDir ?? "/home/agent/.claude/projects";
@@ -2717,20 +2807,48 @@ var makeClaudeSessionStorage = (options) => {
2717
2807
  return readFile(path2, "utf-8");
2718
2808
  },
2719
2809
  captureToHost: async ({ hostCwd, sandboxCwd, sessionId, handle }) => {
2720
- const sandboxPath = claudeSandboxSessionPath(
2810
+ await copyClaudeSessionFile({
2811
+ handle,
2812
+ sourcePath: claudeSandboxSessionPath(
2813
+ sandboxCwd,
2814
+ sessionId,
2815
+ sandboxProjectsDir
2816
+ ),
2817
+ fromCwd: sandboxCwd,
2818
+ toCwd: hostCwd,
2819
+ destPath: claudeHostSessionPath(hostCwd, sessionId, hostProjectsDir),
2820
+ tag: "claude-cap"
2821
+ });
2822
+ const subagentSandboxPaths = await listClaudeSubagentSessionsInSandbox(
2721
2823
  sandboxCwd,
2722
2824
  sessionId,
2825
+ handle,
2723
2826
  sandboxProjectsDir
2724
2827
  );
2725
- const jsonl = await readSandboxFile(handle, sandboxPath, "claude-cap");
2726
- const rewritten = transferClaudeSession(jsonl, sandboxCwd, hostCwd);
2727
- const hostPath = claudeHostSessionPath(
2828
+ const hostSubagentsDir = claudeSubagentsDirOnHost(
2728
2829
  hostCwd,
2729
2830
  sessionId,
2730
2831
  hostProjectsDir
2731
2832
  );
2732
- await mkdir(dirname(hostPath), { recursive: true });
2733
- await writeFile(hostPath, rewritten);
2833
+ for (const sandboxSubagentPath of subagentSandboxPaths) {
2834
+ try {
2835
+ await copyClaudeSessionFile({
2836
+ handle,
2837
+ sourcePath: sandboxSubagentPath,
2838
+ fromCwd: sandboxCwd,
2839
+ toCwd: hostCwd,
2840
+ destPath: join(
2841
+ hostSubagentsDir,
2842
+ posix.basename(sandboxSubagentPath)
2843
+ ),
2844
+ tag: "claude-sub"
2845
+ });
2846
+ } catch (err) {
2847
+ console.error(
2848
+ `sandcastle: failed to capture Claude subagent transcript ${sandboxSubagentPath}: ${err instanceof Error ? err.message : String(err)}`
2849
+ );
2850
+ }
2851
+ }
2734
2852
  },
2735
2853
  resumeIntoSandbox: async ({ hostCwd, sandboxCwd, sessionId, handle }) => {
2736
2854
  const hostPath = claudeHostSessionPath(