@ai-hero/sandcastle 0.9.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { B as BindMountSandboxHandle, S as SandboxProvider, a as BranchStrategy, A as AnySandboxProvider, M as MergeToHeadBranchStrategy, b as NamedBranchStrategy } from './SandboxProvider-EkSMuBp8.js';
2
- export { c as BindMountBranchStrategy, d as BindMountCreateOptions, e as BindMountSandboxProvider, f as BindMountSandboxProviderConfig, E as ExecResult, H as HeadBranchStrategy, g as InteractiveExecOptions, h as IsolatedBranchStrategy, i as IsolatedCreateOptions, j as IsolatedSandboxHandle, I as IsolatedSandboxProvider, k as IsolatedSandboxProviderConfig, l as NoSandboxBranchStrategy, m as NoSandboxHandle, N as NoSandboxProvider, n as createBindMountSandboxProvider, o as createIsolatedSandboxProvider } from './SandboxProvider-EkSMuBp8.js';
1
+ import { B as BindMountSandboxHandle, S as SandboxProvider, a as BranchStrategy, A as AnySandboxProvider, E as ExecResult, M as MergeToHeadBranchStrategy, b as NamedBranchStrategy } from './SandboxProvider-EkSMuBp8.js';
2
+ export { c as BindMountBranchStrategy, d as BindMountCreateOptions, e as BindMountSandboxProvider, f as BindMountSandboxProviderConfig, H as HeadBranchStrategy, g as InteractiveExecOptions, h as IsolatedBranchStrategy, i as IsolatedCreateOptions, j as IsolatedSandboxHandle, I as IsolatedSandboxProvider, k as IsolatedSandboxProviderConfig, l as NoSandboxBranchStrategy, m as NoSandboxHandle, N as NoSandboxProvider, n as createBindMountSandboxProvider, o as createIsolatedSandboxProvider } from './SandboxProvider-EkSMuBp8.js';
3
3
  import { StandardSchemaV1 } from '@standard-schema/spec';
4
4
  export { M as MountConfig } from './MountConfig-CmXclHA5.js';
5
5
 
@@ -246,6 +246,12 @@ declare const claudeCode: (model: string, options?: ClaudeCodeOptions) => AgentP
246
246
  *
247
247
  * Emitted only in log-to-file mode when an `onAgentStreamEvent` callback is
248
248
  * provided via `logging`. See `run()`.
249
+ *
250
+ * The `"raw"` variant carries every stdout line the agent emits, verbatim and
251
+ * before parsing — including lines that the provider's stream parser would
252
+ * otherwise drop (e.g. tool-use blocks for unrecognised tools). Intended for
253
+ * debugging when the typed `"text"` / `"toolCall"` events don't surface
254
+ * enough detail.
249
255
  */
250
256
  type AgentStreamEvent = {
251
257
  readonly type: "text";
@@ -258,6 +264,11 @@ type AgentStreamEvent = {
258
264
  readonly formattedArgs: string;
259
265
  readonly iteration: number;
260
266
  readonly timestamp: Date;
267
+ } | {
268
+ readonly type: "raw";
269
+ readonly line: string;
270
+ readonly iteration: number;
271
+ readonly timestamp: Date;
261
272
  };
262
273
 
263
274
  type SandboxHooks = {
@@ -302,11 +313,35 @@ interface OutputObjectDefinition<T> {
302
313
  readonly _tag: "object";
303
314
  readonly tag: string;
304
315
  readonly schema: StandardSchemaV1<unknown, T>;
316
+ /**
317
+ * Maximum number of additional attempts after the first if structured output
318
+ * extraction or validation fails. Each retry resumes the failed run's agent
319
+ * session and feeds back a token-efficient description of the error so the
320
+ * agent can re-emit a corrected tag. Default: `0` (no retries).
321
+ *
322
+ * Retries require the agent provider to support session resumption (i.e.
323
+ * `provider.sessionStorage` is populated — Claude Code, Codex, Pi). `run()`
324
+ * fails at entry with a clear error when retries are requested but the
325
+ * provider cannot resume.
326
+ */
327
+ readonly maxRetries?: number;
305
328
  }
306
329
  /** Branded output definition for `Output.string({ tag })`. */
307
330
  interface OutputStringDefinition {
308
331
  readonly _tag: "string";
309
332
  readonly tag: string;
333
+ /**
334
+ * Maximum number of additional attempts after the first if structured output
335
+ * extraction fails. Each retry resumes the failed run's agent session and
336
+ * feeds back a token-efficient description of the error so the agent can
337
+ * re-emit a corrected tag. Default: `0` (no retries).
338
+ *
339
+ * Retries require the agent provider to support session resumption (i.e.
340
+ * `provider.sessionStorage` is populated — Claude Code, Codex, Pi). `run()`
341
+ * fails at entry with a clear error when retries are requested but the
342
+ * provider cannot resume.
343
+ */
344
+ readonly maxRetries?: number;
310
345
  }
311
346
  /** Union of all output definition shapes accepted by `run()`. */
312
347
  type OutputDefinition = OutputObjectDefinition<any> | OutputStringDefinition;
@@ -329,18 +364,28 @@ declare const Output: {
329
364
  * Declare an object-typed structured output extracted from an XML tag in
330
365
  * the agent's stdout. The tag contents are JSON-parsed (with fence-aware
331
366
  * unwrapping) and validated against the provided Standard Schema validator.
367
+ *
368
+ * Set `maxRetries` to have `run()` automatically resume the failed session
369
+ * and ask the agent to re-emit corrected output when extraction or
370
+ * validation fails. Default: `0` (no retries).
332
371
  */
333
372
  readonly object: <Schema extends StandardSchemaV1>(opts: {
334
373
  tag: string;
335
374
  schema: Schema;
375
+ maxRetries?: number;
336
376
  }) => OutputObjectDefinition<StandardSchemaV1.InferOutput<Schema>>;
337
377
  /**
338
378
  * Declare a string-typed structured output extracted from an XML tag in
339
379
  * the agent's stdout. The tag contents are whitespace-trimmed and returned
340
380
  * as a plain string — no JSON parsing, no schema validation.
381
+ *
382
+ * Set `maxRetries` to have `run()` automatically resume the failed session
383
+ * and ask the agent to re-emit corrected output when extraction fails.
384
+ * Default: `0` (no retries).
341
385
  */
342
386
  readonly string: (opts: {
343
387
  tag: string;
388
+ maxRetries?: number;
344
389
  }) => OutputStringDefinition;
345
390
  };
346
391
  interface StructuredOutputErrorOptions {
@@ -414,16 +459,34 @@ type LoggingOption =
414
459
  readonly type: "file";
415
460
  readonly path: string;
416
461
  /**
417
- * Optional callback invoked for each agent stream event (text chunk or
418
- * tool call) in addition to being written to the log file. Intended for
419
- * forwarding the agent's output stream to external observability
420
- * systems. Errors thrown by the callback are swallowed.
462
+ * Optional callback invoked for each agent stream event (text chunk,
463
+ * tool call, or raw stdout line) in addition to being written to the
464
+ * log file. Intended for forwarding the agent's output stream to
465
+ * external observability systems. Errors thrown by the callback are
466
+ * swallowed.
421
467
  */
422
468
  readonly onAgentStreamEvent?: (event: AgentStreamEvent) => void;
469
+ /**
470
+ * When `true`, every raw stdout line the agent emits is appended
471
+ * verbatim to the same log file at `path`, in real time. Includes
472
+ * lines the provider's stream parser would otherwise drop (e.g.
473
+ * tool-use blocks for unrecognised tools). Intended for debugging
474
+ * stuck or unexpected agent behavior — note that the raw JSON is
475
+ * interleaved with the human-readable log output. Default: `false`.
476
+ */
477
+ readonly verbose?: boolean;
423
478
  }
424
479
  /** Render progress and agent output as an interactive UI in the terminal (terminal mode). */
425
480
  | {
426
481
  readonly type: "stdout";
482
+ /**
483
+ * When `true`, every raw stdout line the agent emits is written
484
+ * verbatim to `process.stdout`, in real time. Includes lines the
485
+ * provider's stream parser would otherwise drop. Intended for
486
+ * debugging stuck or unexpected agent behavior. Note: the raw output
487
+ * is interleaved with the interactive terminal UI. Default: `false`.
488
+ */
489
+ readonly verbose?: boolean;
427
490
  };
428
491
  /** Override default timeouts for built-in lifecycle steps. Unset keys keep their defaults. */
429
492
  interface Timeouts {
@@ -437,7 +500,7 @@ interface Timeouts {
437
500
  readonly mergeToHostMs?: number;
438
501
  }
439
502
  interface RunOptions<A extends AgentProvider = AgentProvider> {
440
- /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")) */
503
+ /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")) */
441
504
  readonly agent: A;
442
505
  /** Sandbox provider (e.g. docker({ imageName: "sandcastle:myrepo" })). */
443
506
  readonly sandbox: SandboxProvider;
@@ -575,7 +638,7 @@ declare function run<A extends AgentProvider>(options: RunOptions<A> & {
575
638
  declare function run<A extends AgentProvider>(options: RunOptions<A>): Promise<RunResult>;
576
639
 
577
640
  interface InteractiveOptions {
578
- /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")) */
641
+ /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")) */
579
642
  readonly agent: AgentProvider;
580
643
  /** Sandbox provider (e.g. docker(), noSandbox()). */
581
644
  readonly sandbox?: AnySandboxProvider;
@@ -669,17 +732,19 @@ interface CreateSandboxOptions {
669
732
  /** Override default timeouts for built-in lifecycle steps. Unset keys keep their defaults. */
670
733
  readonly timeouts?: Timeouts;
671
734
  }
672
- interface SandboxRunOptions {
673
- /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")). */
674
- readonly agent: AgentProvider;
675
- /** Inline prompt string (mutually exclusive with promptFile). */
676
- readonly prompt?: string;
677
- /** Path to a prompt file (mutually exclusive with prompt). */
678
- readonly promptFile?: string;
735
+ /**
736
+ * Options accepted by `SandboxRunResult.resume()` / `.fork()`. Mirrors
737
+ * `ResumeRunResultOptions` in `run.ts` — drops the fields owned by the
738
+ * captured run (prompt, iteration count, resumeSession/forkSession bookkeeping).
739
+ *
740
+ * Defined as the base interface that `SandboxRunOptions` extends — the
741
+ * interface-extends shape is cheaper for the TS checker than
742
+ * `Omit<SandboxRunOptions, ...>` (which forces a mapped-type computation
743
+ * on every reference).
744
+ */
745
+ interface ResumeSandboxRunResultOptions {
679
746
  /** Key-value map for {{KEY}} placeholder substitution in prompts. */
680
747
  readonly promptArgs?: PromptArgs;
681
- /** Maximum iterations to run (default: 1). */
682
- readonly maxIterations?: number;
683
748
  /** Substring(s) the agent emits to stop the iteration loop early. */
684
749
  readonly completionSignal?: string | string[];
685
750
  /** Idle timeout in seconds. Default: 600. */
@@ -701,6 +766,18 @@ interface SandboxRunOptions {
701
766
  */
702
767
  readonly signal?: AbortSignal;
703
768
  }
769
+ interface SandboxRunOptions extends ResumeSandboxRunResultOptions {
770
+ /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")). */
771
+ readonly agent: AgentProvider;
772
+ /** Inline prompt string (mutually exclusive with promptFile). */
773
+ readonly prompt?: string;
774
+ /** Path to a prompt file (mutually exclusive with prompt). */
775
+ readonly promptFile?: string;
776
+ /** Maximum iterations to run (default: 1). */
777
+ readonly maxIterations?: number;
778
+ /** Resume a prior agent session by id. The session JSONL must exist on the host (captured by a prior `sandbox.run()`). Incompatible with `maxIterations > 1`. */
779
+ readonly resumeSession?: string;
780
+ }
704
781
  interface SandboxRunResult {
705
782
  /** Per-iteration results (use `iterations.length` for the count). */
706
783
  readonly iterations: IterationResult[];
@@ -714,9 +791,23 @@ interface SandboxRunResult {
714
791
  }[];
715
792
  /** Path to the log file, if logging was drained to a file. */
716
793
  readonly logFilePath?: string;
794
+ /**
795
+ * Continue the last captured agent session for exactly one iteration inside
796
+ * the same long-lived sandbox. Present only when the provider supports
797
+ * resume (`sessionStorage` populated) and a session id was captured.
798
+ */
799
+ readonly resume?: (prompt: string, options?: ResumeSandboxRunResultOptions) => Promise<SandboxRunResult>;
800
+ /**
801
+ * Fork the last captured agent session for exactly one iteration inside the
802
+ * same long-lived sandbox: the parent session JSONL is left intact and the
803
+ * child run gets its own session id. Present only when the provider
804
+ * supports resume (`sessionStorage` populated) and a session id was
805
+ * captured. See ADR 0018 for fork semantics.
806
+ */
807
+ readonly fork?: (prompt: string, options?: ResumeSandboxRunResultOptions) => Promise<SandboxRunResult>;
717
808
  }
718
809
  interface SandboxInteractiveOptions {
719
- /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")). */
810
+ /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")). */
720
811
  readonly agent: AgentProvider;
721
812
  /** Inline prompt string (mutually exclusive with promptFile). */
722
813
  readonly prompt?: string;
@@ -756,11 +847,34 @@ interface Sandbox {
756
847
  run(options: SandboxRunOptions): Promise<SandboxRunResult>;
757
848
  /** Launch an interactive agent session inside the existing sandbox. */
758
849
  interactive(options: SandboxInteractiveOptions): Promise<SandboxInteractiveResult>;
850
+ /**
851
+ * Execute a command inside the existing sandbox.
852
+ *
853
+ * `cwd` defaults to the sandbox repo path (same default `interactive()`
854
+ * uses), so callers get the same working directory across providers. Pass
855
+ * `cwd` to override.
856
+ *
857
+ * Returns the full `ExecResult` — non-zero `exitCode` is surfaced, not
858
+ * thrown. Callers that want strict semantics should check `result.exitCode`
859
+ * themselves (matching the contract of `BindMountSandboxHandle.exec`).
860
+ */
861
+ exec(command: string, options?: SandboxExecOptions): Promise<ExecResult>;
759
862
  /** Tear down the sandbox and worktree. */
760
863
  close(): Promise<CloseResult>;
761
864
  /** Auto teardown via `await using`. */
762
865
  [Symbol.asyncDispose](): Promise<void>;
763
866
  }
867
+ /** Options accepted by `Sandbox.exec()`. Mirrors the provider handle's `exec` options. */
868
+ interface SandboxExecOptions {
869
+ /** Per-line stdout callback for streaming output. */
870
+ readonly onLine?: (line: string) => void;
871
+ /** Working directory for the command. Defaults to the sandbox repo path. */
872
+ readonly cwd?: string;
873
+ /** Run the command with sudo, when the provider supports it. */
874
+ readonly sudo?: boolean;
875
+ /** Stdin payload — piped to the child process and then closed. Avoids the Linux 128 KB per-arg limit. */
876
+ readonly stdin?: string;
877
+ }
764
878
  /**
765
879
  * Eagerly creates a git worktree on the provided explicit branch and starts
766
880
  * a sandbox with the worktree bind-mounted. Returns a Sandbox handle that
@@ -792,7 +906,7 @@ interface CreateWorktreeOptions {
792
906
  readonly timeouts?: Timeouts;
793
907
  }
794
908
  interface WorktreeInteractiveOptions {
795
- /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")) */
909
+ /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")) */
796
910
  readonly agent: AgentProvider;
797
911
  /** Sandbox provider (e.g. docker(), noSandbox()). Defaults to noSandbox(). */
798
912
  readonly sandbox?: AnySandboxProvider;
@@ -821,7 +935,7 @@ interface WorktreeInteractiveOptions {
821
935
  readonly signal?: AbortSignal;
822
936
  }
823
937
  interface WorktreeRunOptions {
824
- /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")) */
938
+ /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")) */
825
939
  readonly agent: AgentProvider;
826
940
  /** Sandbox provider (e.g. docker()). Required — AFK agents should always be sandboxed. */
827
941
  readonly sandbox: SandboxProvider;
@@ -934,4 +1048,4 @@ interface CwdError extends Error {
934
1048
  /** The provided `cwd` path does not exist or is not a directory. */
935
1049
  declare const CwdError: CwdErrorConstructor;
936
1050
 
937
- export { type AgentCommandOptions, type AgentProvider, type AgentStreamEvent, AnySandboxProvider, BindMountSandboxHandle, BranchStrategy, type ClaudeCodeOptions, type CloseResult, type CodexOptions, type CopilotOptions, type CreateSandboxOptions, type CreateWorktreeOptions, type CursorOptions, CwdError, type HostSessionLookup, type InteractiveOptions, type InteractiveResult, type IterationResult, type IterationUsage, type LoggingOption, MergeToHeadBranchStrategy, NamedBranchStrategy, type OpenCodeOptions, Output, type OutputDefinition, type OutputObjectDefinition, type OutputStringDefinition, type PiOptions, type PrintCommand, type PromptArgs, type RunOptions, type RunResult, type Sandbox, type SandboxHooks, type SandboxInteractiveOptions, type SandboxInteractiveResult, SandboxProvider, type SandboxRunOptions, type SandboxRunResult, StructuredOutputError, type Timeouts, type Worktree, type WorktreeBranchStrategy, type WorktreeCreateSandboxOptions, type WorktreeInteractiveOptions, type WorktreeRunOptions, type WorktreeRunResult, claudeCode, claudeHostSessionPath, claudeSandboxSessionPath, codex, copilot, createSandbox, createWorktree, cursor, encodeProjectPath, findClaudeSessionOnHost, findCodexSessionOnHost, interactive, opencode, pi, run, transferClaudeSession, transferCodexSession };
1051
+ export { type AgentCommandOptions, type AgentProvider, type AgentStreamEvent, AnySandboxProvider, BindMountSandboxHandle, BranchStrategy, type ClaudeCodeOptions, type CloseResult, type CodexOptions, type CopilotOptions, type CreateSandboxOptions, type CreateWorktreeOptions, type CursorOptions, CwdError, ExecResult, type HostSessionLookup, type InteractiveOptions, type InteractiveResult, type IterationResult, type IterationUsage, type LoggingOption, MergeToHeadBranchStrategy, NamedBranchStrategy, type OpenCodeOptions, Output, type OutputDefinition, type OutputObjectDefinition, type OutputStringDefinition, type PiOptions, type PrintCommand, type PromptArgs, type ResumeSandboxRunResultOptions, type RunOptions, type RunResult, type Sandbox, type SandboxExecOptions, type SandboxHooks, type SandboxInteractiveOptions, type SandboxInteractiveResult, SandboxProvider, type SandboxRunOptions, type SandboxRunResult, StructuredOutputError, type Timeouts, type Worktree, type WorktreeBranchStrategy, type WorktreeCreateSandboxOptions, type WorktreeInteractiveOptions, type WorktreeRunOptions, type WorktreeRunResult, claudeCode, claudeHostSessionPath, claudeSandboxSessionPath, codex, copilot, createSandbox, createWorktree, cursor, encodeProjectPath, findClaudeSessionOnHost, findCodexSessionOnHost, interactive, opencode, pi, run, transferClaudeSession, transferCodexSession };