npm - @ai-hero/sandcastle - Versions diffs - 0.9.0 → 0.12.0 - Mend

@ai-hero/sandcastle 0.9.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/README.md +110 -52
package/dist/{chunk-72UVAC7B.js → chunk-62WN33RK.js} +10 -5
package/dist/chunk-62WN33RK.js.map +1 -0
package/dist/{chunk-NSFQW6ML.js → chunk-CP3TYXZA.js} +3 -3
package/dist/{chunk-NSFQW6ML.js.map → chunk-CP3TYXZA.js.map} +1 -1
package/dist/{chunk-52CIJF45.js → chunk-DJRHWPEH.js} +3 -3
package/dist/{chunk-52CIJF45.js.map → chunk-DJRHWPEH.js.map} +1 -1
package/dist/{chunk-5VM5QZ26.js → chunk-VOG34SRF.js} +72 -48
package/dist/{chunk-5VM5QZ26.js.map → chunk-VOG34SRF.js.map} +1 -1
package/dist/index.d.ts +135 -21
package/dist/index.js +253 -59
package/dist/index.js.map +1 -1
package/dist/main.js +6 -6
package/dist/main.js.map +1 -1
package/dist/sandboxes/docker.d.ts +1 -1
package/dist/sandboxes/docker.js +2 -2
package/dist/sandboxes/no-sandbox.d.ts +1 -1
package/dist/sandboxes/no-sandbox.js +1 -1
package/dist/sandboxes/podman.d.ts +1 -1
package/dist/sandboxes/podman.js +2 -2
package/dist/sandboxes/podman.js.map +1 -1
package/dist/sandboxes/vercel.d.ts +1 -1
package/dist/sandboxes/vercel.js.map +1 -1
package/dist/templates/blank/main.mts +1 -1
package/dist/templates/parallel-planner/main.mts +1 -1
package/dist/templates/parallel-planner-with-review/main.mts +1 -1
package/dist/templates/simple-loop/main.mts +1 -1
package/package.json +1 -1
package/dist/chunk-72UVAC7B.js.map +0 -1

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { B as BindMountSandboxHandle, S as SandboxProvider, a as BranchStrategy, A as AnySandboxProvider, M as MergeToHeadBranchStrategy, b as NamedBranchStrategy } from './SandboxProvider-EkSMuBp8.js';
-export { c as BindMountBranchStrategy, d as BindMountCreateOptions, e as BindMountSandboxProvider, f as BindMountSandboxProviderConfig, E as ExecResult, H as HeadBranchStrategy, g as InteractiveExecOptions, h as IsolatedBranchStrategy, i as IsolatedCreateOptions, j as IsolatedSandboxHandle, I as IsolatedSandboxProvider, k as IsolatedSandboxProviderConfig, l as NoSandboxBranchStrategy, m as NoSandboxHandle, N as NoSandboxProvider, n as createBindMountSandboxProvider, o as createIsolatedSandboxProvider } from './SandboxProvider-EkSMuBp8.js';
+import { B as BindMountSandboxHandle, S as SandboxProvider, a as BranchStrategy, A as AnySandboxProvider, E as ExecResult, M as MergeToHeadBranchStrategy, b as NamedBranchStrategy } from './SandboxProvider-EkSMuBp8.js';
+export { c as BindMountBranchStrategy, d as BindMountCreateOptions, e as BindMountSandboxProvider, f as BindMountSandboxProviderConfig, H as HeadBranchStrategy, g as InteractiveExecOptions, h as IsolatedBranchStrategy, i as IsolatedCreateOptions, j as IsolatedSandboxHandle, I as IsolatedSandboxProvider, k as IsolatedSandboxProviderConfig, l as NoSandboxBranchStrategy, m as NoSandboxHandle, N as NoSandboxProvider, n as createBindMountSandboxProvider, o as createIsolatedSandboxProvider } from './SandboxProvider-EkSMuBp8.js';
 import { StandardSchemaV1 } from '@standard-schema/spec';
 export { M as MountConfig } from './MountConfig-CmXclHA5.js';
@@ -246,6 +246,12 @@ declare const claudeCode: (model: string, options?: ClaudeCodeOptions) => AgentP
  *
  * Emitted only in log-to-file mode when an `onAgentStreamEvent` callback is
  * provided via `logging`. See `run()`.
+ *
+ * The `"raw"` variant carries every stdout line the agent emits, verbatim and
+ * before parsing — including lines that the provider's stream parser would
+ * otherwise drop (e.g. tool-use blocks for unrecognised tools). Intended for
+ * debugging when the typed `"text"` / `"toolCall"` events don't surface
+ * enough detail.
  */
 type AgentStreamEvent = {
     readonly type: "text";
@@ -258,6 +264,11 @@ type AgentStreamEvent = {
     readonly formattedArgs: string;
     readonly iteration: number;
     readonly timestamp: Date;
+} | {
+    readonly type: "raw";
+    readonly line: string;
+    readonly iteration: number;
+    readonly timestamp: Date;
 };
 type SandboxHooks = {
@@ -302,11 +313,35 @@ interface OutputObjectDefinition<T> {
     readonly _tag: "object";
     readonly tag: string;
     readonly schema: StandardSchemaV1<unknown, T>;
+    /**
+     * Maximum number of additional attempts after the first if structured output
+     * extraction or validation fails. Each retry resumes the failed run's agent
+     * session and feeds back a token-efficient description of the error so the
+     * agent can re-emit a corrected tag. Default: `0` (no retries).
+     *
+     * Retries require the agent provider to support session resumption (i.e.
+     * `provider.sessionStorage` is populated — Claude Code, Codex, Pi). `run()`
+     * fails at entry with a clear error when retries are requested but the
+     * provider cannot resume.
+     */
+    readonly maxRetries?: number;
 }
 /** Branded output definition for `Output.string({ tag })`. */
 interface OutputStringDefinition {
     readonly _tag: "string";
     readonly tag: string;
+    /**
+     * Maximum number of additional attempts after the first if structured output
+     * extraction fails. Each retry resumes the failed run's agent session and
+     * feeds back a token-efficient description of the error so the agent can
+     * re-emit a corrected tag. Default: `0` (no retries).
+     *
+     * Retries require the agent provider to support session resumption (i.e.
+     * `provider.sessionStorage` is populated — Claude Code, Codex, Pi). `run()`
+     * fails at entry with a clear error when retries are requested but the
+     * provider cannot resume.
+     */
+    readonly maxRetries?: number;
 }
 /** Union of all output definition shapes accepted by `run()`. */
 type OutputDefinition = OutputObjectDefinition<any> | OutputStringDefinition;
@@ -329,18 +364,28 @@ declare const Output: {
      * Declare an object-typed structured output extracted from an XML tag in
      * the agent's stdout. The tag contents are JSON-parsed (with fence-aware
      * unwrapping) and validated against the provided Standard Schema validator.
+     *
+     * Set `maxRetries` to have `run()` automatically resume the failed session
+     * and ask the agent to re-emit corrected output when extraction or
+     * validation fails. Default: `0` (no retries).
      */
     readonly object: <Schema extends StandardSchemaV1>(opts: {
         tag: string;
         schema: Schema;
+        maxRetries?: number;
     }) => OutputObjectDefinition<StandardSchemaV1.InferOutput<Schema>>;
     /**
      * Declare a string-typed structured output extracted from an XML tag in
      * the agent's stdout. The tag contents are whitespace-trimmed and returned
      * as a plain string — no JSON parsing, no schema validation.
+     *
+     * Set `maxRetries` to have `run()` automatically resume the failed session
+     * and ask the agent to re-emit corrected output when extraction fails.
+     * Default: `0` (no retries).
      */
     readonly string: (opts: {
         tag: string;
+        maxRetries?: number;
     }) => OutputStringDefinition;
 };
 interface StructuredOutputErrorOptions {
@@ -414,16 +459,34 @@ type LoggingOption =
     readonly type: "file";
     readonly path: string;
     /**
-     * Optional callback invoked for each agent stream event (text chunk or
-     * tool call) in addition to being written to the log file. Intended for
-     * forwarding the agent's output stream to external observability
-     * systems. Errors thrown by the callback are swallowed.
+     * Optional callback invoked for each agent stream event (text chunk,
+     * tool call, or raw stdout line) in addition to being written to the
+     * log file. Intended for forwarding the agent's output stream to
+     * external observability systems. Errors thrown by the callback are
+     * swallowed.
      */
     readonly onAgentStreamEvent?: (event: AgentStreamEvent) => void;
+    /**
+     * When `true`, every raw stdout line the agent emits is appended
+     * verbatim to the same log file at `path`, in real time. Includes
+     * lines the provider's stream parser would otherwise drop (e.g.
+     * tool-use blocks for unrecognised tools). Intended for debugging
+     * stuck or unexpected agent behavior — note that the raw JSON is
+     * interleaved with the human-readable log output. Default: `false`.
+     */
+    readonly verbose?: boolean;
 }
 /** Render progress and agent output as an interactive UI in the terminal (terminal mode). */
  | {
     readonly type: "stdout";
+    /**
+     * When `true`, every raw stdout line the agent emits is written
+     * verbatim to `process.stdout`, in real time. Includes lines the
+     * provider's stream parser would otherwise drop. Intended for
+     * debugging stuck or unexpected agent behavior. Note: the raw output
+     * is interleaved with the interactive terminal UI. Default: `false`.
+     */
+    readonly verbose?: boolean;
 };
 /** Override default timeouts for built-in lifecycle steps. Unset keys keep their defaults. */
 interface Timeouts {
@@ -437,7 +500,7 @@ interface Timeouts {
     readonly mergeToHostMs?: number;
 }
 interface RunOptions<A extends AgentProvider = AgentProvider> {
-    /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")) */
+    /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")) */
     readonly agent: A;
     /** Sandbox provider (e.g. docker({ imageName: "sandcastle:myrepo" })). */
     readonly sandbox: SandboxProvider;
@@ -575,7 +638,7 @@ declare function run<A extends AgentProvider>(options: RunOptions<A> & {
 declare function run<A extends AgentProvider>(options: RunOptions<A>): Promise<RunResult>;
 interface InteractiveOptions {
-    /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")) */
+    /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")) */
     readonly agent: AgentProvider;
     /** Sandbox provider (e.g. docker(), noSandbox()). */
     readonly sandbox?: AnySandboxProvider;
@@ -669,17 +732,19 @@ interface CreateSandboxOptions {
     /** Override default timeouts for built-in lifecycle steps. Unset keys keep their defaults. */
     readonly timeouts?: Timeouts;
 }
-interface SandboxRunOptions {
-    /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")). */
-    readonly agent: AgentProvider;
-    /** Inline prompt string (mutually exclusive with promptFile). */
-    readonly prompt?: string;
-    /** Path to a prompt file (mutually exclusive with prompt). */
-    readonly promptFile?: string;
+/**
+ * Options accepted by `SandboxRunResult.resume()` / `.fork()`. Mirrors
+ * `ResumeRunResultOptions` in `run.ts` — drops the fields owned by the
+ * captured run (prompt, iteration count, resumeSession/forkSession bookkeeping).
+ *
+ * Defined as the base interface that `SandboxRunOptions` extends — the
+ * interface-extends shape is cheaper for the TS checker than
+ * `Omit<SandboxRunOptions, ...>` (which forces a mapped-type computation
+ * on every reference).
+ */
+interface ResumeSandboxRunResultOptions {
     /** Key-value map for {{KEY}} placeholder substitution in prompts. */
     readonly promptArgs?: PromptArgs;
-    /** Maximum iterations to run (default: 1). */
-    readonly maxIterations?: number;
     /** Substring(s) the agent emits to stop the iteration loop early. */
     readonly completionSignal?: string | string[];
     /** Idle timeout in seconds. Default: 600. */
@@ -701,6 +766,18 @@ interface SandboxRunOptions {
      */
     readonly signal?: AbortSignal;
 }
+interface SandboxRunOptions extends ResumeSandboxRunResultOptions {
+    /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")). */
+    readonly agent: AgentProvider;
+    /** Inline prompt string (mutually exclusive with promptFile). */
+    readonly prompt?: string;
+    /** Path to a prompt file (mutually exclusive with prompt). */
+    readonly promptFile?: string;
+    /** Maximum iterations to run (default: 1). */
+    readonly maxIterations?: number;
+    /** Resume a prior agent session by id. The session JSONL must exist on the host (captured by a prior `sandbox.run()`). Incompatible with `maxIterations > 1`. */
+    readonly resumeSession?: string;
+}
 interface SandboxRunResult {
     /** Per-iteration results (use `iterations.length` for the count). */
     readonly iterations: IterationResult[];
@@ -714,9 +791,23 @@ interface SandboxRunResult {
     }[];
     /** Path to the log file, if logging was drained to a file. */
     readonly logFilePath?: string;
+    /**
+     * Continue the last captured agent session for exactly one iteration inside
+     * the same long-lived sandbox. Present only when the provider supports
+     * resume (`sessionStorage` populated) and a session id was captured.
+     */
+    readonly resume?: (prompt: string, options?: ResumeSandboxRunResultOptions) => Promise<SandboxRunResult>;
+    /**
+     * Fork the last captured agent session for exactly one iteration inside the
+     * same long-lived sandbox: the parent session JSONL is left intact and the
+     * child run gets its own session id. Present only when the provider
+     * supports resume (`sessionStorage` populated) and a session id was
+     * captured. See ADR 0018 for fork semantics.
+     */
+    readonly fork?: (prompt: string, options?: ResumeSandboxRunResultOptions) => Promise<SandboxRunResult>;
 }
 interface SandboxInteractiveOptions {
-    /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")). */
+    /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")). */
     readonly agent: AgentProvider;
     /** Inline prompt string (mutually exclusive with promptFile). */
     readonly prompt?: string;
@@ -756,11 +847,34 @@ interface Sandbox {
     run(options: SandboxRunOptions): Promise<SandboxRunResult>;
     /** Launch an interactive agent session inside the existing sandbox. */
     interactive(options: SandboxInteractiveOptions): Promise<SandboxInteractiveResult>;
+    /**
+     * Execute a command inside the existing sandbox.
+     *
+     * `cwd` defaults to the sandbox repo path (same default `interactive()`
+     * uses), so callers get the same working directory across providers. Pass
+     * `cwd` to override.
+     *
+     * Returns the full `ExecResult` — non-zero `exitCode` is surfaced, not
+     * thrown. Callers that want strict semantics should check `result.exitCode`
+     * themselves (matching the contract of `BindMountSandboxHandle.exec`).
+     */
+    exec(command: string, options?: SandboxExecOptions): Promise<ExecResult>;
     /** Tear down the sandbox and worktree. */
     close(): Promise<CloseResult>;
     /** Auto teardown via `await using`. */
     [Symbol.asyncDispose](): Promise<void>;
 }
+/** Options accepted by `Sandbox.exec()`. Mirrors the provider handle's `exec` options. */
+interface SandboxExecOptions {
+    /** Per-line stdout callback for streaming output. */
+    readonly onLine?: (line: string) => void;
+    /** Working directory for the command. Defaults to the sandbox repo path. */
+    readonly cwd?: string;
+    /** Run the command with sudo, when the provider supports it. */
+    readonly sudo?: boolean;
+    /** Stdin payload — piped to the child process and then closed. Avoids the Linux 128 KB per-arg limit. */
+    readonly stdin?: string;
+}
 /**
  * Eagerly creates a git worktree on the provided explicit branch and starts
  * a sandbox with the worktree bind-mounted. Returns a Sandbox handle that
@@ -792,7 +906,7 @@ interface CreateWorktreeOptions {
     readonly timeouts?: Timeouts;
 }
 interface WorktreeInteractiveOptions {
-    /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")) */
+    /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")) */
     readonly agent: AgentProvider;
     /** Sandbox provider (e.g. docker(), noSandbox()). Defaults to noSandbox(). */
     readonly sandbox?: AnySandboxProvider;
@@ -821,7 +935,7 @@ interface WorktreeInteractiveOptions {
     readonly signal?: AbortSignal;
 }
 interface WorktreeRunOptions {
-    /** Agent provider to use (e.g. claudeCode("claude-opus-4-7")) */
+    /** Agent provider to use (e.g. claudeCode("claude-opus-4-8")) */
     readonly agent: AgentProvider;
     /** Sandbox provider (e.g. docker()). Required — AFK agents should always be sandboxed. */
     readonly sandbox: SandboxProvider;
@@ -934,4 +1048,4 @@ interface CwdError extends Error {
 /** The provided `cwd` path does not exist or is not a directory. */
 declare const CwdError: CwdErrorConstructor;
-export { type AgentCommandOptions, type AgentProvider, type AgentStreamEvent, AnySandboxProvider, BindMountSandboxHandle, BranchStrategy, type ClaudeCodeOptions, type CloseResult, type CodexOptions, type CopilotOptions, type CreateSandboxOptions, type CreateWorktreeOptions, type CursorOptions, CwdError, type HostSessionLookup, type InteractiveOptions, type InteractiveResult, type IterationResult, type IterationUsage, type LoggingOption, MergeToHeadBranchStrategy, NamedBranchStrategy, type OpenCodeOptions, Output, type OutputDefinition, type OutputObjectDefinition, type OutputStringDefinition, type PiOptions, type PrintCommand, type PromptArgs, type RunOptions, type RunResult, type Sandbox, type SandboxHooks, type SandboxInteractiveOptions, type SandboxInteractiveResult, SandboxProvider, type SandboxRunOptions, type SandboxRunResult, StructuredOutputError, type Timeouts, type Worktree, type WorktreeBranchStrategy, type WorktreeCreateSandboxOptions, type WorktreeInteractiveOptions, type WorktreeRunOptions, type WorktreeRunResult, claudeCode, claudeHostSessionPath, claudeSandboxSessionPath, codex, copilot, createSandbox, createWorktree, cursor, encodeProjectPath, findClaudeSessionOnHost, findCodexSessionOnHost, interactive, opencode, pi, run, transferClaudeSession, transferCodexSession };
+export { type AgentCommandOptions, type AgentProvider, type AgentStreamEvent, AnySandboxProvider, BindMountSandboxHandle, BranchStrategy, type ClaudeCodeOptions, type CloseResult, type CodexOptions, type CopilotOptions, type CreateSandboxOptions, type CreateWorktreeOptions, type CursorOptions, CwdError, ExecResult, type HostSessionLookup, type InteractiveOptions, type InteractiveResult, type IterationResult, type IterationUsage, type LoggingOption, MergeToHeadBranchStrategy, NamedBranchStrategy, type OpenCodeOptions, Output, type OutputDefinition, type OutputObjectDefinition, type OutputStringDefinition, type PiOptions, type PrintCommand, type PromptArgs, type ResumeSandboxRunResultOptions, type RunOptions, type RunResult, type Sandbox, type SandboxExecOptions, type SandboxHooks, type SandboxInteractiveOptions, type SandboxInteractiveResult, SandboxProvider, type SandboxRunOptions, type SandboxRunResult, StructuredOutputError, type Timeouts, type Worktree, type WorktreeBranchStrategy, type WorktreeCreateSandboxOptions, type WorktreeInteractiveOptions, type WorktreeRunOptions, type WorktreeRunResult, claudeCode, claudeHostSessionPath, claudeSandboxSessionPath, codex, copilot, createSandbox, createWorktree, cursor, encodeProjectPath, findClaudeSessionOnHost, findCodexSessionOnHost, interactive, opencode, pi, run, transferClaudeSession, transferCodexSession };