zidane 5.10.13 → 5.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +31 -5
  2. package/dist/{agent-BHkvYIH9.d.ts → agent-D0W9yClt.d.ts} +114 -27
  3. package/dist/agent-D0W9yClt.d.ts.map +1 -0
  4. package/dist/chat/pure.d.ts +3 -3
  5. package/dist/chat.d.ts +7 -7
  6. package/dist/chat.js +2 -2
  7. package/dist/contexts/docker.d.ts +1 -1
  8. package/dist/contexts/docker.d.ts.map +1 -1
  9. package/dist/contexts/docker.js +53 -14
  10. package/dist/contexts/docker.js.map +1 -1
  11. package/dist/contexts/e2b.d.ts +168 -0
  12. package/dist/contexts/e2b.d.ts.map +1 -0
  13. package/dist/contexts/e2b.js +261 -0
  14. package/dist/contexts/e2b.js.map +1 -0
  15. package/dist/{contexts-BJVgG0LY.js → contexts-DglWSzmR.js} +59 -9
  16. package/dist/contexts-DglWSzmR.js.map +1 -0
  17. package/dist/contexts.d.ts +3 -3
  18. package/dist/contexts.js +1 -1
  19. package/dist/eval.d.ts +1 -1
  20. package/dist/eval.js +5 -5
  21. package/dist/eval.js.map +1 -1
  22. package/dist/{headless-CPaunZsU.js → headless-Bb5gU8AR.js} +6 -6
  23. package/dist/{headless-CPaunZsU.js.map → headless-Bb5gU8AR.js.map} +1 -1
  24. package/dist/headless.d.ts +1 -1
  25. package/dist/headless.js +1 -1
  26. package/dist/{index-C_t8tW_X.d.ts → index-CrMb8jCE.d.ts} +2 -2
  27. package/dist/{index-C_t8tW_X.d.ts.map → index-CrMb8jCE.d.ts.map} +1 -1
  28. package/dist/{index-BIo67xLV.d.ts → index-D60tX5XC.d.ts} +10 -3
  29. package/dist/index-D60tX5XC.d.ts.map +1 -0
  30. package/dist/{index-C4aT2kO_.d.ts → index-DZR99FD4.d.ts} +30 -111
  31. package/dist/index-DZR99FD4.d.ts.map +1 -0
  32. package/dist/index.d.ts +7 -6
  33. package/dist/index.js +11 -10
  34. package/dist/index.js.map +1 -1
  35. package/dist/{interpolate-Dy7Lunvg.js → interpolate-CTfr0GdR.js} +19 -1
  36. package/dist/{interpolate-Dy7Lunvg.js.map → interpolate-CTfr0GdR.js.map} +1 -1
  37. package/dist/logger-Ktm-lj1s.js +300 -0
  38. package/dist/logger-Ktm-lj1s.js.map +1 -0
  39. package/dist/logger-n4LsLISE.d.ts +102 -0
  40. package/dist/logger-n4LsLISE.d.ts.map +1 -0
  41. package/dist/{login-0jP1pnSJ.js → login-BHhOdTp9.js} +4 -301
  42. package/dist/login-BHhOdTp9.js.map +1 -0
  43. package/dist/{mcp-tevNihk_.js → mcp-Cy9mgCcr.js} +22 -9
  44. package/dist/mcp-Cy9mgCcr.js.map +1 -0
  45. package/dist/mcp.d.ts +1 -1
  46. package/dist/mcp.js +1 -1
  47. package/dist/{messages-C_1AmSpk.js → messages-RPKrEPvH.js} +6 -2
  48. package/dist/messages-RPKrEPvH.js.map +1 -0
  49. package/dist/output/stream-json.d.ts +2 -2
  50. package/dist/output/stream-json.js +1 -1
  51. package/dist/output/terminal.d.ts +2 -2
  52. package/dist/output/terminal.js +1 -0
  53. package/dist/output/terminal.js.map +1 -1
  54. package/dist/{presets-Cm2BPJaU.js → presets-D5ibZTml.js} +2 -2
  55. package/dist/{presets-Cm2BPJaU.js.map → presets-D5ibZTml.js.map} +1 -1
  56. package/dist/presets.d.ts +2 -2
  57. package/dist/presets.js +1 -1
  58. package/dist/{providers-BGBB18zz.js → providers-C2cxujp_.js} +85 -20
  59. package/dist/providers-C2cxujp_.js.map +1 -0
  60. package/dist/providers.d.ts +1 -1
  61. package/dist/providers.js +2 -2
  62. package/dist/restate.d.ts +2 -2
  63. package/dist/restate.js +4 -1
  64. package/dist/restate.js.map +1 -1
  65. package/dist/session/sqlite.d.ts +1 -1
  66. package/dist/session/sqlite.d.ts.map +1 -1
  67. package/dist/session/sqlite.js +36 -4
  68. package/dist/session/sqlite.js.map +1 -1
  69. package/dist/{session-CtAWwwkn.js → session-Do_TQV7c.js} +70 -22
  70. package/dist/session-Do_TQV7c.js.map +1 -0
  71. package/dist/session.d.ts +2 -2
  72. package/dist/session.js +3 -3
  73. package/dist/shell-quote-BmnhZmdM.js +33 -0
  74. package/dist/shell-quote-BmnhZmdM.js.map +1 -0
  75. package/dist/skills.d.ts +3 -3
  76. package/dist/skills.js +1 -1
  77. package/dist/skills.js.map +1 -1
  78. package/dist/{tool-formatters-D_fX6FGl.d.ts → tool-formatters-RT5-gyE2.d.ts} +2 -2
  79. package/dist/{tool-formatters-D_fX6FGl.d.ts.map → tool-formatters-RT5-gyE2.d.ts.map} +1 -1
  80. package/dist/tools/fetch-url.d.ts +1 -1
  81. package/dist/tools/web-search.d.ts +1 -1
  82. package/dist/{tools-NxnEmzYg.js → tools-ZHKOh44k.js} +342 -123
  83. package/dist/tools-ZHKOh44k.js.map +1 -0
  84. package/dist/tools.d.ts +2 -2
  85. package/dist/tools.js +1 -1
  86. package/dist/{transcript-anchors-DA6XawEU.d.ts → transcript-anchors-B4FxkG-8.d.ts} +10 -4
  87. package/dist/transcript-anchors-B4FxkG-8.d.ts.map +1 -0
  88. package/dist/{transcript-anchors-B_c7gWot.js → transcript-anchors-CS46ul6X.js} +10 -10
  89. package/dist/transcript-anchors-CS46ul6X.js.map +1 -0
  90. package/dist/tui.d.ts +3 -3
  91. package/dist/tui.d.ts.map +1 -1
  92. package/dist/tui.js +167 -41
  93. package/dist/tui.js.map +1 -1
  94. package/dist/{turn-operations-CCl7rpbT.d.ts → turn-operations-CoRj3mYZ.d.ts} +3 -3
  95. package/dist/{turn-operations-CCl7rpbT.d.ts.map → turn-operations-CoRj3mYZ.d.ts.map} +1 -1
  96. package/dist/{types-BibzMDjX.d.ts → types-B39tBba1.d.ts} +69 -2
  97. package/dist/types-B39tBba1.d.ts.map +1 -0
  98. package/dist/types-BiobHM1D.js.map +1 -1
  99. package/dist/types.d.ts +5 -5
  100. package/docs/ARCHITECTURE.md +1 -1
  101. package/docs/CHAT.md +3 -3
  102. package/docs/EXECUTION_CONTEXT.md +257 -0
  103. package/docs/RUN_IN_BACKGROUND.md +8 -0
  104. package/docs/SKILL.md +3 -3
  105. package/package.json +57 -24
  106. package/dist/agent-BHkvYIH9.d.ts.map +0 -1
  107. package/dist/contexts-BJVgG0LY.js.map +0 -1
  108. package/dist/index-BIo67xLV.d.ts.map +0 -1
  109. package/dist/index-C4aT2kO_.d.ts.map +0 -1
  110. package/dist/login-0jP1pnSJ.js.map +0 -1
  111. package/dist/mcp-tevNihk_.js.map +0 -1
  112. package/dist/messages-C_1AmSpk.js.map +0 -1
  113. package/dist/providers-BGBB18zz.js.map +0 -1
  114. package/dist/session-CtAWwwkn.js.map +0 -1
  115. package/dist/tools-NxnEmzYg.js.map +0 -1
  116. package/dist/transcript-anchors-B_c7gWot.js.map +0 -1
  117. package/dist/transcript-anchors-DA6XawEU.d.ts.map +0 -1
  118. package/dist/types-BibzMDjX.d.ts.map +0 -1
@@ -0,0 +1 @@
1
+ {"version":3,"file":"e2b.js","names":[],"sources":["../../src/contexts/e2b.ts"],"sourcesContent":["/**\n * E2B sandbox provider.\n *\n * Implements {@link SandboxProvider} on top of E2B's firecracker-backed\n * micro-VMs. Pair it with {@link createSandboxContext} to run an agent's\n * shell + filesystem tools inside an ephemeral E2B sandbox:\n *\n * ```ts\n * import { createSandboxContext } from 'zidane/contexts'\n * import { createE2BProvider } from 'zidane/contexts/e2b'\n *\n * const execution = createSandboxContext(createE2BProvider())\n * ```\n *\n * ## Pointing at your own E2B\n *\n * The two knobs that select *which* E2B you talk to are `apiKey` and\n * `domain`. Self-hosted / on-prem E2B clusters expose a custom domain; pass\n * it here (or via the `E2B_DOMAIN` env var) and the SDK routes every call to\n * your cluster instead of E2B's hosted API. Precedence, highest first:\n *\n * 1. Per-spawn `SpawnConfig.sandbox` fields (`apiKey`, `domain`, `template`)\n * 2. {@link E2BProviderOptions} passed to this factory\n * 3. `E2B_API_KEY` / `E2B_DOMAIN` env vars (read by the SDK itself), and\n * `E2B_TEMPLATE` (read by this provider — the SDK does not read it)\n *\n * The default `base` template does not exist on self-hosted clusters, so\n * picking the right template matters: an invalid template still returns a\n * sandbox id from the control plane, but its envd never boots and every\n * `exec`/file op then hangs until it times out. Set `E2B_TEMPLATE` (or pass\n * `template`) to a template that actually exists on your cluster.\n *\n * Requires `e2b` as an optional peer dependency:\n * `bun add e2b`.\n */\n\nimport type { Logger } from '../logger'\nimport type { SandboxProvider } from './sandbox'\nimport type { ExecResult, SpawnConfig } from './types'\nimport { consoleSink, createLogger } from '../logger'\n\n// ---------------------------------------------------------------------------\n// Minimal structural views of the `e2b` SDK surface we touch. Declared locally\n// (rather than imported) so the package typechecks without the optional peer\n// dependency installed — mirroring how `docker.ts` keeps `dockerode` at arm's\n// length. The real SDK satisfies these shapes. Every member used here\n// (`Sandbox.create`/`connect`/`kill`/`sandboxId`, `commands.run` with\n// `cwd`/`envs`/`timeoutMs`, `files.read`/`write`/`list`) exists as of\n// `e2b@1.0.0`, which is why the peer dependency floor is `>=1.0.0`.\n// ---------------------------------------------------------------------------\n\ninterface E2BCommandResult {\n stdout: string\n stderr: string\n exitCode: number\n}\n\ninterface E2BEntry {\n name: string\n}\n\ninterface E2BSandbox {\n sandboxId: string\n commands: {\n run: (cmd: string, opts?: { cwd?: string, envs?: Record<string, string>, timeoutMs?: number }) => Promise<E2BCommandResult>\n }\n files: {\n read: (path: string) => Promise<string>\n write: (path: string, data: string) => Promise<unknown>\n list: (path: string) => Promise<E2BEntry[]>\n }\n kill: () => Promise<unknown>\n}\n\ninterface E2BCreateOptions {\n apiKey?: string\n domain?: string\n timeoutMs?: number\n envs?: Record<string, string>\n metadata?: Record<string, string>\n}\n\ninterface E2BConnectOptions {\n apiKey?: string\n domain?: string\n timeoutMs?: number\n}\n\ninterface E2BSandboxStatic {\n create: (templateOrOpts?: string | E2BCreateOptions, opts?: E2BCreateOptions) => Promise<E2BSandbox>\n connect: (sandboxId: string, opts?: E2BConnectOptions) => Promise<E2BSandbox>\n}\n\n// ---------------------------------------------------------------------------\n// Provider options\n// ---------------------------------------------------------------------------\n\nexport interface E2BProviderOptions {\n /** E2B API key. Falls back to the `E2B_API_KEY` env var when omitted. */\n apiKey?: string\n /**\n * E2B API domain. Set this to point at a self-hosted / on-prem cluster\n * (e.g. `e2b.my-company.internal`). Falls back to the `E2B_DOMAIN` env var,\n * then to E2B's hosted default.\n */\n domain?: string\n /**\n * Sandbox template id / name to launch. Falls back to the `E2B_TEMPLATE`\n * env var, then to E2B's hosted `base` template. Note that `base` does not\n * exist on self-hosted clusters — supply a template that does.\n */\n template?: string\n /**\n * Connect to a pre-existing sandbox by id instead of creating a fresh one\n * (E2B's `Sandbox.connect`). When set, `template` is ignored — you're\n * attaching to a sandbox that already exists. The provider treats such a\n * sandbox as externally owned: it is NOT killed on `destroy`, nor torn down\n * if the readiness probe / pregame fails, since the caller manages its\n * lifecycle. Per-spawn `SpawnConfig.sandbox.sandboxId` overrides this.\n */\n sandboxId?: string\n /**\n * Default working directory, applied when a spawn doesn't set its own\n * `SpawnConfig.cwd`. Created on spawn. Defaults to E2B's `/home/user`.\n */\n cwd?: string\n /**\n * Sandbox lifetime in seconds before E2B auto-kills it. Defaults to the\n * SDK default (300s). Per-spawn `SpawnConfig.limits.timeout` overrides this.\n */\n timeoutSeconds?: number\n /**\n * Environment variables baked into every sandbox at create time (passed to\n * E2B's `Sandbox.create({ envs })`), so every command run in the sandbox\n * sees them. Per-spawn `SpawnConfig.env` is merged over these.\n */\n env?: Record<string, string>\n /**\n * Logger for provider-level lifecycle lines (notably the readiness wait).\n * Defaults to a {@link consoleSink}-backed logger so the messages reach\n * stderr without the caller wiring one up.\n */\n logger?: Logger\n /**\n * Total deadline, in seconds, for the post-create readiness probe (see\n * {@link waitForE2BReady}). A freshly-created sandbox can return its id\n * before `envd` accepts commands; `spawn` blocks until a trivial probe\n * succeeds so the first real command never absorbs the cold start. Defaults\n * to `60`. Set to `0` to disable the gate.\n */\n readinessTimeoutSeconds?: number\n /**\n * A setup script to upload and run once the sandbox is ready, before any\n * prompting begins (see {@link runE2BPregame}). `name` is the basename used\n * for the uploaded file; `content` is the script body. The script is run\n * directly so its shebang selects the interpreter. A non-zero exit aborts\n * `spawn` (and tears the sandbox down) — a broken setup must not silently\n * hand back a half-provisioned sandbox.\n */\n pregame?: { name: string, content: string }\n}\n\n/** Per-probe timeout for the readiness `true` command. */\nconst READINESS_PROBE_TIMEOUT_MS = 5000\n/** Pause between readiness probes while the sandbox is still warming up. */\nconst READINESS_BACKOFF_MS = 250\n/**\n * Command timeout for the pregame setup script. E2B's `commands.run` defaults\n * to 60s and offers no true \"unlimited\" — a `timeoutMs` of 0 is a 0ms deadline\n * (instant timeout), not \"off\". Setup scripts (`apt`/`pip` installs, builds)\n * routinely exceed 60s, so we pass a deliberately generous 30-minute ceiling.\n * The sandbox's own lifetime independently bounds anything longer.\n */\nconst PREGAME_TIMEOUT_MS = 30 * 60 * 1000\n\n/**\n * Upload a setup (\"pregame\") script into a ready sandbox and execute it.\n *\n * Writes the script to `/tmp/<name>`, then `chmod +x`'s and runs it directly\n * (so a `#!/bin/bash` / `#!/usr/bin/env python3` shebang chooses the\n * interpreter) in `cwd` with `envs` applied — the same env the sandbox was\n * created with, so the script sees the run's `--env` / `--pass-env`. A\n * generous {@link PREGAME_TIMEOUT_MS} ceiling is used so long `apt`/`pip`\n * installs aren't killed by E2B's 60s command default. A non-zero exit\n * (whether E2B returns it or raises on it) throws, so the caller can tear the\n * sandbox down rather than prompt against a broken environment.\n */\nexport async function runE2BPregame(\n sandbox: E2BSandbox,\n logger: Logger,\n opts: { name: string, content: string, cwd?: string, envs?: Record<string, string> },\n): Promise<void> {\n const path = `/tmp/${opts.name}`\n logger.info('running E2B pregame script', { sandboxId: sandbox.sandboxId, path })\n await sandbox.files.write(path, opts.content)\n\n const quoted = JSON.stringify(path)\n let result: E2BCommandResult\n try {\n result = await sandbox.commands.run(`chmod +x ${quoted} && ${quoted}`, {\n cwd: opts.cwd,\n envs: opts.envs,\n timeoutMs: PREGAME_TIMEOUT_MS,\n })\n }\n catch (err: any) {\n // E2B raises CommandExitError (carrying the captured streams) on a\n // non-zero exit. Surface its output, then fail the spawn.\n const exitCode = typeof err?.exitCode === 'number' ? err.exitCode : 124\n logger.error('E2B pregame script failed', { sandboxId: sandbox.sandboxId, exitCode, stdout: err?.stdout ?? '', stderr: err?.stderr ?? err?.message ?? '' })\n throw new Error(`E2B pregame script ${opts.name} failed with exit code ${exitCode}`)\n }\n\n if (result.exitCode !== 0) {\n logger.error('E2B pregame script failed', { sandboxId: sandbox.sandboxId, exitCode: result.exitCode, stdout: result.stdout, stderr: result.stderr })\n throw new Error(`E2B pregame script ${opts.name} failed with exit code ${result.exitCode}`)\n }\n\n logger.info('E2B pregame ready', { sandboxId: sandbox.sandboxId, stdout: result.stdout, stderr: result.stderr })\n}\n\nexport interface E2BReadinessOptions {\n /** Total deadline in seconds. `<= 0` disables the probe entirely. */\n timeoutSeconds: number\n /**\n * Whether to kill the sandbox if it never becomes ready. Defaults to `true`\n * (we created it, so a dead sandbox is ours to clean up). Pass `false` when\n * attaching to an externally-owned sandbox — failing the probe shouldn't tear\n * down a sandbox the caller is managing.\n */\n killOnTimeout?: boolean\n /** Clock source — injectable so tests can drive the deadline deterministically. */\n now?: () => number\n /** Sleep between probes — injectable so tests can advance the clock without waiting. */\n sleep?: (ms: number) => Promise<void>\n}\n\n/**\n * Block until an E2B sandbox is ready to run commands, or throw if it never\n * becomes ready within the deadline.\n *\n * The E2B control plane can hand back a sandbox id before the sandbox's `envd`\n * is accepting commands; the first real command then absorbs that cold start\n * and may time out. This polls a trivial `true` command on a short backoff,\n * logging a `waiting…` line up front and a `ready` line (with `elapsedMs`) on\n * success. On deadline exhaustion it best-effort kills the sandbox and throws.\n *\n * `now`/`sleep` are injectable purely for deterministic tests; in production\n * they default to `Date.now` and a real timer.\n */\nexport async function waitForE2BReady(\n sandbox: E2BSandbox,\n logger: Logger,\n opts: E2BReadinessOptions,\n): Promise<void> {\n if (opts.timeoutSeconds <= 0)\n return\n\n const now = opts.now ?? Date.now\n const sleep = opts.sleep ?? (ms => new Promise<void>(resolve => setTimeout(resolve, ms)))\n const start = now()\n const deadline = start + opts.timeoutSeconds * 1000\n\n logger.info('waiting for E2B sandbox to initialize', { sandboxId: sandbox.sandboxId })\n\n while (true) {\n try {\n await sandbox.commands.run('true', { timeoutMs: READINESS_PROBE_TIMEOUT_MS })\n logger.info('E2B sandbox ready', { sandboxId: sandbox.sandboxId, elapsedMs: now() - start })\n return\n }\n catch {\n if (now() >= deadline) {\n if (opts.killOnTimeout !== false)\n await sandbox.kill().catch(() => {})\n throw new Error(`E2B sandbox ${sandbox.sandboxId} did not become ready within ${opts.timeoutSeconds}s`)\n }\n await sleep(READINESS_BACKOFF_MS)\n }\n }\n}\n\n/**\n * Resolve the effective E2B template, highest precedence first: the per-spawn\n * `SpawnConfig.sandbox.template`, then the factory `template` option, then the\n * `E2B_TEMPLATE` env var. Returns `undefined` (→ SDK's `base` default) when\n * none is set. The E2B SDK reads `E2B_API_KEY`/`E2B_DOMAIN` from the env but\n * NOT `E2B_TEMPLATE`, so this provider has to honor it itself.\n */\nexport function resolveE2BTemplate(\n perSpawnTemplate: string | undefined,\n optionTemplate: string | undefined,\n env: Record<string, string | undefined> = process.env,\n): string | undefined {\n return perSpawnTemplate || optionTemplate || env.E2B_TEMPLATE || undefined\n}\n\n/**\n * Merge the factory-level {@link E2BProviderOptions.env} with the per-spawn\n * `SpawnConfig.env`, per-spawn winning on key conflicts. Returns `undefined`\n * when the merge is empty so callers can skip setting `envs` on the create\n * options entirely (rather than handing the SDK an empty object).\n */\nexport function resolveE2BEnv(\n optionEnv: Record<string, string> | undefined,\n perSpawnEnv: Record<string, string> | undefined,\n): Record<string, string> | undefined {\n const merged = { ...optionEnv, ...perSpawnEnv }\n return Object.keys(merged).length > 0 ? merged : undefined\n}\n\n// ---------------------------------------------------------------------------\n// Provider\n// ---------------------------------------------------------------------------\n\n/**\n * Build a {@link SandboxProvider} backed by E2B.\n *\n * The provider owns a registry of live {@link E2BSandbox} instances keyed by\n * sandbox id, so the per-call `exec`/`readFile`/… hooks reuse the same warm\n * connection rather than reconnecting each time.\n */\nexport function createE2BProvider(options: E2BProviderOptions = {}): SandboxProvider {\n // Track the live sandbox, the env it was created with, and whether WE own it.\n // We re-send that env on every `commands.run` rather than relying on\n // `Sandbox.create`'s `envs` alone — a warm / pre-provisioned sandbox handed\n // back by the control plane may never have run our create-time env injection.\n // `owned` is false for sandboxes we attached to via `Sandbox.connect`: those\n // are never killed on teardown/failure because the caller manages them.\n const live = new Map<string, { sandbox: E2BSandbox, env?: Record<string, string>, owned: boolean }>()\n\n // Resolve the readiness logger once: caller-supplied, else a console-backed\n // default so the \"waiting for sandbox\" line still reaches stderr.\n const logger = options.logger ?? createLogger(consoleSink())\n const readinessTimeoutSeconds = options.readinessTimeoutSeconds ?? 60\n\n async function loadSdk(): Promise<E2BSandboxStatic> {\n try {\n const mod = await import('e2b') as { Sandbox: E2BSandboxStatic }\n return mod.Sandbox\n }\n catch {\n throw new Error('e2b is required for the E2B sandbox provider. Install it with: bun add e2b')\n }\n }\n\n function get(sandboxId: string): { sandbox: E2BSandbox, env?: Record<string, string>, owned: boolean } {\n const entry = live.get(sandboxId)\n if (!entry)\n throw new Error(`E2B sandbox ${sandboxId} is not tracked by this provider`)\n return entry\n }\n\n return {\n name: 'e2b',\n\n async spawn(config: SpawnConfig): Promise<{ id: string, cwd: string }> {\n const Sandbox = await loadSdk()\n const sb: Record<string, unknown> = config.sandbox ?? {}\n\n // Per-spawn overrides win over factory options win over SDK env defaults.\n const apiKey = (sb.apiKey as string | undefined) ?? options.apiKey\n const domain = (sb.domain as string | undefined) ?? options.domain\n const template = resolveE2BTemplate(sb.template as string | undefined, options.template)\n const timeoutSeconds = config.limits?.timeout ?? options.timeoutSeconds\n const connectId = (sb.sandboxId as string | undefined) ?? options.sandboxId\n\n const envs = resolveE2BEnv(options.env, config.env)\n\n // Attach to an existing sandbox when a sandbox id was supplied; otherwise\n // create a fresh one. A connected sandbox is externally owned (`owned`\n // false) so we never kill it on failure or teardown. The create-time\n // `envs`/`template` don't apply to an attach — env is still re-sent on\n // every command, so a connected sandbox isn't left without it.\n let sandbox: E2BSandbox\n const owned = connectId === undefined\n if (connectId !== undefined) {\n const connectOpts: E2BConnectOptions = {}\n if (apiKey !== undefined)\n connectOpts.apiKey = apiKey\n if (domain !== undefined)\n connectOpts.domain = domain\n if (timeoutSeconds !== undefined)\n connectOpts.timeoutMs = timeoutSeconds * 1000\n sandbox = await Sandbox.connect(connectId, connectOpts)\n }\n else {\n const createOpts: E2BCreateOptions = {}\n if (apiKey !== undefined)\n createOpts.apiKey = apiKey\n if (domain !== undefined)\n createOpts.domain = domain\n if (envs)\n createOpts.envs = envs\n if (timeoutSeconds !== undefined)\n createOpts.timeoutMs = timeoutSeconds * 1000\n sandbox = template !== undefined\n ? await Sandbox.create(template, createOpts)\n : await Sandbox.create(createOpts)\n }\n\n live.set(sandbox.sandboxId, { sandbox, env: envs, owned })\n\n // Wait for envd to actually accept commands before handing back the\n // handle — otherwise the cwd mkdir below (and the agent's first turn)\n // races the sandbox's cold start. On failure a sandbox we created is\n // already killed; a connected one is left alone (`killOnTimeout`). Either\n // way, stop tracking it and propagate.\n try {\n await waitForE2BReady(sandbox, logger, { timeoutSeconds: readinessTimeoutSeconds, killOnTimeout: owned })\n }\n catch (err) {\n live.delete(sandbox.sandboxId)\n throw err\n }\n\n // Resolve the working directory. An explicit cwd (per-spawn, else the\n // provider default) is the caller's responsibility — create it up front\n // so relative ops have somewhere to land. With no explicit cwd, DON'T\n // assume E2B's hosted `/home/user`: self-hosted templates may run as a\n // different user whose home lives elsewhere and which we lack permission\n // to create under `/home`. Handing envd a non-existent cwd makes it fail\n // process spawn with ENOENT (\"No such file or directory\"), breaking the\n // pregame script and the agent's first shell turn alike. Instead, ask the\n // sandbox where a login shell actually lands (a cwd-less `pwd`, which\n // can't itself hit the ENOENT) and use that.\n const requestedCwd = config.cwd ?? options.cwd\n let cwd: string\n if (requestedCwd) {\n cwd = requestedCwd\n // Best-effort: the cwd usually already exists (or is creatable), and a\n // genuine failure surfaces as ENOENT on the very next command anyway.\n await sandbox.commands.run(`mkdir -p ${JSON.stringify(cwd)}`).catch(() => {})\n }\n else {\n const discovered = await sandbox.commands.run('pwd')\n .then(r => r.stdout.trim())\n .catch(() => '')\n cwd = discovered || '/home/user'\n }\n\n // Pre-provision the environment before any prompting: upload + run the\n // setup script in the now-ready sandbox. A failure tears the sandbox\n // down (mirroring the readiness gate) rather than handing back a\n // half-provisioned environment.\n if (options.pregame) {\n try {\n await runE2BPregame(sandbox, logger, { name: options.pregame.name, content: options.pregame.content, cwd, envs })\n }\n catch (err) {\n // Only tear down a sandbox we created; a connected one is the\n // caller's to manage even when our setup script fails.\n if (owned)\n await sandbox.kill().catch(() => {})\n live.delete(sandbox.sandboxId)\n throw err\n }\n }\n\n return { id: sandbox.sandboxId, cwd }\n },\n\n async exec(sandboxId, command, opts): Promise<ExecResult> {\n const { sandbox, env } = get(sandboxId)\n // Match the process/docker contexts: `timeout` is in seconds, default 30.\n const timeoutMs = (opts?.timeout ?? 30) * 1000\n try {\n const result = await sandbox.commands.run(command, {\n cwd: opts?.cwd,\n // Re-send the sandbox's configured env on every command (warm\n // sandboxes may lack the create-time injection), with the per-call\n // env layered on top.\n envs: resolveE2BEnv(env, opts?.env),\n timeoutMs: timeoutMs > 0 ? timeoutMs : undefined,\n })\n return { stdout: result.stdout, stderr: result.stderr, exitCode: result.exitCode }\n }\n catch (err: any) {\n // E2B throws on non-zero exit (CommandExitError carries the captured\n // streams) and on timeout. Normalize both into an ExecResult so the\n // shell tool sees a clean exit code rather than a thrown error.\n if (typeof err?.exitCode === 'number') {\n return {\n stdout: err.stdout ?? '',\n stderr: err.stderr ?? err.message ?? '',\n exitCode: err.exitCode,\n }\n }\n return { stdout: '', stderr: err?.message ?? String(err), exitCode: 124 }\n }\n },\n\n async readFile(sandboxId, path): Promise<string> {\n return get(sandboxId).sandbox.files.read(path)\n },\n\n async writeFile(sandboxId, path, content): Promise<void> {\n await get(sandboxId).sandbox.files.write(path, content)\n },\n\n async listFiles(sandboxId, path): Promise<string[]> {\n const entries = await get(sandboxId).sandbox.files.list(path)\n return entries.map(e => e.name)\n },\n\n async destroy(sandboxId): Promise<void> {\n const entry = live.get(sandboxId)\n if (!entry)\n return\n // Never kill a sandbox we only connected to — the caller owns its\n // lifecycle. Just drop our handle to it.\n if (entry.owned) {\n try {\n await entry.sandbox.kill()\n }\n catch {\n // Sandbox may already be gone (timed out, killed out-of-band).\n }\n }\n live.delete(sandboxId)\n },\n }\n}\n"],"mappings":";;;AAmKA,MAAM,6BAA6B;;AAEnC,MAAM,uBAAuB;;;;;;;;AAQ7B,MAAM,qBAAqB,OAAU;;;;;;;;;;;;;AAcrC,eAAsB,cACpB,SACA,QACA,MACe;CACf,MAAM,OAAO,QAAQ,KAAK;CAC1B,OAAO,KAAK,8BAA8B;EAAE,WAAW,QAAQ;EAAW;CAAK,CAAC;CAChF,MAAM,QAAQ,MAAM,MAAM,MAAM,KAAK,OAAO;CAE5C,MAAM,SAAS,KAAK,UAAU,IAAI;CAClC,IAAI;CACJ,IAAI;EACF,SAAS,MAAM,QAAQ,SAAS,IAAI,YAAY,OAAO,MAAM,UAAU;GACrE,KAAK,KAAK;GACV,MAAM,KAAK;GACX,WAAW;EACb,CAAC;CACH,SACO,KAAU;EAGf,MAAM,WAAW,OAAO,KAAK,aAAa,WAAW,IAAI,WAAW;EACpE,OAAO,MAAM,6BAA6B;GAAE,WAAW,QAAQ;GAAW;GAAU,QAAQ,KAAK,UAAU;GAAI,QAAQ,KAAK,UAAU,KAAK,WAAW;EAAG,CAAC;EAC1J,MAAM,IAAI,MAAM,sBAAsB,KAAK,KAAK,yBAAyB,UAAU;CACrF;CAEA,IAAI,OAAO,aAAa,GAAG;EACzB,OAAO,MAAM,6BAA6B;GAAE,WAAW,QAAQ;GAAW,UAAU,OAAO;GAAU,QAAQ,OAAO;GAAQ,QAAQ,OAAO;EAAO,CAAC;EACnJ,MAAM,IAAI,MAAM,sBAAsB,KAAK,KAAK,yBAAyB,OAAO,UAAU;CAC5F;CAEA,OAAO,KAAK,qBAAqB;EAAE,WAAW,QAAQ;EAAW,QAAQ,OAAO;EAAQ,QAAQ,OAAO;CAAO,CAAC;AACjH;;;;;;;;;;;;;;AA+BA,eAAsB,gBACpB,SACA,QACA,MACe;CACf,IAAI,KAAK,kBAAkB,GACzB;CAEF,MAAM,MAAM,KAAK,OAAO,KAAK;CAC7B,MAAM,QAAQ,KAAK,WAAU,OAAM,IAAI,SAAc,YAAW,WAAW,SAAS,EAAE,CAAC;CACvF,MAAM,QAAQ,IAAI;CAClB,MAAM,WAAW,QAAQ,KAAK,iBAAiB;CAE/C,OAAO,KAAK,yCAAyC,EAAE,WAAW,QAAQ,UAAU,CAAC;CAErF,OAAO,MACL,IAAI;EACF,MAAM,QAAQ,SAAS,IAAI,QAAQ,EAAE,WAAW,2BAA2B,CAAC;EAC5E,OAAO,KAAK,qBAAqB;GAAE,WAAW,QAAQ;GAAW,WAAW,IAAI,IAAI;EAAM,CAAC;EAC3F;CACF,QACM;EACJ,IAAI,IAAI,KAAK,UAAU;GACrB,IAAI,KAAK,kBAAkB,OACzB,MAAM,QAAQ,KAAK,EAAE,YAAY,CAAC,CAAC;GACrC,MAAM,IAAI,MAAM,eAAe,QAAQ,UAAU,+BAA+B,KAAK,eAAe,EAAE;EACxG;EACA,MAAM,MAAM,oBAAoB;CAClC;AAEJ;;;;;;;;AASA,SAAgB,mBACd,kBACA,gBACA,MAA0C,QAAQ,KAC9B;CACpB,OAAO,oBAAoB,kBAAkB,IAAI,gBAAgB,KAAA;AACnE;;;;;;;AAQA,SAAgB,cACd,WACA,aACoC;CACpC,MAAM,SAAS;EAAE,GAAG;EAAW,GAAG;CAAY;CAC9C,OAAO,OAAO,KAAK,MAAM,EAAE,SAAS,IAAI,SAAS,KAAA;AACnD;;;;;;;;AAaA,SAAgB,kBAAkB,UAA8B,CAAC,GAAoB;CAOnF,MAAM,uBAAO,IAAI,IAAmF;CAIpG,MAAM,SAAS,QAAQ,UAAU,aAAa,YAAY,CAAC;CAC3D,MAAM,0BAA0B,QAAQ,2BAA2B;CAEnE,eAAe,UAAqC;EAClD,IAAI;GAEF,QAAO,MADW,OAAO,QACd;EACb,QACM;GACJ,MAAM,IAAI,MAAM,4EAA4E;EAC9F;CACF;CAEA,SAAS,IAAI,WAA0F;EACrG,MAAM,QAAQ,KAAK,IAAI,SAAS;EAChC,IAAI,CAAC,OACH,MAAM,IAAI,MAAM,eAAe,UAAU,iCAAiC;EAC5E,OAAO;CACT;CAEA,OAAO;EACL,MAAM;EAEN,MAAM,MAAM,QAA2D;GACrE,MAAM,UAAU,MAAM,QAAQ;GAC9B,MAAM,KAA8B,OAAO,WAAW,CAAC;GAGvD,MAAM,SAAU,GAAG,UAAiC,QAAQ;GAC5D,MAAM,SAAU,GAAG,UAAiC,QAAQ;GAC5D,MAAM,WAAW,mBAAmB,GAAG,UAAgC,QAAQ,QAAQ;GACvF,MAAM,iBAAiB,OAAO,QAAQ,WAAW,QAAQ;GACzD,MAAM,YAAa,GAAG,aAAoC,QAAQ;GAElE,MAAM,OAAO,cAAc,QAAQ,KAAK,OAAO,GAAG;GAOlD,IAAI;GACJ,MAAM,QAAQ,cAAc,KAAA;GAC5B,IAAI,cAAc,KAAA,GAAW;IAC3B,MAAM,cAAiC,CAAC;IACxC,IAAI,WAAW,KAAA,GACb,YAAY,SAAS;IACvB,IAAI,WAAW,KAAA,GACb,YAAY,SAAS;IACvB,IAAI,mBAAmB,KAAA,GACrB,YAAY,YAAY,iBAAiB;IAC3C,UAAU,MAAM,QAAQ,QAAQ,WAAW,WAAW;GACxD,OACK;IACH,MAAM,aAA+B,CAAC;IACtC,IAAI,WAAW,KAAA,GACb,WAAW,SAAS;IACtB,IAAI,WAAW,KAAA,GACb,WAAW,SAAS;IACtB,IAAI,MACF,WAAW,OAAO;IACpB,IAAI,mBAAmB,KAAA,GACrB,WAAW,YAAY,iBAAiB;IAC1C,UAAU,aAAa,KAAA,IACnB,MAAM,QAAQ,OAAO,UAAU,UAAU,IACzC,MAAM,QAAQ,OAAO,UAAU;GACrC;GAEA,KAAK,IAAI,QAAQ,WAAW;IAAE;IAAS,KAAK;IAAM;GAAM,CAAC;GAOzD,IAAI;IACF,MAAM,gBAAgB,SAAS,QAAQ;KAAE,gBAAgB;KAAyB,eAAe;IAAM,CAAC;GAC1G,SACO,KAAK;IACV,KAAK,OAAO,QAAQ,SAAS;IAC7B,MAAM;GACR;GAYA,MAAM,eAAe,OAAO,OAAO,QAAQ;GAC3C,IAAI;GACJ,IAAI,cAAc;IAChB,MAAM;IAGN,MAAM,QAAQ,SAAS,IAAI,YAAY,KAAK,UAAU,GAAG,GAAG,EAAE,YAAY,CAAC,CAAC;GAC9E,OAKE,MAAM,MAHmB,QAAQ,SAAS,IAAI,KAAK,EAChD,MAAK,MAAK,EAAE,OAAO,KAAK,CAAC,EACzB,YAAY,EAAE,KACG;GAOtB,IAAI,QAAQ,SACV,IAAI;IACF,MAAM,cAAc,SAAS,QAAQ;KAAE,MAAM,QAAQ,QAAQ;KAAM,SAAS,QAAQ,QAAQ;KAAS;KAAK;IAAK,CAAC;GAClH,SACO,KAAK;IAGV,IAAI,OACF,MAAM,QAAQ,KAAK,EAAE,YAAY,CAAC,CAAC;IACrC,KAAK,OAAO,QAAQ,SAAS;IAC7B,MAAM;GACR;GAGF,OAAO;IAAE,IAAI,QAAQ;IAAW;GAAI;EACtC;EAEA,MAAM,KAAK,WAAW,SAAS,MAA2B;GACxD,MAAM,EAAE,SAAS,QAAQ,IAAI,SAAS;GAEtC,MAAM,aAAa,MAAM,WAAW,MAAM;GAC1C,IAAI;IACF,MAAM,SAAS,MAAM,QAAQ,SAAS,IAAI,SAAS;KACjD,KAAK,MAAM;KAIX,MAAM,cAAc,KAAK,MAAM,GAAG;KAClC,WAAW,YAAY,IAAI,YAAY,KAAA;IACzC,CAAC;IACD,OAAO;KAAE,QAAQ,OAAO;KAAQ,QAAQ,OAAO;KAAQ,UAAU,OAAO;IAAS;GACnF,SACO,KAAU;IAIf,IAAI,OAAO,KAAK,aAAa,UAC3B,OAAO;KACL,QAAQ,IAAI,UAAU;KACtB,QAAQ,IAAI,UAAU,IAAI,WAAW;KACrC,UAAU,IAAI;IAChB;IAEF,OAAO;KAAE,QAAQ;KAAI,QAAQ,KAAK,WAAW,OAAO,GAAG;KAAG,UAAU;IAAI;GAC1E;EACF;EAEA,MAAM,SAAS,WAAW,MAAuB;GAC/C,OAAO,IAAI,SAAS,EAAE,QAAQ,MAAM,KAAK,IAAI;EAC/C;EAEA,MAAM,UAAU,WAAW,MAAM,SAAwB;GACvD,MAAM,IAAI,SAAS,EAAE,QAAQ,MAAM,MAAM,MAAM,OAAO;EACxD;EAEA,MAAM,UAAU,WAAW,MAAyB;GAElD,QAAO,MADe,IAAI,SAAS,EAAE,QAAQ,MAAM,KAAK,IAAI,GAC7C,KAAI,MAAK,EAAE,IAAI;EAChC;EAEA,MAAM,QAAQ,WAA0B;GACtC,MAAM,QAAQ,KAAK,IAAI,SAAS;GAChC,IAAI,CAAC,OACH;GAGF,IAAI,MAAM,OACR,IAAI;IACF,MAAM,MAAM,QAAQ,KAAK;GAC3B,QACM,CAEN;GAEF,KAAK,OAAO,SAAS;EACvB;CACF;AACF"}
@@ -1,7 +1,7 @@
1
- import { dirname, resolve } from "node:path";
1
+ import { dirname, isAbsolute, relative, resolve } from "node:path";
2
2
  import { createWriteStream } from "node:fs";
3
+ import { mkdir, readFile, readdir, realpath, writeFile } from "node:fs/promises";
3
4
  import { spawn } from "node:child_process";
4
- import { mkdir, readFile, readdir, writeFile } from "node:fs/promises";
5
5
  //#region src/contexts/process.ts
6
6
  /**
7
7
  * Whether the host supports POSIX process groups (the `detached: true` +
@@ -65,12 +65,62 @@ function formatContextTimestamp(date) {
65
65
  const pad3 = (n) => n.toString().padStart(3, "0");
66
66
  return `${date.getUTCFullYear()}${pad2(date.getUTCMonth() + 1)}${pad2(date.getUTCDate())}-${pad2(date.getUTCHours())}${pad2(date.getUTCMinutes())}${pad2(date.getUTCSeconds())}-${pad3(date.getUTCMilliseconds())}`;
67
67
  }
68
+ /** Copy only the named keys (when present) from an env object. */
69
+ function pickEnv(env, keys) {
70
+ const out = {};
71
+ for (const key of keys) if (env[key] !== void 0) out[key] = env[key];
72
+ return out;
73
+ }
68
74
  function createProcessContext(config) {
69
75
  let counter = 0;
70
76
  const handles = /* @__PURE__ */ new Map();
71
77
  const defaultCwd = config?.cwd ?? process.cwd();
72
78
  const defaultEnv = config?.env;
73
79
  const destroyGraceMs = config?.destroyGraceMs ?? DESTROY_SIGTERM_GRACE_MS;
80
+ const baseEnv = config?.inheritEnv === false ? pickEnv(process.env, [
81
+ "PATH",
82
+ "HOME",
83
+ "SHELL",
84
+ "LANG",
85
+ "LC_ALL",
86
+ "USER",
87
+ "TERM",
88
+ "TMPDIR"
89
+ ]) : process.env;
90
+ const workspaceRoot = config?.workspaceRoot !== void 0 ? resolve(defaultCwd, config.workspaceRoot) : void 0;
91
+ /** True when `target` is the root itself or lives under it. */
92
+ function isWithinRoot(root, target) {
93
+ const rel = relative(root, target);
94
+ return rel === "" || !rel.startsWith("..") && !isAbsolute(rel);
95
+ }
96
+ /**
97
+ * Resolve `path` against the handle cwd and, when a `workspaceRoot` is
98
+ * configured, reject anything that escapes it. Uses `realpath` on the
99
+ * nearest existing ancestor so symlink escapes (and `..`) are caught for
100
+ * both existing and not-yet-created targets.
101
+ */
102
+ async function resolveContained(handle, path) {
103
+ const full = resolve(handle.cwd, path);
104
+ if (workspaceRoot === void 0) return full;
105
+ let existing = full;
106
+ const tail = [];
107
+ while (true) try {
108
+ existing = await realpath(existing);
109
+ break;
110
+ } catch {
111
+ const parent = dirname(existing);
112
+ if (parent === existing) {
113
+ existing = parent;
114
+ break;
115
+ }
116
+ tail.unshift(relative(parent, existing));
117
+ existing = parent;
118
+ }
119
+ const canonical = tail.length ? resolve(existing, ...tail) : existing;
120
+ const canonicalRoot = await realpath(workspaceRoot).catch(() => workspaceRoot);
121
+ if (!isWithinRoot(canonicalRoot, canonical)) throw new Error(`Path escapes the workspace root: ${path} (resolved outside ${canonicalRoot})`);
122
+ return full;
123
+ }
74
124
  /**
75
125
  * Per-context background-task registry. Entries live for the context's
76
126
  * lifetime — even after the child exits — so the model can read output
@@ -165,7 +215,7 @@ function createProcessContext(config) {
165
215
  const child = spawn("/bin/sh", ["-c", command], {
166
216
  cwd,
167
217
  env: {
168
- ...process.env,
218
+ ...baseEnv,
169
219
  ...defaultEnv,
170
220
  ...options?.env
171
221
  },
@@ -273,19 +323,19 @@ function createProcessContext(config) {
273
323
  });
274
324
  },
275
325
  async readFile(handle, path) {
276
- return readFile(resolve(handle.cwd, path), "utf-8");
326
+ return readFile(await resolveContained(handle, path), "utf-8");
277
327
  },
278
328
  async readFileBinary(handle, path) {
279
- const buf = await readFile(resolve(handle.cwd, path));
329
+ const buf = await readFile(await resolveContained(handle, path));
280
330
  return new Uint8Array(buf);
281
331
  },
282
332
  async writeFile(handle, path, content) {
283
- const fullPath = resolve(handle.cwd, path);
333
+ const fullPath = await resolveContained(handle, path);
284
334
  await mkdir(dirname(fullPath), { recursive: true });
285
335
  await writeFile(fullPath, content, "utf-8");
286
336
  },
287
337
  async listFiles(handle, path) {
288
- return readdir(resolve(handle.cwd, path));
338
+ return readdir(await resolveContained(handle, path));
289
339
  },
290
340
  async execBackground(handle, command, options) {
291
341
  const cwd = options.cwd ? resolve(handle.cwd, options.cwd) : handle.cwd;
@@ -304,7 +354,7 @@ function createProcessContext(config) {
304
354
  const child = spawn("/bin/sh", ["-c", command], {
305
355
  cwd,
306
356
  env: {
307
- ...process.env,
357
+ ...baseEnv,
308
358
  ...defaultEnv,
309
359
  ...options.env
310
360
  },
@@ -643,4 +693,4 @@ function resolveDetachedTasksCapability(context) {
643
693
  //#endregion
644
694
  export { createSandboxContext as n, createProcessContext as r, resolveDetachedTasksCapability as t };
645
695
 
646
- //# sourceMappingURL=contexts-BJVgG0LY.js.map
696
+ //# sourceMappingURL=contexts-DglWSzmR.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"contexts-DglWSzmR.js","names":["spawnChild"],"sources":["../src/contexts/process.ts","../src/contexts/sandbox.ts","../src/contexts/types.ts"],"sourcesContent":["/**\n * In-process execution context.\n *\n * Runs everything in the current Node/Bun process.\n * No isolation — fastest, used as the default.\n */\n\nimport type { Buffer } from 'node:buffer'\nimport type { ChildProcess } from 'node:child_process'\nimport type { WriteStream } from 'node:fs'\nimport type { ContextCapabilities, ExecResult, ExecutionContext, ExecutionHandle, SpawnConfig, TaskEntry, TaskExitInfo, TaskHandle, TaskStallInfo } from './types'\nimport { spawn as spawnChild } from 'node:child_process'\nimport { createWriteStream } from 'node:fs'\nimport { mkdir, readdir, readFile, realpath, writeFile } from 'node:fs/promises'\nimport { dirname, isAbsolute, relative, resolve } from 'node:path'\n\n/**\n * Whether the host supports POSIX process groups (the `detached: true` +\n * `process.kill(-pid)` combination). Windows doesn't — its job-object\n * model is shaped differently — so on win32 we fall back to killing the\n * shell wrapper alone (matches pre-fix behavior; better than nothing).\n */\nconst SUPPORTS_PROCESS_GROUPS = process.platform !== 'win32'\n\n/**\n * Default cap on captured stdout / stderr per child. Matches the\n * pre-fix `child_process.exec` setting so existing callers see the\n * same buffer envelope. Output beyond this is truncated and a\n * marker is appended to stderr.\n */\nconst DEFAULT_MAX_BUFFER = 10 * 1024 * 1024\n\n/**\n * How long `destroy()` waits for a SIGTERM'd background task to settle\n * before escalating to SIGKILL and abandoning the wait. Matches the MCP\n * connection's default `closeTimeout` so `agent.destroy()` stays bounded\n * by a single grace period regardless of which leg is slowest.\n */\nconst DESTROY_SIGTERM_GRACE_MS = 5_000\n\n/**\n * How long after the child's `exit` event we keep waiting for `close`\n * before settling with the output collected so far.\n *\n * `close` fires only once every stdio pipe has closed — and a\n * daemonized grandchild that inherited stdout/stderr (a dev server\n * started with `&`, `nohup` without redirection, simulator helpers, …)\n * keeps those pipes open indefinitely. Pre-fix, a foreground `exec`\n * whose command finished in milliseconds would hang until its timeout,\n * then `killProcessGroup` took the intentionally-backgrounded daemon\n * down with it. The process itself is done at `exit`; this grace only\n * exists to let normally-closing pipes flush their tail bytes.\n */\nconst EXIT_PIPE_DRAIN_GRACE_MS = 1_500\n\n/**\n * Sanitize a task id before it's joined into a filesystem path.\n *\n * We mint `bash_<n>` ids ourselves (no user input flows into the path\n * for `ProcessContext`), so this is defensive — but third-party contexts\n * MAY accept caller-provided ids, so the helper exists for them too.\n * Anything that doesn't match the expected shape is rejected — never\n * coerced — so the call site sees a clear error rather than a\n * traversal-shaped path.\n */\nconst TASK_ID_RE = /^[a-z][\\w-]*$/i\n\nfunction assertSafeTaskId(taskId: string): void {\n if (!TASK_ID_RE.test(taskId))\n throw new Error(`Invalid task id \"${taskId}\" — must match ${TASK_ID_RE}.`)\n}\n\n/**\n * Format `date` as `YYYYMMDD-HHMMSS-mmm` in UTC.\n *\n * Pinned to UTC so the lexical sort of two timestamps always matches\n * their chronological order (local time + DST does not). Used as the\n * per-context suffix on background-task log filenames; see the field\n * doc on `contextTimestamp` for the why.\n */\nexport function formatContextTimestamp(date: Date): string {\n const pad2 = (n: number): string => n.toString().padStart(2, '0')\n const pad3 = (n: number): string => n.toString().padStart(3, '0')\n const y = date.getUTCFullYear()\n const M = pad2(date.getUTCMonth() + 1)\n const d = pad2(date.getUTCDate())\n const h = pad2(date.getUTCHours())\n const m = pad2(date.getUTCMinutes())\n const s = pad2(date.getUTCSeconds())\n const ms = pad3(date.getUTCMilliseconds())\n return `${y}${M}${d}-${h}${m}${s}-${ms}`\n}\n\n/** Pattern of a background-task log filename. Used by tests + tooling. */\nexport const TASK_LOG_FILENAME_RE = /^(bash_\\d+)\\.(\\d{8}-\\d{6}-\\d{3})\\.log$/\n\n/** Copy only the named keys (when present) from an env object. */\nfunction pickEnv(env: NodeJS.ProcessEnv, keys: string[]): NodeJS.ProcessEnv {\n const out: NodeJS.ProcessEnv = {}\n for (const key of keys) {\n if (env[key] !== undefined)\n out[key] = env[key]\n }\n return out\n}\n\nexport function createProcessContext(config?: SpawnConfig): ExecutionContext {\n let counter = 0\n const handles = new Map<string, ExecutionHandle>()\n const defaultCwd = config?.cwd ?? process.cwd()\n const defaultEnv = config?.env\n const destroyGraceMs = config?.destroyGraceMs ?? DESTROY_SIGTERM_GRACE_MS\n\n // Base environment children inherit. Defaults to the full parent env\n // (back-compat); when `inheritEnv: false`, only a minimal allow-list is\n // forwarded so parent secrets (e.g. API keys Bun loaded from `.env`) aren't\n // exposed to tool commands. Explicit `env` / per-call env always apply on top.\n const baseEnv: NodeJS.ProcessEnv = config?.inheritEnv === false\n ? pickEnv(process.env, ['PATH', 'HOME', 'SHELL', 'LANG', 'LC_ALL', 'USER', 'TERM', 'TMPDIR'])\n : process.env\n\n // Optional workspace containment for the file methods. Unset = no checks.\n const workspaceRoot = config?.workspaceRoot !== undefined\n ? resolve(defaultCwd, config.workspaceRoot)\n : undefined\n\n /** True when `target` is the root itself or lives under it. */\n function isWithinRoot(root: string, target: string): boolean {\n const rel = relative(root, target)\n return rel === '' || (!rel.startsWith('..') && !isAbsolute(rel))\n }\n\n /**\n * Resolve `path` against the handle cwd and, when a `workspaceRoot` is\n * configured, reject anything that escapes it. Uses `realpath` on the\n * nearest existing ancestor so symlink escapes (and `..`) are caught for\n * both existing and not-yet-created targets.\n */\n async function resolveContained(handle: ExecutionHandle, path: string): Promise<string> {\n const full = resolve(handle.cwd, path)\n if (workspaceRoot === undefined)\n return full\n\n // Canonicalize the deepest existing ancestor to defeat symlink escapes;\n // the non-existent tail (for writes) is appended back verbatim.\n let existing = full\n const tail: string[] = []\n while (true) {\n try {\n existing = await realpath(existing)\n break\n }\n catch {\n const parent = dirname(existing)\n if (parent === existing) {\n // Reached the filesystem root without an existing ancestor.\n existing = parent\n break\n }\n // `relative` (not slice) so the segment is correct even on Windows\n // drive roots where `dirname('C:\\\\foo')` is `C:\\\\` (no 1-char sep).\n tail.unshift(relative(parent, existing))\n existing = parent\n }\n }\n const canonical = tail.length ? resolve(existing, ...tail) : existing\n const canonicalRoot = await realpath(workspaceRoot).catch(() => workspaceRoot)\n if (!isWithinRoot(canonicalRoot, canonical)) {\n throw new Error(\n `Path escapes the workspace root: ${path} (resolved outside ${canonicalRoot})`,\n )\n }\n return full\n }\n\n /**\n * Per-context background-task registry. Entries live for the context's\n * lifetime — even after the child exits — so the model can read output\n * of completed tasks until `destroy()` tears everything down. Kept as\n * a plain Map (not a class) per code-quality checklist #7: no premature\n * abstraction. The state and the operations on it live inline.\n */\n const tasks = new Map<string, TaskState>()\n let taskCounter = 0\n\n /**\n * Per-context UTC timestamp segment baked into every background task's\n * log filename. Same `taskCounter` value across two contexts (e.g. a\n * TUI restart) would otherwise re-open the SAME `bash_<n>.log` file —\n * we open with `flags: 'a'` so the new task would APPEND into the old\n * log, producing scrambled output. The timestamp guarantees each\n * context owns a distinct log filename without forcing the\n * model-facing task id (`bash_<n>`) to grow longer.\n *\n * Format: `YYYYMMDD-HHMMSS-mmm` in UTC.\n * - Sortable (lexical sort = chronological sort).\n * - Unambiguous (UTC sidesteps DST / locale shifts).\n * - Filesystem-safe (digits + hyphens only).\n * - Millisecond precision avoids same-second-restart collisions.\n *\n * The timestamp is computed once per context, NOT per task — all of a\n * context's tasks share the same suffix so a directory listing groups\n * cleanly by \"which run produced these\".\n */\n const contextTimestamp = formatContextTimestamp(new Date())\n\n /**\n * Last-resort orphan reaper. With `detached: true`, background tasks\n * are in their OWN process group — when the parent (zidane TUI) dies,\n * the OS does NOT send them SIGHUP and they keep running indefinitely.\n * The user's \"Ctrl+C the TUI\" intent is \"stop everything I started\",\n * not \"leak a `sleep 60` into the background\".\n *\n * `process.on('exit')` fires SYNCHRONOUSLY on `process.exit()` AND on\n * natural shutdown — exactly the seam we need. The handler can only\n * do synchronous work (Node ignores async), but `process.kill` IS\n * synchronous, so a SIGTERM-the-group sweep lands cleanly. The kill\n * is best-effort: already-dead children throw ESRCH (swallowed).\n *\n * Registered lazily on first `execBackground` so contexts that never\n * background a task don't pay the listener cost. Deregistered in\n * `destroy()` so reconstructed contexts don't accumulate listeners\n * (Node warns past 10 — a long-running session that switches sessions\n * frequently would otherwise hit that).\n */\n let exitHandlerRegistered = false\n const exitHandler = (): void => {\n for (const task of tasks.values()) {\n if (task.status !== 'running')\n continue\n const pid = task.child.pid\n if (pid === undefined)\n continue\n try {\n if (SUPPORTS_PROCESS_GROUPS)\n process.kill(-pid, 'SIGTERM')\n else\n process.kill(pid, 'SIGTERM')\n }\n catch {\n // ESRCH (already dead) / EPERM (lost ownership). Swallow —\n // the process either won't kill cleanly OR is already gone,\n // both acceptable at shutdown.\n }\n }\n }\n\n return {\n type: 'process',\n\n capabilities: {\n shell: true,\n filesystem: true,\n network: true,\n gpu: false,\n // Background tasks are OS children of this process's machine —\n // they survive `agent.destroy()` reassignment games but NOT a\n // host machine reboot, and the orphan reaper kills them on\n // process exit. That's the 'process-lifetime' tier.\n detachedTasks: 'process-lifetime',\n } satisfies ContextCapabilities,\n\n async spawn(overrides?: SpawnConfig): Promise<ExecutionHandle> {\n const id = `process-${++counter}`\n const cwd = overrides?.cwd ?? defaultCwd\n\n await mkdir(cwd, { recursive: true })\n\n const handle: ExecutionHandle = { id, type: 'process', cwd }\n handles.set(id, handle)\n return handle\n },\n\n async exec(\n handle: ExecutionHandle,\n command: string,\n options?: { cwd?: string, env?: Record<string, string>, timeout?: number, signal?: AbortSignal },\n ): Promise<ExecResult> {\n const cwd = options?.cwd ? resolve(handle.cwd, options.cwd) : handle.cwd\n\n // Pre-aborted fast path: skip the spawn entirely and synthesize a\n // killed-by-signal result. Saves a spawn round-trip + dodges Node\n // emitting an immediate `AbortError`.\n if (options?.signal?.aborted) {\n return { stdout: '', stderr: 'aborted by signal before spawn', exitCode: 143 }\n }\n\n const timeoutMs = (options?.timeout ?? config?.limits?.timeout ?? 30) * 1000\n const maxBuffer = DEFAULT_MAX_BUFFER\n\n return new Promise<ExecResult>((resolveP) => {\n // Spawn as a NEW process group leader so we can kill the whole\n // subtree on abort. Without `detached: true`, sending SIGTERM to\n // the shell's pid only kills the shell wrapper — its child\n // processes (the actual `sleep`, `npm`, `python`, …) get\n // reparented to init and keep running. `process.kill(-pid, …)`\n // with a NEGATIVE pid targets the whole process group; that's\n // the POSIX idiom for \"shut down everything I started\".\n //\n // On Windows there are no process groups in the POSIX sense, so\n // we leave `detached` off and accept the shell-only kill — the\n // platform's job-object machinery is the path forward there if\n // we ever need it, but it's not the bug zidane's users are\n // hitting today.\n const child = spawnChild('/bin/sh', ['-c', command], {\n cwd,\n env: { ...baseEnv, ...defaultEnv, ...options?.env },\n stdio: ['ignore', 'pipe', 'pipe'],\n detached: SUPPORTS_PROCESS_GROUPS,\n })\n\n let stdout = ''\n let stderr = ''\n // Byte accounting per slot. `maxBuffer` is a BYTE budget — comparing\n // it against the accumulated string's UTF-16 `.length` undercounts\n // multi-byte UTF-8 (overshooting the budget) and slicing the Buffer\n // at a char-derived offset can split a codepoint mid-sequence.\n let stdoutBytes = 0\n let stderrBytes = 0\n let bufferTruncated = false\n let timedOut = false\n let killedByAbort = false\n let settled = false\n\n const appendCapped = (slot: 'stdout' | 'stderr', chunk: Buffer): void => {\n const used = slot === 'stdout' ? stdoutBytes : stderrBytes\n if (used >= maxBuffer) {\n // Buffer filled exactly on a prior chunk boundary — this chunk\n // is data we're dropping, so it still counts as truncation.\n if (!bufferTruncated) {\n bufferTruncated = true\n killProcessGroup(child, 'SIGTERM')\n }\n return\n }\n const room = maxBuffer - used\n let piece = chunk\n if (chunk.length > room) {\n // Cut on a UTF-8 codepoint boundary: if the byte right after the\n // cut is a continuation byte (0b10xxxxxx), the cut would land\n // mid-codepoint — back off until it doesn't.\n let end = room\n while (end > 0 && (chunk[end] & 0xC0) === 0x80)\n end--\n piece = chunk.subarray(0, end)\n }\n if (slot === 'stdout') {\n stdout += piece.toString('utf8')\n stdoutBytes += piece.length\n }\n else {\n stderr += piece.toString('utf8')\n stderrBytes += piece.length\n }\n if (chunk.length > room) {\n bufferTruncated = true\n // Kill on overflow — matches `execAsync`'s `maxBuffer`\n // behavior (which kills the child and surfaces an error).\n killProcessGroup(child, 'SIGTERM')\n }\n }\n\n child.stdout?.on('data', chunk => appendCapped('stdout', chunk as Buffer))\n child.stderr?.on('data', chunk => appendCapped('stderr', chunk as Buffer))\n\n const timeoutTimer = timeoutMs > 0\n ? setTimeout(() => {\n timedOut = true\n killProcessGroup(child, 'SIGTERM')\n }, timeoutMs)\n : undefined\n\n const onAbort = (): void => {\n killedByAbort = true\n killProcessGroup(child, 'SIGTERM')\n }\n const userSignal = options?.signal\n if (userSignal)\n userSignal.addEventListener('abort', onAbort, { once: true })\n\n let exitGraceTimer: NodeJS.Timeout | undefined\n\n const settle = (exitCode: number, extraStderr?: string): void => {\n if (settled)\n return\n settled = true\n if (timeoutTimer)\n clearTimeout(timeoutTimer)\n if (exitGraceTimer)\n clearTimeout(exitGraceTimer)\n if (userSignal)\n userSignal.removeEventListener('abort', onAbort)\n const finalStderr = extraStderr\n ? (stderr ? `${stderr}\\n${extraStderr}` : extraStderr)\n : stderr\n resolveP({ stdout, stderr: finalStderr, exitCode })\n }\n\n // Shared classification ladder for `close` (fast path: pipes\n // drained normally) and the post-`exit` grace (pipes held open\n // by a daemonized grandchild). Order matters: killed-by-our-\n // abort wins over timeout wins over natural exit, because the\n // abort listener fires first when the user cancels mid-\n // timeout-window.\n const settleFromStatus = (code: number | null, signal: NodeJS.Signals | null): void => {\n if (killedByAbort) {\n settle(143, 'aborted by signal')\n return\n }\n if (timedOut) {\n settle(124, `command timed out after ${timeoutMs}ms`)\n return\n }\n if (bufferTruncated) {\n settle(143, `output exceeded ${maxBuffer}-byte buffer; process killed`)\n return\n }\n if (signal) {\n // Killed by some other signal we didn't issue. Treat as\n // signal-killed for consumer compat.\n settle(128 + 15, `terminated by signal ${signal}`)\n return\n }\n settle(typeof code === 'number' ? code : 1)\n }\n\n child.on('error', (err) => {\n // Spawn failure (ENOENT on `/bin/sh`, EACCES, …). Mirror\n // `execAsync`'s \"rejects with an Error\" shape by surfacing\n // the message on stderr and a non-zero exit.\n settle(1, err.message)\n })\n\n child.on('close', (code, signal) => settleFromStatus(code, signal))\n\n child.on('exit', (code, signal) => {\n // The command is DONE here — `close` only adds \"every stdio\n // pipe closed\", which a daemonized grandchild can block\n // forever (see EXIT_PIPE_DRAIN_GRACE_MS). Give the pipes a\n // short drain grace, then settle with what we have and\n // release them. The daemon keeps running — that was the\n // command's intent; killing the group here would be wrong.\n exitGraceTimer = setTimeout(() => {\n if (settled)\n return\n child.stdout?.destroy()\n child.stderr?.destroy()\n settleFromStatus(code, signal)\n }, EXIT_PIPE_DRAIN_GRACE_MS)\n exitGraceTimer.unref?.()\n })\n })\n },\n\n async readFile(handle: ExecutionHandle, path: string): Promise<string> {\n return readFile(await resolveContained(handle, path), 'utf-8')\n },\n\n async readFileBinary(handle: ExecutionHandle, path: string): Promise<Uint8Array> {\n // No encoding → returns a Buffer (which is a Uint8Array). Used by\n // read_file to ferry image / binary content into the multimodal route.\n const buf = await readFile(await resolveContained(handle, path))\n return new Uint8Array(buf)\n },\n\n async writeFile(handle: ExecutionHandle, path: string, content: string): Promise<void> {\n const fullPath = await resolveContained(handle, path)\n await mkdir(dirname(fullPath), { recursive: true })\n await writeFile(fullPath, content, 'utf-8')\n },\n\n async listFiles(handle: ExecutionHandle, path: string): Promise<string[]> {\n return readdir(await resolveContained(handle, path))\n },\n\n async execBackground(\n handle: ExecutionHandle,\n command: string,\n options: {\n cwd?: string\n env?: Record<string, string>\n outputDir: string\n onExit?: (info: TaskExitInfo) => void\n maxOutputBytes?: number\n stallTimeoutMs?: number\n onStall?: (info: TaskStallInfo) => void\n },\n ): Promise<TaskHandle> {\n const cwd = options.cwd ? resolve(handle.cwd, options.cwd) : handle.cwd\n\n await mkdir(options.outputDir, { recursive: true })\n\n // Mint id + path. The id is sequential per context (model-facing,\n // short, ergonomic for `shell_kill`). The log FILENAME embeds the\n // context's start timestamp so two contexts sharing an `outputDir`\n // (TUI restart on the same session, concurrent zidane instances,\n // …) never resolve to the same file — we open with `flags: 'a'`\n // and a name collision would interleave their output. Path\n // validation is defensive — we mint our own ids so it never trips\n // today, but it pins the invariant for forks / third parties.\n const taskId = `bash_${++taskCounter}`\n assertSafeTaskId(taskId)\n const outputPath = resolve(options.outputDir, `${taskId}.${contextTimestamp}.log`)\n\n // Install the orphan reaper on first task. See `exitHandler`'s\n // JSDoc for the kill-on-shutdown rationale.\n if (!exitHandlerRegistered) {\n process.on('exit', exitHandler)\n exitHandlerRegistered = true\n }\n\n // Open the output file. The timestamped path is unique per context\n // so a brand-new file is the expected outcome; `flags: 'a'` is kept\n // as the safe default (preserves bytes if the path collides for any\n // reason — same-millisecond context creation, manual pre-population,\n // etc.) rather than blindly truncating. Streams are opened BEFORE\n // the spawn to avoid a race where the child writes before the stream\n // is ready — `child_process` buffers stdio until the consumer\n // attaches, but the FS handle has to exist either way for our pipe.\n const outputStream: WriteStream = createWriteStream(outputPath, { flags: 'a' })\n // Surface FS errors (ENOSPC, EACCES on a remounted FS, etc.)\n // under ZIDANE_DEBUG instead of crashing the host via an\n // unhandled 'error' event. Without a listener Node escalates\n // any stream-level error to an uncaughtException and the whole\n // process exits — the model and the user would lose every\n // unrelated in-flight piece of work to one bad task's disk\n // hiccup. Swallow + log is the safer default for a fire-and-\n // forget log writer; the task's exit code still reports.\n outputStream.on('error', (err) => {\n if (process.env.ZIDANE_DEBUG)\n process.stderr.write(`[zidane/contexts] task ${taskId} log stream error: ${err.message}\\n`)\n })\n\n // Spawn as a NEW process group leader so we can kill the whole\n // subtree on demand. Same primitive `exec` uses for foreground\n // shells — see the long comment in that method for the\n // process-group rationale.\n const child = spawnChild('/bin/sh', ['-c', command], {\n cwd,\n env: { ...baseEnv, ...defaultEnv, ...options.env },\n stdio: ['ignore', 'pipe', 'pipe'],\n detached: SUPPORTS_PROCESS_GROUPS,\n })\n\n const state: TaskState = {\n taskId,\n handleId: handle.id,\n pid: child.pid ?? -1,\n command,\n cwd,\n startedAt: Date.now(),\n outputPath,\n outputStream,\n child,\n status: 'running',\n bytesWritten: 0,\n settled: false,\n onExit: options.onExit,\n }\n tasks.set(taskId, state)\n\n // Output cap (optional). Beyond the cap the process keeps\n // running — head-priority retention: bytes already on disk stay,\n // subsequent bytes are counted but dropped, and a structured\n // truncation marker is appended at settle so the model can\n // pattern-match the loss. Killing on overflow (what the\n // foreground `exec` does) would be wrong here: long-running\n // servers legitimately log forever.\n const maxOutputBytes = typeof options.maxOutputBytes === 'number' && options.maxOutputBytes > 0\n ? options.maxOutputBytes\n : undefined\n let droppedBytes = 0\n // Latched on the first overflowing chunk. Without it, a codepoint-\n // boundary backoff (below) leaves a few bytes of \"room\" that later\n // chunks would dribble into — appending disjoint fragments from\n // much-later output right after the cut point.\n let capExhausted = false\n\n // Stall watchdog (optional). One-shot per quiet period: fires\n // `onStall` after `stallTimeoutMs` of no output, then stays quiet\n // until fresh output re-arms it. `unref()` so a pending timer\n // never holds the host process open.\n const stallTimeoutMs = typeof options.stallTimeoutMs === 'number' && options.stallTimeoutMs > 0\n ? options.stallTimeoutMs\n : undefined\n const onStall = options.onStall\n let stallTimer: NodeJS.Timeout | undefined\n let lastOutputAt = Date.now()\n const armStallTimer = (): void => {\n if (!stallTimeoutMs || !onStall)\n return\n if (stallTimer)\n clearTimeout(stallTimer)\n stallTimer = setTimeout(() => {\n stallTimer = undefined\n if (state.settled)\n return\n try {\n onStall({\n taskId,\n command,\n outputPath,\n stalledForMs: Date.now() - lastOutputAt,\n bytesWritten: state.bytesWritten,\n })\n }\n catch (err) {\n if (process.env.ZIDANE_DEBUG)\n process.stderr.write(`[zidane/contexts] task ${taskId} onStall threw: ${err instanceof Error ? err.message : String(err)}\\n`)\n }\n }, stallTimeoutMs)\n stallTimer.unref?.()\n }\n const clearStallTimer = (): void => {\n if (stallTimer) {\n clearTimeout(stallTimer)\n stallTimer = undefined\n }\n }\n armStallTimer()\n\n // Pipe both streams into the same file. Order between stdout and\n // stderr is preserved per-stream; cross-stream ordering depends on\n // Node's event loop — acceptable interleaving for log-shaped\n // output. Tracked-bytes is updated on every chunk for the\n // listBackground UX (counted even past the cap, so the UX shows\n // the task's TRUE output volume).\n const appendChunk = (chunk: Buffer): void => {\n state.bytesWritten += chunk.length\n lastOutputAt = Date.now()\n armStallTimer()\n if (maxOutputBytes !== undefined) {\n const onDisk = state.bytesWritten - droppedBytes - chunk.length\n if (capExhausted || onDisk >= maxOutputBytes) {\n capExhausted = true\n droppedBytes += chunk.length\n return\n }\n const room = maxOutputBytes - onDisk\n if (chunk.length > room) {\n capExhausted = true\n // Cut on a UTF-8 codepoint boundary, same as the foreground\n // exec cap: back off while the byte after the cut is a\n // continuation byte (0b10xxxxxx) so the log never ends in a\n // mangled half-codepoint right before the truncation marker.\n let end = room\n while (end > 0 && (chunk[end] & 0xC0) === 0x80)\n end--\n droppedBytes += chunk.length - end\n if (end > 0)\n outputStream.write(chunk.subarray(0, end))\n return\n }\n }\n outputStream.write(chunk)\n }\n child.stdout?.on('data', chunk => appendChunk(chunk as Buffer))\n child.stderr?.on('data', chunk => appendChunk(chunk as Buffer))\n\n // Settle path — at-most-once via `settled` flag (checklist #14).\n // Three trigger sources: `close` (natural OR signal-killed),\n // `error` (spawn failure), explicit `killBackground` (which\n // routes through `close` itself after the SIGTERM lands).\n const settle = (cause: 'close' | 'error', code: number | null, signal: NodeJS.Signals | null, errMessage?: string): void => {\n if (state.settled)\n return\n state.settled = true\n clearStallTimer()\n state.endedAt = Date.now()\n\n // Determine final status from cause + signal.\n const status: TaskExitInfo['status']\n = signal === 'SIGTERM' || state.killRequested\n ? 'killed'\n : 'exited'\n // Signal-killed children report null `code` from Node; map back\n // to the POSIX `128 + signum` convention so consumers can read\n // an integer either way.\n const exitCode = code !== null\n ? code\n : signal === 'SIGTERM'\n ? 143\n : signal\n ? 128\n : 1\n state.status = status\n state.exitCode = exitCode\n if (signal)\n state.signal = signal\n\n // Flush + close the WriteStream BEFORE firing onExit — model\n // may read the file in the same turn it receives the\n // notification, and a still-open stream can hold tail bytes\n // back from disk. `stream.end(callback)` is the documented\n // \"all queued writes are flushed when this fires\" idiom.\n //\n // ORDER MATTERS: any error preamble we want in the log file\n // (spawn failures with no stdout, buffer overflows) MUST be\n // written BEFORE `end()` — once `end()` is called the stream\n // is closed for writing and subsequent `.write()` calls are\n // dropped. Earlier revisions had this reversed and silently\n // lost ENOENT-on-`/bin/sh` messages.\n if (errMessage) {\n try {\n outputStream.write(`\\n${errMessage}\\n`)\n }\n catch {\n // Stream may have errored before this — best-effort only.\n }\n }\n if (droppedBytes > 0) {\n try {\n // Structured + loud — the model can pattern-match the tag\n // rather than misread a truncated log as the full output.\n outputStream.write(`\\n<output-truncated bytes-dropped=\"${droppedBytes}\"/>\\n`)\n }\n catch {\n // Best-effort only, same as the error preamble above.\n }\n }\n outputStream.end(() => {\n // `stateToTaskExitInfo` reads the same fields we just set\n // on `state`, so the snapshot the consumer gets matches the\n // post-settle state exactly. `onExit` is optional — pull-based\n // consumers reconcile via `listBackground` instead.\n try {\n state.onExit?.(stateToTaskExitInfo(state))\n }\n catch (err) {\n // Defensive — a buggy onExit callback shouldn't crash the\n // host. Surface via stderr under ZIDANE_DEBUG; otherwise\n // swallow. Matches the spawn-tool's bubbleError pattern.\n if (process.env.ZIDANE_DEBUG)\n process.stderr.write(`[zidane/contexts] task ${taskId} onExit threw: ${err instanceof Error ? err.message : String(err)}\\n`)\n }\n })\n }\n\n child.on('close', (code, signal) => settle('close', code, signal))\n child.on('error', err => settle('error', null, null, `[spawn error] ${err.message}`))\n\n child.on('exit', (code, signal) => {\n // Same pipe-hostage hazard as foreground `exec`: a grandchild\n // that inherited the pipes blocks `close` forever, leaving the\n // task `running` (and any waiter parked) long after the\n // command exited. Settle after a short drain grace; `settle`'s\n // latch makes this a no-op when `close` already fired.\n const exitGraceTimer = setTimeout(() => {\n if (state.settled)\n return\n child.stdout?.destroy()\n child.stderr?.destroy()\n settle('close', code, signal)\n }, EXIT_PIPE_DRAIN_GRACE_MS)\n exitGraceTimer.unref?.()\n })\n\n return { taskId, pid: state.pid, outputPath }\n },\n\n async killBackground(handle: ExecutionHandle, taskId: string): Promise<TaskExitInfo | null> {\n const state = tasks.get(taskId)\n // Two miss cases collapse into one `null` return: unknown id, AND\n // known-id-but-not-owned-by-this-handle. The second case is the\n // subagent-can't-kill-parent-tasks defense; surfacing it as a\n // distinct error would leak the existence of the parent's task\n // to the subagent's model, which violates the per-handle\n // isolation contract.\n if (!state || state.handleId !== handle.id)\n return null\n // Already exited — return the cached info. We don't keep a\n // separate cached exit; the state itself carries every field\n // `TaskExitInfo` needs and `stateToTaskExitInfo` projects it.\n if (state.status !== 'running')\n return stateToTaskExitInfo(state)\n\n // Mark the intent BEFORE issuing the kill so the close handler\n // classifies the exit as `'killed'` even on platforms where the\n // SIGTERM-via-group lands faster than the close event drains.\n // (Checklist #14: at-most-once settle, plus correct status\n // classification regardless of event ordering.)\n state.killRequested = true\n\n // Wait for the existing close listener to fire — `settle()` does\n // all the flushing + onExit work. We just sit on a one-shot\n // promise tied to the `child.on('close')` we already registered\n // at spawn time.\n const closed = new Promise<void>((resolveP) => {\n if (state.settled) {\n resolveP()\n return\n }\n const originalOnExit = state.onExit\n state.onExit = (info) => {\n originalOnExit?.(info)\n resolveP()\n }\n })\n\n killProcessGroup(state.child, 'SIGTERM')\n await closed\n return stateToTaskExitInfo(state)\n },\n\n async waitBackground(\n handle: ExecutionHandle,\n taskId: string,\n options?: { timeoutMs?: number, signal?: AbortSignal },\n ): Promise<TaskExitInfo | null> {\n const state = tasks.get(taskId)\n // Same two-misses-collapse-to-null contract as `killBackground`:\n // unknown id AND known-but-other-handle both return `null` so a\n // subagent can't observe the parent's tasks through the wait seam.\n if (!state || state.handleId !== handle.id)\n return null\n if (state.settled)\n return stateToTaskExitInfo(state)\n\n return new Promise<TaskExitInfo | null>((resolveP) => {\n let done = false\n let timer: NodeJS.Timeout | undefined\n const signal = options?.signal\n const finish = (value: TaskExitInfo | null): void => {\n if (done)\n return\n done = true\n if (timer)\n clearTimeout(timer)\n signal?.removeEventListener('abort', onAbort)\n resolveP(value)\n }\n function onAbort(): void {\n finish(null)\n }\n\n // Chain onto the live exit callback rather than replacing it —\n // the original (notification enqueue, kill-waiter, a sibling\n // waiter) must keep firing. The `done` latch makes a late exit\n // after timeout a no-op for THIS waiter.\n const originalOnExit = state.onExit\n state.onExit = (info) => {\n originalOnExit?.(info)\n finish(stateToTaskExitInfo(state))\n }\n\n if (signal) {\n if (signal.aborted) {\n finish(null)\n return\n }\n signal.addEventListener('abort', onAbort, { once: true })\n }\n const timeoutMs = options?.timeoutMs\n if (typeof timeoutMs === 'number' && Number.isFinite(timeoutMs) && timeoutMs > 0) {\n timer = setTimeout(finish, timeoutMs, null)\n timer.unref?.()\n }\n })\n },\n\n async reassignBackgroundTasks(\n fromHandle: ExecutionHandle,\n toHandle: ExecutionHandle,\n newOnExit?: (info: TaskExitInfo) => void,\n ): Promise<readonly TaskEntry[]> {\n // No-op when source = destination — keeps the spawn.ts call site\n // unconditional without forcing it to dedupe.\n if (fromHandle.id === toHandle.id)\n return []\n const promoted: TaskEntry[] = []\n for (const state of tasks.values()) {\n if (state.handleId !== fromHandle.id || state.status !== 'running')\n continue\n state.handleId = toHandle.id\n // Replace the natural-exit callback. The original closed over\n // the spawning agent's hook bus, which is about to be destroyed\n // — without rewiring, the task's eventual `background:exit`\n // fires into a torn-down hookable and the parent never learns.\n if (newOnExit)\n state.onExit = newOnExit\n promoted.push(stateToTaskEntry(state))\n }\n return promoted\n },\n\n async listBackground(handle: ExecutionHandle): Promise<readonly TaskEntry[]> {\n // Snapshot — callers must not assume the returned array stays\n // in sync with the live registry. Sorted by startedAt so the\n // model / UI sees consistent ordering across calls. Scoped to\n // the calling handle so subagents don't see the parent's tasks\n // (and vice versa) in their listing.\n return [...tasks.values()]\n .filter(s => s.handleId === handle.id)\n .sort((a, b) => a.startedAt - b.startedAt)\n .map(stateToTaskEntry)\n },\n\n async destroy(handle: ExecutionHandle): Promise<void> {\n // Kill every still-running background task SPAWNED THROUGH THIS\n // HANDLE before tearing the handle down. SIGTERM the groups,\n // await the close + flush, THEN drop the registry entries.\n // Sequential — destroy is one-shot teardown, the few ms of extra\n // latency aren't worth the synchronization complexity.\n //\n // The handle scope matters when the same `ExecutionContext` is\n // shared across a parent agent and its `spawn`-ed subagents (the\n // default — `spawn.ts` passes `execution: ctx.execution`). Each\n // agent mints its own `ExecutionHandle` and registers its\n // background tasks under that handle's id. Without the filter,\n // a child agent's `destroy()` (fired by `spawn.ts`'s `finally`\n // when the subagent finishes / is cancelled) would walk the\n // shared registry and SIGTERM the parent's tasks too. So\n // cancelling a subagent that has its own background shells now\n // correctly kills JUST those subagent shells, leaving the\n // parent's intact.\n const survivors = [...tasks.values()].filter(s => s.handleId === handle.id && !s.settled)\n await Promise.all(survivors.map(async (state) => {\n state.killRequested = true\n await new Promise<void>((resolveP) => {\n let graceTimer: NodeJS.Timeout | undefined\n const originalOnExit = state.onExit\n state.onExit = (info) => {\n originalOnExit?.(info)\n if (graceTimer)\n clearTimeout(graceTimer)\n resolveP()\n }\n killProcessGroup(state.child, 'SIGTERM')\n // Bounded drain: a task that traps/ignores SIGTERM — or whose\n // `close` event is held back by an escaped grandchild (setsid'd\n // out of the process group) keeping the inherited stdio pipe\n // open — must not wedge `agent.destroy()` forever. After the\n // grace period, SIGKILL the group and stop waiting: `close`\n // may STILL never fire (the pipe holder isn't in the group),\n // so resolving here is the only way destroy() stays bounded.\n graceTimer = setTimeout(() => {\n killProcessGroup(state.child, 'SIGKILL')\n // settle() normally flushes + ends the log stream, but it\n // only runs off `close` — which we just gave up on. Tear the\n // stream down so the fd doesn't leak into a long-lived host.\n state.outputStream.destroy()\n resolveP()\n }, destroyGraceMs)\n graceTimer.unref?.()\n })\n }))\n // Drop only this handle's tasks from the registry. Other handles\n // (siblings, parent) keep their entries.\n for (const [taskId, state] of tasks) {\n if (state.handleId === handle.id)\n tasks.delete(taskId)\n }\n handles.delete(handle.id)\n // Drop the orphan reaper ONLY when no handles remain — otherwise\n // a child's `destroy()` would strip the parent's safety net. The\n // reaper protects every still-tracked task in the context, so it\n // sticks around until the LAST handle is gone.\n //\n // Without this guard, the spawn-tool sequence\n // parent.spawn(child) → child.run() → child.destroy() (auto)\n // would deregister the handler mid-parent-lifetime. The parent's\n // own subsequent Ctrl+C orphan-kill safety would silently degrade.\n if (exitHandlerRegistered && handles.size === 0) {\n process.off('exit', exitHandler)\n exitHandlerRegistered = false\n }\n },\n }\n}\n\n/**\n * Per-task state. Lives in the context's `tasks` registry. Fields are\n * mutated in place by the spawn / close / kill / destroy code paths —\n * the registry isn't immutable. Treat the type as a record-of-cells,\n * not a value.\n */\ninterface TaskState {\n taskId: string\n /**\n * `ExecutionHandle.id` of the spawning agent. The registry is\n * context-scoped (one Map shared across all handles a context minted),\n * so a per-task owner tag is what scopes `listBackground` /\n * `killBackground` / `destroy` to the calling handle's slice.\n *\n * Without this, a subagent spawned via `spawn` tool — which inherits\n * the parent's `ExecutionContext` but mints its OWN handle — would\n * see (and accidentally kill on `destroy()`) every task the parent\n * had running. Came up the first time the model spawned a subagent\n * that ran a background task: the subagent's run-end `destroy()`\n * SIGTERMed the parent's `npm run dev` mid-flight.\n */\n handleId: string\n pid: number\n command: string\n cwd: string\n startedAt: number\n outputPath: string\n outputStream: WriteStream\n child: ChildProcess\n status: 'running' | 'exited' | 'killed'\n exitCode?: number\n signal?: NodeJS.Signals\n bytesWritten: number\n /**\n * `at-most-once` settle latch. Multiple trigger sources (`close`,\n * `error`, `kill`) can race; the flag dedupes so `onExit` fires\n * exactly once per task (checklist #14).\n */\n settled: boolean\n /**\n * Set by `killBackground` / `destroy` before issuing SIGTERM so the\n * close handler classifies the exit as `'killed'` even when the\n * platform delivers the close event ahead of our intent record.\n */\n killRequested?: boolean\n /**\n * `Date.now()` at settle time. Lets `durationMs` (and pull-based\n * reconcile consumers reading `TaskEntry.endedAt`) report the task's\n * real lifetime instead of a projection-time delta.\n */\n endedAt?: number\n /**\n * Optional push-style exit callback. Undefined for pull-based\n * consumers (remote / durable hosts reconcile via `listBackground`).\n * Mutated in place by the kill / destroy / wait chains.\n */\n onExit?: (info: TaskExitInfo) => void\n}\n\n/**\n * Send `signal` to the child's whole process group. Falls back to a\n * single-process kill on Windows (no POSIX process groups). Shared\n * across the foreground `exec` path, the background spawn / kill\n * paths, and the shutdown-time orphan reaper — keeping one definition\n * so the kill semantics can't drift between them.\n */\nfunction killProcessGroup(child: ChildProcess, signal: NodeJS.Signals): void {\n const pid = child.pid\n if (pid === undefined)\n return\n try {\n if (SUPPORTS_PROCESS_GROUPS)\n process.kill(-pid, signal)\n else\n process.kill(pid, signal)\n }\n catch {\n // ESRCH / EPERM — process is already gone (race with natural exit)\n // or we lost the right to kill it. Both are safe to swallow.\n }\n}\n\n/**\n * Project a `TaskState` to the `TaskEntry` shape `listBackground` and\n * `reassignBackgroundTasks` return. Single helper keeps the shape\n * consistent across both call sites (and a future addition of fields\n * to `TaskEntry` only needs to land here).\n */\nfunction stateToTaskEntry(state: TaskState): TaskEntry {\n return {\n taskId: state.taskId,\n pid: state.pid,\n command: state.command,\n cwd: state.cwd,\n startedAt: state.startedAt,\n ...(state.endedAt !== undefined ? { endedAt: state.endedAt } : {}),\n outputPath: state.outputPath,\n status: state.status,\n ...(state.exitCode !== undefined ? { exitCode: state.exitCode } : {}),\n ...(state.signal ? { signal: state.signal } : {}),\n bytesWritten: state.bytesWritten,\n }\n}\n\n/**\n * Project a settled `TaskState` to the `TaskExitInfo` shape `settle()`\n * fires from `onExit` and `killBackground` returns on the\n * cached-exit path. Pre-condition: `state.status !== 'running'`\n * (callers gate on this).\n */\nfunction stateToTaskExitInfo(state: TaskState): TaskExitInfo {\n return {\n taskId: state.taskId,\n status: state.status as Exclude<TaskState['status'], 'running'>,\n exitCode: state.exitCode ?? 0,\n ...(state.signal ? { signal: state.signal } : {}),\n outputPath: state.outputPath,\n // Settle-time delta when available (always set since `endedAt` was\n // introduced); the `Date.now()` fallback covers defensive callers\n // projecting an unsettled state.\n durationMs: (state.endedAt ?? Date.now()) - state.startedAt,\n command: state.command,\n }\n}\n","/**\n * Remote sandbox execution context.\n *\n * Offloads execution to a remote sandbox API (e.g. Rivet, E2B).\n * Specific providers implement the SandboxProvider interface.\n */\n\nimport type { ContextCapabilities, ExecResult, ExecutionContext, ExecutionHandle, SpawnConfig } from './types'\n\n// ---------------------------------------------------------------------------\n// Sandbox provider interface\n// ---------------------------------------------------------------------------\n\nexport interface SandboxProvider {\n name: string\n spawn: (config: SpawnConfig) => Promise<{ id: string, cwd: string }>\n exec: (sandboxId: string, command: string, options?: { cwd?: string, env?: Record<string, string>, timeout?: number }) => Promise<ExecResult>\n readFile: (sandboxId: string, path: string) => Promise<string>\n writeFile: (sandboxId: string, path: string, content: string) => Promise<void>\n listFiles: (sandboxId: string, path: string) => Promise<string[]>\n destroy: (sandboxId: string) => Promise<void>\n}\n\n// ---------------------------------------------------------------------------\n// Sandbox execution context\n// ---------------------------------------------------------------------------\n\nexport function createSandboxContext(provider: SandboxProvider): ExecutionContext {\n const sandboxes = new Map<string, string>()\n\n function getSandboxId(handle: ExecutionHandle): string {\n const id = sandboxes.get(handle.id)\n if (!id)\n throw new Error(`Sandbox ${handle.id} not found`)\n return id\n }\n\n return {\n type: 'sandbox',\n\n capabilities: {\n shell: true,\n filesystem: true,\n network: true,\n gpu: false,\n } satisfies ContextCapabilities,\n\n async spawn(config?: SpawnConfig): Promise<ExecutionHandle> {\n const result = await provider.spawn(config ?? {})\n const handle: ExecutionHandle = { id: result.id, type: 'sandbox', cwd: result.cwd }\n sandboxes.set(handle.id, result.id)\n return handle\n },\n\n async exec(handle: ExecutionHandle, command: string, options?): Promise<ExecResult> {\n return provider.exec(getSandboxId(handle), command, options)\n },\n\n async readFile(handle: ExecutionHandle, path: string): Promise<string> {\n return provider.readFile(getSandboxId(handle), path)\n },\n\n async writeFile(handle: ExecutionHandle, path: string, content: string): Promise<void> {\n return provider.writeFile(getSandboxId(handle), path, content)\n },\n\n async listFiles(handle: ExecutionHandle, path: string): Promise<string[]> {\n return provider.listFiles(getSandboxId(handle), path)\n },\n\n async destroy(handle: ExecutionHandle): Promise<void> {\n const id = sandboxes.get(handle.id)\n if (!id)\n return\n await provider.destroy(id)\n sandboxes.delete(handle.id)\n },\n }\n}\n","/**\n * Execution context types.\n *\n * An execution context defines *where* and *how* an agent's tools run.\n * The agent loop and tools interact through this interface without knowing\n * whether they're running in-process, in a Docker container, or in a\n * remote sandbox.\n */\n\n// ---------------------------------------------------------------------------\n// Capabilities\n// ---------------------------------------------------------------------------\n\n/**\n * Lifetime guarantee of background tasks started through\n * {@link ExecutionContext.execBackground}.\n *\n * - `'none'` — the context cannot detach tasks at all.\n * - `'process-lifetime'` — tasks live as long as the HOST process; a\n * crash/restart of the host orphans or kills them (`ProcessContext`).\n * - `'durable'` — tasks live on a remote runner and survive host process\n * death (remote execution contexts driven by durable runtimes).\n *\n * Durable-execution adapters (e.g. `zidane/restate`) consult this to\n * decide whether backgrounding is safe to expose: a `'durable'` context\n * keeps its tasks across worker crashes, so there is no reason to strip\n * the capability from the model.\n */\nexport type DetachedTasksCapability = 'none' | 'process-lifetime' | 'durable'\n\nexport interface ContextCapabilities {\n /** Can execute shell commands */\n shell: boolean\n /** Can read/write files in a workspace */\n filesystem: boolean\n /** Can make outbound network requests */\n network: boolean\n /** Has GPU access */\n gpu: boolean\n /**\n * Background-task lifetime guarantee. Optional for backward\n * compatibility — when absent, callers infer `'process-lifetime'`\n * if the context implements `execBackground`, `'none'` otherwise\n * (see {@link resolveDetachedTasksCapability}).\n */\n detachedTasks?: DetachedTasksCapability\n}\n\n/**\n * Effective {@link DetachedTasksCapability} of a context, with the\n * backward-compatible inference for contexts that predate the field.\n */\nexport function resolveDetachedTasksCapability(context: ExecutionContext): DetachedTasksCapability {\n return context.capabilities.detachedTasks\n ?? (context.execBackground ? 'process-lifetime' : 'none')\n}\n\n// ---------------------------------------------------------------------------\n// Execution handle\n// ---------------------------------------------------------------------------\n\n/** Opaque handle to a running execution context instance */\nexport interface ExecutionHandle {\n id: string\n type: ContextType\n /** Working directory within the context */\n cwd: string\n}\n\n// ---------------------------------------------------------------------------\n// Exec result\n// ---------------------------------------------------------------------------\n\nexport interface ExecResult {\n stdout: string\n stderr: string\n exitCode: number\n}\n\n// ---------------------------------------------------------------------------\n// Spawn config\n// ---------------------------------------------------------------------------\n\nexport interface SpawnConfig {\n /** Working directory (created if it doesn't exist) */\n cwd?: string\n /**\n * Optional workspace-containment root for the file methods (`readFile`,\n * `writeFile`, `listFiles`) of the process context.\n *\n * When set, a resolved path that escapes this root (via absolute path,\n * `..`, or a symlink pointing outside) is rejected before any I/O. Default\n * unset = no containment (back-compat; the agent can read/write anywhere\n * the host process can). Set this for untrusted workloads to confine file\n * tools to a directory. Shell commands are not covered — use a sandbox /\n * docker context for full isolation.\n */\n workspaceRoot?: string\n /** Environment variables */\n env?: Record<string, string>\n /**\n * Whether spawned shells inherit the parent `process.env` (process context).\n *\n * Default `true` (preserves existing behavior — children see the parent\n * environment including anything Bun auto-loaded from `.env`). Set `false`\n * for untrusted workloads so secrets in the parent environment aren't\n * readable by tool commands; only `env` / per-call env plus a minimal base\n * (`PATH`, `HOME`, `SHELL`, `LANG`, `LC_ALL`, `USER`, `TERM`, `TMPDIR`) are\n * passed through.\n */\n inheritEnv?: boolean\n /** Docker image (only for 'docker' context) */\n image?: string\n /** Docker container name prefix (only for 'docker' context) */\n name?: string\n /** Host paths mounted into the context (only for 'docker' context today) */\n mounts?: ContextMount[]\n /** Resource limits */\n limits?: {\n /** Memory limit in MB */\n memory?: number\n /** CPU limit (e.g. '1.0' = 1 core) */\n cpu?: string\n /** Timeout in seconds for the entire context lifetime */\n timeout?: number\n }\n /** Sandbox provider config (only for 'sandbox' context) */\n sandbox?: {\n provider: string\n apiKey?: string\n /**\n * Attach to a pre-existing sandbox by id rather than creating one\n * (provider-specific; E2B maps it to `Sandbox.connect`). The provider\n * leaves a connected sandbox running on teardown.\n */\n sandboxId?: string\n [key: string]: unknown\n }\n\n /**\n * How long `destroy()` waits for a SIGTERM'd background task to settle\n * before escalating to SIGKILL and abandoning the wait (process context\n * only). Bounds `agent.destroy()` against tasks that trap SIGTERM or\n * whose stdio pipes are held open by escaped grandchildren.\n *\n * Default: `5000`.\n */\n destroyGraceMs?: number\n\n /**\n * Publish container ports on the host (docker context only).\n *\n * Each entry maps a container port to either an explicit host port or\n * (when `host` is omitted) a Docker-assigned random port. Retrieve the\n * actual host port at runtime via `getMappedPort(container)` on the\n * docker context.\n */\n ports?: Array<{ container: number, host?: number, proto?: 'tcp' | 'udp' }>\n\n /**\n * UID/GID the container should run as (docker context only).\n *\n * Accepts the same forms Docker's `--user` does: `uid`, `uid:gid`, or a\n * named user that exists in the image. Default is the image's default\n * user (typically root). Setting this to the host user's `uid:gid`\n * avoids the EACCES-on-cleanup problem when sharing a workspace via\n * a `shared` mount.\n */\n user?: string\n\n /**\n * User-defined Docker network to join (docker context only).\n *\n * Defaults to Docker's default bridge. Use a user network when you\n * need multiple sibling containers (e.g. agent + database + dev\n * server) to discover each other by name.\n */\n network?: string\n\n /**\n * Docker labels to attach to the container (docker context only).\n *\n * Useful for ownership tracking — callers can sweep abandoned\n * containers (e.g. from a crashed parent process) by filtering on\n * a label they own: `docker ps -aq --filter label=my-app=true`.\n */\n labels?: Record<string, string>\n\n /**\n * Container hardening options (docker context only).\n *\n * All fields are opt-in and OFF by default to preserve existing behavior\n * (containers run with the image's default user/capabilities). Enable them\n * for untrusted workloads. `dropAllCapabilities` and a non-root `user`\n * (see {@link SpawnConfig.user}) can break images that expect root or\n * specific capabilities, so they are not applied unless requested.\n */\n hardening?: ContextHardening\n}\n\n/**\n * Opt-in container hardening for the docker context. Every field defaults to\n * \"unset\" so an omitted `hardening` (or omitted field) reproduces the prior,\n * unrestricted behavior — this keeps the option purely additive.\n */\nexport interface ContextHardening {\n /**\n * Drop all Linux capabilities (`CapDrop: ['ALL']`). Strong isolation, but\n * breaks images needing capabilities (e.g. binding low ports, `ping`).\n * Default: `false`.\n */\n dropAllCapabilities?: boolean\n /**\n * Set `no-new-privileges` so processes can't gain privileges via setuid\n * binaries. Low blast radius; safe for most workloads. Default: `false`.\n */\n noNewPrivileges?: boolean\n /**\n * Mount the container root filesystem read-only. Pair with writable mounts\n * for scratch space. Default: `false`.\n */\n readonlyRootfs?: boolean\n /**\n * Max number of processes (`PidsLimit`) — caps fork-bombs. Default: unset\n * (no limit). A few hundred is usually plenty for an agent sandbox.\n */\n pidsLimit?: number\n}\n\nexport interface ContextMount {\n /** Absolute host path to mount. */\n source: string\n /** Absolute path inside the execution context. */\n target: string\n /** Mount read-only. Defaults to false for Docker's native bind behavior. */\n readonly?: boolean\n /**\n * Apply the SELinux shared label (`:z`) so the host user and the container\n * user can both read/write the mount (docker context only). No-op on\n * non-SELinux hosts. Combine with `SpawnConfig.user` to avoid root-owned\n * files leaking onto the host. Mutually exclusive with `readonly`.\n */\n shared?: boolean\n}\n\n// ---------------------------------------------------------------------------\n// Execution context interface\n// ---------------------------------------------------------------------------\n\nexport type ContextType = 'process' | 'docker' | 'sandbox'\n\nexport interface ExecutionContext {\n /** Context type identifier */\n readonly type: ContextType\n\n /** What this context supports */\n readonly capabilities: ContextCapabilities\n\n /** Spawn a new execution environment */\n spawn: (config?: SpawnConfig) => Promise<ExecutionHandle>\n\n /**\n * Execute a shell command in the context.\n *\n * `signal` propagates abort all the way down to the underlying child\n * process — the implementation is expected to wire it into whatever\n * spawn primitive it uses so the OS receives a SIGTERM (or equivalent)\n * when the caller aborts. Without it, a cancelled tool returns its\n * cancellation marker to the model but the underlying process keeps\n * running in the background, orphaning compute / locks / IO. Pass\n * `ctx.signal` from a tool body to inherit the per-call + run-level\n * abort union; pass a freshly-built one for a host-driven kill.\n *\n * Implementations are free to ignore `signal` (the contract degrades\n * gracefully — the process simply won't be killed), but the in-process\n * default DOES honor it via `child_process.exec`'s native `signal`\n * option.\n */\n exec: (handle: ExecutionHandle, command: string, options?: { cwd?: string, env?: Record<string, string>, timeout?: number, signal?: AbortSignal }) => Promise<ExecResult>\n\n /**\n * Start a process in the background. Settles as soon as `spawn` returns\n * — does NOT wait for the child to exit. Stdout + stderr stream\n * interleaved to the file at the returned `outputPath`. The caller\n * (typically the agent) reads incremental output via the regular\n * {@link ExecutionContext.readFile} seam.\n *\n * Optional — contexts without background support (some remote sandboxes)\n * just don't implement it. The shell tool surfaces a clean\n * \"background mode is not supported in this execution context\" error\n * when this is undefined.\n *\n * `onExit` is called once when the child terminates (natural, killed,\n * or error). The same instance that called `execBackground` is the\n * exclusive owner of the callback — it's not a multi-cast bus. Hosts\n * wire this to the agent's pending-notification queue so the model\n * gets a `<task-notification>` on its next turn.\n *\n * `onExit` is OPTIONAL: remote / durable contexts may have no legal\n * way to push a callback from a timer back into the host (a Restate\n * journal, for instance, forbids out-of-band writes). Such contexts\n * simply record the exit in their registry; the agent loop reconciles\n * by polling {@link ExecutionContext.listBackground} at run\n * boundaries, where a host journal wrapper is legal (the\n * `background:reconcile` hook is the journalable seam).\n *\n * See `docs/RUN_IN_BACKGROUND.md` for the broader design contract\n * (file location, replay semantics, suppression rules).\n */\n execBackground?: (\n handle: ExecutionHandle,\n command: string,\n options: {\n cwd?: string\n env?: Record<string, string>\n /**\n * Absolute directory the context appends `<task-id>.log` to. The\n * agent owns this path because it carries session-shaped knowledge\n * (`<userDir>/<sessionId>/tasks/`) the context can't synthesize.\n * Must already exist OR be creatable by the context — the\n * implementation handles `mkdir -p` defensively.\n */\n outputDir: string\n /** Push-style exit callback. Optional — see the method doc. */\n onExit?: (info: TaskExitInfo) => void\n /**\n * Cap on bytes written to the output file. Beyond the cap the\n * process KEEPS RUNNING; further output is counted but dropped,\n * and a `<output-truncated bytes-dropped=\"N\"/>` marker is\n * appended when the task settles. Unset / non-positive = no cap.\n */\n maxOutputBytes?: number\n /**\n * Stall watchdog: when the task produces no output for this many\n * milliseconds, `onStall` fires ONCE (one-shot — re-arms only\n * after fresh output arrives). The process is NOT killed; the\n * consumer decides (typically by telling the model the task may\n * be stuck at an interactive prompt). Unset = no watchdog.\n */\n stallTimeoutMs?: number\n /** One-shot stall callback — see `stallTimeoutMs`. */\n onStall?: (info: TaskStallInfo) => void\n },\n ) => Promise<TaskHandle>\n\n /**\n * SIGTERM the whole process group of a running background task.\n * Idempotent — second call returns `null` (or the cached exit info).\n * Resolves once the process has exited AND its output stream has\n * been flushed + closed.\n *\n * `null` return on miss (unknown id, already cleaned up) so the\n * shell_kill tool can surface a clean \"no such task\" message\n * without throwing.\n */\n killBackground?: (\n handle: ExecutionHandle,\n taskId: string,\n ) => Promise<TaskExitInfo | null>\n\n /**\n * Snapshot of every task in the context's registry — running AND\n * terminated (entries remain until the next context destroy, so\n * the model can still read output of exited tasks).\n */\n listBackground?: (\n handle: ExecutionHandle,\n ) => Promise<readonly TaskEntry[]>\n\n /**\n * Block until a background task terminates, then resolve with its\n * exit info. Resolves immediately for already-terminated tasks.\n *\n * Returns `null` when:\n * - the task id is unknown (or owned by another handle — same\n * isolation contract as `killBackground`), OR\n * - `timeoutMs` elapsed / `signal` aborted before the task exited.\n *\n * Callers that need to distinguish \"unknown\" from \"still running\"\n * should consult `listBackground` first (the `wait_task` tool does).\n *\n * This is the injectable wait seam for the `wait_task` tool:\n * in-process contexts implement it on the existing `onExit`\n * machinery; durable hosts implement it as an awakeable park\n * (runner fires task-exit over their bridge → resolveAwakeable).\n */\n waitBackground?: (\n handle: ExecutionHandle,\n taskId: string,\n options?: { timeoutMs?: number, signal?: AbortSignal },\n ) => Promise<TaskExitInfo | null>\n\n /**\n * Transfer ownership of every still-running task from `fromHandle`\n * to `toHandle`. Used by the spawn tool to \"promote\" a subagent's\n * background tasks up to the parent's handle so they outlive the\n * subagent's destroy() — matching shell semantics, where a `&`-ed\n * command outlives the parent process.\n *\n * Side effects:\n * - The task's `handleId` is rewritten, so subsequent\n * `listBackground(toHandle)` / `killBackground(toHandle, …)` /\n * `destroy(toHandle)` see it (and `fromHandle`-scoped operations\n * don't).\n * - When `newOnExit` is provided, the original `onExit` is\n * REPLACED with it. Critical: the task's natural exit callback\n * was captured against the spawning agent's hook bus, which is\n * about to be destroyed; without rewiring, the parent never\n * learns when the task terminates. Pass a closure that fires\n * the parent agent's `background:exit` hook.\n *\n * Returns the entries that were actually reassigned (running tasks\n * only; terminated ones stay where they are). Implementations that\n * don't support reassignment can leave this undefined; the caller\n * (spawn.ts) falls back to the current behavior of killing child\n * tasks at subagent shutdown.\n */\n reassignBackgroundTasks?: (\n fromHandle: ExecutionHandle,\n toHandle: ExecutionHandle,\n newOnExit?: (info: TaskExitInfo) => void,\n ) => Promise<readonly TaskEntry[]>\n\n /** Read a file from the context's filesystem */\n readFile: (handle: ExecutionHandle, path: string) => Promise<string>\n\n /**\n * Read a file from the context's filesystem as raw bytes.\n *\n * Used by `read_file` to dispatch image / binary files into the multimodal\n * `ToolResultContent[]` route. Optional — when not implemented, the tool\n * falls back to `base64 < path` via the `exec` seam, which works in any\n * shell-capable context. Implementations that already have a native\n * binary read (in-process `fs.readFile` without encoding, container API,\n * sandbox SDK) should override for the latency win.\n */\n readFileBinary?: (handle: ExecutionHandle, path: string) => Promise<Uint8Array>\n\n /** Write a file to the context's filesystem */\n writeFile: (handle: ExecutionHandle, path: string, content: string) => Promise<void>\n\n /** List files in a directory */\n listFiles: (handle: ExecutionHandle, path: string) => Promise<string[]>\n\n /**\n * Resolve the host-side port that a container port was published on\n * (docker context only, and only when the matching entry was created\n * with `host` omitted in `SpawnConfig.ports`).\n *\n * Resolves to `null` if the container port isn't published. Other\n * contexts (process, sandbox) don't implement this; they don't have\n * a port-mapping concept.\n */\n getMappedPort?: (handle: ExecutionHandle, containerPort: number) => Promise<number | null>\n\n /** Destroy the execution environment and clean up resources */\n destroy: (handle: ExecutionHandle) => Promise<void>\n}\n\n// ---------------------------------------------------------------------------\n// Background task types\n// ---------------------------------------------------------------------------\n\n/**\n * Lifecycle status of a background task.\n *\n * - `'running'` — process is still live; `exitCode` / `signal` unset.\n * - `'exited'` — process terminated on its own (clean or non-zero exit).\n * - `'killed'` — the host issued `killBackground` (SIGTERM to the group).\n *\n * The status is a coarse-grained signal; the exit code carries the\n * fine-grained detail (e.g. `143 = SIGTERM` for `'killed'`).\n */\nexport type BackgroundTaskStatus = 'running' | 'exited' | 'killed'\n\n/**\n * Returned synchronously by `execBackground` — the handle the model and\n * the framework use to refer to the task until it terminates.\n *\n * `outputPath` is an absolute path to the log file the context is\n * appending stdout + stderr to (interleaved by emit order). The model\n * reads it via the normal `read_file` tool; no special tool is required.\n */\nexport interface TaskHandle {\n /**\n * Stable id minted by the context — typically `bash_<n>` for\n * `ProcessContext`. Sequential within a single context instance,\n * resets when a new context is constructed. Forwarded to `killBackground` /\n * `listBackground` and stamped into every `<task-notification>` block.\n */\n taskId: string\n /** OS pid of the spawned shell wrapper (process-group leader on POSIX). */\n pid: number\n /** Absolute path to the log file the context is streaming output into. */\n outputPath: string\n}\n\n/**\n * Fired exactly once per task when the child process terminates, via\n * `execBackground`'s `onExit` callback. The agent layer translates this\n * into a queued `<task-notification>` for the next turn.\n *\n * `signal` is set when the child was terminated by a signal (e.g.\n * SIGTERM from our own kill-tree, SIGKILL from oom-killer); absent on\n * natural exit. `exitCode` is `128 + signal-number` on signal-killed\n * children, matching POSIX shell conventions — `143` for SIGTERM, etc.\n */\nexport interface TaskExitInfo {\n taskId: string\n status: Exclude<BackgroundTaskStatus, 'running'>\n exitCode: number\n signal?: NodeJS.Signals\n outputPath: string\n /** `Date.now()` delta between spawn and exit. */\n durationMs: number\n /** The original command string the model invoked — useful for telemetry / banner summary. */\n command: string\n}\n\n/**\n * One row in `listBackground`'s snapshot. Living entries (status `'running'`)\n * have `exitCode` / `signal` / `endedAt` unset; terminated entries carry the\n * same data `TaskExitInfo` returned at exit time.\n */\nexport interface TaskEntry {\n taskId: string\n pid: number\n command: string\n cwd: string\n startedAt: number\n /**\n * Set when the task terminated. Lets pull-based consumers (the agent's\n * run-boundary reconcile) derive `durationMs = endedAt - startedAt`\n * without depending on when the snapshot was taken.\n */\n endedAt?: number\n outputPath: string\n status: BackgroundTaskStatus\n exitCode?: number\n signal?: NodeJS.Signals\n /** Total bytes written to the output file so far — useful for \"task X has produced N KB\" UX hints. */\n bytesWritten: number\n}\n\n/**\n * Fired by the optional stall watchdog (see `execBackground`'s\n * `stallTimeoutMs`) when a running task has produced no output for the\n * configured window. The process is still alive — this is a signal, not\n * a state transition.\n */\nexport interface TaskStallInfo {\n taskId: string\n command: string\n outputPath: string\n /** Milliseconds since the last output chunk (>= the configured window). */\n stalledForMs: number\n /** Total bytes the task has written so far. */\n bytesWritten: number\n}\n"],"mappings":";;;;;;;;;;;AAsBA,MAAM,0BAA0B,QAAQ,aAAa;;;;;;;AAQrD,MAAM,qBAAqB,KAAK,OAAO;;;;;;;AAQvC,MAAM,2BAA2B;;;;;;;;;;;;;;AAejC,MAAM,2BAA2B;;;;;;;;;;;AAYjC,MAAM,aAAa;AAEnB,SAAS,iBAAiB,QAAsB;CAC9C,IAAI,CAAC,WAAW,KAAK,MAAM,GACzB,MAAM,IAAI,MAAM,oBAAoB,OAAO,iBAAiB,WAAW,EAAE;AAC7E;;;;;;;;;AAUA,SAAgB,uBAAuB,MAAoB;CACzD,MAAM,QAAQ,MAAsB,EAAE,SAAS,EAAE,SAAS,GAAG,GAAG;CAChE,MAAM,QAAQ,MAAsB,EAAE,SAAS,EAAE,SAAS,GAAG,GAAG;CAQhE,OAAO,GAPG,KAAK,eAOL,IANA,KAAK,KAAK,YAAY,IAAI,CAMtB,IALJ,KAAK,KAAK,WAAW,CAKb,EAAE,GAJV,KAAK,KAAK,YAAY,CAIT,IAHb,KAAK,KAAK,cAAc,CAGP,IAFjB,KAAK,KAAK,cAAc,CAEH,EAAE,GADtB,KAAK,KAAK,mBAAmB,CACH;AACvC;;AAMA,SAAS,QAAQ,KAAwB,MAAmC;CAC1E,MAAM,MAAyB,CAAC;CAChC,KAAK,MAAM,OAAO,MAChB,IAAI,IAAI,SAAS,KAAA,GACf,IAAI,OAAO,IAAI;CAEnB,OAAO;AACT;AAEA,SAAgB,qBAAqB,QAAwC;CAC3E,IAAI,UAAU;CACd,MAAM,0BAAU,IAAI,IAA6B;CACjD,MAAM,aAAa,QAAQ,OAAO,QAAQ,IAAI;CAC9C,MAAM,aAAa,QAAQ;CAC3B,MAAM,iBAAiB,QAAQ,kBAAkB;CAMjD,MAAM,UAA6B,QAAQ,eAAe,QACtD,QAAQ,QAAQ,KAAK;EAAC;EAAQ;EAAQ;EAAS;EAAQ;EAAU;EAAQ;EAAQ;CAAQ,CAAC,IAC1F,QAAQ;CAGZ,MAAM,gBAAgB,QAAQ,kBAAkB,KAAA,IAC5C,QAAQ,YAAY,OAAO,aAAa,IACxC,KAAA;;CAGJ,SAAS,aAAa,MAAc,QAAyB;EAC3D,MAAM,MAAM,SAAS,MAAM,MAAM;EACjC,OAAO,QAAQ,MAAO,CAAC,IAAI,WAAW,IAAI,KAAK,CAAC,WAAW,GAAG;CAChE;;;;;;;CAQA,eAAe,iBAAiB,QAAyB,MAA+B;EACtF,MAAM,OAAO,QAAQ,OAAO,KAAK,IAAI;EACrC,IAAI,kBAAkB,KAAA,GACpB,OAAO;EAIT,IAAI,WAAW;EACf,MAAM,OAAiB,CAAC;EACxB,OAAO,MACL,IAAI;GACF,WAAW,MAAM,SAAS,QAAQ;GAClC;EACF,QACM;GACJ,MAAM,SAAS,QAAQ,QAAQ;GAC/B,IAAI,WAAW,UAAU;IAEvB,WAAW;IACX;GACF;GAGA,KAAK,QAAQ,SAAS,QAAQ,QAAQ,CAAC;GACvC,WAAW;EACb;EAEF,MAAM,YAAY,KAAK,SAAS,QAAQ,UAAU,GAAG,IAAI,IAAI;EAC7D,MAAM,gBAAgB,MAAM,SAAS,aAAa,EAAE,YAAY,aAAa;EAC7E,IAAI,CAAC,aAAa,eAAe,SAAS,GACxC,MAAM,IAAI,MACR,oCAAoC,KAAK,qBAAqB,cAAc,EAC9E;EAEF,OAAO;CACT;;;;;;;;CASA,MAAM,wBAAQ,IAAI,IAAuB;CACzC,IAAI,cAAc;;;;;;;;;;;;;;;;;;;;CAqBlB,MAAM,mBAAmB,uCAAuB,IAAI,KAAK,CAAC;;;;;;;;;;;;;;;;;;;;CAqB1D,IAAI,wBAAwB;CAC5B,MAAM,oBAA0B;EAC9B,KAAK,MAAM,QAAQ,MAAM,OAAO,GAAG;GACjC,IAAI,KAAK,WAAW,WAClB;GACF,MAAM,MAAM,KAAK,MAAM;GACvB,IAAI,QAAQ,KAAA,GACV;GACF,IAAI;IACF,IAAI,yBACF,QAAQ,KAAK,CAAC,KAAK,SAAS;SAE5B,QAAQ,KAAK,KAAK,SAAS;GAC/B,QACM,CAIN;EACF;CACF;CAEA,OAAO;EACL,MAAM;EAEN,cAAc;GACZ,OAAO;GACP,YAAY;GACZ,SAAS;GACT,KAAK;GAKL,eAAe;EACjB;EAEA,MAAM,MAAM,WAAmD;GAC7D,MAAM,KAAK,WAAW,EAAE;GACxB,MAAM,MAAM,WAAW,OAAO;GAE9B,MAAM,MAAM,KAAK,EAAE,WAAW,KAAK,CAAC;GAEpC,MAAM,SAA0B;IAAE;IAAI,MAAM;IAAW;GAAI;GAC3D,QAAQ,IAAI,IAAI,MAAM;GACtB,OAAO;EACT;EAEA,MAAM,KACJ,QACA,SACA,SACqB;GACrB,MAAM,MAAM,SAAS,MAAM,QAAQ,OAAO,KAAK,QAAQ,GAAG,IAAI,OAAO;GAKrE,IAAI,SAAS,QAAQ,SACnB,OAAO;IAAE,QAAQ;IAAI,QAAQ;IAAkC,UAAU;GAAI;GAG/E,MAAM,aAAa,SAAS,WAAW,QAAQ,QAAQ,WAAW,MAAM;GACxE,MAAM,YAAY;GAElB,OAAO,IAAI,SAAqB,aAAa;IAc3C,MAAM,QAAQA,MAAW,WAAW,CAAC,MAAM,OAAO,GAAG;KACnD;KACA,KAAK;MAAE,GAAG;MAAS,GAAG;MAAY,GAAG,SAAS;KAAI;KAClD,OAAO;MAAC;MAAU;MAAQ;KAAM;KAChC,UAAU;IACZ,CAAC;IAED,IAAI,SAAS;IACb,IAAI,SAAS;IAKb,IAAI,cAAc;IAClB,IAAI,cAAc;IAClB,IAAI,kBAAkB;IACtB,IAAI,WAAW;IACf,IAAI,gBAAgB;IACpB,IAAI,UAAU;IAEd,MAAM,gBAAgB,MAA2B,UAAwB;KACvE,MAAM,OAAO,SAAS,WAAW,cAAc;KAC/C,IAAI,QAAQ,WAAW;MAGrB,IAAI,CAAC,iBAAiB;OACpB,kBAAkB;OAClB,iBAAiB,OAAO,SAAS;MACnC;MACA;KACF;KACA,MAAM,OAAO,YAAY;KACzB,IAAI,QAAQ;KACZ,IAAI,MAAM,SAAS,MAAM;MAIvB,IAAI,MAAM;MACV,OAAO,MAAM,MAAM,MAAM,OAAO,SAAU,KACxC;MACF,QAAQ,MAAM,SAAS,GAAG,GAAG;KAC/B;KACA,IAAI,SAAS,UAAU;MACrB,UAAU,MAAM,SAAS,MAAM;MAC/B,eAAe,MAAM;KACvB,OACK;MACH,UAAU,MAAM,SAAS,MAAM;MAC/B,eAAe,MAAM;KACvB;KACA,IAAI,MAAM,SAAS,MAAM;MACvB,kBAAkB;MAGlB,iBAAiB,OAAO,SAAS;KACnC;IACF;IAEA,MAAM,QAAQ,GAAG,SAAQ,UAAS,aAAa,UAAU,KAAe,CAAC;IACzE,MAAM,QAAQ,GAAG,SAAQ,UAAS,aAAa,UAAU,KAAe,CAAC;IAEzE,MAAM,eAAe,YAAY,IAC7B,iBAAiB;KACf,WAAW;KACX,iBAAiB,OAAO,SAAS;IACnC,GAAG,SAAS,IACZ,KAAA;IAEJ,MAAM,gBAAsB;KAC1B,gBAAgB;KAChB,iBAAiB,OAAO,SAAS;IACnC;IACA,MAAM,aAAa,SAAS;IAC5B,IAAI,YACF,WAAW,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;IAE9D,IAAI;IAEJ,MAAM,UAAU,UAAkB,gBAA+B;KAC/D,IAAI,SACF;KACF,UAAU;KACV,IAAI,cACF,aAAa,YAAY;KAC3B,IAAI,gBACF,aAAa,cAAc;KAC7B,IAAI,YACF,WAAW,oBAAoB,SAAS,OAAO;KACjD,MAAM,cAAc,cACf,SAAS,GAAG,OAAO,IAAI,gBAAgB,cACxC;KACJ,SAAS;MAAE;MAAQ,QAAQ;MAAa;KAAS,CAAC;IACpD;IAQA,MAAM,oBAAoB,MAAqB,WAAwC;KACrF,IAAI,eAAe;MACjB,OAAO,KAAK,mBAAmB;MAC/B;KACF;KACA,IAAI,UAAU;MACZ,OAAO,KAAK,2BAA2B,UAAU,GAAG;MACpD;KACF;KACA,IAAI,iBAAiB;MACnB,OAAO,KAAK,mBAAmB,UAAU,6BAA6B;MACtE;KACF;KACA,IAAI,QAAQ;MAGV,OAAO,KAAU,wBAAwB,QAAQ;MACjD;KACF;KACA,OAAO,OAAO,SAAS,WAAW,OAAO,CAAC;IAC5C;IAEA,MAAM,GAAG,UAAU,QAAQ;KAIzB,OAAO,GAAG,IAAI,OAAO;IACvB,CAAC;IAED,MAAM,GAAG,UAAU,MAAM,WAAW,iBAAiB,MAAM,MAAM,CAAC;IAElE,MAAM,GAAG,SAAS,MAAM,WAAW;KAOjC,iBAAiB,iBAAiB;MAChC,IAAI,SACF;MACF,MAAM,QAAQ,QAAQ;MACtB,MAAM,QAAQ,QAAQ;MACtB,iBAAiB,MAAM,MAAM;KAC/B,GAAG,wBAAwB;KAC3B,eAAe,QAAQ;IACzB,CAAC;GACH,CAAC;EACH;EAEA,MAAM,SAAS,QAAyB,MAA+B;GACrE,OAAO,SAAS,MAAM,iBAAiB,QAAQ,IAAI,GAAG,OAAO;EAC/D;EAEA,MAAM,eAAe,QAAyB,MAAmC;GAG/E,MAAM,MAAM,MAAM,SAAS,MAAM,iBAAiB,QAAQ,IAAI,CAAC;GAC/D,OAAO,IAAI,WAAW,GAAG;EAC3B;EAEA,MAAM,UAAU,QAAyB,MAAc,SAAgC;GACrF,MAAM,WAAW,MAAM,iBAAiB,QAAQ,IAAI;GACpD,MAAM,MAAM,QAAQ,QAAQ,GAAG,EAAE,WAAW,KAAK,CAAC;GAClD,MAAM,UAAU,UAAU,SAAS,OAAO;EAC5C;EAEA,MAAM,UAAU,QAAyB,MAAiC;GACxE,OAAO,QAAQ,MAAM,iBAAiB,QAAQ,IAAI,CAAC;EACrD;EAEA,MAAM,eACJ,QACA,SACA,SASqB;GACrB,MAAM,MAAM,QAAQ,MAAM,QAAQ,OAAO,KAAK,QAAQ,GAAG,IAAI,OAAO;GAEpE,MAAM,MAAM,QAAQ,WAAW,EAAE,WAAW,KAAK,CAAC;GAUlD,MAAM,SAAS,QAAQ,EAAE;GACzB,iBAAiB,MAAM;GACvB,MAAM,aAAa,QAAQ,QAAQ,WAAW,GAAG,OAAO,GAAG,iBAAiB,KAAK;GAIjF,IAAI,CAAC,uBAAuB;IAC1B,QAAQ,GAAG,QAAQ,WAAW;IAC9B,wBAAwB;GAC1B;GAUA,MAAM,eAA4B,kBAAkB,YAAY,EAAE,OAAO,IAAI,CAAC;GAS9E,aAAa,GAAG,UAAU,QAAQ;IAChC,IAAI,QAAQ,IAAI,cACd,QAAQ,OAAO,MAAM,0BAA0B,OAAO,qBAAqB,IAAI,QAAQ,GAAG;GAC9F,CAAC;GAMD,MAAM,QAAQA,MAAW,WAAW,CAAC,MAAM,OAAO,GAAG;IACnD;IACA,KAAK;KAAE,GAAG;KAAS,GAAG;KAAY,GAAG,QAAQ;IAAI;IACjD,OAAO;KAAC;KAAU;KAAQ;IAAM;IAChC,UAAU;GACZ,CAAC;GAED,MAAM,QAAmB;IACvB;IACA,UAAU,OAAO;IACjB,KAAK,MAAM,OAAO;IAClB;IACA;IACA,WAAW,KAAK,IAAI;IACpB;IACA;IACA;IACA,QAAQ;IACR,cAAc;IACd,SAAS;IACT,QAAQ,QAAQ;GAClB;GACA,MAAM,IAAI,QAAQ,KAAK;GASvB,MAAM,iBAAiB,OAAO,QAAQ,mBAAmB,YAAY,QAAQ,iBAAiB,IAC1F,QAAQ,iBACR,KAAA;GACJ,IAAI,eAAe;GAKnB,IAAI,eAAe;GAMnB,MAAM,iBAAiB,OAAO,QAAQ,mBAAmB,YAAY,QAAQ,iBAAiB,IAC1F,QAAQ,iBACR,KAAA;GACJ,MAAM,UAAU,QAAQ;GACxB,IAAI;GACJ,IAAI,eAAe,KAAK,IAAI;GAC5B,MAAM,sBAA4B;IAChC,IAAI,CAAC,kBAAkB,CAAC,SACtB;IACF,IAAI,YACF,aAAa,UAAU;IACzB,aAAa,iBAAiB;KAC5B,aAAa,KAAA;KACb,IAAI,MAAM,SACR;KACF,IAAI;MACF,QAAQ;OACN;OACA;OACA;OACA,cAAc,KAAK,IAAI,IAAI;OAC3B,cAAc,MAAM;MACtB,CAAC;KACH,SACO,KAAK;MACV,IAAI,QAAQ,IAAI,cACd,QAAQ,OAAO,MAAM,0BAA0B,OAAO,kBAAkB,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,GAAG;KAChI;IACF,GAAG,cAAc;IACjB,WAAW,QAAQ;GACrB;GACA,MAAM,wBAA8B;IAClC,IAAI,YAAY;KACd,aAAa,UAAU;KACvB,aAAa,KAAA;IACf;GACF;GACA,cAAc;GAQd,MAAM,eAAe,UAAwB;IAC3C,MAAM,gBAAgB,MAAM;IAC5B,eAAe,KAAK,IAAI;IACxB,cAAc;IACd,IAAI,mBAAmB,KAAA,GAAW;KAChC,MAAM,SAAS,MAAM,eAAe,eAAe,MAAM;KACzD,IAAI,gBAAgB,UAAU,gBAAgB;MAC5C,eAAe;MACf,gBAAgB,MAAM;MACtB;KACF;KACA,MAAM,OAAO,iBAAiB;KAC9B,IAAI,MAAM,SAAS,MAAM;MACvB,eAAe;MAKf,IAAI,MAAM;MACV,OAAO,MAAM,MAAM,MAAM,OAAO,SAAU,KACxC;MACF,gBAAgB,MAAM,SAAS;MAC/B,IAAI,MAAM,GACR,aAAa,MAAM,MAAM,SAAS,GAAG,GAAG,CAAC;MAC3C;KACF;IACF;IACA,aAAa,MAAM,KAAK;GAC1B;GACA,MAAM,QAAQ,GAAG,SAAQ,UAAS,YAAY,KAAe,CAAC;GAC9D,MAAM,QAAQ,GAAG,SAAQ,UAAS,YAAY,KAAe,CAAC;GAM9D,MAAM,UAAU,OAA0B,MAAqB,QAA+B,eAA8B;IAC1H,IAAI,MAAM,SACR;IACF,MAAM,UAAU;IAChB,gBAAgB;IAChB,MAAM,UAAU,KAAK,IAAI;IAGzB,MAAM,SACF,WAAW,aAAa,MAAM,gBAC5B,WACA;IAIN,MAAM,WAAW,SAAS,OACtB,OACA,WAAW,YACT,MACA,SACE,MACA;IACR,MAAM,SAAS;IACf,MAAM,WAAW;IACjB,IAAI,QACF,MAAM,SAAS;IAcjB,IAAI,YACF,IAAI;KACF,aAAa,MAAM,KAAK,WAAW,GAAG;IACxC,QACM,CAEN;IAEF,IAAI,eAAe,GACjB,IAAI;KAGF,aAAa,MAAM,sCAAsC,aAAa,MAAM;IAC9E,QACM,CAEN;IAEF,aAAa,UAAU;KAKrB,IAAI;MACF,MAAM,SAAS,oBAAoB,KAAK,CAAC;KAC3C,SACO,KAAK;MAIV,IAAI,QAAQ,IAAI,cACd,QAAQ,OAAO,MAAM,0BAA0B,OAAO,iBAAiB,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,GAAG;KAC/H;IACF,CAAC;GACH;GAEA,MAAM,GAAG,UAAU,MAAM,WAAW,OAAO,SAAS,MAAM,MAAM,CAAC;GACjE,MAAM,GAAG,UAAS,QAAO,OAAO,SAAS,MAAM,MAAM,iBAAiB,IAAI,SAAS,CAAC;GAEpF,MAAM,GAAG,SAAS,MAAM,WAAW;IAajC,iBAPwC;KACtC,IAAI,MAAM,SACR;KACF,MAAM,QAAQ,QAAQ;KACtB,MAAM,QAAQ,QAAQ;KACtB,OAAO,SAAS,MAAM,MAAM;IAC9B,GAAG,wBACU,EAAE,QAAQ;GACzB,CAAC;GAED,OAAO;IAAE;IAAQ,KAAK,MAAM;IAAK;GAAW;EAC9C;EAEA,MAAM,eAAe,QAAyB,QAA8C;GAC1F,MAAM,QAAQ,MAAM,IAAI,MAAM;GAO9B,IAAI,CAAC,SAAS,MAAM,aAAa,OAAO,IACtC,OAAO;GAIT,IAAI,MAAM,WAAW,WACnB,OAAO,oBAAoB,KAAK;GAOlC,MAAM,gBAAgB;GAMtB,MAAM,SAAS,IAAI,SAAe,aAAa;IAC7C,IAAI,MAAM,SAAS;KACjB,SAAS;KACT;IACF;IACA,MAAM,iBAAiB,MAAM;IAC7B,MAAM,UAAU,SAAS;KACvB,iBAAiB,IAAI;KACrB,SAAS;IACX;GACF,CAAC;GAED,iBAAiB,MAAM,OAAO,SAAS;GACvC,MAAM;GACN,OAAO,oBAAoB,KAAK;EAClC;EAEA,MAAM,eACJ,QACA,QACA,SAC8B;GAC9B,MAAM,QAAQ,MAAM,IAAI,MAAM;GAI9B,IAAI,CAAC,SAAS,MAAM,aAAa,OAAO,IACtC,OAAO;GACT,IAAI,MAAM,SACR,OAAO,oBAAoB,KAAK;GAElC,OAAO,IAAI,SAA8B,aAAa;IACpD,IAAI,OAAO;IACX,IAAI;IACJ,MAAM,SAAS,SAAS;IACxB,MAAM,UAAU,UAAqC;KACnD,IAAI,MACF;KACF,OAAO;KACP,IAAI,OACF,aAAa,KAAK;KACpB,QAAQ,oBAAoB,SAAS,OAAO;KAC5C,SAAS,KAAK;IAChB;IACA,SAAS,UAAgB;KACvB,OAAO,IAAI;IACb;IAMA,MAAM,iBAAiB,MAAM;IAC7B,MAAM,UAAU,SAAS;KACvB,iBAAiB,IAAI;KACrB,OAAO,oBAAoB,KAAK,CAAC;IACnC;IAEA,IAAI,QAAQ;KACV,IAAI,OAAO,SAAS;MAClB,OAAO,IAAI;MACX;KACF;KACA,OAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;IAC1D;IACA,MAAM,YAAY,SAAS;IAC3B,IAAI,OAAO,cAAc,YAAY,OAAO,SAAS,SAAS,KAAK,YAAY,GAAG;KAChF,QAAQ,WAAW,QAAQ,WAAW,IAAI;KAC1C,MAAM,QAAQ;IAChB;GACF,CAAC;EACH;EAEA,MAAM,wBACJ,YACA,UACA,WAC+B;GAG/B,IAAI,WAAW,OAAO,SAAS,IAC7B,OAAO,CAAC;GACV,MAAM,WAAwB,CAAC;GAC/B,KAAK,MAAM,SAAS,MAAM,OAAO,GAAG;IAClC,IAAI,MAAM,aAAa,WAAW,MAAM,MAAM,WAAW,WACvD;IACF,MAAM,WAAW,SAAS;IAK1B,IAAI,WACF,MAAM,SAAS;IACjB,SAAS,KAAK,iBAAiB,KAAK,CAAC;GACvC;GACA,OAAO;EACT;EAEA,MAAM,eAAe,QAAwD;GAM3E,OAAO,CAAC,GAAG,MAAM,OAAO,CAAC,EACtB,QAAO,MAAK,EAAE,aAAa,OAAO,EAAE,EACpC,MAAM,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS,EACxC,IAAI,gBAAgB;EACzB;EAEA,MAAM,QAAQ,QAAwC;GAkBpD,MAAM,YAAY,CAAC,GAAG,MAAM,OAAO,CAAC,EAAE,QAAO,MAAK,EAAE,aAAa,OAAO,MAAM,CAAC,EAAE,OAAO;GACxF,MAAM,QAAQ,IAAI,UAAU,IAAI,OAAO,UAAU;IAC/C,MAAM,gBAAgB;IACtB,MAAM,IAAI,SAAe,aAAa;KACpC,IAAI;KACJ,MAAM,iBAAiB,MAAM;KAC7B,MAAM,UAAU,SAAS;MACvB,iBAAiB,IAAI;MACrB,IAAI,YACF,aAAa,UAAU;MACzB,SAAS;KACX;KACA,iBAAiB,MAAM,OAAO,SAAS;KAQvC,aAAa,iBAAiB;MAC5B,iBAAiB,MAAM,OAAO,SAAS;MAIvC,MAAM,aAAa,QAAQ;MAC3B,SAAS;KACX,GAAG,cAAc;KACjB,WAAW,QAAQ;IACrB,CAAC;GACH,CAAC,CAAC;GAGF,KAAK,MAAM,CAAC,QAAQ,UAAU,OAC5B,IAAI,MAAM,aAAa,OAAO,IAC5B,MAAM,OAAO,MAAM;GAEvB,QAAQ,OAAO,OAAO,EAAE;GAUxB,IAAI,yBAAyB,QAAQ,SAAS,GAAG;IAC/C,QAAQ,IAAI,QAAQ,WAAW;IAC/B,wBAAwB;GAC1B;EACF;CACF;AACF;;;;;;;;AAoEA,SAAS,iBAAiB,OAAqB,QAA8B;CAC3E,MAAM,MAAM,MAAM;CAClB,IAAI,QAAQ,KAAA,GACV;CACF,IAAI;EACF,IAAI,yBACF,QAAQ,KAAK,CAAC,KAAK,MAAM;OAEzB,QAAQ,KAAK,KAAK,MAAM;CAC5B,QACM,CAGN;AACF;;;;;;;AAQA,SAAS,iBAAiB,OAA6B;CACrD,OAAO;EACL,QAAQ,MAAM;EACd,KAAK,MAAM;EACX,SAAS,MAAM;EACf,KAAK,MAAM;EACX,WAAW,MAAM;EACjB,GAAI,MAAM,YAAY,KAAA,IAAY,EAAE,SAAS,MAAM,QAAQ,IAAI,CAAC;EAChE,YAAY,MAAM;EAClB,QAAQ,MAAM;EACd,GAAI,MAAM,aAAa,KAAA,IAAY,EAAE,UAAU,MAAM,SAAS,IAAI,CAAC;EACnE,GAAI,MAAM,SAAS,EAAE,QAAQ,MAAM,OAAO,IAAI,CAAC;EAC/C,cAAc,MAAM;CACtB;AACF;;;;;;;AAQA,SAAS,oBAAoB,OAAgC;CAC3D,OAAO;EACL,QAAQ,MAAM;EACd,QAAQ,MAAM;EACd,UAAU,MAAM,YAAY;EAC5B,GAAI,MAAM,SAAS,EAAE,QAAQ,MAAM,OAAO,IAAI,CAAC;EAC/C,YAAY,MAAM;EAIlB,aAAa,MAAM,WAAW,KAAK,IAAI,KAAK,MAAM;EAClD,SAAS,MAAM;CACjB;AACF;;;AC1iCA,SAAgB,qBAAqB,UAA6C;CAChF,MAAM,4BAAY,IAAI,IAAoB;CAE1C,SAAS,aAAa,QAAiC;EACrD,MAAM,KAAK,UAAU,IAAI,OAAO,EAAE;EAClC,IAAI,CAAC,IACH,MAAM,IAAI,MAAM,WAAW,OAAO,GAAG,WAAW;EAClD,OAAO;CACT;CAEA,OAAO;EACL,MAAM;EAEN,cAAc;GACZ,OAAO;GACP,YAAY;GACZ,SAAS;GACT,KAAK;EACP;EAEA,MAAM,MAAM,QAAgD;GAC1D,MAAM,SAAS,MAAM,SAAS,MAAM,UAAU,CAAC,CAAC;GAChD,MAAM,SAA0B;IAAE,IAAI,OAAO;IAAI,MAAM;IAAW,KAAK,OAAO;GAAI;GAClF,UAAU,IAAI,OAAO,IAAI,OAAO,EAAE;GAClC,OAAO;EACT;EAEA,MAAM,KAAK,QAAyB,SAAiB,SAA+B;GAClF,OAAO,SAAS,KAAK,aAAa,MAAM,GAAG,SAAS,OAAO;EAC7D;EAEA,MAAM,SAAS,QAAyB,MAA+B;GACrE,OAAO,SAAS,SAAS,aAAa,MAAM,GAAG,IAAI;EACrD;EAEA,MAAM,UAAU,QAAyB,MAAc,SAAgC;GACrF,OAAO,SAAS,UAAU,aAAa,MAAM,GAAG,MAAM,OAAO;EAC/D;EAEA,MAAM,UAAU,QAAyB,MAAiC;GACxE,OAAO,SAAS,UAAU,aAAa,MAAM,GAAG,IAAI;EACtD;EAEA,MAAM,QAAQ,QAAwC;GACpD,MAAM,KAAK,UAAU,IAAI,OAAO,EAAE;GAClC,IAAI,CAAC,IACH;GACF,MAAM,SAAS,QAAQ,EAAE;GACzB,UAAU,OAAO,OAAO,EAAE;EAC5B;CACF;AACF;;;;;;;AC1BA,SAAgB,+BAA+B,SAAoD;CACjG,OAAO,QAAQ,aAAa,kBACtB,QAAQ,iBAAiB,qBAAqB;AACtD"}
@@ -1,3 +1,3 @@
1
- import { a as DetachedTasksCapability, c as ExecutionHandle, d as TaskExitInfo, f as TaskHandle, i as ContextType, l as SpawnConfig, m as resolveDetachedTasksCapability, n as ContextCapabilities, o as ExecResult, p as TaskStallInfo, r as ContextMount, s as ExecutionContext, t as BackgroundTaskStatus, u as TaskEntry } from "./types-BibzMDjX.js";
2
- import { n as createSandboxContext, r as createProcessContext, t as SandboxProvider } from "./index-C_t8tW_X.js";
3
- export { type BackgroundTaskStatus, type ContextCapabilities, type ContextMount, type ContextType, type DetachedTasksCapability, type ExecResult, type ExecutionContext, type ExecutionHandle, type SandboxProvider, type SpawnConfig, type TaskEntry, type TaskExitInfo, type TaskHandle, type TaskStallInfo, createProcessContext, createSandboxContext, resolveDetachedTasksCapability };
1
+ import { a as ContextType, c as ExecutionContext, d as TaskEntry, f as TaskExitInfo, h as resolveDetachedTasksCapability, i as ContextMount, l as ExecutionHandle, m as TaskStallInfo, n as ContextCapabilities, o as DetachedTasksCapability, p as TaskHandle, r as ContextHardening, s as ExecResult, t as BackgroundTaskStatus, u as SpawnConfig } from "./types-B39tBba1.js";
2
+ import { n as createSandboxContext, r as createProcessContext, t as SandboxProvider } from "./index-CrMb8jCE.js";
3
+ export { type BackgroundTaskStatus, type ContextCapabilities, type ContextHardening, type ContextMount, type ContextType, type DetachedTasksCapability, type ExecResult, type ExecutionContext, type ExecutionHandle, type SandboxProvider, type SpawnConfig, type TaskEntry, type TaskExitInfo, type TaskHandle, type TaskStallInfo, createProcessContext, createSandboxContext, resolveDetachedTasksCapability };
package/dist/contexts.js CHANGED
@@ -1,2 +1,2 @@
1
- import { n as createSandboxContext, r as createProcessContext, t as resolveDetachedTasksCapability } from "./contexts-BJVgG0LY.js";
1
+ import { n as createSandboxContext, r as createProcessContext, t as resolveDetachedTasksCapability } from "./contexts-DglWSzmR.js";
2
2
  export { createProcessContext, createSandboxContext, resolveDetachedTasksCapability };
package/dist/eval.d.ts CHANGED
@@ -1,2 +1,2 @@
1
- import { $t as MetricEmitter, An as llmJudge, At as EvalArtifacts, Bt as EvalRunSummary, Cn as fileExists, Ct as EFFICIENCY_METRICS, Dn as formatEvalRunSummary, Dt as EvalAgentRunResult, En as formatEvalCaseSummary, Et as EvalAgentRunOptions, Fn as statusCompleted, Ft as EvalMetric, Gt as EvalScorerContext, Ht as EvalRunUsage, It as EvalMetricError, Jt as EvalWorkspaceFile, Kt as EvalTestRunner, Lt as EvalRunMetricAggregate, Mn as registerEvalTests, Mt as EvalCaseResult, Nn as relativeArtifactPath, Nt as EvalDefinition, On as formatTrajectoryLine, Ot as EvalAgentRunStats, Pn as runEvalCase, Pt as EvalDefinitionContext, Qt as MetricDirection, Rt as EvalRunReporter, Sn as fileContentQuality, St as CreateEvalAgentOptions, Tn as finalizeEvalMetrics, Tt as EvalAgentMcpServers, Ut as EvalScore, Vt as EvalRunSummaryCase, Wt as EvalScorer, Xt as EvalWorkspaceSnapshot, Yt as EvalWorkspaceOptions, Zt as LlmJudgeOptions, _n as defineEval, an as Trajectory, bn as emitEfficiencyMetrics, cn as artifactPath, dn as buildTrajectory, en as MetricSpec, fn as clearRegisteredEvals, gn as createReusableExecutionContext, hn as createEvalRunReporter, in as ReusableExecutionContext, jn as normalizeMetric, jt as EvalCaseOptions, kn as functionalityMetric, kt as EvalAgentStats, ln as buildEvalRunSummary, mn as createEvalAgent, nn as MetricStats, on as TrajectoryStep, pn as computeEvalTagScores, qt as EvalVariantSummary, rn as RegisterEvalTestsOptions, sn as TrajectoryStepKind, tn as MetricSpecMap, un as buildRegisteredEvals, vn as defineMetrics, wn as fileExistsOneOf, wt as EvalAgent, xn as fileContains, yn as efficiencyMetricValues, zt as EvalRunReporterOptions } from "./index-C4aT2kO_.js";
1
+ import { $t as buildRegisteredEvals, At as EvalRunSummaryCase, Bt as LlmJudgeOptions, Cn as runEvalCase, Ct as EvalDefinitionContext, Dt as EvalRunReporter, Et as EvalRunMetricAggregate, Ft as EvalTestRunner, Gt as MetricStats, Ht as MetricEmitter, It as EvalVariantSummary, Jt as Trajectory, Kt as RegisterEvalTestsOptions, Lt as EvalWorkspaceFile, Mt as EvalScore, Nt as EvalScorer, Ot as EvalRunReporterOptions, Pt as EvalScorerContext, Qt as buildEvalRunSummary, Rt as EvalWorkspaceOptions, Sn as relativeArtifactPath, St as EvalDefinition, Tt as EvalMetricError, Ut as MetricSpec, Vt as MetricDirection, Wt as MetricSpecMap, Xt as TrajectoryStepKind, Yt as TrajectoryStep, Zt as artifactPath, _n as formatTrajectoryLine, _t as EvalAgentRunStats, an as createReusableExecutionContext, bn as normalizeMetric, bt as EvalCaseOptions, cn as efficiencyMetricValues, dn as fileContentQuality, dt as CreateEvalAgentOptions, en as buildTrajectory, fn as fileExists, ft as EFFICIENCY_METRICS, gn as formatEvalRunSummary, gt as EvalAgentRunResult, hn as formatEvalCaseSummary, ht as EvalAgentRunOptions, in as createEvalRunReporter, jt as EvalRunUsage, kt as EvalRunSummary, ln as emitEfficiencyMetrics, mn as finalizeEvalMetrics, mt as EvalAgentMcpServers, nn as computeEvalTagScores, on as defineEval, pn as fileExistsOneOf, pt as EvalAgent, qt as ReusableExecutionContext, rn as createEvalAgent, sn as defineMetrics, tn as clearRegisteredEvals, un as fileContains, vn as functionalityMetric, vt as EvalAgentStats, wn as statusCompleted, wt as EvalMetric, xn as registerEvalTests, xt as EvalCaseResult, yn as llmJudge, yt as EvalArtifacts, zt as EvalWorkspaceSnapshot } from "./index-DZR99FD4.js";
2
2
  export { CreateEvalAgentOptions, EFFICIENCY_METRICS, EvalAgent, EvalAgentMcpServers, EvalAgentRunOptions, EvalAgentRunResult, EvalAgentRunStats, EvalAgentStats, EvalArtifacts, EvalCaseOptions, EvalCaseResult, EvalDefinition, EvalDefinitionContext, EvalMetric, EvalMetricError, EvalRunMetricAggregate, EvalRunReporter, EvalRunReporterOptions, EvalRunSummary, EvalRunSummaryCase, EvalRunUsage, EvalScore, EvalScorer, EvalScorerContext, EvalTestRunner, EvalVariantSummary, EvalWorkspaceFile, EvalWorkspaceOptions, EvalWorkspaceSnapshot, LlmJudgeOptions, MetricDirection, MetricEmitter, MetricSpec, MetricSpecMap, MetricStats, RegisterEvalTestsOptions, ReusableExecutionContext, Trajectory, TrajectoryStep, TrajectoryStepKind, artifactPath, buildEvalRunSummary, buildRegisteredEvals, buildTrajectory, clearRegisteredEvals, computeEvalTagScores, createEvalAgent, createEvalRunReporter, createReusableExecutionContext, defineEval, defineMetrics, efficiencyMetricValues, emitEfficiencyMetrics, fileContains, fileContentQuality, fileExists, fileExistsOneOf, finalizeEvalMetrics, formatEvalCaseSummary, formatEvalRunSummary, formatTrajectoryLine, functionalityMetric, llmJudge, normalizeMetric, registerEvalTests, relativeArtifactPath, runEvalCase, statusCompleted };
package/dist/eval.js CHANGED
@@ -1,7 +1,7 @@
1
- import { _ as alwaysQuote } from "./tools-NxnEmzYg.js";
2
- import { r as createProcessContext } from "./contexts-BJVgG0LY.js";
3
- import { a as headlessEventToJsonl, c as runHeadless } from "./headless-CPaunZsU.js";
4
- import { i as createMemoryStore, t as createSession } from "./session-CtAWwwkn.js";
1
+ import { r as createProcessContext } from "./contexts-DglWSzmR.js";
2
+ import { t as alwaysQuote } from "./shell-quote-BmnhZmdM.js";
3
+ import { a as headlessEventToJsonl, c as runHeadless } from "./headless-Bb5gU8AR.js";
4
+ import { a as createMemoryStore, t as createSession } from "./session-Do_TQV7c.js";
5
5
  import { join, relative, resolve } from "node:path";
6
6
  import { tmpdir } from "node:os";
7
7
  import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
@@ -103,7 +103,7 @@ function createReusableExecutionContext(base) {
103
103
  }
104
104
  /**
105
105
  * Multi-turn eval agent over the low-level headless runner. It keeps session
106
- * and execution lifetime outside any Playwright/Bolt-specific fixture layer,
106
+ * and execution lifetime outside any Playwright-specific fixture layer,
107
107
  * so downstream platforms can wrap it with their own `agent.run(...)` shape.
108
108
  */
109
109
  function createEvalAgent(options) {