npm - @tangle-network/agent-runtime - Versions diffs - 0.43.0 → 0.44.0 - Mend

@tangle-network/agent-runtime 0.43.0 → 0.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +3 -1
package/dist/agent.d.ts +1 -1
package/dist/agent.js +1 -1
package/dist/{chunk-C5HMTTNY.js → chunk-GFKVVRQ7.js} +8 -8
package/dist/{chunk-MNCB4SJ5.js → chunk-KDMRUD2P.js} +2 -2
package/dist/{chunk-EKBSQYZE.js → chunk-S7JXV32P.js} +159 -25
package/dist/chunk-S7JXV32P.js.map +1 -0
package/dist/{chunk-MJDGCRAT.js → chunk-SKUZZCHE.js} +2 -2
package/dist/{dynamic-B_7GgCwu.d.ts → dynamic-wUgp6UKs.d.ts} +1 -1
package/dist/improvement.js +3 -3
package/dist/index.d.ts +6 -6
package/dist/index.js +15 -15
package/dist/{kb-gate-DTBum3vH.d.ts → kb-gate-D0ZIhFOU.d.ts} +1 -1
package/dist/{loop-runner-bin-CVoCBmYk.d.ts → loop-runner-bin-BLMa8He3.d.ts} +3 -3
package/dist/loop-runner-bin.d.ts +4 -4
package/dist/loop-runner-bin.js +4 -4
package/dist/loops.d.ts +101 -79
package/dist/loops.js +9 -1
package/dist/mcp/bin.js +3 -3
package/dist/mcp/index.d.ts +5 -5
package/dist/mcp/index.js +3 -3
package/dist/{otel-export-BzvF1Ela.d.ts → otel-export-wFDmmurL.d.ts} +1 -1
package/dist/profiles.d.ts +1 -1
package/dist/run-loop-C4L1Sted.d.ts +89 -0
package/dist/{types-Bcp071Jg.d.ts → types-DbJzz2uf.d.ts} +1 -1
package/dist/workflow.d.ts +550 -0
package/dist/workflow.js +1779 -0
package/dist/workflow.js.map +1 -0
package/package.json +29 -12
package/dist/chunk-EKBSQYZE.js.map +0 -1
/package/dist/{chunk-C5HMTTNY.js.map → chunk-GFKVVRQ7.js.map} +0 -0
/package/dist/{chunk-MNCB4SJ5.js.map → chunk-KDMRUD2P.js.map} +0 -0
/package/dist/{chunk-MJDGCRAT.js.map → chunk-SKUZZCHE.js.map} +0 -0

package/dist/improvement.js CHANGED Viewed

@@ -1,9 +1,9 @@
-import {
-  optimizePrompt
-} from "./chunk-VOX6Z3II.js";
 import {
   runLocalHarness
 } from "./chunk-GLR25NG7.js";
+import {
+  optimizePrompt
+} from "./chunk-VOX6Z3II.js";
 import "./chunk-SQSCRJ7U.js";
 import "./chunk-DGUM43GV.js";

package/dist/index.d.ts CHANGED Viewed

@@ -1,15 +1,15 @@
 import { AgentEvalError, KnowledgeReadinessReport, RunRecord, ControlEvalResult, KnowledgeRequirement } from '@tangle-network/agent-eval';
 export { AgentEvalError, AgentEvalErrorCode, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, RunRecord, ValidationError } from '@tangle-network/agent-eval';
-import { q as AgentBackendInput, r as AgentExecutionBackend, O as OpenAIChatTool, s as OpenAIChatToolChoice, t as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, u as RunAgentTaskOptions, v as AgentTaskRunResult, w as RunAgentTaskStreamOptions, x as AgentRuntimeEvent, y as AgentTaskStatus, z as RuntimeSessionStore, B as RuntimeSession } from './types-Bcp071Jg.js';
-export { C as AgentAdapter, F as AgentKnowledgeProvider, G as AgentRuntimeEventSink, H as AgentTaskContext, J as AgentTaskSpec, M as BackendErrorDetail, N as RuntimeRunHandle, P as RuntimeRunPersistenceAdapter, Q as RuntimeRunRow, S as startRuntimeRun } from './types-Bcp071Jg.js';
-export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-CVoCBmYk.js';
-export { E as EvalRunEvent, b as EvalRunGeneration, c as EvalRunsExportConfig, d as EvalRunsExportResult, I as INTELLIGENCE_WIRE_VERSION, e as OtelAttribute, f as OtelExportConfig, O as OtelExporter, g as OtelSpan, h as buildLoopOtelSpans, i as createOtelExporter, j as exportEvalRuns, l as loopEventToOtelSpan, m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './otel-export-BzvF1Ela.js';
+import { q as AgentBackendInput, r as AgentExecutionBackend, c as OpenAIChatTool, s as OpenAIChatToolChoice, t as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, u as RunAgentTaskOptions, v as AgentTaskRunResult, w as RunAgentTaskStreamOptions, x as AgentRuntimeEvent, y as AgentTaskStatus, z as RuntimeSessionStore, B as RuntimeSession } from './types-DbJzz2uf.js';
+export { C as AgentAdapter, F as AgentKnowledgeProvider, G as AgentRuntimeEventSink, H as AgentTaskContext, J as AgentTaskSpec, M as BackendErrorDetail, N as RuntimeRunHandle, P as RuntimeRunPersistenceAdapter, Q as RuntimeRunRow, S as startRuntimeRun } from './types-DbJzz2uf.js';
+export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-BLMa8He3.js';
+export { E as EvalRunEvent, b as EvalRunGeneration, c as EvalRunsExportConfig, d as EvalRunsExportResult, I as INTELLIGENCE_WIRE_VERSION, e as OtelAttribute, f as OtelExportConfig, O as OtelExporter, g as OtelSpan, h as buildLoopOtelSpans, i as createOtelExporter, j as exportEvalRuns, l as loopEventToOtelSpan, m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './otel-export-wFDmmurL.js';
 import '@tangle-network/sandbox';
 import '@tangle-network/agent-eval/campaign';
 import './types-p8dWBIXL.js';
 import './optimize-prompt-D-urF2wW.js';
-import './dynamic-B_7GgCwu.js';
-import './kb-gate-DTBum3vH.js';
+import './dynamic-wUgp6UKs.js';
+import './kb-gate-D0ZIhFOU.js';
 import './profiles.js';
 /**

package/dist/index.js CHANGED Viewed

@@ -1,3 +1,14 @@
+import {
+  mcpToolsForRuntimeMcp,
+  mcpToolsForRuntimeMcpSubset
+} from "./chunk-NRZOXCJK.js";
+import {
+  INTELLIGENCE_WIRE_VERSION,
+  buildLoopOtelSpans,
+  createOtelExporter,
+  exportEvalRuns,
+  loopEventToOtelSpan
+} from "./chunk-HVYOHJHK.js";
 import {
   DELEGATED_LOOP_MODES,
   auditLoopRunner,
@@ -10,23 +21,12 @@ import {
   runDelegatedLoop,
   runLoopRunnerCli,
   selfImproveLoopRunner
-} from "./chunk-C5HMTTNY.js";
+} from "./chunk-GFKVVRQ7.js";
+import "./chunk-FNMGYYSS.js";
+import "./chunk-SKUZZCHE.js";
 import "./chunk-XBUG326M.js";
 import "./chunk-VOX6Z3II.js";
-import {
-  mcpToolsForRuntimeMcp,
-  mcpToolsForRuntimeMcpSubset
-} from "./chunk-NRZOXCJK.js";
-import {
-  INTELLIGENCE_WIRE_VERSION,
-  buildLoopOtelSpans,
-  createOtelExporter,
-  exportEvalRuns,
-  loopEventToOtelSpan
-} from "./chunk-HVYOHJHK.js";
-import "./chunk-FNMGYYSS.js";
-import "./chunk-MJDGCRAT.js";
-import "./chunk-EKBSQYZE.js";
+import "./chunk-S7JXV32P.js";
 import "./chunk-3HMHSN22.js";
 import "./chunk-PY6NMZYX.js";
 import {

package/dist/{kb-gate-DTBum3vH.d.ts → kb-gate-D0ZIhFOU.d.ts} RENAMED Viewed

@@ -1,5 +1,5 @@
 import { CoderOutput, CoderTask } from './profiles.js';
-import { L as LoopSandboxClient, c as LoopTraceEmitter } from './types-Bcp071Jg.js';
+import { a as LoopSandboxClient, d as LoopTraceEmitter } from './types-DbJzz2uf.js';
 import { SandboxInstance } from '@tangle-network/sandbox';
 /**

package/dist/{loop-runner-bin-CVoCBmYk.d.ts → loop-runner-bin-BLMa8He3.d.ts} RENAMED Viewed

@@ -1,9 +1,9 @@
 import { Scenario } from '@tangle-network/agent-eval/campaign';
 import { R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-p8dWBIXL.js';
 import { O as OptimizePromptOptions, a as OptimizePromptResult } from './optimize-prompt-D-urF2wW.js';
-import { T as TopologyPlanner, D as DynamicDecision } from './dynamic-B_7GgCwu.js';
-import { L as LoopSandboxClient, a as OutputAdapter, V as Validator, A as AgentRunSpec, b as LoopResult } from './types-Bcp071Jg.js';
-import { D as DelegateCodeArgs, C as CoderReviewer, a as CoderWinnerSelection, F as FactCandidate, b as CreateKbGateOptions } from './kb-gate-DTBum3vH.js';
+import { T as TopologyPlanner, D as DynamicDecision } from './dynamic-wUgp6UKs.js';
+import { a as LoopSandboxClient, O as OutputAdapter, V as Validator, A as AgentRunSpec, b as LoopResult } from './types-DbJzz2uf.js';
+import { D as DelegateCodeArgs, C as CoderReviewer, a as CoderWinnerSelection, F as FactCandidate, b as CreateKbGateOptions } from './kb-gate-D0ZIhFOU.js';
 import { CoderOutput } from './profiles.js';
 /**

package/dist/loop-runner-bin.d.ts CHANGED Viewed

@@ -1,11 +1,11 @@
 #!/usr/bin/env node
-export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-CVoCBmYk.js';
+export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-BLMa8He3.js';
 import '@tangle-network/agent-eval/campaign';
 import './types-p8dWBIXL.js';
 import '@tangle-network/agent-eval';
 import './optimize-prompt-D-urF2wW.js';
-import './dynamic-B_7GgCwu.js';
-import './types-Bcp071Jg.js';
+import './dynamic-wUgp6UKs.js';
+import './types-DbJzz2uf.js';
 import '@tangle-network/sandbox';
-import './kb-gate-DTBum3vH.js';
+import './kb-gate-D0ZIhFOU.js';
 import './profiles.js';

package/dist/loop-runner-bin.js CHANGED Viewed

@@ -2,12 +2,12 @@
 import {
   parseLoopRunnerArgv,
   runLoopRunnerCli
-} from "./chunk-C5HMTTNY.js";
+} from "./chunk-GFKVVRQ7.js";
+import "./chunk-FNMGYYSS.js";
+import "./chunk-SKUZZCHE.js";
 import "./chunk-XBUG326M.js";
 import "./chunk-VOX6Z3II.js";
-import "./chunk-FNMGYYSS.js";
-import "./chunk-MJDGCRAT.js";
-import "./chunk-EKBSQYZE.js";
+import "./chunk-S7JXV32P.js";
 import "./chunk-3HMHSN22.js";
 import "./chunk-PY6NMZYX.js";
 import "./chunk-SQSCRJ7U.js";

package/dist/loops.d.ts CHANGED Viewed

@@ -1,12 +1,14 @@
-import { AgentProfile, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
+import { AgentProfile, SandboxEvent, SandboxInstance, CreateSandboxOptions } from '@tangle-network/sandbox';
 export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
-import { P as PlannerContext, T as TopologyPlanner } from './dynamic-B_7GgCwu.js';
-export { C as CreateDynamicDriverOptions, D as DynamicDecision, a as TopologyMove, c as createDynamicDriver, s as summarizeHistory } from './dynamic-B_7GgCwu.js';
-import { D as Driver, I as Iteration, L as LoopSandboxClient, A as AgentRunSpec, a as OutputAdapter, V as Validator, E as ExecCtx, d as LoopWinner, b as LoopResult, R as RuntimeStreamEvent } from './types-Bcp071Jg.js';
-export { e as LoopDecisionPayload, f as LoopEndedPayload, g as LoopIterationDispatchPayload, h as LoopIterationEndedPayload, i as LoopIterationStartedPayload, j as LoopPlanDescription, k as LoopPlanPayload, l as LoopSandboxPlacement, m as LoopStartedPayload, n as LoopTokenUsage, c as LoopTraceEmitter, o as LoopTraceEvent, p as ValidationCtx } from './types-Bcp071Jg.js';
+import { T as TopologyPlanner, P as PlannerContext } from './dynamic-wUgp6UKs.js';
+export { C as CreateDynamicDriverOptions, D as DynamicDecision, a as TopologyMove, c as createDynamicDriver, s as summarizeHistory } from './dynamic-wUgp6UKs.js';
+import { D as Driver, I as Iteration, a as LoopSandboxClient, A as AgentRunSpec, b as LoopResult, R as RuntimeStreamEvent } from './types-DbJzz2uf.js';
+export { E as ExecCtx, e as LoopDecisionPayload, f as LoopEndedPayload, g as LoopIterationDispatchPayload, h as LoopIterationEndedPayload, i as LoopIterationStartedPayload, j as LoopPlanDescription, k as LoopPlanPayload, l as LoopSandboxPlacement, m as LoopStartedPayload, n as LoopTokenUsage, d as LoopTraceEmitter, o as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, p as ValidationCtx, V as Validator } from './types-DbJzz2uf.js';
 import { DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
 export { DefaultVerdict } from '@tangle-network/agent-eval';
 import { Scenario, DispatchFn, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
+import { R as RunLoopOptions } from './run-loop-C4L1Sted.js';
+export { c as createSandboxForSpec, r as runLoop } from './run-loop-C4L1Sted.js';
 /**
  * @experimental
@@ -52,6 +54,36 @@ declare function createFanoutVoteDriver<Task, Output>(options: CreateFanoutVoteD
  */
 declare function scoreFanoutVoteIterations<Task, Output>(iterations: ReadonlyArray<Iteration<Task, Output>>): FanoutVoteScored<Task, Output>[];
+/**
+ * @experimental
+ *
+ * Named driver policies — a registry, not a constructor zoo.
+ *
+ * A "driver variant" is just a `TopologyPlanner` chosen by name and run by the
+ * one interpreter (`createDynamicDriver`). The agentic variants are sandboxed
+ * agents (`createSandboxPlanner`) — an LLM/agent in a box that emits the next
+ * move; this registry holds the *deterministic* ones a benchmark needs as
+ * controls. Today that's `blind`: a single attempt, no steering — the baseline
+ * you measure a real driver against.
+ *
+ * Adding a variant is a line here (or a sandboxed planner registered by name),
+ * never a new `createXDriver` factory and never a spec schema.
+ */
+/** A driver policy over prompt-shaped (string) tasks. Output is consulted only
+ *  through the iteration's verdict, so it stays `unknown`. */
+type PromptPlanner = TopologyPlanner<string, unknown>;
+/**
+ * `blind` — one attempt, then stop. The no-driver control: a single worker run
+ * with no steering, so a benchmark can isolate what a real driver adds.
+ */
+declare const blind: PromptPlanner;
+/** The registry. Pick a driver by name (e.g. `DRIVER=blind`); fail loud on an
+ *  unknown key. Sandboxed-agent planners can be registered here too. */
+declare const PROMPT_PLANNERS: Record<string, PromptPlanner>;
+/** Resolve a planner by name; fail loud on an unknown variant. */
+declare function resolvePlanner(name: string): PromptPlanner;
 /**
  * @experimental
  *
@@ -104,6 +136,13 @@ declare function refineWinnerIndex<Task, Output>(iterations: ReadonlyArray<Itera
  * the planner never names which harness runs a branch — the kernel's
  * `agentRuns` round-robin decides that.
  *
+ * Three execution modes, all the same code path:
+ *   - LLM call        — a cheap-model `profile`; one prompt → one move.
+ *   - different sandbox (default) — a fresh planner-owned box per round.
+ *   - same sandbox     — pass `reuseBox` to stream the move into the worker's
+ *                        own box (a session against its live filesystem/state),
+ *                        so the driver steers from what the worker actually did.
+ *
  * Envelope contract the agent must emit (fenced ```json or a structured
  * `result`/`final` event payload):
  *   { "kind": "refine" | "fanout" | "stop",
@@ -136,89 +175,26 @@ interface CreateSandboxPlannerOptions<Task, Output> {
      */
     decodeTask: (raw: unknown, ctx: PlannerContext<Task, Output>) => Task;
     /** Override the default prompt (history summary + envelope contract). */
-    buildPrompt?: (ctx: PlannerContext<Task, Output>) => string;
+    buildPrompt?: (ctx: PlannerContext<Task, Output>) => string | Promise<string>;
     /** Override envelope extraction from the event stream. */
     parseEnvelope?: (events: SandboxEvent[]) => TopologyMoveEnvelope | undefined;
     /** Sandbox overrides for the planner sandbox (timeouts, env, etc.). */
     sandboxOverrides?: AgentRunSpec<Task>['sandboxOverrides'];
     /** Cancellation for the planner's own LLM call. */
     signal?: AbortSignal;
-}
-/** @experimental */
-declare function createSandboxPlanner<Task, Output>(opts: CreateSandboxPlannerOptions<Task, Output>): TopologyPlanner<Task, Output>;
-/**
- * @experimental
- *
- * `runLoop` — the topology-agnostic kernel built atop the sandbox SDK.
- *
- * Each iteration:
- *   1. `driver.plan(task, history)` → N tasks (1 = refine, N = fanout, 0 = stop)
- *   2. For each task (parallel, bounded by `maxConcurrency`):
- *        a. round-robin an `AgentRunSpec` from `agentRuns`
- *        b. `sandboxClient.create({ backend: { profile }, ...overrides })`
- *        c. emit `loop.iteration.dispatch` with the placement
- *           (`{ sibling, sandboxId }` or `{ fleet, fleetId, machineId, sandboxId }`)
- *        d. iterate `box.streamPrompt(taskToPrompt(task))` and collect events
- *   3. `output.parse(events)` → typed `Output`
- *   4. `validator?.validate(output)` → `DefaultVerdict`
- *   5. Append `Iteration` to history; emit `loop.iteration.ended`
- *   6. `driver.decide(history)` → if terminal, return result + winner
- *
- * The kernel owns: iteration accounting, per-iteration timing, error
- * capture, abort propagation, concurrency cap, cost aggregation, and trace
- * emission. The kernel does NOT own: what the agent runs (sandbox SDK +
- * profile), how outputs are decoded (output adapter), how outputs are
- * scored (validator), or topology (driver).
- */
-/** @experimental */
-interface RunLoopOptions<Task, Output, Decision> {
-    driver: Driver<Task, Output, Decision>;
-    /**
-     * Single agent spec — every iteration uses this profile. Mutually
-     * exclusive with `agentRuns`.
-     */
-    agentRun?: AgentRunSpec<Task>;
-    /**
-     * Multiple specs for heterogeneous fanout. The kernel round-robins
-     * through them when the driver plans N tasks. Mutually exclusive with
-     * `agentRun`.
-     */
-    agentRuns?: AgentRunSpec<Task>[];
-    output: OutputAdapter<Output>;
-    validator?: Validator<Output>;
-    task: Task;
-    ctx: ExecCtx;
-    /** Default 10. Hard cap on total iterations across all `plan()` rounds. */
-    maxIterations?: number;
-    /** Default 4. In-flight worker cap within a single `plan()` batch. */
-    maxConcurrency?: number;
-    /**
-     * Pre-allocated id for trace correlation. Default = `loop-${random}`.
-     * Surfaces as `runId` on every emitted `LoopTraceEvent`.
-     */
-    runId?: string;
-    /**
-     * Clock override; default `Date.now`. Deterministic tests pass a
-     * monotonic counter to stabilize iteration timing fields.
-     */
-    now?: () => number;
     /**
-     * Override the default winner selector (highest-valid-score, ties broken
-     * by earliest iteration).
+     * Same-sandbox mode. Return an existing box and the planner streams its move
+     * INTO that box (a session against the worker's environment) instead of
+     * spinning its own — so the driver can inspect the worker's real filesystem
+     * and state, not just the history summary. The returned box's lifecycle is
+     * the CALLER's: the planner neither creates nor deletes it. Return
+     * `undefined` to fall back to the default (a fresh, planner-owned box =
+     * different-sandbox mode). Omit entirely for the default.
      */
-    selectWinner?: (iterations: Iteration<Task, Output>[]) => LoopWinner<Task, Output> | undefined;
+    reuseBox?: () => SandboxInstance | undefined | Promise<SandboxInstance | undefined>;
 }
 /** @experimental */
-declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, Output, Decision>): Promise<LoopResult<Task, Output, Decision>>;
-/**
- * Instantiate a sandbox for an `AgentRunSpec`: sets `backend.profile` to the
- * spec's profile (inferring the backend type when the spec doesn't override
- * it) and merges `sandboxOverrides`. Shared by the loop kernel and the
- * `AgentRuntime.act` sandbox bridge so both boot the sandbox identically.
- */
-declare function createSandboxForSpec<Task>(client: LoopSandboxClient, spec: AgentRunSpec<Task>, signal: AbortSignal): Promise<SandboxInstance>;
+declare function createSandboxPlanner<Task, Output>(opts: CreateSandboxPlannerOptions<Task, Output>): TopologyPlanner<Task, Output>;
 /**
  * `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
@@ -318,6 +294,52 @@ interface UsageSink {
  */
 declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
+/**
+ * @experimental
+ *
+ * `acquireSandbox` — cold-start-resilient sandbox acquisition. Eliminates the
+ * "create timed out at the proxy" failure mode conceptually by DECOUPLING "the
+ * create HTTP call returned" from "the sandbox is ready":
+ *
+ *   - Create is initiated with a known `name`.
+ *   - Readiness is observed from the sandbox's own `status` (`refresh()` polls
+ *     true state), NOT from whether the create call returned in time.
+ *   - If the create call itself times out at a gateway (502/503/504/522/524 or
+ *     a transport timeout), provisioning is still running server-side — so we
+ *     find the named sandbox via `list()` and wait for it to reach `running`.
+ *
+ * Result: a scale-from-zero cold start (node boot + host-agent registration,
+ * minutes) can no longer surface as a create failure behind a ~100s proxy
+ * limit. The loop becomes indifferent to whether the host pool is warm or cold.
+ *
+ * Backward-compatible: an instance that reports no `status` (the minimal fakes
+ * the loop tests use) is treated as ready — only an explicit `pending`/
+ * `provisioning` status triggers waiting, and only a retryable THROW triggers
+ * the find-by-name path. Real errors (auth, validation, budget) fail loud.
+ */
+/** @experimental */
+interface AcquireOptions {
+    /**
+     * Total budget for the sandbox to reach `running`, covering on-demand node
+     * cold-start. Default 600_000ms — matches the orchestrator's pending-host
+     * registration window so we never give up before the platform itself would.
+     */
+    readyTimeoutMs?: number;
+    /** Poll interval while waiting for `running` / for the named sandbox to appear. */
+    pollIntervalMs?: number;
+    /** Cancellation (user abort). Distinct from create-call timeouts. */
+    signal?: AbortSignal;
+    /** Stamp a name so a timed-out create is recoverable by lookup. Auto-generated if absent. */
+    name?: string;
+    /** Clock override for deterministic tests. */
+    now?: () => number;
+    /** Sleep override for deterministic tests. */
+    sleep?: (ms: number) => Promise<void>;
+}
+/** @experimental */
+declare function acquireSandbox(client: LoopSandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
 /**
  * Sandbox-event → runtime-event mapping.
  *
@@ -370,4 +392,4 @@ declare function mapSandboxEvent(event: SandboxEvent, opts?: {
     agentRunName?: string;
 }): RuntimeStreamEvent | undefined;
-export { AgentRunSpec, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMoveEnvelope, TopologyPlanner, type UsageSink, Validator, createFanoutVoteDriver, createRefineDriver, createSandboxForSpec, createSandboxPlanner, extractLlmCallEvent, loopCampaignDispatch, loopDispatch, mapSandboxEvent, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations };
+export { type AcquireOptions, AgentRunSpec, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, PROMPT_PLANNERS, PlannerContext, type PromptPlanner, type RefineDecision, RunLoopOptions, type TopologyMoveEnvelope, TopologyPlanner, type UsageSink, acquireSandbox, blind, createFanoutVoteDriver, createRefineDriver, createSandboxPlanner, extractLlmCallEvent, loopCampaignDispatch, loopDispatch, mapSandboxEvent, refineWinnerIndex, reportLoopUsage, resolvePlanner, scoreFanoutVoteIterations };

package/dist/loops.js CHANGED Viewed

@@ -1,4 +1,7 @@
 import {
+  PROMPT_PLANNERS,
+  acquireSandbox,
+  blind,
   createDynamicDriver,
   createRefineDriver,
   createSandboxForSpec,
@@ -9,9 +12,10 @@ import {
   mapSandboxEvent,
   refineWinnerIndex,
   reportLoopUsage,
+  resolvePlanner,
   runLoop,
   summarizeHistory
-} from "./chunk-EKBSQYZE.js";
+} from "./chunk-S7JXV32P.js";
 import {
   createFanoutVoteDriver,
   scoreFanoutVoteIterations
@@ -19,6 +23,9 @@ import {
 import "./chunk-SQSCRJ7U.js";
 import "./chunk-DGUM43GV.js";
 export {
+  PROMPT_PLANNERS,
+  acquireSandbox,
+  blind,
   createDynamicDriver,
   createFanoutVoteDriver,
   createRefineDriver,
@@ -30,6 +37,7 @@ export {
   mapSandboxEvent,
   refineWinnerIndex,
   reportLoopUsage,
+  resolvePlanner,
   runLoop,
   scoreFanoutVoteIterations,
   summarizeHistory

package/dist/mcp/bin.js CHANGED Viewed

@@ -4,15 +4,15 @@ import {
   createPropagatingTraceEmitter,
   detectExecutor,
   readTraceContextFromEnv
-} from "../chunk-MNCB4SJ5.js";
+} from "../chunk-KDMRUD2P.js";
 import "../chunk-HVYOHJHK.js";
 import {
   createDefaultCoderDelegate
-} from "../chunk-MJDGCRAT.js";
+} from "../chunk-SKUZZCHE.js";
 import "../chunk-GLR25NG7.js";
 import {
   runLoop
-} from "../chunk-EKBSQYZE.js";
+} from "../chunk-S7JXV32P.js";
 import "../chunk-3HMHSN22.js";
 import "../chunk-PY6NMZYX.js";
 import "../chunk-SQSCRJ7U.js";

package/dist/mcp/index.d.ts CHANGED Viewed

@@ -1,11 +1,11 @@
-import { L as LoopSandboxClient, l as LoopSandboxPlacement, c as LoopTraceEmitter } from '../types-Bcp071Jg.js';
-import { c as FleetHandle, d as DelegationExecutor, e as DelegateFeedbackArgs, f as DelegationFeedbackSnapshot, g as DelegationProfile, D as DelegateCodeArgs, h as DelegateResearchArgs, i as DelegationStatus, j as DelegationProgress, k as DelegationResultPayload, l as DelegationError, m as DelegationStatusResult, n as DelegationHistoryArgs, o as DelegationHistoryEntry, p as CoderDelegate, R as ResearcherDelegate, q as DelegateCodeResult, r as DelegateFeedbackResult, s as ResearchSource, t as DelegateResearchResult, u as DelegationHistoryResult, v as DelegationStatusArgs } from '../kb-gate-DTBum3vH.js';
-export { w as CoderReview, C as CoderReviewer, a as CoderWinnerSelection, x as CreateDefaultCoderDelegateOptions, b as CreateKbGateOptions, y as DelegateCodeConfig, z as DelegateResearchConfig, A as DelegateRunCtx, F as FactCandidate, B as FactJudge, E as FactJudgeVerdict, G as FeedbackRating, H as FeedbackRefersTo, I as FleetWorkspaceExecutorOptions, K as KbGateResult, J as ResearchOutputShape, S as SiblingSandboxExecutorOptions, L as createDefaultCoderDelegate, M as createFleetWorkspaceExecutor, N as createKbGate, O as createSiblingSandboxExecutor } from '../kb-gate-DTBum3vH.js';
+import { a as LoopSandboxClient, l as LoopSandboxPlacement, d as LoopTraceEmitter } from '../types-DbJzz2uf.js';
+import { c as FleetHandle, d as DelegationExecutor, e as DelegateFeedbackArgs, f as DelegationFeedbackSnapshot, g as DelegationProfile, D as DelegateCodeArgs, h as DelegateResearchArgs, i as DelegationStatus, j as DelegationProgress, k as DelegationResultPayload, l as DelegationError, m as DelegationStatusResult, n as DelegationHistoryArgs, o as DelegationHistoryEntry, p as CoderDelegate, R as ResearcherDelegate, q as DelegateCodeResult, r as DelegateFeedbackResult, s as ResearchSource, t as DelegateResearchResult, u as DelegationHistoryResult, v as DelegationStatusArgs } from '../kb-gate-D0ZIhFOU.js';
+export { w as CoderReview, C as CoderReviewer, a as CoderWinnerSelection, x as CreateDefaultCoderDelegateOptions, b as CreateKbGateOptions, y as DelegateCodeConfig, z as DelegateResearchConfig, A as DelegateRunCtx, F as FactCandidate, B as FactJudge, E as FactJudgeVerdict, G as FeedbackRating, H as FeedbackRefersTo, I as FleetWorkspaceExecutorOptions, K as KbGateResult, J as ResearchOutputShape, S as SiblingSandboxExecutorOptions, L as createDefaultCoderDelegate, M as createFleetWorkspaceExecutor, N as createKbGate, O as createSiblingSandboxExecutor } from '../kb-gate-D0ZIhFOU.js';
 export { B as BuildDelegationMcpServerOptions, C as ComposeProductionAgentProfileOptions, D as DELEGATION_MCP_SERVER_KEY, b as buildDelegationMcpServer, c as composeProductionAgentProfile } from '../delegation-profile-1GbW5yA3.js';
 import { L as LocalHarness, r as runLocalHarness } from '../local-harness-KrdFTY5R.js';
 export { a as LocalHarnessResult, R as RunLocalHarnessOptions } from '../local-harness-KrdFTY5R.js';
-import { O as OtelExporter } from '../otel-export-BzvF1Ela.js';
-export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-BzvF1Ela.js';
+import { O as OtelExporter } from '../otel-export-wFDmmurL.js';
+export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-wFDmmurL.js';
 import '@tangle-network/agent-eval';
 import '@tangle-network/sandbox';
 import '../profiles.js';

package/dist/mcp/index.js CHANGED Viewed

@@ -9,7 +9,7 @@ import {
   readTraceContextFromEnv,
   removeWorktree,
   traceContextToEnv
-} from "../chunk-MNCB4SJ5.js";
+} from "../chunk-KDMRUD2P.js";
 import {
   mcpToolsForRuntimeMcp,
   mcpToolsForRuntimeMcpSubset
@@ -52,7 +52,7 @@ import {
   createDefaultCoderDelegate,
   createFleetWorkspaceExecutor,
   createSiblingSandboxExecutor
-} from "../chunk-MJDGCRAT.js";
+} from "../chunk-SKUZZCHE.js";
 import {
   runLocalHarness
 } from "../chunk-GLR25NG7.js";
@@ -61,7 +61,7 @@ import {
   buildDelegationMcpServer,
   composeProductionAgentProfile
 } from "../chunk-7JITYN6T.js";
-import "../chunk-EKBSQYZE.js";
+import "../chunk-S7JXV32P.js";
 import "../chunk-3HMHSN22.js";
 import "../chunk-PY6NMZYX.js";
 import "../chunk-SQSCRJ7U.js";

package/dist/{otel-export-BzvF1Ela.d.ts → otel-export-wFDmmurL.d.ts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { O as OpenAIChatTool } from './types-Bcp071Jg.js';
+import { c as OpenAIChatTool } from './types-DbJzz2uf.js';
 /**
  * @experimental

package/dist/profiles.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { AgentProfile } from '@tangle-network/sandbox';
-import { a as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-Bcp071Jg.js';
+import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-DbJzz2uf.js';
 import '@tangle-network/agent-eval';
 /**

package/dist/run-loop-C4L1Sted.d.ts ADDED Viewed

@@ -0,0 +1,89 @@
+import { SandboxInstance } from '@tangle-network/sandbox';
+import { D as Driver, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, I as Iteration, L as LoopWinner, a as LoopSandboxClient, b as LoopResult } from './types-DbJzz2uf.js';
+/**
+ * @experimental
+ *
+ * `runLoop` — the topology-agnostic kernel built atop the sandbox SDK.
+ *
+ * Each iteration:
+ *   1. `driver.plan(task, history)` → N tasks (1 = refine, N = fanout, 0 = stop)
+ *   2. For each task (parallel, bounded by `maxConcurrency`):
+ *        a. round-robin an `AgentRunSpec` from `agentRuns`
+ *        b. `sandboxClient.create({ backend: { profile }, ...overrides })`
+ *        c. emit `loop.iteration.dispatch` with the placement
+ *           (`{ sibling, sandboxId }` or `{ fleet, fleetId, machineId, sandboxId }`)
+ *        d. iterate `box.streamPrompt(taskToPrompt(task))` and collect events
+ *   3. `output.parse(events)` → typed `Output`
+ *   4. `validator?.validate(output)` → `DefaultVerdict`
+ *   5. Append `Iteration` to history; emit `loop.iteration.ended`
+ *   6. `driver.decide(history)` → if terminal, return result + winner
+ *
+ * The kernel owns: iteration accounting, per-iteration timing, error
+ * capture, abort propagation, concurrency cap, cost aggregation, and trace
+ * emission. The kernel does NOT own: what the agent runs (sandbox SDK +
+ * profile), how outputs are decoded (output adapter), how outputs are
+ * scored (validator), or topology (driver).
+ */
+/** @experimental */
+interface RunLoopOptions<Task, Output, Decision> {
+    driver: Driver<Task, Output, Decision>;
+    /**
+     * Single agent spec — every iteration uses this profile. Mutually
+     * exclusive with `agentRuns`.
+     */
+    agentRun?: AgentRunSpec<Task>;
+    /**
+     * Multiple specs for heterogeneous fanout. The kernel round-robins
+     * through them when the driver plans N tasks. Mutually exclusive with
+     * `agentRun`.
+     */
+    agentRuns?: AgentRunSpec<Task>[];
+    output: OutputAdapter<Output>;
+    validator?: Validator<Output>;
+    task: Task;
+    ctx: ExecCtx;
+    /** Default 10. Hard cap on total iterations across all `plan()` rounds. */
+    maxIterations?: number;
+    /** Default 4. In-flight worker cap within a single `plan()` batch. */
+    maxConcurrency?: number;
+    /**
+     * Pre-allocated id for trace correlation. Default = `loop-${random}`.
+     * Surfaces as `runId` on every emitted `LoopTraceEvent`.
+     */
+    runId?: string;
+    /**
+     * Clock override; default `Date.now`. Deterministic tests pass a
+     * monotonic counter to stabilize iteration timing fields.
+     */
+    now?: () => number;
+    /**
+     * Override the default winner selector (highest-valid-score, ties broken
+     * by earliest iteration).
+     */
+    selectWinner?: (iterations: Iteration<Task, Output>[]) => LoopWinner<Task, Output> | undefined;
+    /**
+     * Same-sandbox driver mode. Pass a setter and the kernel keeps each worker box
+     * alive across the `plan()` boundary and hands the latest one here, so a
+     * same-sandbox planner (`createSandboxPlanner` with `reuseBox`) can stream its
+     * move INTO the worker's live box — steering from the worker's real filesystem
+     * and state, not just a history summary. The kernel owns teardown: every box
+     * kept alive this way is destroyed at loop end (and the setter is called with
+     * `undefined` then). Without it, worker boxes are torn down per-iteration
+     * (default) and a same-sandbox planner has nothing to reuse. Intended for
+     * single-worker (refine) loops; under fanout the most-recent box is shared.
+     */
+    shareWorkerBox?: (box: SandboxInstance | undefined) => void;
+}
+/** @experimental */
+declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, Output, Decision>): Promise<LoopResult<Task, Output, Decision>>;
+/**
+ * Instantiate a sandbox for an `AgentRunSpec`: sets `backend.profile` to the
+ * spec's profile (inferring the backend type when the spec doesn't override
+ * it) and merges `sandboxOverrides`. Shared by the loop kernel and the
+ * `AgentRuntime.act` sandbox bridge so both boot the sandbox identically.
+ */
+declare function createSandboxForSpec<Task>(client: LoopSandboxClient, spec: AgentRunSpec<Task>, signal: AbortSignal): Promise<SandboxInstance>;
+export { type RunLoopOptions as R, createSandboxForSpec as c, runLoop as r };

package/dist/{types-Bcp071Jg.d.ts → types-DbJzz2uf.d.ts} RENAMED Viewed

@@ -973,4 +973,4 @@ interface ExecCtx {
     parentSpanId?: string;
 }
-export { type AgentRunSpec as A, type RuntimeSession as B, type AgentAdapter as C, type Driver as D, type ExecCtx as E, type AgentKnowledgeProvider as F, type AgentRuntimeEventSink as G, type AgentTaskContext as H, type Iteration as I, type AgentTaskSpec as J, type KnowledgeReadinessDecision as K, type LoopSandboxClient as L, type BackendErrorDetail as M, type RuntimeRunHandle as N, type OpenAIChatTool as O, type RuntimeRunPersistenceAdapter as P, type RuntimeRunRow as Q, type RuntimeStreamEvent as R, startRuntimeRun as S, type Validator as V, type OutputAdapter as a, type LoopResult as b, type LoopTraceEmitter as c, type LoopWinner as d, type LoopDecisionPayload as e, type LoopEndedPayload as f, type LoopIterationDispatchPayload as g, type LoopIterationEndedPayload as h, type LoopIterationStartedPayload as i, type LoopPlanDescription as j, type LoopPlanPayload as k, type LoopSandboxPlacement as l, type LoopStartedPayload as m, type LoopTokenUsage as n, type LoopTraceEvent as o, type ValidationCtx as p, type AgentBackendInput as q, type AgentExecutionBackend as r, type OpenAIChatToolChoice as s, type AgentBackendContext as t, type RunAgentTaskOptions as u, type AgentTaskRunResult as v, type RunAgentTaskStreamOptions as w, type AgentRuntimeEvent as x, type AgentTaskStatus as y, type RuntimeSessionStore as z };
+export { type AgentRunSpec as A, type RuntimeSession as B, type AgentAdapter as C, type Driver as D, type ExecCtx as E, type AgentKnowledgeProvider as F, type AgentRuntimeEventSink as G, type AgentTaskContext as H, type Iteration as I, type AgentTaskSpec as J, type KnowledgeReadinessDecision as K, type LoopWinner as L, type BackendErrorDetail as M, type RuntimeRunHandle as N, type OutputAdapter as O, type RuntimeRunPersistenceAdapter as P, type RuntimeRunRow as Q, type RuntimeStreamEvent as R, startRuntimeRun as S, type Validator as V, type LoopSandboxClient as a, type LoopResult as b, type OpenAIChatTool as c, type LoopTraceEmitter as d, type LoopDecisionPayload as e, type LoopEndedPayload as f, type LoopIterationDispatchPayload as g, type LoopIterationEndedPayload as h, type LoopIterationStartedPayload as i, type LoopPlanDescription as j, type LoopPlanPayload as k, type LoopSandboxPlacement as l, type LoopStartedPayload as m, type LoopTokenUsage as n, type LoopTraceEvent as o, type ValidationCtx as p, type AgentBackendInput as q, type AgentExecutionBackend as r, type OpenAIChatToolChoice as s, type AgentBackendContext as t, type RunAgentTaskOptions as u, type AgentTaskRunResult as v, type RunAgentTaskStreamOptions as w, type AgentRuntimeEvent as x, type AgentTaskStatus as y, type RuntimeSessionStore as z };