@tangle-network/agent-runtime 0.43.0 → 0.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +3 -1
  2. package/dist/agent.d.ts +1 -1
  3. package/dist/agent.js +1 -1
  4. package/dist/{chunk-C5HMTTNY.js → chunk-GFKVVRQ7.js} +8 -8
  5. package/dist/{chunk-MNCB4SJ5.js → chunk-KDMRUD2P.js} +2 -2
  6. package/dist/{chunk-EKBSQYZE.js → chunk-S7JXV32P.js} +159 -25
  7. package/dist/chunk-S7JXV32P.js.map +1 -0
  8. package/dist/{chunk-MJDGCRAT.js → chunk-SKUZZCHE.js} +2 -2
  9. package/dist/{dynamic-B_7GgCwu.d.ts → dynamic-wUgp6UKs.d.ts} +1 -1
  10. package/dist/improvement.js +3 -3
  11. package/dist/index.d.ts +6 -6
  12. package/dist/index.js +15 -15
  13. package/dist/{kb-gate-DTBum3vH.d.ts → kb-gate-D0ZIhFOU.d.ts} +1 -1
  14. package/dist/{loop-runner-bin-CVoCBmYk.d.ts → loop-runner-bin-BLMa8He3.d.ts} +3 -3
  15. package/dist/loop-runner-bin.d.ts +4 -4
  16. package/dist/loop-runner-bin.js +4 -4
  17. package/dist/loops.d.ts +101 -79
  18. package/dist/loops.js +9 -1
  19. package/dist/mcp/bin.js +3 -3
  20. package/dist/mcp/index.d.ts +5 -5
  21. package/dist/mcp/index.js +3 -3
  22. package/dist/{otel-export-BzvF1Ela.d.ts → otel-export-wFDmmurL.d.ts} +1 -1
  23. package/dist/profiles.d.ts +1 -1
  24. package/dist/run-loop-C4L1Sted.d.ts +89 -0
  25. package/dist/{types-Bcp071Jg.d.ts → types-DbJzz2uf.d.ts} +1 -1
  26. package/dist/workflow.d.ts +550 -0
  27. package/dist/workflow.js +1779 -0
  28. package/dist/workflow.js.map +1 -0
  29. package/package.json +29 -12
  30. package/dist/chunk-EKBSQYZE.js.map +0 -1
  31. /package/dist/{chunk-C5HMTTNY.js.map → chunk-GFKVVRQ7.js.map} +0 -0
  32. /package/dist/{chunk-MNCB4SJ5.js.map → chunk-KDMRUD2P.js.map} +0 -0
  33. /package/dist/{chunk-MJDGCRAT.js.map → chunk-SKUZZCHE.js.map} +0 -0
@@ -1,9 +1,9 @@
1
- import {
2
- optimizePrompt
3
- } from "./chunk-VOX6Z3II.js";
4
1
  import {
5
2
  runLocalHarness
6
3
  } from "./chunk-GLR25NG7.js";
4
+ import {
5
+ optimizePrompt
6
+ } from "./chunk-VOX6Z3II.js";
7
7
  import "./chunk-SQSCRJ7U.js";
8
8
  import "./chunk-DGUM43GV.js";
9
9
 
package/dist/index.d.ts CHANGED
@@ -1,15 +1,15 @@
1
1
  import { AgentEvalError, KnowledgeReadinessReport, RunRecord, ControlEvalResult, KnowledgeRequirement } from '@tangle-network/agent-eval';
2
2
  export { AgentEvalError, AgentEvalErrorCode, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, RunRecord, ValidationError } from '@tangle-network/agent-eval';
3
- import { q as AgentBackendInput, r as AgentExecutionBackend, O as OpenAIChatTool, s as OpenAIChatToolChoice, t as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, u as RunAgentTaskOptions, v as AgentTaskRunResult, w as RunAgentTaskStreamOptions, x as AgentRuntimeEvent, y as AgentTaskStatus, z as RuntimeSessionStore, B as RuntimeSession } from './types-Bcp071Jg.js';
4
- export { C as AgentAdapter, F as AgentKnowledgeProvider, G as AgentRuntimeEventSink, H as AgentTaskContext, J as AgentTaskSpec, M as BackendErrorDetail, N as RuntimeRunHandle, P as RuntimeRunPersistenceAdapter, Q as RuntimeRunRow, S as startRuntimeRun } from './types-Bcp071Jg.js';
5
- export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-CVoCBmYk.js';
6
- export { E as EvalRunEvent, b as EvalRunGeneration, c as EvalRunsExportConfig, d as EvalRunsExportResult, I as INTELLIGENCE_WIRE_VERSION, e as OtelAttribute, f as OtelExportConfig, O as OtelExporter, g as OtelSpan, h as buildLoopOtelSpans, i as createOtelExporter, j as exportEvalRuns, l as loopEventToOtelSpan, m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './otel-export-BzvF1Ela.js';
3
+ import { q as AgentBackendInput, r as AgentExecutionBackend, c as OpenAIChatTool, s as OpenAIChatToolChoice, t as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, u as RunAgentTaskOptions, v as AgentTaskRunResult, w as RunAgentTaskStreamOptions, x as AgentRuntimeEvent, y as AgentTaskStatus, z as RuntimeSessionStore, B as RuntimeSession } from './types-DbJzz2uf.js';
4
+ export { C as AgentAdapter, F as AgentKnowledgeProvider, G as AgentRuntimeEventSink, H as AgentTaskContext, J as AgentTaskSpec, M as BackendErrorDetail, N as RuntimeRunHandle, P as RuntimeRunPersistenceAdapter, Q as RuntimeRunRow, S as startRuntimeRun } from './types-DbJzz2uf.js';
5
+ export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-BLMa8He3.js';
6
+ export { E as EvalRunEvent, b as EvalRunGeneration, c as EvalRunsExportConfig, d as EvalRunsExportResult, I as INTELLIGENCE_WIRE_VERSION, e as OtelAttribute, f as OtelExportConfig, O as OtelExporter, g as OtelSpan, h as buildLoopOtelSpans, i as createOtelExporter, j as exportEvalRuns, l as loopEventToOtelSpan, m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './otel-export-wFDmmurL.js';
7
7
  import '@tangle-network/sandbox';
8
8
  import '@tangle-network/agent-eval/campaign';
9
9
  import './types-p8dWBIXL.js';
10
10
  import './optimize-prompt-D-urF2wW.js';
11
- import './dynamic-B_7GgCwu.js';
12
- import './kb-gate-DTBum3vH.js';
11
+ import './dynamic-wUgp6UKs.js';
12
+ import './kb-gate-D0ZIhFOU.js';
13
13
  import './profiles.js';
14
14
 
15
15
  /**
package/dist/index.js CHANGED
@@ -1,3 +1,14 @@
1
+ import {
2
+ mcpToolsForRuntimeMcp,
3
+ mcpToolsForRuntimeMcpSubset
4
+ } from "./chunk-NRZOXCJK.js";
5
+ import {
6
+ INTELLIGENCE_WIRE_VERSION,
7
+ buildLoopOtelSpans,
8
+ createOtelExporter,
9
+ exportEvalRuns,
10
+ loopEventToOtelSpan
11
+ } from "./chunk-HVYOHJHK.js";
1
12
  import {
2
13
  DELEGATED_LOOP_MODES,
3
14
  auditLoopRunner,
@@ -10,23 +21,12 @@ import {
10
21
  runDelegatedLoop,
11
22
  runLoopRunnerCli,
12
23
  selfImproveLoopRunner
13
- } from "./chunk-C5HMTTNY.js";
24
+ } from "./chunk-GFKVVRQ7.js";
25
+ import "./chunk-FNMGYYSS.js";
26
+ import "./chunk-SKUZZCHE.js";
14
27
  import "./chunk-XBUG326M.js";
15
28
  import "./chunk-VOX6Z3II.js";
16
- import {
17
- mcpToolsForRuntimeMcp,
18
- mcpToolsForRuntimeMcpSubset
19
- } from "./chunk-NRZOXCJK.js";
20
- import {
21
- INTELLIGENCE_WIRE_VERSION,
22
- buildLoopOtelSpans,
23
- createOtelExporter,
24
- exportEvalRuns,
25
- loopEventToOtelSpan
26
- } from "./chunk-HVYOHJHK.js";
27
- import "./chunk-FNMGYYSS.js";
28
- import "./chunk-MJDGCRAT.js";
29
- import "./chunk-EKBSQYZE.js";
29
+ import "./chunk-S7JXV32P.js";
30
30
  import "./chunk-3HMHSN22.js";
31
31
  import "./chunk-PY6NMZYX.js";
32
32
  import {
@@ -1,5 +1,5 @@
1
1
  import { CoderOutput, CoderTask } from './profiles.js';
2
- import { L as LoopSandboxClient, c as LoopTraceEmitter } from './types-Bcp071Jg.js';
2
+ import { a as LoopSandboxClient, d as LoopTraceEmitter } from './types-DbJzz2uf.js';
3
3
  import { SandboxInstance } from '@tangle-network/sandbox';
4
4
 
5
5
  /**
@@ -1,9 +1,9 @@
1
1
  import { Scenario } from '@tangle-network/agent-eval/campaign';
2
2
  import { R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-p8dWBIXL.js';
3
3
  import { O as OptimizePromptOptions, a as OptimizePromptResult } from './optimize-prompt-D-urF2wW.js';
4
- import { T as TopologyPlanner, D as DynamicDecision } from './dynamic-B_7GgCwu.js';
5
- import { L as LoopSandboxClient, a as OutputAdapter, V as Validator, A as AgentRunSpec, b as LoopResult } from './types-Bcp071Jg.js';
6
- import { D as DelegateCodeArgs, C as CoderReviewer, a as CoderWinnerSelection, F as FactCandidate, b as CreateKbGateOptions } from './kb-gate-DTBum3vH.js';
4
+ import { T as TopologyPlanner, D as DynamicDecision } from './dynamic-wUgp6UKs.js';
5
+ import { a as LoopSandboxClient, O as OutputAdapter, V as Validator, A as AgentRunSpec, b as LoopResult } from './types-DbJzz2uf.js';
6
+ import { D as DelegateCodeArgs, C as CoderReviewer, a as CoderWinnerSelection, F as FactCandidate, b as CreateKbGateOptions } from './kb-gate-D0ZIhFOU.js';
7
7
  import { CoderOutput } from './profiles.js';
8
8
 
9
9
  /**
@@ -1,11 +1,11 @@
1
1
  #!/usr/bin/env node
2
- export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-CVoCBmYk.js';
2
+ export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-BLMa8He3.js';
3
3
  import '@tangle-network/agent-eval/campaign';
4
4
  import './types-p8dWBIXL.js';
5
5
  import '@tangle-network/agent-eval';
6
6
  import './optimize-prompt-D-urF2wW.js';
7
- import './dynamic-B_7GgCwu.js';
8
- import './types-Bcp071Jg.js';
7
+ import './dynamic-wUgp6UKs.js';
8
+ import './types-DbJzz2uf.js';
9
9
  import '@tangle-network/sandbox';
10
- import './kb-gate-DTBum3vH.js';
10
+ import './kb-gate-D0ZIhFOU.js';
11
11
  import './profiles.js';
@@ -2,12 +2,12 @@
2
2
  import {
3
3
  parseLoopRunnerArgv,
4
4
  runLoopRunnerCli
5
- } from "./chunk-C5HMTTNY.js";
5
+ } from "./chunk-GFKVVRQ7.js";
6
+ import "./chunk-FNMGYYSS.js";
7
+ import "./chunk-SKUZZCHE.js";
6
8
  import "./chunk-XBUG326M.js";
7
9
  import "./chunk-VOX6Z3II.js";
8
- import "./chunk-FNMGYYSS.js";
9
- import "./chunk-MJDGCRAT.js";
10
- import "./chunk-EKBSQYZE.js";
10
+ import "./chunk-S7JXV32P.js";
11
11
  import "./chunk-3HMHSN22.js";
12
12
  import "./chunk-PY6NMZYX.js";
13
13
  import "./chunk-SQSCRJ7U.js";
package/dist/loops.d.ts CHANGED
@@ -1,12 +1,14 @@
1
- import { AgentProfile, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
1
+ import { AgentProfile, SandboxEvent, SandboxInstance, CreateSandboxOptions } from '@tangle-network/sandbox';
2
2
  export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
3
- import { P as PlannerContext, T as TopologyPlanner } from './dynamic-B_7GgCwu.js';
4
- export { C as CreateDynamicDriverOptions, D as DynamicDecision, a as TopologyMove, c as createDynamicDriver, s as summarizeHistory } from './dynamic-B_7GgCwu.js';
5
- import { D as Driver, I as Iteration, L as LoopSandboxClient, A as AgentRunSpec, a as OutputAdapter, V as Validator, E as ExecCtx, d as LoopWinner, b as LoopResult, R as RuntimeStreamEvent } from './types-Bcp071Jg.js';
6
- export { e as LoopDecisionPayload, f as LoopEndedPayload, g as LoopIterationDispatchPayload, h as LoopIterationEndedPayload, i as LoopIterationStartedPayload, j as LoopPlanDescription, k as LoopPlanPayload, l as LoopSandboxPlacement, m as LoopStartedPayload, n as LoopTokenUsage, c as LoopTraceEmitter, o as LoopTraceEvent, p as ValidationCtx } from './types-Bcp071Jg.js';
3
+ import { T as TopologyPlanner, P as PlannerContext } from './dynamic-wUgp6UKs.js';
4
+ export { C as CreateDynamicDriverOptions, D as DynamicDecision, a as TopologyMove, c as createDynamicDriver, s as summarizeHistory } from './dynamic-wUgp6UKs.js';
5
+ import { D as Driver, I as Iteration, a as LoopSandboxClient, A as AgentRunSpec, b as LoopResult, R as RuntimeStreamEvent } from './types-DbJzz2uf.js';
6
+ export { E as ExecCtx, e as LoopDecisionPayload, f as LoopEndedPayload, g as LoopIterationDispatchPayload, h as LoopIterationEndedPayload, i as LoopIterationStartedPayload, j as LoopPlanDescription, k as LoopPlanPayload, l as LoopSandboxPlacement, m as LoopStartedPayload, n as LoopTokenUsage, d as LoopTraceEmitter, o as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, p as ValidationCtx, V as Validator } from './types-DbJzz2uf.js';
7
7
  import { DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
8
8
  export { DefaultVerdict } from '@tangle-network/agent-eval';
9
9
  import { Scenario, DispatchFn, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
10
+ import { R as RunLoopOptions } from './run-loop-C4L1Sted.js';
11
+ export { c as createSandboxForSpec, r as runLoop } from './run-loop-C4L1Sted.js';
10
12
 
11
13
  /**
12
14
  * @experimental
@@ -52,6 +54,36 @@ declare function createFanoutVoteDriver<Task, Output>(options: CreateFanoutVoteD
52
54
  */
53
55
  declare function scoreFanoutVoteIterations<Task, Output>(iterations: ReadonlyArray<Iteration<Task, Output>>): FanoutVoteScored<Task, Output>[];
54
56
 
57
+ /**
58
+ * @experimental
59
+ *
60
+ * Named driver policies — a registry, not a constructor zoo.
61
+ *
62
+ * A "driver variant" is just a `TopologyPlanner` chosen by name and run by the
63
+ * one interpreter (`createDynamicDriver`). The agentic variants are sandboxed
64
+ * agents (`createSandboxPlanner`) — an LLM/agent in a box that emits the next
65
+ * move; this registry holds the *deterministic* ones a benchmark needs as
66
+ * controls. Today that's `blind`: a single attempt, no steering — the baseline
67
+ * you measure a real driver against.
68
+ *
69
+ * Adding a variant is a line here (or a sandboxed planner registered by name),
70
+ * never a new `createXDriver` factory and never a spec schema.
71
+ */
72
+
73
+ /** A driver policy over prompt-shaped (string) tasks. Output is consulted only
74
+ * through the iteration's verdict, so it stays `unknown`. */
75
+ type PromptPlanner = TopologyPlanner<string, unknown>;
76
+ /**
77
+ * `blind` — one attempt, then stop. The no-driver control: a single worker run
78
+ * with no steering, so a benchmark can isolate what a real driver adds.
79
+ */
80
+ declare const blind: PromptPlanner;
81
+ /** The registry. Pick a driver by name (e.g. `DRIVER=blind`); fail loud on an
82
+ * unknown key. Sandboxed-agent planners can be registered here too. */
83
+ declare const PROMPT_PLANNERS: Record<string, PromptPlanner>;
84
+ /** Resolve a planner by name; fail loud on an unknown variant. */
85
+ declare function resolvePlanner(name: string): PromptPlanner;
86
+
55
87
  /**
56
88
  * @experimental
57
89
  *
@@ -104,6 +136,13 @@ declare function refineWinnerIndex<Task, Output>(iterations: ReadonlyArray<Itera
104
136
  * the planner never names which harness runs a branch — the kernel's
105
137
  * `agentRuns` round-robin decides that.
106
138
  *
139
+ * Three execution modes, all the same code path:
140
+ * - LLM call — a cheap-model `profile`; one prompt → one move.
141
+ * - different sandbox (default) — a fresh planner-owned box per round.
142
+ * - same sandbox — pass `reuseBox` to stream the move into the worker's
143
+ * own box (a session against its live filesystem/state),
144
+ * so the driver steers from what the worker actually did.
145
+ *
107
146
  * Envelope contract the agent must emit (fenced ```json or a structured
108
147
  * `result`/`final` event payload):
109
148
  * { "kind": "refine" | "fanout" | "stop",
@@ -136,89 +175,26 @@ interface CreateSandboxPlannerOptions<Task, Output> {
136
175
  */
137
176
  decodeTask: (raw: unknown, ctx: PlannerContext<Task, Output>) => Task;
138
177
  /** Override the default prompt (history summary + envelope contract). */
139
- buildPrompt?: (ctx: PlannerContext<Task, Output>) => string;
178
+ buildPrompt?: (ctx: PlannerContext<Task, Output>) => string | Promise<string>;
140
179
  /** Override envelope extraction from the event stream. */
141
180
  parseEnvelope?: (events: SandboxEvent[]) => TopologyMoveEnvelope | undefined;
142
181
  /** Sandbox overrides for the planner sandbox (timeouts, env, etc.). */
143
182
  sandboxOverrides?: AgentRunSpec<Task>['sandboxOverrides'];
144
183
  /** Cancellation for the planner's own LLM call. */
145
184
  signal?: AbortSignal;
146
- }
147
- /** @experimental */
148
- declare function createSandboxPlanner<Task, Output>(opts: CreateSandboxPlannerOptions<Task, Output>): TopologyPlanner<Task, Output>;
149
-
150
- /**
151
- * @experimental
152
- *
153
- * `runLoop` — the topology-agnostic kernel built atop the sandbox SDK.
154
- *
155
- * Each iteration:
156
- * 1. `driver.plan(task, history)` → N tasks (1 = refine, N = fanout, 0 = stop)
157
- * 2. For each task (parallel, bounded by `maxConcurrency`):
158
- * a. round-robin an `AgentRunSpec` from `agentRuns`
159
- * b. `sandboxClient.create({ backend: { profile }, ...overrides })`
160
- * c. emit `loop.iteration.dispatch` with the placement
161
- * (`{ sibling, sandboxId }` or `{ fleet, fleetId, machineId, sandboxId }`)
162
- * d. iterate `box.streamPrompt(taskToPrompt(task))` and collect events
163
- * 3. `output.parse(events)` → typed `Output`
164
- * 4. `validator?.validate(output)` → `DefaultVerdict`
165
- * 5. Append `Iteration` to history; emit `loop.iteration.ended`
166
- * 6. `driver.decide(history)` → if terminal, return result + winner
167
- *
168
- * The kernel owns: iteration accounting, per-iteration timing, error
169
- * capture, abort propagation, concurrency cap, cost aggregation, and trace
170
- * emission. The kernel does NOT own: what the agent runs (sandbox SDK +
171
- * profile), how outputs are decoded (output adapter), how outputs are
172
- * scored (validator), or topology (driver).
173
- */
174
-
175
- /** @experimental */
176
- interface RunLoopOptions<Task, Output, Decision> {
177
- driver: Driver<Task, Output, Decision>;
178
- /**
179
- * Single agent spec — every iteration uses this profile. Mutually
180
- * exclusive with `agentRuns`.
181
- */
182
- agentRun?: AgentRunSpec<Task>;
183
- /**
184
- * Multiple specs for heterogeneous fanout. The kernel round-robins
185
- * through them when the driver plans N tasks. Mutually exclusive with
186
- * `agentRun`.
187
- */
188
- agentRuns?: AgentRunSpec<Task>[];
189
- output: OutputAdapter<Output>;
190
- validator?: Validator<Output>;
191
- task: Task;
192
- ctx: ExecCtx;
193
- /** Default 10. Hard cap on total iterations across all `plan()` rounds. */
194
- maxIterations?: number;
195
- /** Default 4. In-flight worker cap within a single `plan()` batch. */
196
- maxConcurrency?: number;
197
- /**
198
- * Pre-allocated id for trace correlation. Default = `loop-${random}`.
199
- * Surfaces as `runId` on every emitted `LoopTraceEvent`.
200
- */
201
- runId?: string;
202
- /**
203
- * Clock override; default `Date.now`. Deterministic tests pass a
204
- * monotonic counter to stabilize iteration timing fields.
205
- */
206
- now?: () => number;
207
185
  /**
208
- * Override the default winner selector (highest-valid-score, ties broken
209
- * by earliest iteration).
186
+ * Same-sandbox mode. Return an existing box and the planner streams its move
187
+ * INTO that box (a session against the worker's environment) instead of
188
+ * spinning its own — so the driver can inspect the worker's real filesystem
189
+ * and state, not just the history summary. The returned box's lifecycle is
190
+ * the CALLER's: the planner neither creates nor deletes it. Return
191
+ * `undefined` to fall back to the default (a fresh, planner-owned box =
192
+ * different-sandbox mode). Omit entirely for the default.
210
193
  */
211
- selectWinner?: (iterations: Iteration<Task, Output>[]) => LoopWinner<Task, Output> | undefined;
194
+ reuseBox?: () => SandboxInstance | undefined | Promise<SandboxInstance | undefined>;
212
195
  }
213
196
  /** @experimental */
214
- declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, Output, Decision>): Promise<LoopResult<Task, Output, Decision>>;
215
- /**
216
- * Instantiate a sandbox for an `AgentRunSpec`: sets `backend.profile` to the
217
- * spec's profile (inferring the backend type when the spec doesn't override
218
- * it) and merges `sandboxOverrides`. Shared by the loop kernel and the
219
- * `AgentRuntime.act` sandbox bridge so both boot the sandbox identically.
220
- */
221
- declare function createSandboxForSpec<Task>(client: LoopSandboxClient, spec: AgentRunSpec<Task>, signal: AbortSignal): Promise<SandboxInstance>;
197
+ declare function createSandboxPlanner<Task, Output>(opts: CreateSandboxPlannerOptions<Task, Output>): TopologyPlanner<Task, Output>;
222
198
 
223
199
  /**
224
200
  * `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
@@ -318,6 +294,52 @@ interface UsageSink {
318
294
  */
319
295
  declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
320
296
 
297
+ /**
298
+ * @experimental
299
+ *
300
+ * `acquireSandbox` — cold-start-resilient sandbox acquisition. Eliminates the
301
+ * "create timed out at the proxy" failure mode conceptually by DECOUPLING "the
302
+ * create HTTP call returned" from "the sandbox is ready":
303
+ *
304
+ * - Create is initiated with a known `name`.
305
+ * - Readiness is observed from the sandbox's own `status` (`refresh()` polls
306
+ * true state), NOT from whether the create call returned in time.
307
+ * - If the create call itself times out at a gateway (502/503/504/522/524 or
308
+ * a transport timeout), provisioning is still running server-side — so we
309
+ * find the named sandbox via `list()` and wait for it to reach `running`.
310
+ *
311
+ * Result: a scale-from-zero cold start (node boot + host-agent registration,
312
+ * minutes) can no longer surface as a create failure behind a ~100s proxy
313
+ * limit. The loop becomes indifferent to whether the host pool is warm or cold.
314
+ *
315
+ * Backward-compatible: an instance that reports no `status` (the minimal fakes
316
+ * the loop tests use) is treated as ready — only an explicit `pending`/
317
+ * `provisioning` status triggers waiting, and only a retryable THROW triggers
318
+ * the find-by-name path. Real errors (auth, validation, budget) fail loud.
319
+ */
320
+
321
+ /** @experimental */
322
+ interface AcquireOptions {
323
+ /**
324
+ * Total budget for the sandbox to reach `running`, covering on-demand node
325
+ * cold-start. Default 600_000ms — matches the orchestrator's pending-host
326
+ * registration window so we never give up before the platform itself would.
327
+ */
328
+ readyTimeoutMs?: number;
329
+ /** Poll interval while waiting for `running` / for the named sandbox to appear. */
330
+ pollIntervalMs?: number;
331
+ /** Cancellation (user abort). Distinct from create-call timeouts. */
332
+ signal?: AbortSignal;
333
+ /** Stamp a name so a timed-out create is recoverable by lookup. Auto-generated if absent. */
334
+ name?: string;
335
+ /** Clock override for deterministic tests. */
336
+ now?: () => number;
337
+ /** Sleep override for deterministic tests. */
338
+ sleep?: (ms: number) => Promise<void>;
339
+ }
340
+ /** @experimental */
341
+ declare function acquireSandbox(client: LoopSandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
342
+
321
343
  /**
322
344
  * Sandbox-event → runtime-event mapping.
323
345
  *
@@ -370,4 +392,4 @@ declare function mapSandboxEvent(event: SandboxEvent, opts?: {
370
392
  agentRunName?: string;
371
393
  }): RuntimeStreamEvent | undefined;
372
394
 
373
- export { AgentRunSpec, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMoveEnvelope, TopologyPlanner, type UsageSink, Validator, createFanoutVoteDriver, createRefineDriver, createSandboxForSpec, createSandboxPlanner, extractLlmCallEvent, loopCampaignDispatch, loopDispatch, mapSandboxEvent, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations };
395
+ export { type AcquireOptions, AgentRunSpec, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, PROMPT_PLANNERS, PlannerContext, type PromptPlanner, type RefineDecision, RunLoopOptions, type TopologyMoveEnvelope, TopologyPlanner, type UsageSink, acquireSandbox, blind, createFanoutVoteDriver, createRefineDriver, createSandboxPlanner, extractLlmCallEvent, loopCampaignDispatch, loopDispatch, mapSandboxEvent, refineWinnerIndex, reportLoopUsage, resolvePlanner, scoreFanoutVoteIterations };
package/dist/loops.js CHANGED
@@ -1,4 +1,7 @@
1
1
  import {
2
+ PROMPT_PLANNERS,
3
+ acquireSandbox,
4
+ blind,
2
5
  createDynamicDriver,
3
6
  createRefineDriver,
4
7
  createSandboxForSpec,
@@ -9,9 +12,10 @@ import {
9
12
  mapSandboxEvent,
10
13
  refineWinnerIndex,
11
14
  reportLoopUsage,
15
+ resolvePlanner,
12
16
  runLoop,
13
17
  summarizeHistory
14
- } from "./chunk-EKBSQYZE.js";
18
+ } from "./chunk-S7JXV32P.js";
15
19
  import {
16
20
  createFanoutVoteDriver,
17
21
  scoreFanoutVoteIterations
@@ -19,6 +23,9 @@ import {
19
23
  import "./chunk-SQSCRJ7U.js";
20
24
  import "./chunk-DGUM43GV.js";
21
25
  export {
26
+ PROMPT_PLANNERS,
27
+ acquireSandbox,
28
+ blind,
22
29
  createDynamicDriver,
23
30
  createFanoutVoteDriver,
24
31
  createRefineDriver,
@@ -30,6 +37,7 @@ export {
30
37
  mapSandboxEvent,
31
38
  refineWinnerIndex,
32
39
  reportLoopUsage,
40
+ resolvePlanner,
33
41
  runLoop,
34
42
  scoreFanoutVoteIterations,
35
43
  summarizeHistory
package/dist/mcp/bin.js CHANGED
@@ -4,15 +4,15 @@ import {
4
4
  createPropagatingTraceEmitter,
5
5
  detectExecutor,
6
6
  readTraceContextFromEnv
7
- } from "../chunk-MNCB4SJ5.js";
7
+ } from "../chunk-KDMRUD2P.js";
8
8
  import "../chunk-HVYOHJHK.js";
9
9
  import {
10
10
  createDefaultCoderDelegate
11
- } from "../chunk-MJDGCRAT.js";
11
+ } from "../chunk-SKUZZCHE.js";
12
12
  import "../chunk-GLR25NG7.js";
13
13
  import {
14
14
  runLoop
15
- } from "../chunk-EKBSQYZE.js";
15
+ } from "../chunk-S7JXV32P.js";
16
16
  import "../chunk-3HMHSN22.js";
17
17
  import "../chunk-PY6NMZYX.js";
18
18
  import "../chunk-SQSCRJ7U.js";
@@ -1,11 +1,11 @@
1
- import { L as LoopSandboxClient, l as LoopSandboxPlacement, c as LoopTraceEmitter } from '../types-Bcp071Jg.js';
2
- import { c as FleetHandle, d as DelegationExecutor, e as DelegateFeedbackArgs, f as DelegationFeedbackSnapshot, g as DelegationProfile, D as DelegateCodeArgs, h as DelegateResearchArgs, i as DelegationStatus, j as DelegationProgress, k as DelegationResultPayload, l as DelegationError, m as DelegationStatusResult, n as DelegationHistoryArgs, o as DelegationHistoryEntry, p as CoderDelegate, R as ResearcherDelegate, q as DelegateCodeResult, r as DelegateFeedbackResult, s as ResearchSource, t as DelegateResearchResult, u as DelegationHistoryResult, v as DelegationStatusArgs } from '../kb-gate-DTBum3vH.js';
3
- export { w as CoderReview, C as CoderReviewer, a as CoderWinnerSelection, x as CreateDefaultCoderDelegateOptions, b as CreateKbGateOptions, y as DelegateCodeConfig, z as DelegateResearchConfig, A as DelegateRunCtx, F as FactCandidate, B as FactJudge, E as FactJudgeVerdict, G as FeedbackRating, H as FeedbackRefersTo, I as FleetWorkspaceExecutorOptions, K as KbGateResult, J as ResearchOutputShape, S as SiblingSandboxExecutorOptions, L as createDefaultCoderDelegate, M as createFleetWorkspaceExecutor, N as createKbGate, O as createSiblingSandboxExecutor } from '../kb-gate-DTBum3vH.js';
1
+ import { a as LoopSandboxClient, l as LoopSandboxPlacement, d as LoopTraceEmitter } from '../types-DbJzz2uf.js';
2
+ import { c as FleetHandle, d as DelegationExecutor, e as DelegateFeedbackArgs, f as DelegationFeedbackSnapshot, g as DelegationProfile, D as DelegateCodeArgs, h as DelegateResearchArgs, i as DelegationStatus, j as DelegationProgress, k as DelegationResultPayload, l as DelegationError, m as DelegationStatusResult, n as DelegationHistoryArgs, o as DelegationHistoryEntry, p as CoderDelegate, R as ResearcherDelegate, q as DelegateCodeResult, r as DelegateFeedbackResult, s as ResearchSource, t as DelegateResearchResult, u as DelegationHistoryResult, v as DelegationStatusArgs } from '../kb-gate-D0ZIhFOU.js';
3
+ export { w as CoderReview, C as CoderReviewer, a as CoderWinnerSelection, x as CreateDefaultCoderDelegateOptions, b as CreateKbGateOptions, y as DelegateCodeConfig, z as DelegateResearchConfig, A as DelegateRunCtx, F as FactCandidate, B as FactJudge, E as FactJudgeVerdict, G as FeedbackRating, H as FeedbackRefersTo, I as FleetWorkspaceExecutorOptions, K as KbGateResult, J as ResearchOutputShape, S as SiblingSandboxExecutorOptions, L as createDefaultCoderDelegate, M as createFleetWorkspaceExecutor, N as createKbGate, O as createSiblingSandboxExecutor } from '../kb-gate-D0ZIhFOU.js';
4
4
  export { B as BuildDelegationMcpServerOptions, C as ComposeProductionAgentProfileOptions, D as DELEGATION_MCP_SERVER_KEY, b as buildDelegationMcpServer, c as composeProductionAgentProfile } from '../delegation-profile-1GbW5yA3.js';
5
5
  import { L as LocalHarness, r as runLocalHarness } from '../local-harness-KrdFTY5R.js';
6
6
  export { a as LocalHarnessResult, R as RunLocalHarnessOptions } from '../local-harness-KrdFTY5R.js';
7
- import { O as OtelExporter } from '../otel-export-BzvF1Ela.js';
8
- export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-BzvF1Ela.js';
7
+ import { O as OtelExporter } from '../otel-export-wFDmmurL.js';
8
+ export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-wFDmmurL.js';
9
9
  import '@tangle-network/agent-eval';
10
10
  import '@tangle-network/sandbox';
11
11
  import '../profiles.js';
package/dist/mcp/index.js CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  readTraceContextFromEnv,
10
10
  removeWorktree,
11
11
  traceContextToEnv
12
- } from "../chunk-MNCB4SJ5.js";
12
+ } from "../chunk-KDMRUD2P.js";
13
13
  import {
14
14
  mcpToolsForRuntimeMcp,
15
15
  mcpToolsForRuntimeMcpSubset
@@ -52,7 +52,7 @@ import {
52
52
  createDefaultCoderDelegate,
53
53
  createFleetWorkspaceExecutor,
54
54
  createSiblingSandboxExecutor
55
- } from "../chunk-MJDGCRAT.js";
55
+ } from "../chunk-SKUZZCHE.js";
56
56
  import {
57
57
  runLocalHarness
58
58
  } from "../chunk-GLR25NG7.js";
@@ -61,7 +61,7 @@ import {
61
61
  buildDelegationMcpServer,
62
62
  composeProductionAgentProfile
63
63
  } from "../chunk-7JITYN6T.js";
64
- import "../chunk-EKBSQYZE.js";
64
+ import "../chunk-S7JXV32P.js";
65
65
  import "../chunk-3HMHSN22.js";
66
66
  import "../chunk-PY6NMZYX.js";
67
67
  import "../chunk-SQSCRJ7U.js";
@@ -1,4 +1,4 @@
1
- import { O as OpenAIChatTool } from './types-Bcp071Jg.js';
1
+ import { c as OpenAIChatTool } from './types-DbJzz2uf.js';
2
2
 
3
3
  /**
4
4
  * @experimental
@@ -1,5 +1,5 @@
1
1
  import { AgentProfile } from '@tangle-network/sandbox';
2
- import { a as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-Bcp071Jg.js';
2
+ import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-DbJzz2uf.js';
3
3
  import '@tangle-network/agent-eval';
4
4
 
5
5
  /**
@@ -0,0 +1,89 @@
1
+ import { SandboxInstance } from '@tangle-network/sandbox';
2
+ import { D as Driver, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, I as Iteration, L as LoopWinner, a as LoopSandboxClient, b as LoopResult } from './types-DbJzz2uf.js';
3
+
4
+ /**
5
+ * @experimental
6
+ *
7
+ * `runLoop` — the topology-agnostic kernel built atop the sandbox SDK.
8
+ *
9
+ * Each iteration:
10
+ * 1. `driver.plan(task, history)` → N tasks (1 = refine, N = fanout, 0 = stop)
11
+ * 2. For each task (parallel, bounded by `maxConcurrency`):
12
+ * a. round-robin an `AgentRunSpec` from `agentRuns`
13
+ * b. `sandboxClient.create({ backend: { profile }, ...overrides })`
14
+ * c. emit `loop.iteration.dispatch` with the placement
15
+ * (`{ sibling, sandboxId }` or `{ fleet, fleetId, machineId, sandboxId }`)
16
+ * d. iterate `box.streamPrompt(taskToPrompt(task))` and collect events
17
+ * 3. `output.parse(events)` → typed `Output`
18
+ * 4. `validator?.validate(output)` → `DefaultVerdict`
19
+ * 5. Append `Iteration` to history; emit `loop.iteration.ended`
20
+ * 6. `driver.decide(history)` → if terminal, return result + winner
21
+ *
22
+ * The kernel owns: iteration accounting, per-iteration timing, error
23
+ * capture, abort propagation, concurrency cap, cost aggregation, and trace
24
+ * emission. The kernel does NOT own: what the agent runs (sandbox SDK +
25
+ * profile), how outputs are decoded (output adapter), how outputs are
26
+ * scored (validator), or topology (driver).
27
+ */
28
+
29
+ /** @experimental */
30
+ interface RunLoopOptions<Task, Output, Decision> {
31
+ driver: Driver<Task, Output, Decision>;
32
+ /**
33
+ * Single agent spec — every iteration uses this profile. Mutually
34
+ * exclusive with `agentRuns`.
35
+ */
36
+ agentRun?: AgentRunSpec<Task>;
37
+ /**
38
+ * Multiple specs for heterogeneous fanout. The kernel round-robins
39
+ * through them when the driver plans N tasks. Mutually exclusive with
40
+ * `agentRun`.
41
+ */
42
+ agentRuns?: AgentRunSpec<Task>[];
43
+ output: OutputAdapter<Output>;
44
+ validator?: Validator<Output>;
45
+ task: Task;
46
+ ctx: ExecCtx;
47
+ /** Default 10. Hard cap on total iterations across all `plan()` rounds. */
48
+ maxIterations?: number;
49
+ /** Default 4. In-flight worker cap within a single `plan()` batch. */
50
+ maxConcurrency?: number;
51
+ /**
52
+ * Pre-allocated id for trace correlation. Default = `loop-${random}`.
53
+ * Surfaces as `runId` on every emitted `LoopTraceEvent`.
54
+ */
55
+ runId?: string;
56
+ /**
57
+ * Clock override; default `Date.now`. Deterministic tests pass a
58
+ * monotonic counter to stabilize iteration timing fields.
59
+ */
60
+ now?: () => number;
61
+ /**
62
+ * Override the default winner selector (highest-valid-score, ties broken
63
+ * by earliest iteration).
64
+ */
65
+ selectWinner?: (iterations: Iteration<Task, Output>[]) => LoopWinner<Task, Output> | undefined;
66
+ /**
67
+ * Same-sandbox driver mode. Pass a setter and the kernel keeps each worker box
68
+ * alive across the `plan()` boundary and hands the latest one here, so a
69
+ * same-sandbox planner (`createSandboxPlanner` with `reuseBox`) can stream its
70
+ * move INTO the worker's live box — steering from the worker's real filesystem
71
+ * and state, not just a history summary. The kernel owns teardown: every box
72
+ * kept alive this way is destroyed at loop end (and the setter is called with
73
+ * `undefined` then). Without it, worker boxes are torn down per-iteration
74
+ * (default) and a same-sandbox planner has nothing to reuse. Intended for
75
+ * single-worker (refine) loops; under fanout the most-recent box is shared.
76
+ */
77
+ shareWorkerBox?: (box: SandboxInstance | undefined) => void;
78
+ }
79
+ /** @experimental */
80
+ declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, Output, Decision>): Promise<LoopResult<Task, Output, Decision>>;
81
+ /**
82
+ * Instantiate a sandbox for an `AgentRunSpec`: sets `backend.profile` to the
83
+ * spec's profile (inferring the backend type when the spec doesn't override
84
+ * it) and merges `sandboxOverrides`. Shared by the loop kernel and the
85
+ * `AgentRuntime.act` sandbox bridge so both boot the sandbox identically.
86
+ */
87
+ declare function createSandboxForSpec<Task>(client: LoopSandboxClient, spec: AgentRunSpec<Task>, signal: AbortSignal): Promise<SandboxInstance>;
88
+
89
+ export { type RunLoopOptions as R, createSandboxForSpec as c, runLoop as r };
@@ -973,4 +973,4 @@ interface ExecCtx {
973
973
  parentSpanId?: string;
974
974
  }
975
975
 
976
- export { type AgentRunSpec as A, type RuntimeSession as B, type AgentAdapter as C, type Driver as D, type ExecCtx as E, type AgentKnowledgeProvider as F, type AgentRuntimeEventSink as G, type AgentTaskContext as H, type Iteration as I, type AgentTaskSpec as J, type KnowledgeReadinessDecision as K, type LoopSandboxClient as L, type BackendErrorDetail as M, type RuntimeRunHandle as N, type OpenAIChatTool as O, type RuntimeRunPersistenceAdapter as P, type RuntimeRunRow as Q, type RuntimeStreamEvent as R, startRuntimeRun as S, type Validator as V, type OutputAdapter as a, type LoopResult as b, type LoopTraceEmitter as c, type LoopWinner as d, type LoopDecisionPayload as e, type LoopEndedPayload as f, type LoopIterationDispatchPayload as g, type LoopIterationEndedPayload as h, type LoopIterationStartedPayload as i, type LoopPlanDescription as j, type LoopPlanPayload as k, type LoopSandboxPlacement as l, type LoopStartedPayload as m, type LoopTokenUsage as n, type LoopTraceEvent as o, type ValidationCtx as p, type AgentBackendInput as q, type AgentExecutionBackend as r, type OpenAIChatToolChoice as s, type AgentBackendContext as t, type RunAgentTaskOptions as u, type AgentTaskRunResult as v, type RunAgentTaskStreamOptions as w, type AgentRuntimeEvent as x, type AgentTaskStatus as y, type RuntimeSessionStore as z };
976
+ export { type AgentRunSpec as A, type RuntimeSession as B, type AgentAdapter as C, type Driver as D, type ExecCtx as E, type AgentKnowledgeProvider as F, type AgentRuntimeEventSink as G, type AgentTaskContext as H, type Iteration as I, type AgentTaskSpec as J, type KnowledgeReadinessDecision as K, type LoopWinner as L, type BackendErrorDetail as M, type RuntimeRunHandle as N, type OutputAdapter as O, type RuntimeRunPersistenceAdapter as P, type RuntimeRunRow as Q, type RuntimeStreamEvent as R, startRuntimeRun as S, type Validator as V, type LoopSandboxClient as a, type LoopResult as b, type OpenAIChatTool as c, type LoopTraceEmitter as d, type LoopDecisionPayload as e, type LoopEndedPayload as f, type LoopIterationDispatchPayload as g, type LoopIterationEndedPayload as h, type LoopIterationStartedPayload as i, type LoopPlanDescription as j, type LoopPlanPayload as k, type LoopSandboxPlacement as l, type LoopStartedPayload as m, type LoopTokenUsage as n, type LoopTraceEvent as o, type ValidationCtx as p, type AgentBackendInput as q, type AgentExecutionBackend as r, type OpenAIChatToolChoice as s, type AgentBackendContext as t, type RunAgentTaskOptions as u, type AgentTaskRunResult as v, type RunAgentTaskStreamOptions as w, type AgentRuntimeEvent as x, type AgentTaskStatus as y, type RuntimeSessionStore as z };