@tangle-network/agent-runtime 0.44.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +95 -203
  2. package/dist/agent.d.ts +3 -2
  3. package/dist/agent.js +5 -7
  4. package/dist/agent.js.map +1 -1
  5. package/dist/analyst-loop.d.ts +28 -2
  6. package/dist/analyst-loop.js +4 -1
  7. package/dist/audit.d.ts +93 -0
  8. package/dist/audit.js +312 -0
  9. package/dist/audit.js.map +1 -0
  10. package/dist/chunk-4B6U4CVQ.js +15 -0
  11. package/dist/chunk-4B6U4CVQ.js.map +1 -0
  12. package/dist/chunk-65FQLI4V.js +4089 -0
  13. package/dist/chunk-65FQLI4V.js.map +1 -0
  14. package/dist/{chunk-GFKVVRQ7.js → chunk-GN75RGM6.js} +13 -12
  15. package/dist/chunk-GN75RGM6.js.map +1 -0
  16. package/dist/chunk-GSUO5QS6.js +146 -0
  17. package/dist/chunk-GSUO5QS6.js.map +1 -0
  18. package/dist/chunk-HNUXAZIJ.js +580 -0
  19. package/dist/chunk-HNUXAZIJ.js.map +1 -0
  20. package/dist/{chunk-SKUZZCHE.js → chunk-I42NHLKX.js} +5 -5
  21. package/dist/chunk-I42NHLKX.js.map +1 -0
  22. package/dist/{chunk-HVYOHJHK.js → chunk-JNPK46YH.js} +2 -2
  23. package/dist/chunk-JNPK46YH.js.map +1 -0
  24. package/dist/{chunk-3HMHSN22.js → chunk-KADIJAD4.js} +38 -24
  25. package/dist/chunk-KADIJAD4.js.map +1 -0
  26. package/dist/{chunk-KDMRUD2P.js → chunk-KPN7OQ64.js} +296 -8
  27. package/dist/chunk-KPN7OQ64.js.map +1 -0
  28. package/dist/{chunk-NRZOXCJK.js → chunk-VR4JIC5H.js} +2 -2
  29. package/dist/chunk-WIR4HOOJ.js +27 -0
  30. package/dist/chunk-WIR4HOOJ.js.map +1 -0
  31. package/dist/coder-DCWFQpmJ.d.ts +114 -0
  32. package/dist/driver-C-mtBo7h.d.ts +221 -0
  33. package/dist/improvement.d.ts +0 -1
  34. package/dist/improvement.js +0 -5
  35. package/dist/improvement.js.map +1 -1
  36. package/dist/index.d.ts +122 -9
  37. package/dist/index.js +398 -10
  38. package/dist/index.js.map +1 -1
  39. package/dist/{kb-gate-D0ZIhFOU.d.ts → kb-gate-2Gwpz_27.d.ts} +86 -9
  40. package/dist/{loop-runner-bin-BLMa8He3.d.ts → loop-runner-bin-D-K6bRp3.d.ts} +17 -13
  41. package/dist/loop-runner-bin.d.ts +8 -6
  42. package/dist/loop-runner-bin.js +6 -8
  43. package/dist/loops.d.ts +7 -393
  44. package/dist/loops.js +96 -27
  45. package/dist/mcp/bin.js +7 -7
  46. package/dist/mcp/bin.js.map +1 -1
  47. package/dist/mcp/index.d.ts +286 -13
  48. package/dist/mcp/index.js +341 -9
  49. package/dist/mcp/index.js.map +1 -1
  50. package/dist/{otel-export-wFDmmurL.d.ts → otel-export-nurzFwuJ.d.ts} +1 -1
  51. package/dist/profiles.d.ts +385 -86
  52. package/dist/profiles.js +549 -4
  53. package/dist/profiles.js.map +1 -1
  54. package/dist/{run-loop-C4L1Sted.d.ts → run-loop-CU2Y00Si.d.ts} +36 -13
  55. package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
  56. package/dist/runtime.d.ts +1964 -0
  57. package/dist/runtime.js +114 -0
  58. package/dist/runtime.js.map +1 -0
  59. package/dist/substrate-CUgk7F7s.d.ts +77 -0
  60. package/dist/topology.d.ts +73 -0
  61. package/dist/topology.js +111 -0
  62. package/dist/topology.js.map +1 -0
  63. package/dist/types-BfoeiQRZ.d.ts +438 -0
  64. package/dist/{types-DbJzz2uf.d.ts → types-DnYoHvvZ.d.ts} +110 -4
  65. package/dist/workflow.d.ts +4 -3
  66. package/dist/workflow.js +4 -5
  67. package/dist/workflow.js.map +1 -1
  68. package/package.json +37 -28
  69. package/skills/agent-runtime-adoption/SKILL.md +32 -29
  70. package/skills/generate-eval/SKILL.md +60 -0
  71. package/dist/chunk-3HMHSN22.js.map +0 -1
  72. package/dist/chunk-GFKVVRQ7.js.map +0 -1
  73. package/dist/chunk-HVYOHJHK.js.map +0 -1
  74. package/dist/chunk-KDMRUD2P.js.map +0 -1
  75. package/dist/chunk-PY6NMZYX.js +0 -52
  76. package/dist/chunk-PY6NMZYX.js.map +0 -1
  77. package/dist/chunk-S7JXV32P.js +0 -947
  78. package/dist/chunk-S7JXV32P.js.map +0 -1
  79. package/dist/chunk-SKUZZCHE.js.map +0 -1
  80. package/dist/chunk-SQSCRJ7U.js +0 -65
  81. package/dist/chunk-SQSCRJ7U.js.map +0 -1
  82. package/dist/chunk-VOX6Z3II.js +0 -90
  83. package/dist/chunk-VOX6Z3II.js.map +0 -1
  84. package/dist/chunk-XBUG326M.js +0 -261
  85. package/dist/chunk-XBUG326M.js.map +0 -1
  86. package/dist/dynamic-wUgp6UKs.d.ts +0 -108
  87. package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
  88. /package/dist/{chunk-NRZOXCJK.js.map → chunk-VR4JIC5H.js.map} +0 -0
@@ -0,0 +1,438 @@
1
+ import { DefaultVerdict } from '@tangle-network/agent-eval';
2
+ import { AgentProfile, BackendType } from '@tangle-network/sandbox';
3
+ import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
4
+ import { c as LoopTokenUsage } from './types-DnYoHvvZ.js';
5
+
6
+ /**
7
+ * @experimental
8
+ *
9
+ * Recursive execution atom — the FROZEN type surface (the keystone contract).
10
+ *
11
+ * One self-similar `Agent` atom runs inside a budget-conserving reactive `Scope`,
12
+ * orchestrated by a `Supervisor` over an event-sourced `SpawnJournal`. A leaf is an
13
+ * `Agent` that never calls `scope.spawn`; a driver is an `Agent` that spawns and runs
14
+ * a policy over its children's streaming results.
15
+ *
16
+ * Two invariants the surface exists to make enforceable:
17
+ * - Budget is an atomically-reserved CONSERVED pool, so `Σk(treatment) ≡ Σk(blind)` by
18
+ * construction (reserve-on-spawn, refund-unspent-on-settle, fail-closed admission).
19
+ * - The journal records a content-addressed `outRef` per child result, so replay
20
+ * rehydrates the exact `Settled` the driver branched on (the replay invariant below).
21
+ *
22
+ * The leaf RUNTIME is one OPEN `Executor` interface, not a closed `inline|sandbox|cli`
23
+ * union the call site switches on. The built-ins (router/inline, sandbox, cli) are the
24
+ * initial IMPLEMENTATIONS; any user agent is first-class the moment it implements the
25
+ * interface. The interface IS the extension point — no per-vendor adapters live here.
26
+ *
27
+ * Layering: substrate types (`DefaultVerdict`) come from `@tangle-network/agent-eval`;
28
+ * runtime-shaped types (everything else) live here. Pure types/interfaces only — this
29
+ * module typechecks standalone and is imported by every keystone impl.
30
+ */
31
+
32
+ /**
33
+ * One self-similar atom. A leaf is an `Agent` that never calls `scope.spawn`; a driver
34
+ * is an `Agent` whose `act` spawns children and reacts to them via `scope.next()`. An
35
+ * analyst is an `Agent` whose task is "read these traces → findings" — `where` it runs
36
+ * is its executor, not a separate type.
37
+ *
38
+ * `act` MUST be replay-safe: it may read `verdict`, `spent`, and `out` (rehydrated by
39
+ * `outRef`) off each `Settled`; it MUST NOT read `Date.now`, `Math.random`, or any
40
+ * unordered collection. `scope.next()` delivers strictly in recorded `seq` order.
41
+ */
42
+ interface Agent<Task, Out> {
43
+ readonly name: string;
44
+ act(task: Task, scope: Scope<Out>): Promise<Out>;
45
+ }
46
+ /**
47
+ * The leaf runtime — ONE open interface, not a closed union. `execute` returns a
48
+ * `Promise<ExecutorResult>` for one-shot executors OR an `AsyncIterable<UsageEvent>` for
49
+ * streaming ones; a streaming executor reports incremental normalized usage as it runs
50
+ * (the budget pool reconciles against it) and exposes its terminal artifact via
51
+ * `resultArtifact()`. Both shapes normalize usage to `UsageEvent` so the conserved pool
52
+ * meters every runtime identically.
53
+ *
54
+ * Built-in implementations (in `runtime.ts`, NOT variants here): router/inline (a direct
55
+ * Router/HTTP inference call, no box), sandbox (COMPOSES `runLoop` as a leaf, forwarding
56
+ * PR #150's optional `lineage` passthrough — does NOT reinvent checkpoint/fork), cli
57
+ * (Halo/RLM subprocess; `budgetExempt`, excluded from equal-k by construction). A user's
58
+ * own agent (mastra/agno/raw HTTP/anything) is first-class by implementing this interface.
59
+ */
60
+ interface Executor<Out> {
61
+ /** Stable runtime tag for traces + the equal-k exemption check. */
62
+ readonly runtime: Runtime;
63
+ /**
64
+ * When true, this executor's spend is NOT metered against the conserved pool and its
65
+ * iterations are excluded from the equal-k assertion (a `cli` subprocess without
66
+ * token accounting). Fail-loud everywhere else: a metered executor MUST report usage.
67
+ */
68
+ readonly budgetExempt?: boolean;
69
+ /**
70
+ * One-shot → resolves a `ExecutorResult`; streaming → yields incremental `UsageEvent`s and
71
+ * the terminal artifact is read from `resultArtifact()` after the stream drains.
72
+ * `signal` is the spawn-scoped abort (chains the acquire lifecycle for sandbox).
73
+ */
74
+ execute(task: unknown, signal: AbortSignal): Promise<ExecutorResult<Out>> | AsyncIterable<UsageEvent>;
75
+ /**
76
+ * Optional inbox: receive an out-of-band message from the driver mid-run (the `send`/`steer_worker`
77
+ * verb). A streaming executor drains pending messages between turns and folds them into the next
78
+ * step (a steer / interrupt / resume). A one-shot executor that can't be steered mid-flight omits
79
+ * this; `Scope.send` then returns `false` for it. Never throws — a malformed message is the
80
+ * executor's to ignore.
81
+ */
82
+ deliver?(msg: unknown): void;
83
+ /**
84
+ * Tear the executor's resources down. `grace` mirrors the OTP shutdown spec
85
+ * (`'brutalKill'` = immediate, a number = ms grace, `'infinity'` = await clean exit).
86
+ */
87
+ teardown(grace: number | 'brutalKill' | 'infinity'): Promise<{
88
+ destroyed: boolean;
89
+ }>;
90
+ /**
91
+ * The replay source (B1): the content-addressed `outRef` + the materialized output the
92
+ * driver branched on, its verdict, and the conserved spend. Read once, after settle.
93
+ */
94
+ resultArtifact(): {
95
+ outRef: string;
96
+ out: Out;
97
+ verdict?: DefaultVerdict;
98
+ spent: Spend;
99
+ };
100
+ }
101
+ /** Terminal artifact of a one-shot `Executor.execute`. */
102
+ interface ExecutorResult<Out> {
103
+ outRef: string;
104
+ out: Out;
105
+ verdict?: DefaultVerdict;
106
+ spent: Spend;
107
+ }
108
+ /**
109
+ * Normalized usage event — the single channel every executor reports through, so the
110
+ * conserved pool meters all runtimes identically. `tokens` carries `LoopTokenUsage`'s
111
+ * `{ input, output }`; `usd` is a SEPARATE channel (never folded into tokens).
112
+ */
113
+ type UsageEvent = {
114
+ kind: 'tokens';
115
+ input: number;
116
+ output: number;
117
+ } | {
118
+ kind: 'cost';
119
+ usd: number;
120
+ } | {
121
+ kind: 'iteration';
122
+ };
123
+ /** The runtime tag of a `Executor` impl. Open by intent — `string` so a BYO executor
124
+ * names its own runtime; the built-ins use these literals. */
125
+ type Runtime = 'router' | 'inline' | 'sandbox' | 'cli' | (string & {});
126
+ /**
127
+ * `AgentProfile` does NOT carry a `harness`/backend field — `harness` lives on the
128
+ * sandbox SDK's `BackendConfig`, not the portable profile. So an agent is mapped to its
129
+ * executor through this MINIMAL wrapper, never by fabricating a field onto `AgentProfile`.
130
+ *
131
+ * Resolution (in `runtime.ts`):
132
+ * - `executor` present → BYO: use it verbatim (a user's own `Executor`).
133
+ * - `harness === null` → router/inline: a direct Router call, no box.
134
+ * - `harness` is a `BackendType` → sandbox: compose `runLoop` against `profile` on that backend.
135
+ * Fail loud on an unresolvable spec (no executor and an unknown harness).
136
+ */
137
+ interface AgentSpec {
138
+ readonly profile: AgentProfile;
139
+ /** `null` selects router/inline; a `BackendType` selects the sandboxed harness. */
140
+ readonly harness: BackendType | null;
141
+ /** Bring-your-own executor: when set, overrides harness-based resolution entirely. */
142
+ readonly executor?: Executor<unknown>;
143
+ }
144
+ /**
145
+ * Builds a fresh `Executor` for one spawn from the resolved spec. Per-spawn (not
146
+ * shared) so each child owns its own box/abort/teardown lifecycle. A BYO factory lets a
147
+ * user supply construction args without pre-instantiating.
148
+ */
149
+ type ExecutorFactory<Out> = (spec: AgentSpec, ctx: ExecutorContext) => Executor<Out>;
150
+ /** Construction context handed to a `ExecutorFactory` — the seams a built-in needs
151
+ * (sandbox client for the sandbox executor, router config for router/inline) without
152
+ * the factory reaching into module globals. */
153
+ interface ExecutorContext {
154
+ readonly signal: AbortSignal;
155
+ /** Opaque seams the registry threads through; a built-in narrows what it needs. */
156
+ readonly seams: Readonly<Record<string, unknown>>;
157
+ }
158
+ /**
159
+ * The OPEN resolver: maps an `AgentSpec` to a `ExecutorFactory`. The default
160
+ * registry resolves the three built-ins AND accepts a BYO `executor`/factory; callers
161
+ * register more runtimes by name. NOT a closed switch — registration is the extension
162
+ * point, mirroring the open `Executor` interface.
163
+ */
164
+ interface ExecutorRegistry {
165
+ /** Register a factory for a named runtime. Throws on a duplicate name (fail loud). */
166
+ register<Out>(runtime: Runtime, factory: ExecutorFactory<Out>): void;
167
+ /**
168
+ * Resolve a spec to a factory. Precedence: a BYO `spec.executor` → a trivial factory
169
+ * returning it; else `harness === null` → the `'router'` factory; else a registered
170
+ * factory for the harness-derived runtime. Returns a typed outcome — the caller
171
+ * inspects `succeeded` before `value` (no silent fallback).
172
+ */
173
+ resolve<Out>(spec: AgentSpec): {
174
+ succeeded: true;
175
+ value: ExecutorFactory<Out>;
176
+ } | {
177
+ succeeded: false;
178
+ error: string;
179
+ };
180
+ }
181
+ /** A budget envelope on a spawn or the root. All ceilings; the pool reserves against them. */
182
+ interface Budget {
183
+ readonly maxIterations: number;
184
+ readonly maxTokens: number;
185
+ readonly maxUsd?: number;
186
+ readonly deadlineMs?: number;
187
+ }
188
+ /** Conserved spend, reconciled from the normalized `UsageEvent` stream. Tokens and usd
189
+ * are separate channels (never folded). */
190
+ interface Spend {
191
+ iterations: number;
192
+ tokens: LoopTokenUsage;
193
+ usd: number;
194
+ ms: number;
195
+ }
196
+ /** OTP child-spec restart class. */
197
+ type Restart = 'temporary' | 'transient' | 'permanent';
198
+ /** `'acquiring'` is first-class (M1): a node spends real time + reaps an orphan box
199
+ * during sandbox acquire BEFORE it is `running`, so abort must be defined over it. */
200
+ type NodeStatus = 'pending' | 'acquiring' | 'running' | 'done' | 'failed' | 'cancelled';
201
+ /** Deterministic node id — `${parent}:s${seq}` from the cursor order, never wall-clock. */
202
+ type NodeId = string;
203
+ interface SpawnOpts {
204
+ readonly budget: Budget;
205
+ readonly label: string;
206
+ readonly restart?: Restart;
207
+ /** Teardown grace handed to the executor when this node is reaped. */
208
+ readonly shutdown?: number | 'brutalKill' | 'infinity';
209
+ }
210
+ /**
211
+ * A live child handle. `abort()` is defined over the ACQUIRE lifecycle: it chains into
212
+ * the `acquireSandbox` signal and reaps a find-by-name orphan box, so a node aborted
213
+ * mid-acquire never leaks (M1).
214
+ */
215
+ interface Handle<Out> {
216
+ readonly id: NodeId;
217
+ readonly label: string;
218
+ readonly status: NodeStatus;
219
+ abort(reason?: string): void;
220
+ /** Phantom: binds the handle to the child's output type so `spawn<C>` returns a
221
+ * `Handle<C>` distinct from a `Handle<other>`. Type-only — never present at runtime. */
222
+ readonly __out?: Out;
223
+ }
224
+ /**
225
+ * A settled child, delivered by `scope.next()`. `seq` is the monotonic cursor order
226
+ * `next()` yielded this settlement (B2) — NOT wall-clock — and replay delivers strictly
227
+ * in `seq` order. `outRef` rehydrates `out` from the `ResultBlobStore` on replay.
228
+ */
229
+ type Settled<Out> = {
230
+ kind: 'done';
231
+ handle: Handle<Out>;
232
+ out: Out;
233
+ outRef: string;
234
+ verdict?: DefaultVerdict;
235
+ spent: Spend;
236
+ seq: number;
237
+ } | {
238
+ kind: 'down';
239
+ handle: Handle<Out>;
240
+ reason: string;
241
+ /** True = infrastructure failure (excluded from merge `n` / equal-k), not a bad result. */
242
+ infra: boolean;
243
+ restartCount: number;
244
+ seq: number;
245
+ };
246
+ /**
247
+ * The budget-conserving reactive scope an `Agent.act` runs inside. `spawn` reserves
248
+ * budget atomically from the shared pool and FAILS CLOSED when the pool can't cover it;
249
+ * `next()` is a ray.wait cursor (n=1) over THIS scope's IN-MEMORY live set; `view` reads
250
+ * the in-memory nursery (NOT the log), O(live).
251
+ */
252
+ interface Scope<Out> {
253
+ /**
254
+ * Spawn a child. Reserves `opts.budget` from the conserved pool atomically; refunds the
255
+ * unspent remainder on settle. Returns a typed outcome — fail-closed on an exhausted
256
+ * pool or an exceeded depth ceiling (the caller inspects `ok` before `handle`).
257
+ */
258
+ spawn<C extends Out>(agent: Agent<unknown, C>, task: unknown, opts: SpawnOpts): {
259
+ ok: true;
260
+ handle: Handle<C>;
261
+ } | {
262
+ ok: false;
263
+ reason: 'budget-exhausted' | 'depth-exceeded';
264
+ };
265
+ /** ray.wait n=1 over this scope's in-memory live set; resolves as each child settles;
266
+ * `null` when the live set is empty. */
267
+ next(): Promise<Settled<Out> | null>;
268
+ /**
269
+ * Steer a RUNNING child out-of-band — deliver a message to its executor's inbox (the driver's
270
+ * `send` verb: next-instruction, interrupt, or resume). Returns `true` if the message was
271
+ * delivered to a live child whose executor accepts delivery, `false` otherwise (unknown id,
272
+ * already settled, or an executor with no inbox). The executor drains its inbox between turns;
273
+ * a leaf that does not implement `deliver` simply cannot be steered mid-flight. In-process this
274
+ * is a direct call; the sandbox/Agent-Bus transports surface the SAME verb as an MCP tool.
275
+ */
276
+ send(nodeId: NodeId, msg: unknown): boolean;
277
+ /** The live tree — reads the in-memory nursery, not the journal. */
278
+ readonly view: TreeView;
279
+ /** Conserved-pool readouts (post-reservation). */
280
+ readonly budget: Readonly<{
281
+ tokensLeft: number;
282
+ usdLeft: number;
283
+ deadlineMs: number;
284
+ reservedTokens: number;
285
+ }>;
286
+ }
287
+ interface NodeSnapshot {
288
+ readonly id: NodeId;
289
+ readonly parent?: NodeId;
290
+ readonly label: string;
291
+ readonly status: NodeStatus;
292
+ readonly runtime: Runtime;
293
+ readonly budget: Budget;
294
+ /** Conserved spend so far for this node. */
295
+ readonly spent: Spend;
296
+ /** `outRef` once the node is `done` (the replay/result pointer). */
297
+ readonly outRef?: string;
298
+ }
299
+ /** The live tree — what `scope.view` / `RootHandle.view()` materialize for a viewer. */
300
+ interface TreeView {
301
+ readonly root: NodeId;
302
+ readonly nodes: ReadonlyArray<NodeSnapshot>;
303
+ /** Count of nodes in `running` or `acquiring` — the "what's in flow?" answer. */
304
+ readonly inFlight: number;
305
+ }
306
+ /** Journaled spawn-tree events (B1/B2). `seq` is the cursor order; `at` is an ISO
307
+ * timestamp for human inspection only (NOT a replay input). */
308
+ type SpawnEvent = {
309
+ kind: 'spawned';
310
+ id: NodeId;
311
+ parent?: NodeId;
312
+ label: string;
313
+ budget: Budget;
314
+ runtime: Runtime;
315
+ seq: number;
316
+ at: string;
317
+ } | {
318
+ kind: 'settled';
319
+ id: NodeId;
320
+ status: 'done' | 'down';
321
+ /** Content-addressed result pointer; rehydrates `out` from `ResultBlobStore`. */
322
+ outRef?: string;
323
+ verdict?: DefaultVerdict;
324
+ spent: Spend;
325
+ infra?: boolean;
326
+ seq: number;
327
+ at: string;
328
+ } | {
329
+ kind: 'cancelled';
330
+ id: NodeId;
331
+ reason: string;
332
+ seq: number;
333
+ at: string;
334
+ };
335
+ /**
336
+ * The spawn-tree event source (mirrors `ConversationJournal`'s begin/append/load shape).
337
+ * `loadTree` replays the full ordered event list for resume/replay; `appendEvent` is
338
+ * called only AFTER the event is observed-committed (never speculative).
339
+ */
340
+ interface SpawnJournal {
341
+ loadTree(root: NodeId): Promise<SpawnEvent[] | undefined>;
342
+ beginTree(root: NodeId, at: string): Promise<void>;
343
+ appendEvent(root: NodeId, ev: SpawnEvent): Promise<void>;
344
+ }
345
+ /** Content-addressed result blobs (the `outRef` → artifact map) backing the replay
346
+ * invariant. Split from the journal so the journal stays small (decisions) and the
347
+ * payloads (evidence) live where a viewer/replayer rehydrates them. */
348
+ interface ResultBlobStore {
349
+ put(outRef: string, artifact: unknown): Promise<void>;
350
+ get(outRef: string): Promise<unknown | undefined>;
351
+ }
352
+ /**
353
+ * Owns the conserved pool, the spawn log, the abort cascade, the OTP intensity breaker,
354
+ * and the root handle. `run` executes the root `Agent` to completion; `attach` wires a
355
+ * live `RootHandle` (the Q2 substrate the chat/pi-viz client later consumes).
356
+ */
357
+ interface Supervisor<Task, Out> {
358
+ run(root: Agent<Task, Out>, task: Task, opts: SupervisorOpts): Promise<SupervisedResult<Out>>;
359
+ attach(h: RootHandle<Out>): void;
360
+ }
361
+ interface SupervisorOpts {
362
+ /** The root conserved-pool ceiling (tokens + usd + iterations + deadline). */
363
+ readonly budget: Budget;
364
+ /** Trace-correlation root + the journal/blob root key. */
365
+ readonly runId: NodeId;
366
+ /** Event source — defaults to the in-memory journal in the impl; pass JSONL/FS for durability. */
367
+ readonly journal: SpawnJournal;
368
+ /** Result payload store backing `outRef` rehydration. */
369
+ readonly blobs: ResultBlobStore;
370
+ /** Executor resolution — the open registry mapping `AgentSpec` → `Executor`. */
371
+ readonly executors: ExecutorRegistry;
372
+ /** Runtime recursion-depth ceiling (paired with the conserved pool per R3). */
373
+ readonly maxDepth?: number;
374
+ /**
375
+ * OTP intensity breaker: more than `maxRestarts` child restarts within `withinMs`
376
+ * trips the supervisor to `no-winner` rather than restarting forever.
377
+ */
378
+ readonly maxRestarts?: number;
379
+ readonly withinMs?: number;
380
+ readonly now?: () => number;
381
+ readonly signal?: AbortSignal;
382
+ /** Lifecycle stream sink, threaded into the root `Scope` so every `spawn`/settle emits on the
383
+ * same `agent.spawn`/`agent.child` stream `runLoop` feeds — one observable recursive tree. */
384
+ readonly hooks?: RuntimeHooks;
385
+ }
386
+ /** Typed terminal result (M2) — a no-winner is NEVER coerced to a best-effort output. */
387
+ type SupervisedResult<Out> = {
388
+ kind: 'winner';
389
+ out: Out;
390
+ outRef: string;
391
+ verdict?: DefaultVerdict;
392
+ tree: TreeView;
393
+ spentTotal: Spend;
394
+ } | {
395
+ kind: 'no-winner';
396
+ reason: 'all-children-down' | 'budget-exhausted' | 'aborted';
397
+ tree: TreeView;
398
+ downCount: number;
399
+ };
400
+ /** Live root handle — the substrate a chat/pi-viz client attaches to (Q2). `signal`
401
+ * delivers an out-of-band message to the running root; `view()` materializes the tree. */
402
+ interface RootHandle<Out> {
403
+ view(): TreeView;
404
+ signal(msg: RootSignal): void;
405
+ abort(reason?: string): void;
406
+ /** Phantom: binds the handle to the supervised run's output type. Type-only — never
407
+ * present at runtime; lets `attach(h: RootHandle<Out>)` stay output-typed. */
408
+ readonly __out?: Out;
409
+ }
410
+ /** Out-of-band message to a running root. Open by intent — a client extends it. */
411
+ type RootSignal = {
412
+ kind: 'pause';
413
+ } | {
414
+ kind: 'resume';
415
+ } | {
416
+ kind: 'cancel';
417
+ reason?: string;
418
+ } | {
419
+ kind: 'ask';
420
+ question: string;
421
+ };
422
+ /**
423
+ * The progressive-widening gate (MCTS-PW). Decides whether a settled child is
424
+ * `promising` enough to spawn another under the remaining pool. DEFAULTS TO FLAT
425
+ * (`shouldWiden` always false) so a gate run never widens and the selector≠judge
426
+ * firewall conflict (R2) stays dormant. When widening IS enabled, `promising` MUST be
427
+ * derived from TRACE findings (`analyses`), never raw `verdict` — or the gate carries
428
+ * an explicit, argued `judgeExempt: true` (the documented escape hatch, off by default).
429
+ */
430
+ interface WidenGate<Out> {
431
+ /** Default impl returns false for every settlement (flat — never widens). */
432
+ shouldWiden(settled: Settled<Out>, budget: Scope<Out>['budget']): boolean;
433
+ /** When true, widening may read `verdict` directly (collides with the steer firewall —
434
+ * must be explicitly argued per cell, never defaulted on). */
435
+ readonly judgeExempt?: boolean;
436
+ }
437
+
438
+ export type { Agent as A, Budget as B, ExecutorFactory as E, Handle as H, NodeId as N, ResultBlobStore as R, Scope as S, TreeView as T, UsageEvent as U, WidenGate as W, SpawnJournal as a, SpawnEvent as b, Settled as c, AgentSpec as d, ExecutorRegistry as e, RootHandle as f, SupervisedResult as g, Spend as h, Supervisor as i, Executor as j, ExecutorContext as k, ExecutorResult as l, NodeSnapshot as m, NodeStatus as n, Restart as o, RootSignal as p, Runtime as q, SpawnOpts as r, SupervisorOpts as s };
@@ -1,5 +1,6 @@
1
1
  import { ControlEvalResult, KnowledgeRequirement, ControlBudget, KnowledgeReadinessReport, ControlStep, ControlDecision, UserQuestion, DataAcquisitionPlan, ControlRunResult, RunRecord, TraceStore, DefaultVerdict } from '@tangle-network/agent-eval';
2
2
  import { CreateSandboxOptions, SandboxInstance, SandboxEvent, AgentProfile } from '@tangle-network/sandbox';
3
+ import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
3
4
 
4
5
  /**
5
6
  * @stable
@@ -744,9 +745,19 @@ interface Driver<Task, Output, Decision> {
744
745
  * move + rationale (not just the inferred fan-width). Drivers whose topology
745
746
  * is a pure function of count (refine/fanout-vote) omit it — the kernel
746
747
  * infers `moveKind` from the planned-task count. Agent-authored drivers
747
- * (`createDynamicDriver`) return their chosen move's kind + rationale.
748
+ * (`createDriver`) return their chosen move's kind + rationale.
748
749
  */
749
750
  describePlan?(): LoopPlanDescription | undefined;
751
+ /**
752
+ * Optional: the driver AUTHORS the winner instead of the kernel's argmax. The
753
+ * kernel consults this at finalize ONLY when the caller did not pass an explicit
754
+ * `selectWinner` to runLoop. Return the driver-declared winner (e.g. from a
755
+ * `select` topology move) or `undefined` to fall through to the default
756
+ * (best-valid-score, earliest index). This is the SELECTOR role made
757
+ * agent-authorable — the planner runs the selection, not the kernel.
758
+ * @experimental
759
+ */
760
+ selectWinner?(history: ReadonlyArray<Iteration<Task, Output>>): LoopWinner<Task, Output> | undefined;
750
761
  }
751
762
  /** @experimental Driver-supplied description of the just-planned move. */
752
763
  interface LoopPlanDescription {
@@ -796,9 +807,89 @@ interface LoopResult<Task, Output, Decision> {
796
807
  *
797
808
  * @experimental
798
809
  */
799
- interface LoopSandboxClient {
810
+ interface SandboxClient {
800
811
  create(options?: CreateSandboxOptions): Promise<SandboxInstance>;
801
812
  describePlacement?(box: SandboxInstance): LoopSandboxPlacement;
813
+ /**
814
+ * Optional CRIU capability probe. When present and it resolves
815
+ * `{ available: true }`, the loop's `lineage.fork` seam may checkpoint+fork a
816
+ * parent box so a fanout's branches inherit a shared context prefix; absent or
817
+ * `false`, the fanout degrades to independent fresh boxes. The kernel reads
818
+ * this ONLY through the capability probe — it never branches on backend kind.
819
+ * The raw `Sandbox` SDK class satisfies it; the loop's test fakes omit it
820
+ * (⇒ `canFork = false`).
821
+ * @experimental
822
+ */
823
+ criuStatus?(): Promise<{
824
+ available: boolean;
825
+ criuVersion?: string;
826
+ reason?: string;
827
+ }>;
828
+ }
829
+ /**
830
+ * Opt-in box-lineage controls for `runLoop`. Default OFF — with both flags
831
+ * unset the kernel's per-iteration behavior is byte-identical to acquiring a
832
+ * fresh box, streaming once, and tearing it down. The independence of N fresh
833
+ * boxes (e.g. `random@k`) is a compute-control invariant; these flags must
834
+ * never apply to it. Enable them ONLY on a steered loop (refine / planner-driven
835
+ * fanout) where reusing the parent's context is intended.
836
+ *
837
+ * Live-box footprint: the lineage keeps every box it starts or forks alive
838
+ * across rounds so a later round can descend from it, and tears them down at
839
+ * loop end. When the driver's branch point is kernel-inferred (no
840
+ * `describePlan` — refine, fanout-vote), the kernel prunes boxes no future
841
+ * round can reach after each round, so the live set tracks the active frontier.
842
+ * When the driver authors its own branch point (`describePlan().parentIndex` —
843
+ * `createDriver`), it may descend from any prior
844
+ * iteration, so no box is pruned and the live-box count rises to the total
845
+ * iterations across all rounds. Size `forkFanout` runs accordingly (CRIU forks
846
+ * are copy-on-write, but each is still a live box until loop end).
847
+ *
848
+ * @experimental
849
+ */
850
+ interface LoopLineageOptions {
851
+ /**
852
+ * When true, a refine round (1 planned task) descending from a prior round
853
+ * CONTINUES the parent iteration's session on the SAME box
854
+ * (`streamPrompt({ sessionId })`) instead of acquiring a fresh box and
855
+ * re-injecting prior context as prompt text. Round 0 (no parent) always
856
+ * starts fresh. Usable on any single-task path, not just the refine driver.
857
+ *
858
+ * Requires a platform that honors a client-supplied `sessionId`. The lineage
859
+ * mints the id and `continue` asserts the session is still live
860
+ * (`box.session(id).status()`), failing loud if the platform dropped it — so a
861
+ * non-honoring platform errors instead of silently running contextless turns.
862
+ * Verify continuity against the live platform before enabling: the assertion
863
+ * proves the session EXISTS server-side, not that prior turns replay into it.
864
+ */
865
+ sessionContinuity?: boolean;
866
+ /**
867
+ * When true AND the platform reports CRIU fork support, a fanout round (N
868
+ * planned tasks) descending from a prior round FORKS the parent iteration's
869
+ * checkpoint so all N branches inherit a shared context prefix. Without fork
870
+ * support it degrades to N independent fresh boxes (same result, no prefix).
871
+ * Round 0 always starts fresh. NEVER set this for a `random@k` control arm —
872
+ * forking would couple the independent samples.
873
+ *
874
+ * A real fork inherits the parent's IMAGE/PROFILE: per-branch `AgentRunSpec`
875
+ * profiles are honored only on the degraded fresh-box path, so a
876
+ * heterogeneous-profile fanout silently homogenizes to the parent's profile
877
+ * when fork is available. Use this for same-profile branching; for
878
+ * different-per-branch profiles use the unforked fanout path.
879
+ */
880
+ forkFanout?: boolean;
881
+ /**
882
+ * Per-turn sandbox streaming mode. Default `'sse'` (live `streamPrompt` —
883
+ * low-latency, full per-token trace; best for interactive chat). `'poll'`
884
+ * fire-and-detaches via `dispatchPrompt` and awaits the terminal result by
885
+ * status-polling, so a long, quiet in-box turn (clone + build + test) never
886
+ * holds a live stream a proxy idle-timeout can drop mid-execution. Lower trace
887
+ * fidelity (one terminal event), so it is opt-in — intended for BATCH eval
888
+ * runs, which don't need live streaming and were losing long turns to the
889
+ * idle-drop. Applies to the default fresh-box path too, not only when
890
+ * `sessionContinuity`/`forkFanout` are on.
891
+ */
892
+ streaming?: 'sse' | 'poll';
802
893
  }
803
894
  /** @experimental */
804
895
  interface LoopSandboxPlacement {
@@ -847,6 +938,11 @@ type LoopTraceEvent = {
847
938
  runId: string;
848
939
  timestamp: number;
849
940
  payload: LoopEndedPayload;
941
+ } | {
942
+ kind: 'loop.teardown.failed';
943
+ runId: string;
944
+ timestamp: number;
945
+ payload: LoopTeardownFailedPayload;
850
946
  };
851
947
  /** @experimental */
852
948
  interface LoopStartedPayload {
@@ -946,10 +1042,20 @@ interface LoopEndedPayload {
946
1042
  durationMs: number;
947
1043
  iterations: number;
948
1044
  }
1045
+ /** Emitted when a box's `delete()` throws or times out during teardown — the
1046
+ * loop swallows the failure (platform reaps on expiry) but surfaces it here so
1047
+ * a real leak (e.g. mid-loop auth expiry) is observable. @experimental */
1048
+ interface LoopTeardownFailedPayload {
1049
+ sandboxId?: string;
1050
+ /** `'timeout'` or the delete error message. */
1051
+ reason: string;
1052
+ }
949
1053
  /** @experimental */
950
1054
  interface ExecCtx {
951
1055
  /** Sandbox SDK client — the kernel calls `.create()` per iteration. */
952
- sandboxClient: LoopSandboxClient;
1056
+ sandboxClient: SandboxClient;
1057
+ /** Optional runtime hooks. Execution-scoped; never part of `AgentProfile`. */
1058
+ hooks?: RuntimeHooks;
953
1059
  /** Optional trace emitter. When set, the kernel emits `loop.*` events. */
954
1060
  traceEmitter?: LoopTraceEmitter;
955
1061
  /**
@@ -973,4 +1079,4 @@ interface ExecCtx {
973
1079
  parentSpanId?: string;
974
1080
  }
975
1081
 
976
- export { type AgentRunSpec as A, type RuntimeSession as B, type AgentAdapter as C, type Driver as D, type ExecCtx as E, type AgentKnowledgeProvider as F, type AgentRuntimeEventSink as G, type AgentTaskContext as H, type Iteration as I, type AgentTaskSpec as J, type KnowledgeReadinessDecision as K, type LoopWinner as L, type BackendErrorDetail as M, type RuntimeRunHandle as N, type OutputAdapter as O, type RuntimeRunPersistenceAdapter as P, type RuntimeRunRow as Q, type RuntimeStreamEvent as R, startRuntimeRun as S, type Validator as V, type LoopSandboxClient as a, type LoopResult as b, type OpenAIChatTool as c, type LoopTraceEmitter as d, type LoopDecisionPayload as e, type LoopEndedPayload as f, type LoopIterationDispatchPayload as g, type LoopIterationEndedPayload as h, type LoopIterationStartedPayload as i, type LoopPlanDescription as j, type LoopPlanPayload as k, type LoopSandboxPlacement as l, type LoopStartedPayload as m, type LoopTokenUsage as n, type LoopTraceEvent as o, type ValidationCtx as p, type AgentBackendInput as q, type AgentExecutionBackend as r, type OpenAIChatToolChoice as s, type AgentBackendContext as t, type RunAgentTaskOptions as u, type AgentTaskRunResult as v, type RunAgentTaskStreamOptions as w, type AgentRuntimeEvent as x, type AgentTaskStatus as y, type RuntimeSessionStore as z };
1082
+ export { type AgentRunSpec as A, type BackendErrorDetail as B, type LoopDecisionPayload as C, type Driver as D, type ExecCtx as E, type LoopEndedPayload as F, type LoopIterationDispatchPayload as G, type LoopIterationEndedPayload as H, type Iteration as I, type LoopIterationStartedPayload as J, type KnowledgeReadinessDecision as K, type LoopWinner as L, type LoopPlanDescription as M, type LoopPlanPayload as N, type OutputAdapter as O, type LoopStartedPayload as P, type LoopTeardownFailedPayload as Q, type RuntimeStreamEvent as R, type SandboxClient as S, type LoopTraceEvent as T, type ValidationCtx as U, type Validator as V, type LoopLineageOptions as a, type LoopResult as b, type LoopTokenUsage as c, type OpenAIChatTool as d, type LoopTraceEmitter as e, type LoopSandboxPlacement as f, type AgentBackendInput as g, type AgentExecutionBackend as h, type OpenAIChatToolChoice as i, type AgentBackendContext as j, type RunAgentTaskOptions as k, type AgentTaskRunResult as l, type RunAgentTaskStreamOptions as m, type AgentRuntimeEvent as n, type AgentTaskStatus as o, type RuntimeSessionStore as p, type RuntimeSession as q, type AgentAdapter as r, type AgentKnowledgeProvider as s, type AgentRuntimeEventSink as t, type AgentTaskContext as u, type AgentTaskSpec as v, type RuntimeRunHandle as w, type RuntimeRunPersistenceAdapter as x, type RuntimeRunRow as y, startRuntimeRun as z };
@@ -1,7 +1,8 @@
1
1
  import { AgentProfile, CreateSandboxOptions, PromptOptions, TaskOptions, SandboxEvent } from '@tangle-network/sandbox';
2
- import { a as LoopSandboxClient, O as OutputAdapter, l as LoopSandboxPlacement, b as LoopResult } from './types-DbJzz2uf.js';
3
- import { R as RunLoopOptions } from './run-loop-C4L1Sted.js';
2
+ import { S as SandboxClient, O as OutputAdapter, f as LoopSandboxPlacement, b as LoopResult } from './types-DnYoHvvZ.js';
3
+ import { R as RunLoopOptions } from './run-loop-CU2Y00Si.js';
4
4
  import '@tangle-network/agent-eval';
5
+ import './runtime-hooks-C7JwKb9E.js';
5
6
 
6
7
  /**
7
8
  * @experimental
@@ -436,7 +437,7 @@ interface WorkflowSandboxAgentTrace<TOutput = unknown> {
436
437
  tokenUsage: WorkflowTokenUsage;
437
438
  }
438
439
  interface CreateSandboxWorkflowAgentDelegateOptions<TOutput = unknown> {
439
- client: LoopSandboxClient;
440
+ client: SandboxClient;
440
441
  profile: WorkflowSandboxAgentProfileResolver;
441
442
  output?: OutputAdapter<TOutput>;
442
443
  stream?: WorkflowSandboxAgentStream;
package/dist/workflow.js CHANGED
@@ -1,13 +1,12 @@
1
1
  import {
2
2
  createSandboxForSpec,
3
3
  describeSandboxPlacement,
4
- extractLlmCallEvent,
5
4
  runLoop
6
- } from "./chunk-S7JXV32P.js";
7
- import "./chunk-PY6NMZYX.js";
5
+ } from "./chunk-65FQLI4V.js";
8
6
  import {
9
- ValidationError
10
- } from "./chunk-SQSCRJ7U.js";
7
+ ValidationError,
8
+ extractLlmCallEvent
9
+ } from "./chunk-GSUO5QS6.js";
11
10
  import "./chunk-DGUM43GV.js";
12
11
 
13
12
  // src/workflow/agent-delegate.ts