@tangle-network/agent-runtime 0.44.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +95 -203
  2. package/dist/agent.d.ts +3 -2
  3. package/dist/agent.js +5 -7
  4. package/dist/agent.js.map +1 -1
  5. package/dist/analyst-loop.d.ts +28 -2
  6. package/dist/analyst-loop.js +4 -1
  7. package/dist/audit.d.ts +93 -0
  8. package/dist/audit.js +312 -0
  9. package/dist/audit.js.map +1 -0
  10. package/dist/chunk-4B6U4CVQ.js +15 -0
  11. package/dist/chunk-4B6U4CVQ.js.map +1 -0
  12. package/dist/chunk-65FQLI4V.js +4089 -0
  13. package/dist/chunk-65FQLI4V.js.map +1 -0
  14. package/dist/{chunk-GFKVVRQ7.js → chunk-GN75RGM6.js} +13 -12
  15. package/dist/chunk-GN75RGM6.js.map +1 -0
  16. package/dist/chunk-GSUO5QS6.js +146 -0
  17. package/dist/chunk-GSUO5QS6.js.map +1 -0
  18. package/dist/chunk-HNUXAZIJ.js +580 -0
  19. package/dist/chunk-HNUXAZIJ.js.map +1 -0
  20. package/dist/{chunk-SKUZZCHE.js → chunk-I42NHLKX.js} +5 -5
  21. package/dist/chunk-I42NHLKX.js.map +1 -0
  22. package/dist/{chunk-HVYOHJHK.js → chunk-JNPK46YH.js} +2 -2
  23. package/dist/chunk-JNPK46YH.js.map +1 -0
  24. package/dist/{chunk-3HMHSN22.js → chunk-KADIJAD4.js} +38 -24
  25. package/dist/chunk-KADIJAD4.js.map +1 -0
  26. package/dist/{chunk-KDMRUD2P.js → chunk-KPN7OQ64.js} +296 -8
  27. package/dist/chunk-KPN7OQ64.js.map +1 -0
  28. package/dist/{chunk-NRZOXCJK.js → chunk-VR4JIC5H.js} +2 -2
  29. package/dist/chunk-WIR4HOOJ.js +27 -0
  30. package/dist/chunk-WIR4HOOJ.js.map +1 -0
  31. package/dist/coder-DCWFQpmJ.d.ts +114 -0
  32. package/dist/driver-C-mtBo7h.d.ts +221 -0
  33. package/dist/improvement.d.ts +0 -1
  34. package/dist/improvement.js +0 -5
  35. package/dist/improvement.js.map +1 -1
  36. package/dist/index.d.ts +122 -9
  37. package/dist/index.js +398 -10
  38. package/dist/index.js.map +1 -1
  39. package/dist/{kb-gate-D0ZIhFOU.d.ts → kb-gate-2Gwpz_27.d.ts} +86 -9
  40. package/dist/{loop-runner-bin-BLMa8He3.d.ts → loop-runner-bin-D-K6bRp3.d.ts} +17 -13
  41. package/dist/loop-runner-bin.d.ts +8 -6
  42. package/dist/loop-runner-bin.js +6 -8
  43. package/dist/loops.d.ts +7 -393
  44. package/dist/loops.js +96 -27
  45. package/dist/mcp/bin.js +7 -7
  46. package/dist/mcp/bin.js.map +1 -1
  47. package/dist/mcp/index.d.ts +286 -13
  48. package/dist/mcp/index.js +341 -9
  49. package/dist/mcp/index.js.map +1 -1
  50. package/dist/{otel-export-wFDmmurL.d.ts → otel-export-nurzFwuJ.d.ts} +1 -1
  51. package/dist/profiles.d.ts +385 -86
  52. package/dist/profiles.js +549 -4
  53. package/dist/profiles.js.map +1 -1
  54. package/dist/{run-loop-C4L1Sted.d.ts → run-loop-CU2Y00Si.d.ts} +36 -13
  55. package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
  56. package/dist/runtime.d.ts +1964 -0
  57. package/dist/runtime.js +114 -0
  58. package/dist/runtime.js.map +1 -0
  59. package/dist/substrate-CUgk7F7s.d.ts +77 -0
  60. package/dist/topology.d.ts +73 -0
  61. package/dist/topology.js +111 -0
  62. package/dist/topology.js.map +1 -0
  63. package/dist/types-BfoeiQRZ.d.ts +438 -0
  64. package/dist/{types-DbJzz2uf.d.ts → types-DnYoHvvZ.d.ts} +110 -4
  65. package/dist/workflow.d.ts +4 -3
  66. package/dist/workflow.js +4 -5
  67. package/dist/workflow.js.map +1 -1
  68. package/package.json +37 -28
  69. package/skills/agent-runtime-adoption/SKILL.md +32 -29
  70. package/skills/generate-eval/SKILL.md +60 -0
  71. package/dist/chunk-3HMHSN22.js.map +0 -1
  72. package/dist/chunk-GFKVVRQ7.js.map +0 -1
  73. package/dist/chunk-HVYOHJHK.js.map +0 -1
  74. package/dist/chunk-KDMRUD2P.js.map +0 -1
  75. package/dist/chunk-PY6NMZYX.js +0 -52
  76. package/dist/chunk-PY6NMZYX.js.map +0 -1
  77. package/dist/chunk-S7JXV32P.js +0 -947
  78. package/dist/chunk-S7JXV32P.js.map +0 -1
  79. package/dist/chunk-SKUZZCHE.js.map +0 -1
  80. package/dist/chunk-SQSCRJ7U.js +0 -65
  81. package/dist/chunk-SQSCRJ7U.js.map +0 -1
  82. package/dist/chunk-VOX6Z3II.js +0 -90
  83. package/dist/chunk-VOX6Z3II.js.map +0 -1
  84. package/dist/chunk-XBUG326M.js +0 -261
  85. package/dist/chunk-XBUG326M.js.map +0 -1
  86. package/dist/dynamic-wUgp6UKs.d.ts +0 -108
  87. package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
  88. /package/dist/{chunk-NRZOXCJK.js.map → chunk-VR4JIC5H.js.map} +0 -0
@@ -0,0 +1,1964 @@
1
+ import { AgentProfile as AgentProfile$1, BackendType, CreateSandboxOptions, SandboxInstance, SandboxEvent } from '@tangle-network/sandbox';
2
+ export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
3
+ import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, E as ExecutorFactory, d as AgentSpec, e as ExecutorRegistry, B as Budget, A as Agent, f as RootHandle, g as SupervisedResult, h as Spend, S as Scope, U as UsageEvent, i as Supervisor } from './types-BfoeiQRZ.js';
4
+ export { j as Executor, k as ExecutorContext, l as ExecutorResult, H as Handle, m as NodeSnapshot, n as NodeStatus, o as Restart, p as RootSignal, q as Runtime, r as SpawnOpts, s as SupervisorOpts, W as WidenGate } from './types-BfoeiQRZ.js';
5
+ export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDriverOptions, D as DriverDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './driver-C-mtBo7h.js';
6
+ import { S as SandboxClient, b as LoopResult, c as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-DnYoHvvZ.js';
7
+ export { D as Driver, C as LoopDecisionPayload, F as LoopEndedPayload, G as LoopIterationDispatchPayload, H as LoopIterationEndedPayload, J as LoopIterationStartedPayload, a as LoopLineageOptions, M as LoopPlanDescription, N as LoopPlanPayload, f as LoopSandboxPlacement, P as LoopStartedPayload, Q as LoopTeardownFailedPayload, e as LoopTraceEmitter, T as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, U as ValidationCtx, V as Validator } from './types-DnYoHvvZ.js';
8
+ import { AgentProfile, AnalystFinding, DefaultVerdict } from '@tangle-network/agent-eval';
9
+ export { DefaultVerdict } from '@tangle-network/agent-eval';
10
+ import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
11
+ import { R as RunLoopOptions } from './run-loop-CU2Y00Si.js';
12
+ export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop-CU2Y00Si.js';
13
+ import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
14
+
15
+ /**
16
+ * @experimental
17
+ *
18
+ * Event-sourced spawn journal for the recursive execution atom (build steps 3 + 7).
19
+ *
20
+ * The supervision tree is journaled as an append-only event log: every `spawned`,
21
+ * `settled`, and `cancelled` is recorded AFTER it is observed-committed (never
22
+ * speculative), mirroring `ConversationJournal`'s begin/append/load shape. The log
23
+ * holds only the THIN decision record — ids, parentage, budget, the spend a decision
24
+ * consumed, and a content-addressed `outRef`. The payloads the driver branched on
25
+ * (the `out` artifacts) live in a separate `ResultBlobStore`, keyed by `outRef`, so
26
+ * the journal stays small (decisions) and replay rehydrates the exact `Settled` from
27
+ * the blob store (evidence). This is the decision/payload split the replay argument
28
+ * rests on (B1/B2).
29
+ *
30
+ * Replay determinism (B2): `seq` is the monotonic cursor order `scope.next()` yielded
31
+ * each settlement — NOT wall-clock. `replaySpawnTree` sorts strictly by `seq` before
32
+ * touching the blob store, so the order in which rehydration `get`s resolve can never
33
+ * reorder the replayed `Settled[]`; the result is identical regardless of blob latency.
34
+ */
35
+
36
+ /**
37
+ * Mint the content-addressed `outRef` for a result artifact: `sha256:<hex>` over a
38
+ * stable JSON encoding. Producers call this to derive the `outRef` they journal and
39
+ * `put`; the FS/in-mem stores re-derive it on `put` to verify the supplied ref
40
+ * matches (fail loud on a mismatch — a forged ref breaks the replay invariant).
41
+ *
42
+ * Stable encoding: object keys are sorted recursively so two structurally-equal
43
+ * artifacts hash identically regardless of key insertion order.
44
+ */
45
+ declare function contentAddress(artifact: unknown): string;
46
+ /**
47
+ * In-memory `ResultBlobStore`. Content-addressed: `put` verifies the supplied
48
+ * `outRef` matches the artifact's hash so a stale/forged ref fails loud rather than
49
+ * silently rehydrating the wrong payload. Idempotent on an identical re-put.
50
+ */
51
+ declare class InMemoryResultBlobStore implements ResultBlobStore {
52
+ private readonly blobs;
53
+ put(outRef: string, artifact: unknown): Promise<void>;
54
+ get(outRef: string): Promise<unknown | undefined>;
55
+ }
56
+ /**
57
+ * FS `ResultBlobStore`. One JSON file per artifact under `dir`, named by a
58
+ * filesystem-safe encoding of the `outRef` (`sha256:<hex>` → `sha256-<hex>.json`).
59
+ * `put` fsyncs so a crash between writes never loses an acknowledged blob.
60
+ */
61
+ declare class FileResultBlobStore implements ResultBlobStore {
62
+ private readonly dir;
63
+ constructor(dir: string);
64
+ put(outRef: string, artifact: unknown): Promise<void>;
65
+ get(outRef: string): Promise<unknown | undefined>;
66
+ private blobPath;
67
+ }
68
+ /**
69
+ * In-memory `SpawnJournal`. Appends are observed-committed only; the impl enforces
70
+ * the corruption guards a durable replay rests on:
71
+ * - an event before `beginTree` is a corrupted tree (fail loud),
72
+ * - a duplicate `seq` within a tree is a corrupted cursor (fail loud) — two
73
+ * settlements cannot share the cursor position replay orders by.
74
+ */
75
+ declare class InMemorySpawnJournal implements SpawnJournal {
76
+ private readonly trees;
77
+ loadTree(root: NodeId): Promise<SpawnEvent[] | undefined>;
78
+ beginTree(root: NodeId, at: string): Promise<void>;
79
+ appendEvent(root: NodeId, ev: SpawnEvent): Promise<void>;
80
+ }
81
+ /**
82
+ * JSONL on disk. One line per record: the first record is `begin`, subsequent records
83
+ * are `event` envelopes wrapping a `SpawnEvent`. `loadTree` replays the whole file,
84
+ * filtering by `root`, and applies the same begin-precedes-events + unique-seq
85
+ * corruption guards as the in-memory impl. Each append fsyncs so a crash between
86
+ * writes never loses an acknowledged event.
87
+ */
88
+ declare class FileSpawnJournal implements SpawnJournal {
89
+ private readonly path;
90
+ constructor(path: string);
91
+ loadTree(root: NodeId): Promise<SpawnEvent[] | undefined>;
92
+ beginTree(root: NodeId, at: string): Promise<void>;
93
+ appendEvent(root: NodeId, ev: SpawnEvent): Promise<void>;
94
+ private loadTreeBegin;
95
+ private appendRecord;
96
+ }
97
+ /**
98
+ * Re-feed a journaled spawn tree in strict `seq` order, rehydrating each settled
99
+ * child's `out` from the blob store by `outRef`, and return the `Settled[]` exactly
100
+ * as `scope.next()` originally delivered them.
101
+ *
102
+ * Determinism (B2): the events are sorted by `seq` BEFORE any blob `get`, so the
103
+ * replay order is the recorded cursor order regardless of how fast each rehydration
104
+ * resolves. `at` (wall-clock) is never a replay input. Fail loud on a tree that was
105
+ * never begun, a settled-done event missing its `outRef`, or a blob the store can't
106
+ * rehydrate — a silent gap would let `act` branch on the wrong evidence.
107
+ */
108
+ declare function replaySpawnTree(journal: SpawnJournal, blobs: ResultBlobStore, root: NodeId): Promise<Settled<unknown>[]>;
109
+ /**
110
+ * Materialize the live tree (`TreeView`) from a journaled event list for resume. Folds
111
+ * `spawned`/`settled`/`cancelled` into a per-node snapshot in `seq` order so the
112
+ * resumed view matches what `scope.view` showed at the recorded cursor position.
113
+ */
114
+ declare function materializeTreeView(events: SpawnEvent[]): TreeView;
115
+
116
+ /**
117
+ * Adapt an `ExecutorFactory` into a `SandboxClient` for `runLoop`. The factory is
118
+ * instantiated fresh per `streamPrompt` (mirrors the per-spawn executor lifecycle):
119
+ * run once on the prompt, emit the terminal result event, tear down.
120
+ */
121
+ declare function inlineSandboxClient(factory: ExecutorFactory<unknown>): SandboxClient;
122
+
123
+ /**
124
+ * `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
125
+ *
126
+ * Without this adapter a consumer wiring `runLoop` into `runProfileMatrix` /
127
+ * `runCampaign` has to, by hand, every time: (a) build an `ExecCtx` with a
128
+ * sandbox client, (b) adapt the campaign `DispatchContext.trace` into a
129
+ * `LoopTraceEmitter` (or lose all loop trace correlation), and (c) remember to
130
+ * forward the loop's cost + tokens via `ctx.cost` (forgetting it yields a
131
+ * `{0,0}` cell the backend-integrity guard reads as a stub). Three foot-guns,
132
+ * the third silent. The fleet's products skipped (c) and fell back to a
133
+ * `workerRecords[]` side-channel — the exact anti-pattern the substrate exists
134
+ * to kill.
135
+ *
136
+ * `loopDispatch` collapses all three into one typed call:
137
+ *
138
+ * const dispatch = loopDispatch({
139
+ * sandboxClient,
140
+ * toLoopOptions: (scenario, profile) => ({ driver, agentRun, output, validator, task }),
141
+ * })
142
+ * await runProfileMatrix({ profiles, scenarios, dispatch, judges, commitSha })
143
+ *
144
+ * Usage is reported automatically; trace events are forwarded automatically;
145
+ * the ctx is built automatically. The seam becomes impossible to mis-wire.
146
+ *
147
+ * Typed structurally against the campaign `DispatchContext` (imported type-only
148
+ * from `@tangle-network/agent-eval/campaign`) — a downward dependency, never an
149
+ * inversion.
150
+ */
151
+
152
+ /** runLoop options minus the `ctx` (loopDispatch builds the ctx). */
153
+ type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
154
+ interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
155
+ /** Sandbox client used for every cell's `runLoop`. Supplied once. */
156
+ sandboxClient: SandboxClient;
157
+ /** Build the per-cell runLoop options from the scenario (+ profile, when
158
+ * used with `runProfileMatrix`). */
159
+ toLoopOptions: (scenario: TScenario, profile: AgentProfile) => LoopOptionsForDispatch<Task, Output, Decision>;
160
+ /** Map the finished loop to the artifact the judges score. Default:
161
+ * `result.winner?.output`. A loop with no winner yields `undefined` (judges
162
+ * skip the cell) — but the loop's token usage is STILL reported, so the
163
+ * integrity guard sees real activity. */
164
+ toArtifact?: (result: LoopResult<Task, Output, Decision>) => TArtifact;
165
+ /** Forward `loop.*` trace events into the campaign's scoped trace so loop
166
+ * spans correlate with the cell. Default true. */
167
+ forwardTrace?: boolean;
168
+ /** Cost-meter source label for the loop's spend. Default `'loop'`. */
169
+ costSource?: string;
170
+ }
171
+ /**
172
+ * Adapter for `runProfileMatrix` (profile is an axis). Returns a
173
+ * `ProfileDispatchFn` that runs `runLoop` per (profile, scenario) cell and
174
+ * reports usage automatically.
175
+ */
176
+ declare function loopDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(opts: LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>): ProfileDispatchFn<TScenario, TArtifact>;
177
+
178
+ /**
179
+ * @experimental
180
+ *
181
+ * The personify layer — the "act like X" knob on top of the recursive keystone.
182
+ *
183
+ * The keystone (`src/loops/supervise/`) is pure STRUCTURE: a recursive `Agent` atom inside
184
+ * a budget-conserving `Scope`, an `ExecutorRegistry` mapping an `AgentSpec` to a runtime,
185
+ * and a `Supervisor` that runs a root agent to a typed `SupervisedResult`. It carries no
186
+ * CONTENT — no model, no prompt, no goal framing, no notion of "who this loop is".
187
+ *
188
+ * This layer adds exactly that content seam without inventing a second engine:
189
+ * - A `Persona` is a thin record: the root `AgentSpec` (profile + harness + optional BYO
190
+ * executor), a root `directive` (the goal framing handed to the chosen shape), a
191
+ * `context` blob (who the loop is acting as), and the executor seams the registry needs.
192
+ * `definePersona` builds it; it is data, not behavior.
193
+ * - A `LoopShape` is a reusable act-body FACTORY: `(ctx: ShapeContext) => Agent`. The shape
194
+ * owns the STRUCTURE (how to decompose / fan out / verify / synthesize); the persona's
195
+ * content parameterizes it. A new shape is ONE file + one `registerShape` call.
196
+ * - `Outcome<D>` is the contract every shape synthesizes into: a finished deliverable OR a
197
+ * list of concrete blockers — "100% done or 100%-defined blockers", never a vague middle.
198
+ *
199
+ * Layering: this module imports ONLY keystone runtime types (`./supervise/types`) and the
200
+ * substrate `AgentProfile`/`BackendType`. It typechecks standalone — no impl, no engine.
201
+ * Extensibility is structural: `Persona` carries an open `extensions` bag so a later
202
+ * world-model / memory field is additive (a new optional key), never a breaking change.
203
+ */
204
+
205
+ /**
206
+ * The terminal contract Drew wants: a loop returns a FINISHED deliverable, or the concrete
207
+ * list of blockers that stopped it — never a half-done best-effort coercion. A `blocked`
208
+ * outcome with an empty `blockers` list is a contract violation (a shape that can't finish
209
+ * MUST name why); impls fail loud on it rather than emitting a vacuous block.
210
+ *
211
+ * `Outcome` is the `Out` type a personified `Agent`/`Supervisor` is parameterized by, so the
212
+ * keystone's typed `SupervisedResult<Outcome<D>>` carries it end to end with no coercion.
213
+ */
214
+ type Outcome<D> = {
215
+ kind: 'done';
216
+ deliverable: D;
217
+ } | {
218
+ kind: 'blocked';
219
+ blockers: string[];
220
+ };
221
+ /**
222
+ * The "act like X" record. A thin composition over the keystone's `AgentSpec`: it pairs the
223
+ * root spec (the executor mapping for the root agent the shape builds) with the CONTENT a
224
+ * shape consumes — the goal framing (`directive`) and who the loop is acting as (`context`).
225
+ *
226
+ * The framework never reads `directive`/`context` semantically; it threads them to the shape
227
+ * verbatim through `ShapeContext`. This is the rule the mandate names: the FRAMEWORK is
228
+ * structure, the PERSONA carries model/prompt/tools/directive. No model name, prompt, or
229
+ * persona string is ever hardcoded in a shape or the engine.
230
+ *
231
+ * `D` is the deliverable type this persona's loops produce; it flows into `Outcome<D>`.
232
+ */
233
+ interface Persona<D = unknown> {
234
+ /** Stable persona name — used as the trace/journal label root, never as content. */
235
+ readonly name: string;
236
+ /**
237
+ * The root agent's executor mapping (profile + harness + optional BYO executor). The
238
+ * shape's root `Agent` carries THIS as its `executorSpec`; child specs the shape spawns
239
+ * are derived from / resolved against the same persona registry (see `ShapeContext`).
240
+ */
241
+ readonly root: AgentSpec;
242
+ /** The goal framing handed to the shape — the "what to achieve", not "how". */
243
+ readonly directive: string;
244
+ /** Who the loop is acting as — the opaque persona context blob the shape may inject into
245
+ * child tasks. Opaque to the framework; only the persona's profiles/prompts interpret it. */
246
+ readonly context: PersonaContext;
247
+ /**
248
+ * The executor seams (router endpoint+key, sandbox client, cli bin) the built-in runtimes
249
+ * read off `ExecutorContext.seams`, OR a fully pre-configured registry. The supervisor
250
+ * threads an EMPTY seam bag to the root scope, so a persona that uses built-in metered
251
+ * runtimes MUST supply a registry whose factories close over their seams (or BYO executors
252
+ * on each `AgentSpec`). Carried here so `runPersonified` can build `SupervisorOpts.executors`.
253
+ */
254
+ readonly executors: PersonaExecutors;
255
+ /**
256
+ * Forward-compatible extension bag — a later world-model / memory / tool-budget field is an
257
+ * additive key here, never a breaking change to the `Persona` shape. Opaque to the engine.
258
+ */
259
+ readonly extensions?: Readonly<Record<string, unknown>>;
260
+ /** Phantom: binds the persona to its deliverable type so `runPersonified` infers `D` from
261
+ * the persona and the chosen shape must agree. Type-only — never present at runtime. */
262
+ readonly __deliverable?: D;
263
+ }
264
+ /** The persona context blob — who the loop is acting as. Open by intent: a persona names its
265
+ * own role/audience/constraints; the framework treats it as opaque content. */
266
+ interface PersonaContext {
267
+ /** The role the loop embodies ("senior staff engineer", "equity research analyst", …). */
268
+ readonly role: string;
269
+ /** Optional freeform framing the persona's prompts/profiles consume. */
270
+ readonly notes?: string;
271
+ /** Open content bag — persona-specific fields a shape's child tasks may carry. */
272
+ readonly [key: string]: unknown;
273
+ }
274
+ /**
275
+ * How a persona supplies executor resolution. Either a pre-built registry (factories already
276
+ * closed over their seams) OR the raw seam bag the engine uses to construct a registry +
277
+ * thread the seams onto each spawn. Exactly one is required — fail loud if neither is set.
278
+ */
279
+ interface PersonaExecutors {
280
+ /** A registry whose factories already capture their seams. Highest precedence. */
281
+ readonly registry?: ExecutorRegistry;
282
+ /** Raw seams to thread onto built-in runtimes (`router`/`sandbox`/`cli` keys). */
283
+ readonly seams?: Readonly<Record<string, unknown>>;
284
+ }
285
+ /** The minimal input to build a `Persona`. Mirrors `Persona` but lets the builder default
286
+ * the executors-supplied invariant check and freeze the record. */
287
+ interface DefinePersonaInput<D = unknown> {
288
+ readonly name: string;
289
+ readonly root: AgentSpec;
290
+ readonly directive: string;
291
+ readonly context: PersonaContext;
292
+ readonly executors: PersonaExecutors;
293
+ readonly extensions?: Readonly<Record<string, unknown>>;
294
+ /** Phantom: pins the input's deliverable type so `definePersona<D>` returns a `Persona<D>`
295
+ * the caller's shape must agree with. Type-only — never supplied at a call site. */
296
+ readonly __deliverable?: D;
297
+ }
298
+ /** Builds a frozen `Persona`, failing loud on the executors-supplied invariant (neither a
299
+ * registry nor seams = an unresolvable persona). Pure — no I/O, no engine. */
300
+ type DefinePersona = <D = unknown>(input: DefinePersonaInput<D>) => Persona<D>;
301
+ /**
302
+ * Budget knobs a shape reads to size its fanout/children WITHOUT owning the conserved pool.
303
+ * The root budget lives on `SupervisorOpts.budget`; the shape only needs the per-child
304
+ * sizing hints + the fanout width it is allowed to open. All ceilings — the pool reserves
305
+ * against them and fails closed, so an over-eager shape can never overspend.
306
+ */
307
+ interface ShapeBudget {
308
+ /** Per-child spawn budget the shape reserves for each leaf/sub-loop it opens. */
309
+ readonly perChild: Budget;
310
+ /** Max children a fanout step may open in one round (the shape's structural width). */
311
+ readonly fanout: number;
312
+ }
313
+ /**
314
+ * The construction context a `LoopShape` factory receives. Carries the persona's resolved
315
+ * executor seams + the budget knobs, plus the ONE helper a shape needs to spawn a child
316
+ * through the keystone: `spawnChild` resolves an `AgentSpec` (or a persona-derived child
317
+ * profile) into an `Agent` the shape hands to `scope.spawn`. The shape never touches the
318
+ * registry directly — it asks the context, keeping resolution single-sourced.
319
+ */
320
+ interface ShapeContext<D = unknown> {
321
+ readonly persona: Persona<D>;
322
+ readonly budget: ShapeBudget;
323
+ /**
324
+ * Wrap an `AgentSpec` into a leaf `Agent` carrying it as `executorSpec`, so the shape can
325
+ * `scope.spawn(spawnChild(spec), task, opts)`. `name` labels the child for traces. The
326
+ * returned agent's `act` is never invoked by the keystone (it is spawned, not run) — the
327
+ * spec drives the resolved `Executor`; `act` exists only to satisfy the `Agent` shape.
328
+ */
329
+ spawnChild(name: string, spec: AgentSpec): Agent<unknown, Outcome<D>>;
330
+ /** Derive a child `AgentSpec` from the persona's root spec with an overridden profile —
331
+ * the seam a shape uses to give a worker a narrower role/prompt than the root persona. */
332
+ childSpec(profile: AgentProfile$1, harness?: BackendType | null): AgentSpec;
333
+ }
334
+ /**
335
+ * A reusable act-body factory. Given the persona's content + seams (`ShapeContext`), it
336
+ * returns the root `Agent<Task, Outcome<D>>` whose `act` decomposes the task, fans out
337
+ * children through `scope.spawn`, verifies/selects across their settlements (selector≠judge:
338
+ * via `settledToIteration` + `defaultSelectWinner`, never re-ranking behind the driver), and
339
+ * synthesizes the terminal `Outcome<D>`. The shape is STRUCTURE; the persona is CONTENT.
340
+ */
341
+ type LoopShape<Task, D> = (ctx: ShapeContext<D>) => Agent<Task, Outcome<D>>;
342
+ /**
343
+ * The open shape registry — the extension point that makes a new loop-shape ONE file + one
344
+ * `registerShape` call with zero edits elsewhere. `resolve` returns a typed outcome (inspect
345
+ * `succeeded` before `value`); `register` fails loud on a duplicate name.
346
+ */
347
+ interface ShapeRegistry {
348
+ register<Task, D>(name: string, factory: LoopShape<Task, D>): void;
349
+ resolve<Task, D>(name: string): {
350
+ succeeded: true;
351
+ value: LoopShape<Task, D>;
352
+ } | {
353
+ succeeded: false;
354
+ error: string;
355
+ };
356
+ /** The registered shape names — for diagnostics + a fail-loud "unknown shape" message. */
357
+ names(): string[];
358
+ }
359
+ /**
360
+ * The end-to-end entrypoint. Builds the persona's root `Agent` from the chosen shape, then
361
+ * runs it through a fresh `createSupervisor` over the persona's executors + the supplied
362
+ * budget/journal/blobs. Returns the keystone's typed `SupervisedResult<Outcome<D>>` — a
363
+ * `winner` carries the synthesized `Outcome<D>`; a `no-winner` is never coerced into one.
364
+ *
365
+ * `shape` is either a resolved `LoopShape` or a registered shape NAME (resolved through the
366
+ * default registry). The journal/blobs default to in-memory impls in the engine when omitted
367
+ * (durable FS impls are passed explicitly for a persisted run).
368
+ */
369
+ interface RunPersonifiedOptions<Task, D> {
370
+ readonly persona: Persona<D>;
371
+ /** A resolved shape factory OR a registered shape name. */
372
+ readonly shape: LoopShape<Task, D> | string;
373
+ readonly task: Task;
374
+ readonly budget: Budget;
375
+ /** Per-child sizing + fanout width handed to the shape. Defaults derive from `budget`. */
376
+ readonly shapeBudget?: Partial<ShapeBudget>;
377
+ /** Trace/journal root key. Defaults to the persona name + a run discriminator in the engine. */
378
+ readonly runId?: string;
379
+ readonly journal?: SpawnJournal;
380
+ readonly blobs?: ResultBlobStore;
381
+ /** Runtime recursion-depth ceiling, paired with the conserved pool. */
382
+ readonly maxDepth?: number;
383
+ /** OTP intensity breaker bounds, forwarded to the supervisor verbatim. */
384
+ readonly maxRestarts?: number;
385
+ readonly withinMs?: number;
386
+ /** A live root handle to attach (view/signal/abort) before the run starts. */
387
+ readonly handle?: RootHandle<Outcome<D>>;
388
+ readonly now?: () => number;
389
+ readonly signal?: AbortSignal;
390
+ }
391
+ /** The composed run signature. */
392
+ type RunPersonified = <Task, D>(options: RunPersonifiedOptions<Task, D>) => Promise<SupervisedResult<Outcome<D>>>;
393
+
394
+ /**
395
+ * @experimental
396
+ *
397
+ * The RSI-wave type surface — the FROZEN contracts the wave's Core + Compose build to.
398
+ *
399
+ * The keystone (`../supervise/`) is pure execution structure: a recursive `Agent` atom in a
400
+ * budget-conserving `Scope`, run to a typed `SupervisedResult` by a `Supervisor`. The persona
401
+ * layer (`./types`, `./persona`) adds the "act like X" content seam (`Persona` = `AgentSpec` +
402
+ * `directive` + `context`, `LoopShape = (ctx) => Agent`, `Outcome<D>`). This module freezes the
403
+ * remaining four wave seams ON TOP of those — and nothing more:
404
+ *
405
+ * 1. GENERIC COMBINATORS — the content-free act-library. Five composable shapes
406
+ * (`pipeline`/`fanout`/`loopUntil`/`panel`/`verify`) plus the streaming widener (G5). Each
407
+ * is a `CombinatorShape` (a `LoopShape` whose `Agent.act` runs the combinator over `Scope`),
408
+ * so a combinator IS just a `LoopShape` — no new engine type. The SHAPE is here; the DOMAIN
409
+ * (model, prompt, role) stays on the `Persona`. There is no "research" or "code" combinator:
410
+ * a research sweep is `fanout` under a research persona; a build is `pipeline` under a coder.
411
+ * 2. ANALYST-ON-SCOPE (G1, a PORT) — `ScopeAnalyst` carries the round-synchronous driver's
412
+ * analyze→findings→steer wire (dynamic.ts) across to the reactive `Scope`, behind
413
+ * the same trace-derived firewall (`assertTraceDerivedFindings` semantics): a reactive
414
+ * combinator steers from trace FINDINGS, never a child's raw `verdict`.
415
+ * 3. CROSS-RUN CORPUS (G2) — `Corpus` is the DURABLE accreted-fact store, DISTINCT from the
416
+ * per-run `SpawnJournal`/`ResultBlobStore`. `renderCorpusToInstructions` is the read-back:
417
+ * it projects accreted facts into `AgentProfile.prompt.instructions` / `resources.instructions`
418
+ * for the next run's persona (the learning-flywheel READ side).
419
+ * 4. TRAJECTORY TRACE + COST LEDGER — `trajectoryReport(journal, blobs)` reconstructs the whole
420
+ * spawn tree with per-node + rolled-up `Spend`; `equalKOnCost` compares arms on conserved
421
+ * COST (tokens/usd), NOT raw iteration count — closing the leaf-fanout confound.
422
+ *
423
+ * Layering: imports ONLY keystone runtime types (`../supervise/types`), persona types
424
+ * (`./types`), the substrate `AnalystFinding`/`AgentProfile`, and the durable-store interfaces.
425
+ * Pure types/interfaces — this module typechecks standalone, owns no impl, invents no engine.
426
+ */
427
+
428
+ /**
429
+ * A combinator is just a `LoopShape`: a factory `(ShapeContext) => Agent` whose `Agent.act`
430
+ * runs the combinator's structure over the `Scope` (spawn children, drain `next()`, select via
431
+ * the single-sourced `settledToIteration`+`defaultSelectWinner`, synthesize an `Outcome<D>`).
432
+ * Aliased — NOT a new type — so a combinator stays a first-class shape the persona layer's
433
+ * `runPersonified`/`ShapeRegistry` resolve with zero new machinery. The SHAPE is content-free;
434
+ * the persona carries the domain.
435
+ */
436
+ type CombinatorShape<Task, D> = LoopShape<Task, D>;
437
+ /**
438
+ * `pipeline(stages)` — sequential composition: each stage's `Outcome.deliverable` feeds the next
439
+ * stage's task (via `feed`). The first `blocked` stage short-circuits the whole pipeline (its
440
+ * blockers ARE the pipeline's blockers — never coerced past a failed stage). The terminal
441
+ * stage's `done` deliverable is the pipeline's deliverable. Spawns one child per stage in order;
442
+ * a stage that the conserved pool cannot admit is a concrete blocker.
443
+ *
444
+ * No domain: "code build test" is `pipeline([plan, implement, integrate])` under a coder persona,
445
+ * not a named shape. A stage names only its label + how to derive its task from the prior output.
446
+ */
447
+ interface PipelineStage<Task, StepIn, StepOut> {
448
+ /** Trace/journal label for this stage's spawned child. */
449
+ readonly label: string;
450
+ /** Derive this stage's task from the prior stage's deliverable (or the root task for stage 0).
451
+ * Pure projection — the framework never interprets the result; the resolved leaf does. */
452
+ feed(prior: StepIn, ctx: ShapeContext<unknown>, rootTask: Task): unknown;
453
+ /** Read this stage's settled child output into the typed `StepOut` the next stage feeds on.
454
+ * Fail loud (return a `blocked`) when the child produced nothing usable for the next stage. */
455
+ collect(settled: Settled<Outcome<StepOut>>): Outcome<StepOut>;
456
+ }
457
+ /** `pipeline(stages)` — build the sequential combinator from an ordered stage list. The first
458
+ * stage's `StepIn` is the root `Task`; the last stage's `StepOut` is the deliverable `D`. */
459
+ type Pipeline = <Task, D>(stages: ReadonlyArray<PipelineStage<Task, unknown, unknown>>) => CombinatorShape<Task, D>;
460
+ /**
461
+ * `fanout(items, { synthesize? })` — N children spawned in one round (one per item, bounded by
462
+ * the conserved pool's fail-closed admission), drained via `scope.next()`, then optionally a
463
+ * single SYNTHESIS child over the gathered results. Without `synthesize`, the combinator returns
464
+ * the best-valid child via the single-sourced selector (selector≠judge). A round that admitted
465
+ * zero children, or whose synthesis child could not be admitted, is a concrete blocker.
466
+ *
467
+ * No domain: a "research sweep over angles" is `fanout(angles, { synthesize: cite })` under a
468
+ * research persona; a "fanout-vote" is `fanout(copies)` with the default selector. The item list
469
+ * + the synthesis posture are the SHAPE's args; the prompt that turns an item into work is the
470
+ * persona's.
471
+ */
472
+ interface FanoutOptions<Item, D> {
473
+ /** One child task per item: `item` + the index discriminator. The persona's directive/context
474
+ * is threaded in by the combinator; this only supplies the per-item discriminator. */
475
+ itemTask(item: Item, index: number, ctx: ShapeContext<D>): unknown;
476
+ /** Per-item child label (defaults to `item:<index>` in the impl). */
477
+ label?(item: Item, index: number): string;
478
+ /**
479
+ * Optional synthesis over the gathered child results: when present, the combinator spawns ONE
480
+ * synthesis child whose task is built from the drained settlements, and its `done` output is
481
+ * the deliverable. When absent, the deliverable is the best-valid child via `defaultSelectWinner`.
482
+ * The synthesis child is a SEPARATE keystone agent (not a re-rank behind the driver).
483
+ */
484
+ synthesize?: FanoutSynthesis<D>;
485
+ }
486
+ /** How a fanout's synthesis child is built + read. `synthesisTask` projects the drained child
487
+ * settlements into the synthesis child's task; `collect` reads its settled output into the
488
+ * deliverable `Outcome<D>`. */
489
+ interface FanoutSynthesis<D> {
490
+ synthesisTask(gathered: ReadonlyArray<Settled<Outcome<D>>>, ctx: ShapeContext<D>): unknown;
491
+ collect(settled: Settled<Outcome<D>>): Outcome<D>;
492
+ }
493
+ /** `fanout(items, opts)` — build the fanout combinator over a static item list. */
494
+ type Fanout = <Task, Item, D>(items: ReadonlyArray<Item>, opts: FanoutOptions<Item, D>) => CombinatorShape<Task, D>;
495
+ /**
496
+ * `loopUntil({ until, step })` — iterative deepening inside the conserved pool: spawn one `step`
497
+ * child per round, ask `until` whether the accumulated state satisfies the goal, and stop when it
498
+ * does OR when the pool can no longer admit a step (budget IS the loop bound — no unbounded
499
+ * while). The deployable, non-oracle stop: `until` is the satisfiability gate, read from trace
500
+ * findings + accumulated deliverables, never a fresh raw verdict the loop minted to stop itself.
501
+ *
502
+ * No domain: "refine until tests pass" is `loopUntil` with a coder persona + a `step` that edits
503
+ * and an `until` that reads the test-finding; the combinator owns only the round/stop wiring.
504
+ */
505
+ interface LoopUntilSpec<Task, State, D> {
506
+ /** Build the next step child's task from the root task + the state accumulated so far. */
507
+ step(rootTask: Task, state: LoopUntilState<State>, ctx: ShapeContext<D>): unknown;
508
+ /** Fold one settled step into the accumulated state (the loop's running deliverable candidate). */
509
+ fold(prior: LoopUntilState<State>, settled: Settled<Outcome<D>>): LoopUntilState<State>;
510
+ /**
511
+ * The satisfiability gate: given the accumulated state + the round's trace findings, has the
512
+ * goal been reached? Returns the terminal deliverable when satisfied, or `null` to keep going.
513
+ * Reads `findings` (trace-derived), NOT a raw verdict score — the deployable-stop discipline.
514
+ */
515
+ until(state: LoopUntilState<State>, findings: ReadonlyArray<AnalystFinding>): Outcome<D> | null;
516
+ /** Per-round step label (defaults to `step:<round>` in the impl). */
517
+ label?(round: number): string;
518
+ }
519
+ /** The accumulated state `loopUntil` threads across rounds — the running candidate + the round
520
+ * index, so `step`/`fold`/`until` are pure functions of it (replay-safe, no wall-clock). */
521
+ interface LoopUntilState<State> {
522
+ readonly round: number;
523
+ readonly value: State;
524
+ }
525
+ /** `loopUntil(spec)` — build the iterative-deepening combinator. `seed` is the initial state. */
526
+ type LoopUntil = <Task, State, D>(seed: State, spec: LoopUntilSpec<Task, State, D>) => CombinatorShape<Task, D>;
527
+ /**
528
+ * `panel(judges)` — M judges over ONE artifact, merged WRITE-ONLY (selector≠judge taken to its
529
+ * limit). The combinator spawns the M judge children over the same input artifact, drains their
530
+ * settlements, and MERGES their findings into a panel verdict via `merge` — a pure WRITE-ONLY
531
+ * fold (a judge's output is never fed back to steer another judge, and the merge never re-ranks
532
+ * the children behind the driver). The merged verdict gates the deliverable.
533
+ *
534
+ * No domain: a "code review panel" and an "essay rubric panel" are the same `panel` shape under
535
+ * different personas; the rubric lives in each judge persona's profile, not the combinator.
536
+ */
537
+ interface PanelSpec<Artifact, D> {
538
+ /** The M judge child specs: each is a persona-derived child (a narrower judge profile). The
539
+ * combinator spawns one child per entry over the SAME `artifact` and never lets one judge's
540
+ * output reach another's task (write-only). */
541
+ readonly judges: ReadonlyArray<PanelJudge>;
542
+ /** Build one judge child's task from the shared artifact under review + the judge descriptor. */
543
+ judgeTask(artifact: Artifact, judge: PanelJudge, ctx: ShapeContext<D>): unknown;
544
+ /**
545
+ * Write-only merge: fold the M settled judge verdicts into the panel's terminal `Outcome<D>`.
546
+ * Pure over the drained settlements — it MUST NOT spawn, re-judge, or feed one verdict into
547
+ * another. A panel that reached no quorum is a concrete blocker (fail loud, never a vacuous done).
548
+ */
549
+ merge(verdicts: ReadonlyArray<PanelVerdict>, artifact: Artifact): Outcome<D>;
550
+ }
551
+ /** One judge in a panel — a labeled persona-derived judge child. Content (the rubric) lives in
552
+ * the judge's profile; this carries only the label + the optional weight the merge may read. */
553
+ interface PanelJudge {
554
+ readonly label: string;
555
+ /** Optional merge weight (a write-only hint the `merge` fold may use; default-equal in the impl). */
556
+ readonly weight?: number;
557
+ }
558
+ /** One judge child's settled verdict, surfaced to the write-only `merge`. `down` judges carry no
559
+ * verdict (excluded from the merge `n`, like an infra-errored cell). */
560
+ interface PanelVerdict {
561
+ readonly judge: PanelJudge;
562
+ readonly verdict?: DefaultVerdict;
563
+ /** The judge child's raw output — what it was asked to assess, for a merge that quotes it. */
564
+ readonly output?: unknown;
565
+ /** True when the judge child went `down` (no usable verdict — kept out of the merge denominator). */
566
+ readonly down: boolean;
567
+ }
568
+ /** `panel(spec)` — build the M-judge write-only-merge combinator. */
569
+ type Panel = <Task, Artifact, D>(spec: PanelSpec<Artifact, D>) => CombinatorShape<Task, D>;
570
+ /**
571
+ * `verify({ implement, verifier })` — the 2-node sequential gate: an IMPLEMENT child produces a
572
+ * candidate, then a SEPARATE VERIFIER child's verdict GATES shippability. A `valid` verifier
573
+ * verdict ships the implement deliverable; any other outcome (implement down, verifier down,
574
+ * invalid verdict) becomes a concrete blocker carrying the failure verbatim — never a coerced
575
+ * "done". The verifier is a distinct keystone agent (selector≠judge: the implement child does
576
+ * not grade itself).
577
+ *
578
+ * No domain: "write code then run the test gate" and "draft then fact-check" are the same `verify`
579
+ * shape under different personas; the gate rubric is the verifier persona's, not the combinator's.
580
+ */
581
+ interface VerifySpec<Task, Candidate, D> {
582
+ /** Build the implement child's task from the root task. */
583
+ implement(rootTask: Task, ctx: ShapeContext<D>): unknown;
584
+ /** Build the verifier child's task from the implement child's settled candidate. */
585
+ verifier(candidate: Settled<Outcome<Candidate>>, ctx: ShapeContext<D>): unknown;
586
+ /** Project the gated (verifier-`valid`) candidate into the terminal deliverable. */
587
+ collect(candidate: Settled<Outcome<Candidate>>, verdict: DefaultVerdict): Outcome<D>;
588
+ /** Implement / verifier child labels (default `implement` / `verify` in the impl). */
589
+ readonly implementLabel?: string;
590
+ readonly verifierLabel?: string;
591
+ }
592
+ /** `verify(spec)` — build the 2-node implement→verifier-gate combinator. */
593
+ type Verify = <Task, Candidate, D>(spec: VerifySpec<Task, Candidate, D>) => CombinatorShape<Task, D>;
594
+ /**
595
+ * `widen({ gate })` (G5) — the STREAMING spawn-on-completion driver. Unlike the static-fanout
596
+ * combinators above, the widener REACTS to each `scope.next()`: as each child settles it consults
597
+ * the `WidenGate` and, when a lineage is `promising`, widens by AT MOST ONE child toward it under
598
+ * the remaining conserved pool. Defaults to FLAT (the gate never widens) so a gate run stays
599
+ * non-widening and the R2 selector≠judge collision is dormant. `promising` is derived from the
600
+ * round's analyst FINDINGS (via `ScopeAnalyst`, §2), NOT a child's raw `verdict` — the firewall.
601
+ *
602
+ * This is the progressive-widening (MCTS-PW) combinator: the one shape whose breadth is decided
603
+ * at runtime from the diagnosis, not fixed at spawn. It is the mechanism the diverse-strategy-vs-
604
+ * blind GATE is run with — kept FLAT by default until that gate returns positive (don't build
605
+ * mechanism ahead of the gate).
606
+ */
607
+ interface WidenSpec<Seed, D> {
608
+ /** The initial children to spawn before any widening — the seed lineages the gate widens from.
609
+ * One child task per seed; bounded by the conserved pool's fail-closed admission. */
610
+ readonly seeds: ReadonlyArray<Seed>;
611
+ seedTask(seed: Seed, index: number, ctx: ShapeContext<D>): unknown;
612
+ /**
613
+ * The progressive-widening gate. Consulted on EVERY settled child with the round's
614
+ * trace-derived `findings`; returns a widen decision (spawn one more toward a lineage) or a
615
+ * stop. DEFAULTS to flat via `flatWidenGate` — never widens, so the firewall stays dormant.
616
+ */
617
+ readonly gate: ScopeWidenGate<D>;
618
+ /** Build the widened child's task from the lineage the gate chose to extend. */
619
+ widenTask(toward: WidenLineage<D>, ctx: ShapeContext<D>): unknown;
620
+ /** Synthesize the terminal deliverable from every settled lineage (selector≠judge: the
621
+ * single-sourced selector over the gathered children, never a re-judge). */
622
+ synthesize(gathered: ReadonlyArray<Settled<Outcome<D>>>, ctx: ShapeContext<D>): Outcome<D>;
623
+ }
624
+ /**
625
+ * The runtime widening gate (the reactive analogue of the keystone's `WidenGate`, lifted to read
626
+ * trace FINDINGS instead of a raw verdict). `decide` is consulted per settled child; it MUST
627
+ * derive `promising` from `findings`, never from `settled.verdict`, unless `judgeExempt` is
628
+ * explicitly argued (the documented off-by-default escape hatch). Flat default never widens.
629
+ */
630
+ interface ScopeWidenGate<D> {
631
+ decide(settled: Settled<Outcome<D>>, findings: ReadonlyArray<AnalystFinding>, budget: Scope<Outcome<D>>['budget']): WidenDecision<D>;
632
+ /** When true, `decide` may read `settled.verdict` directly — collides with the steer firewall,
633
+ * so it must be argued per cell, never defaulted on (mirrors the keystone `WidenGate`). */
634
+ readonly judgeExempt?: boolean;
635
+ }
636
+ /** A widening decision: extend one lineage by one child, or stop widening. `flatWidenGate`
637
+ * always returns `{ kind: 'stop' }`. */
638
+ type WidenDecision<D> = {
639
+ kind: 'widen';
640
+ toward: WidenLineage<D>;
641
+ } | {
642
+ kind: 'stop';
643
+ rationale?: string;
644
+ };
645
+ /** A lineage the gate may widen toward — the settled child that looked promising + the findings
646
+ * that justified it (the trace-derived provenance the firewall requires). */
647
+ interface WidenLineage<D> {
648
+ readonly settled: Extract<Settled<Outcome<D>>, {
649
+ kind: 'done';
650
+ }>;
651
+ readonly findings: ReadonlyArray<AnalystFinding>;
652
+ }
653
+ /** `widen(spec)` — build the streaming progressive-widening combinator. */
654
+ type Widen = <Task, Seed, D>(spec: WidenSpec<Seed, D>) => CombinatorShape<Task, D>;
655
+ /** The flat default `ScopeWidenGate` factory contract — never widens, keeping the R2 firewall
656
+ * conflict dormant. Exported so a gate run can pass it explicitly and a test can assert the
657
+ * default is flat. */
658
+ type FlatWidenGate = <D>() => ScopeWidenGate<D>;
659
+ /**
660
+ * The reactive analyst seam — the PORT of the round-synchronous driver's `analyze` hook
661
+ * (dynamic.ts) onto the reactive `Scope`. The old driver wired the analyst at round
662
+ * boundaries (`plan` ran the analyst over `history` BEFORE the planner); the reactive `Scope` has
663
+ * no rounds, so this carries the wire across: a combinator's `act` asks the `ScopeAnalyst` to turn
664
+ * the settled children SO FAR into `AnalystFinding[]`, and steers from THOSE findings.
665
+ *
666
+ * The firewall is preserved (selector≠judge): `analyze` runs the trace-derived analyst and the
667
+ * impl asserts `assertTraceDerivedFindings` semantics — a finding citing judge/verdict/score
668
+ * `metric` evidence aborts the round. The steer decision reads `findings`, NEVER the children's
669
+ * raw `verdict`. Fail loud — a throwing or non-array analyst aborts (no silent empty findings).
670
+ */
671
+ interface ScopeAnalyst<D> {
672
+ /**
673
+ * Turn the children settled so far into trace-derived findings. `settledSoFar` is the cursor-
674
+ * ordered settlement list a combinator has drained (the reactive analogue of the old driver's
675
+ * `history`). The impl runs the analyst, then enforces the trace-derived firewall before
676
+ * returning — a judge-derived finding is rejected, not filtered.
677
+ */
678
+ analyze(input: ScopeAnalyzeInput<D>): Promise<ReadonlyArray<AnalystFinding>>;
679
+ }
680
+ /** Input to a `ScopeAnalyst.analyze` — the root task framing + the children settled so far. The
681
+ * reactive analogue of the old `AnalyzeInput { task, history }`. */
682
+ interface ScopeAnalyzeInput<D> {
683
+ /** Opaque root-task framing (whatever the combinator was invoked with). */
684
+ readonly task: unknown;
685
+ /** The children this combinator has drained off `scope.next()`, in cursor order. */
686
+ readonly settledSoFar: ReadonlyArray<Settled<Outcome<D>>>;
687
+ /** This combinator's scope id (the trace-correlation root for the analyst). */
688
+ readonly nodeId: NodeId;
689
+ }
690
+ /**
691
+ * How a combinator's `act` consumes findings to steer — the SINGLE firewalled steer surface a
692
+ * reactive combinator reads. `loopUntil.until`, `widen` gate, and any future steer all funnel
693
+ * through a `SteerContext` so the firewall is enforced in one place: `findings` is trace-derived
694
+ * (the analyst already asserted it), and a combinator MUST NOT reach back to `settled.verdict`
695
+ * for the steer decision. `lastValidScore` is provided for OBSERVABILITY only (rendering/traces),
696
+ * explicitly NOT for steering — reading it to steer is the coupling the architecture forbids.
697
+ */
698
+ interface SteerContext<D> {
699
+ readonly findings: ReadonlyArray<AnalystFinding>;
700
+ readonly settledSoFar: ReadonlyArray<Settled<Outcome<D>>>;
701
+ /** Observability-only: the best valid score seen so far. Rendering/trace use ONLY — steering
702
+ * off this re-introduces selector=judge. Marked so a reviewer catches a misuse. */
703
+ readonly lastValidScore?: number;
704
+ }
705
+ /**
706
+ * The firewall assertion contract, re-stated for the reactive seam (PORT of
707
+ * `assertTraceDerivedFindings`). A PROVENANCE check, not a content check: span/event/artifact/
708
+ * finding refs and empty-evidence findings pass; only a `metric` ref whose uri is a
709
+ * judge/verdict/score scheme is rejected. Fail loud — a tainted finding aborts. The impl lives in
710
+ * `analyst.ts`; this type pins its signature so callers depend on the contract, not the impl.
711
+ */
712
+ type AssertTraceDerivedFindings = (findings: ReadonlyArray<AnalystFinding>) => void;
713
+ /**
714
+ * One accreted fact in the cross-run corpus — the learning-flywheel's durable unit. DISTINCT from
715
+ * a `SpawnEvent` (a per-run decision record): a `CorpusRecord` is a fact a run LEARNED that a
716
+ * FUTURE run should read back (the world-model for story 5). It is content the next persona reads,
717
+ * not a replay input. Tagged + scored so `query`/`renderCorpusToInstructions` can project the
718
+ * relevant, high-confidence subset.
719
+ */
720
+ interface CorpusRecord {
721
+ readonly schemaVersion: '1.0.0';
722
+ /** Stable id over identity-defining fields (claim + tags) so a re-learned fact dedups. */
723
+ readonly id: string;
724
+ /** The run that produced this fact (the journal `runId`/`root`) — provenance back to the trace. */
725
+ readonly runId: NodeId;
726
+ readonly producedAt: string;
727
+ /** Coarse classification the query/render filters on (free-form, mirrors `AnalystFinding.area`). */
728
+ readonly area: string;
729
+ /** The accreted fact — the instruction-shaped statement the next run reads back. */
730
+ readonly claim: string;
731
+ /** Optional supporting detail the renderer may include under the claim. */
732
+ readonly rationale?: string;
733
+ /** Free-form tags for `query` filtering (domain, persona, surface). */
734
+ readonly tags: ReadonlyArray<string>;
735
+ /** 0..1 — the producing run's confidence in this fact (the render threshold reads it). */
736
+ readonly confidence: number;
737
+ /** Optional provenance back into the run that learned it (a finding id / outRef / span). */
738
+ readonly evidence?: ReadonlyArray<{
739
+ readonly kind: string;
740
+ readonly uri: string;
741
+ }>;
742
+ }
743
+ /** A corpus query filter — every field is an AND-narrowing; an omitted field does not constrain. */
744
+ interface CorpusFilter {
745
+ readonly area?: string;
746
+ /** Match records carrying ALL of these tags. */
747
+ readonly tags?: ReadonlyArray<string>;
748
+ /** Minimum confidence a record must clear to be returned (the render gate). */
749
+ readonly minConfidence?: number;
750
+ /** Only records from this run (rare — usually a cross-run read). */
751
+ readonly runId?: NodeId;
752
+ /** Cap the result count (most-confident first in the impl). */
753
+ readonly limit?: number;
754
+ }
755
+ /**
756
+ * The durable cross-run corpus — the learning-flywheel store. DISTINCT from `SpawnJournal`
757
+ * (per-run decisions, replay) and `ResultBlobStore` (per-run payloads): `Corpus` holds accreted
758
+ * FACTS across runs that the next run reads back. `InMemoryCorpus` + `FileCorpus` (JSONL) impls
759
+ * live in `corpus.ts` and MAY share a storage spine with the JSONL journal, but the INTERFACE is
760
+ * separate so a consumer never confuses a replay record with a learned fact.
761
+ *
762
+ * Fail-loud, typed-outcome boundary: `append` is idempotent on an identical record (same `id` +
763
+ * `claim`); a conflicting re-append under the same `id` is a typed error, never a silent overwrite.
764
+ */
765
+ interface Corpus {
766
+ /** Append one accreted fact. Idempotent on an identical record; returns a typed outcome —
767
+ * inspect `succeeded` before treating it as durable (no silent write-through on conflict). */
768
+ append(record: CorpusRecord): Promise<{
769
+ succeeded: true;
770
+ } | {
771
+ succeeded: false;
772
+ error: string;
773
+ }>;
774
+ /** Query accreted facts by filter — most-confident first. Returns the matching records (an
775
+ * empty array when none match is a valid result, NOT an error). */
776
+ query(filter: CorpusFilter): Promise<ReadonlyArray<CorpusRecord>>;
777
+ }
778
+ /**
779
+ * Project accreted corpus facts into an `AgentProfile`'s instruction seams — the learning-flywheel
780
+ * READ side. Reads the corpus through `filter`, renders the matching facts into instruction lines,
781
+ * and returns a NEW profile with them merged into `prompt.instructions` (the append-line seam) so
782
+ * the next run's persona reads the accreted world-model. Pure projection over the queried records;
783
+ * never mutates the input profile (returns a fresh one). The impl lives in `corpus.ts`.
784
+ *
785
+ * `resources.instructions` is `string | AgentProfileResourceRef`; `prompt.instructions` is
786
+ * `string[]`. The render targets `prompt.instructions` (additive lines) by default; a caller that
787
+ * wants the single-blob `resources.instructions` form passes `target: 'resources'`.
788
+ */
789
+ interface RenderCorpusToInstructionsOptions {
790
+ readonly corpus: Corpus;
791
+ readonly filter: CorpusFilter;
792
+ /** The profile to project the facts into. The result is a fresh profile — the input is unchanged. */
793
+ readonly profile: AgentProfile$1;
794
+ /** Where the rendered facts land: appended to `prompt.instructions[]` (default) or folded into
795
+ * the single-blob `resources.instructions` string. */
796
+ readonly target?: 'prompt' | 'resources';
797
+ /** Optional cap on rendered lines (most-confident first), independent of the query `limit`. */
798
+ readonly maxLines?: number;
799
+ }
800
+ /** `renderCorpusToInstructions(opts)` — the flywheel read-back projection. Async (queries the
801
+ * durable corpus); returns a fresh `AgentProfile` with the accreted facts merged in. */
802
+ type RenderCorpusToInstructions = (opts: RenderCorpusToInstructionsOptions) => Promise<AgentProfile$1>;
803
+ /**
804
+ * One node in the reconstructed trajectory tree — a driver OR a leaf, with its OWN spend and the
805
+ * spend ROLLED UP over its subtree. Reconstructed from the `SpawnJournal` (structure + per-node
806
+ * `Spend`) + the `ResultBlobStore` (the `out` artifact, rehydrated by `outRef`). The realized tree
807
+ * shape: `parent`/`children` are the actual spawn edges the run took, not a planned topology.
808
+ */
809
+ interface TrajectoryNode {
810
+ readonly id: NodeId;
811
+ readonly parent?: NodeId;
812
+ readonly children: ReadonlyArray<NodeId>;
813
+ readonly label: string;
814
+ readonly runtime: string;
815
+ /** Terminal status the journal recorded for this node. */
816
+ readonly status: 'done' | 'failed' | 'cancelled' | 'pending';
817
+ /** This node's OWN conserved spend (from its `settled` event). */
818
+ readonly ownSpend: Spend;
819
+ /** This node's spend PLUS every descendant's — the rolled-up subtree cost. The cost a parent
820
+ * "really" consumed inclusive of its children's fanout (the equal-k-on-cost basis). */
821
+ readonly rolledUpSpend: Spend;
822
+ /** The node's verdict, when its settlement carried one (observability — NOT a steer input). */
823
+ readonly verdict?: DefaultVerdict;
824
+ /** The rehydrated output artifact, when `withOutputs` was requested + the blob resolved. */
825
+ readonly output?: unknown;
826
+ readonly outRef?: string;
827
+ }
828
+ /** The whole reconstructed trajectory — the realized tree + its root-rolled-up total. The
829
+ * per-node + rolled-up `Spend` is the evidence both the trace viewer and `equalKOnCost` read. */
830
+ interface TrajectoryReport {
831
+ readonly root: NodeId;
832
+ /** Every node, in cursor/spawn order — the realized tree (`parent`/`children` are the real edges). */
833
+ readonly nodes: ReadonlyArray<TrajectoryNode>;
834
+ /** The root's rolled-up spend — the whole run's conserved total (tokens + usd + iterations + ms). */
835
+ readonly total: Spend;
836
+ /** Count of nodes by terminal status — a quick "how did the tree end" readout. */
837
+ readonly statusCounts: Readonly<Record<TrajectoryNode['status'], number>>;
838
+ }
839
+ /**
840
+ * `trajectoryReport(journal, blobs, root, { withOutputs? })` — reconstruct the whole tree with
841
+ * per-node + rolled-up `Spend`. Reads the journal for structure + spend and (when `withOutputs`)
842
+ * the blob store for each `done` node's artifact. Fail loud on a tree that was never journaled or
843
+ * a `done` node whose blob the store cannot rehydrate (a silent gap would mis-cost the tree). The
844
+ * impl lives in `trajectory.ts`.
845
+ */
846
+ interface TrajectoryReportOptions {
847
+ /** Rehydrate each `done` node's `output` from the blob store. Off by default (cost-only report). */
848
+ readonly withOutputs?: boolean;
849
+ }
850
+ /** `trajectoryReport(...)` — the tree+cost reconstructor. Async (reads journal + optionally blobs). */
851
+ type TrajectoryReportFn = (journal: SpawnJournal, blobs: ResultBlobStore, root: NodeId, options?: TrajectoryReportOptions) => Promise<TrajectoryReport>;
852
+ /**
853
+ * One arm of an equal-k comparison — a labeled trajectory (a `TrajectoryReport` is one arm's whole
854
+ * run). The arm's conserved COST is `report.total` (tokens + usd), which the sandbox executor
855
+ * already reports INCLUSIVE of a leaf's internal sub-agent fanout — so comparing arms on this cost
856
+ * (not raw `iterations`) closes the leaf-fanout confound: a treatment arm whose leaf fanned out
857
+ * internally is charged for that fanout in `total.tokens`/`total.usd`, not hidden behind one
858
+ * iteration count.
859
+ */
860
+ interface EqualKArm {
861
+ readonly label: string;
862
+ readonly report: TrajectoryReport;
863
+ }
864
+ /**
865
+ * The equal-k-on-cost verdict: whether every arm spent within `tolerance` of the others on the
866
+ * CONSERVED cost channels (tokens + usd), so a downstream metric comparison is "at equal k". Per-
867
+ * arm cost is surfaced so a caller can see HOW close. `withinTolerance: false` means the arms are
868
+ * NOT comparable at equal compute — a confound to report, not a result to publish.
869
+ */
870
+ interface EqualKVerdict {
871
+ readonly withinTolerance: boolean;
872
+ /** Per-arm conserved cost (the basis: tokens total + usd). */
873
+ readonly arms: ReadonlyArray<{
874
+ readonly label: string;
875
+ readonly tokens: number;
876
+ readonly usd: number;
877
+ readonly iterations: number;
878
+ }>;
879
+ /** The realized spread on each channel (max − min across arms), for the report. */
880
+ readonly spread: {
881
+ readonly tokens: number;
882
+ readonly usd: number;
883
+ };
884
+ /** The fractional tolerance the check used (spread / median ≤ tolerance per channel). */
885
+ readonly tolerance: number;
886
+ }
887
+ /**
888
+ * `equalKOnCost(arms, { tolerance? })` — assert arms are comparable at EQUAL conserved COST
889
+ * (tokens + usd), NOT raw iteration count. The conserved-pool guarantees `Σk` equal by
890
+ * construction WITHIN one supervised run; this checks it ACROSS arms (separate runs) where the
891
+ * pool cannot, so a cross-arm gate comparison can prove equal compute before claiming a win. The
892
+ * impl lives in `trajectory.ts`. Pure over the reports — no I/O.
893
+ */
894
+ interface EqualKOnCostOptions {
895
+ /** Max fractional spread (spread/median) per channel for arms to count as equal-k. Default in
896
+ * the impl (e.g. 0.05). A tighter tolerance = a stricter equal-compute claim. */
897
+ readonly tolerance?: number;
898
+ }
899
+ /** `equalKOnCost(arms, opts)` — the cross-arm equal-compute check on conserved cost. */
900
+ type EqualKOnCost = (arms: ReadonlyArray<EqualKArm>, options?: EqualKOnCostOptions) => EqualKVerdict;
901
+
902
+ /**
903
+ * @experimental
904
+ *
905
+ * Analyst-on-scope (G1) — the PORT of the round-synchronous driver's analyze→findings→steer
906
+ * wire (`dynamic.ts`) onto the reactive `Scope`.
907
+ *
908
+ * The old dynamic driver wired the analyst at round boundaries: `plan` ran the analyst over
909
+ * `history` BEFORE the planner and handed the findings forward via `PlannerContext.analyses`,
910
+ * behind a provenance firewall (`assertTraceDerivedFindings`) that keeps the external write-only
911
+ * judge out of the steer decision (selector ≠ judge). The reactive `Scope` has no rounds, so this
912
+ * module carries the same wire across: a combinator's `act` asks a `ScopeAnalyst` to turn the
913
+ * children it has drained off `scope.next()` SO FAR into `AnalystFinding[]`, and steers from THOSE
914
+ * findings through a single `SteerContext`.
915
+ *
916
+ * The analyst itself is not a new type — it is "just an `Agent<unknown, AnalystFinding[]>`" the
917
+ * combinator spawns over a child's trace (harness `null`/`cli`). `createScopeAnalyst` spawns that
918
+ * agent through `Scope.spawn` (so its compute is metered by the conserved pool like any child),
919
+ * drains its settlement, then enforces the firewall on the way out — a judge-derived finding
920
+ * ABORTS, it is never filtered. Fail loud: a down analyst, a non-array result, or a tainted finding
921
+ * throws; there is no silent empty-findings path that would let a combinator steer on nothing.
922
+ */
923
+
924
+ declare const assertTraceDerivedFindings: AssertTraceDerivedFindings;
925
+ /**
926
+ * The analyst run an `Agent<unknown, AnalystFinding[]>` performs over the children settled so far.
927
+ * The combinator supplies the analyst's task projection (how to frame the drained settlements as
928
+ * the analyst's input) — the analyst's `act` reads the trace and returns its raw findings; the
929
+ * firewall is enforced afterwards by `createScopeAnalyst`, not by the analyst itself.
930
+ */
931
+ interface CreateScopeAnalystOptions<D> {
932
+ /** The analyst agent the combinator spawns over the trace. `harness` is the persona's choice
933
+ * (`null` for an inline router analyst, a `BackendType` for a sandboxed one). Its `act` returns
934
+ * the RAW findings; this module asserts the firewall on them before returning. */
935
+ readonly analyst: Agent<unknown, ReadonlyArray<AnalystFinding>>;
936
+ /** Build the analyst agent's task from the analyze input (the root-task framing + the children
937
+ * drained so far). Pure projection — the analyst interprets it, this never reads it. */
938
+ buildTask(input: ScopeAnalyzeInput<D>): unknown;
939
+ /** The conserved budget reserved for one analyst spawn. The pool reserves against it and fails
940
+ * closed; an analyst that cannot be admitted is a fail-loud abort, never silent empty findings. */
941
+ readonly budget: Budget;
942
+ /** Trace/journal label for the spawned analyst child. Default `'analyst'`. */
943
+ readonly label?: string;
944
+ }
945
+ /**
946
+ * Build a `ScopeAnalyst` that spawns the analyst agent through `Scope.spawn` (so its compute is
947
+ * metered by the conserved pool), drains its single settlement, and enforces the trace-derived
948
+ * firewall before returning. The `scope` is the SAME scope the combinator is draining its children
949
+ * from — the analyst is spawned as a sibling and its result is read off `scope.next()` in cursor
950
+ * order, replay-safe like any other child.
951
+ *
952
+ * Fail loud (no silent empty findings):
953
+ * - the pool refuses the analyst spawn → `AnalystError` (the steer would otherwise run on nothing)
954
+ * - the analyst settles `down` → `AnalystError` (a broken capture path, not a verdict)
955
+ * - the analyst returns a non-array → `PlannerError`
956
+ * - any finding cites judge-derived metric evidence → `PlannerError` via the firewall
957
+ */
958
+ declare function createScopeAnalyst<D>(scope: Scope<Outcome<D>>, options: CreateScopeAnalystOptions<D>): ScopeAnalyst<D>;
959
+ /**
960
+ * Build the `SteerContext` a combinator reads to steer (its `loopUntil.until`, `widen` gate, any
961
+ * future steer). One place enforces the firewall: `findings` is asserted trace-derived before it is
962
+ * surfaced, and `lastValidScore` is provided for OBSERVABILITY only — a combinator that steers off
963
+ * it re-introduces selector = judge, the coupling the architecture forbids.
964
+ *
965
+ * `findings` is re-asserted here even when it came from `createScopeAnalyst` (which already asserted
966
+ * it): the assertion is cheap and idempotent, and a `SteerContext` may be built from findings that
967
+ * arrived by another path (a caller-supplied diagnosis). Belt-and-suspenders on the one coupling
968
+ * that must never leak.
969
+ */
970
+ declare function buildSteerContext<D>(findings: ReadonlyArray<AnalystFinding>, settledSoFar: ReadonlyArray<Settled<Outcome<D>>>): SteerContext<D>;
971
+
972
+ /**
973
+ * @experimental
974
+ *
975
+ * The generic combinator library — the content-free act-bodies the wave's §1 contract froze.
976
+ *
977
+ * Each export is a `CombinatorShape<Task, D>` (an alias of `LoopShape<Task, D>`): a factory
978
+ * `(ShapeContext) => Agent<Task, Outcome<D>>` whose `act` runs ONE composition shape over the
979
+ * keystone `Scope` — spawn children through `ctx.spawnChild` + `scope.spawn`, drain settlements
980
+ * via `scope.next()`, select across `done` children with the SINGLE-SOURCED `settledToIteration`
981
+ * + `defaultSelectWinner` (selector≠judge — never a re-rank behind the driver), and synthesize a
982
+ * terminal `Outcome<D>`.
983
+ *
984
+ * The shapes carry NO domain: a "research sweep over angles" is `fanout(angles, { synthesize })`
985
+ * under a research persona; a "code build test" is `pipeline([plan, implement, integrate])` under
986
+ * a coder persona. The SHAPE is here; the model/prompt/role/goal live on the `Persona` + task,
987
+ * threaded to each child verbatim by the spec-objects the builders take. No model name, prompt,
988
+ * role, or domain noun appears below.
989
+ *
990
+ * Two fail-loud invariants every combinator honors: a child the conserved pool cannot admit is a
991
+ * CONCRETE blocker (never an eager over-fan, never a silent drop), and a `blocked` outcome always
992
+ * names at least one blocker (a shape that cannot finish MUST say why — `blocked([])` throws).
993
+ */
994
+
995
+ /**
996
+ * `pipeline(stages)` — run the stages in order, feeding each stage's `done` deliverable into the
997
+ * next stage's task. The first stage that ends `blocked` (a child that went down, a child the
998
+ * pool would not admit, or a stage whose `collect` chose to block) short-circuits — its blockers
999
+ * ARE the pipeline's blockers, never coerced past a failed stage. The terminal stage's `done`
1000
+ * deliverable is the pipeline's deliverable.
1001
+ */
1002
+ declare function pipeline<Task, D>(stages: ReadonlyArray<PipelineStage<Task, unknown, unknown>>): CombinatorShape<Task, D>;
1003
+ /**
1004
+ * `fanout(items, opts)` — spawn one child per item in a single round (bounded by the conserved
1005
+ * pool's fail-closed admission), drain via `scope.next()`, then either synthesize over the
1006
+ * gathered settlements (one SEPARATE synthesis child) or return the best-valid child via the
1007
+ * single-sourced selector. A round that admitted zero children, or whose synthesis child could
1008
+ * not be admitted, is a concrete blocker.
1009
+ */
1010
+ declare function fanout<Task, Item, D>(items: ReadonlyArray<Item>, opts: FanoutOptions<Item, D>): CombinatorShape<Task, D>;
1011
+ /**
1012
+ * `loopUntil(seed, spec)` — one `step` child per round; `fold` accumulates each settlement into
1013
+ * the running state; `until` (reading the round's trace findings, NOT a fresh raw verdict) is
1014
+ * the deployable stop. The conserved pool IS the loop bound: once `spawn` fails closed the loop
1015
+ * stops. A loop that exhausted the pool without `until` ever satisfying is a concrete blocker.
1016
+ *
1017
+ * Findings are threaded through the `SteerContext` firewall in the analyst seam (`analyst.ts`);
1018
+ * absent a wired analyst on this surface the firewall stays dormant and `until` is consulted with
1019
+ * an empty findings array — never a fabricated finding (fail-loud honesty over a silent default).
1020
+ */
1021
+ declare function loopUntil<Task, State, D>(seed: State, spec: LoopUntilSpec<Task, State, D>): CombinatorShape<Task, D>;
1022
+ /**
1023
+ * `panel(spec)` — spawn the M judge children over the SAME artifact, drain their settlements,
1024
+ * and fold them into a panel verdict via the pure WRITE-ONLY `merge` (a judge's output never
1025
+ * reaches another judge's task; the merge never spawns or re-ranks). A `down` judge carries no
1026
+ * verdict and is excluded from the merge denominator. A panel that admitted no judge is a
1027
+ * concrete blocker before `merge` is consulted.
1028
+ */
1029
+ declare function panel<Task, Artifact, D>(spec: PanelSpec<Artifact, D>): CombinatorShape<Task, D>;
1030
+ /**
1031
+ * `verify(spec)` — an IMPLEMENT child produces a candidate, then a SEPARATE VERIFIER child grades
1032
+ * it; only a `valid` verifier verdict ships. Any other outcome (implement down, verifier down,
1033
+ * verifier verdict absent or not `valid`) is a concrete blocker carrying the failure verbatim —
1034
+ * never a coerced "done". The implement child does not grade itself.
1035
+ */
1036
+ declare function verify<Task, Candidate, D>(spec: VerifySpec<Task, Candidate, D>): CombinatorShape<Task, D>;
1037
+ /**
1038
+ * `widen(spec)` — the streaming spawn-on-completion driver. Spawns the seed lineages, then REACTS
1039
+ * to each `scope.next()`: on every settled child it consults `spec.gate.decide` and, when the gate
1040
+ * returns `widen`, spawns AT MOST ONE more child toward the chosen lineage under the remaining
1041
+ * conserved pool. `promising` is derived from the round's trace findings (the analyst seam),
1042
+ * never a child's raw `verdict` — and the default gate (`flatWidenGate`) never widens, so the R2
1043
+ * firewall stays dormant. Terminal selection is `spec.synthesize` over every settled lineage.
1044
+ *
1045
+ * No analyst is wired on this frozen surface, so `decide` is consulted with an empty findings
1046
+ * array; a flat gate ignores it. A non-flat gate that wants findings reads them through the
1047
+ * `SteerContext` firewall the analyst seam owns — never fabricated here.
1048
+ */
1049
+ declare function widen<Task, Seed, D>(spec: WidenSpec<Seed, D>): CombinatorShape<Task, D>;
1050
+ /**
1051
+ * The flat default `ScopeWidenGate` — never widens, keeping the R2 selector≠judge collision
1052
+ * dormant. A gate run passes this explicitly; a test asserts the default is flat.
1053
+ */
1054
+ declare function flatWidenGate<D>(): ScopeWidenGate<D>;
1055
+
1056
+ /**
1057
+ * @experimental
1058
+ *
1059
+ * The cross-run corpus (G2) — the learning-flywheel's durable accreted-fact store.
1060
+ *
1061
+ * `Corpus` is DISTINCT from the per-run `SpawnJournal` (decisions/replay) and `ResultBlobStore`
1062
+ * (payloads): a `CorpusRecord` is a FACT one run LEARNED that a FUTURE run reads back (the
1063
+ * world-model), not a replay input. This module owns the two impls the wave surface pins —
1064
+ * `InMemoryCorpus` and `FileCorpus` (JSONL, append-only) — plus `renderCorpusToInstructions`,
1065
+ * the READ side that projects accreted facts into a fresh `AgentProfile`'s instruction seams.
1066
+ *
1067
+ * The boundary is fail-loud, typed-outcome: `append` is idempotent on an identical record and
1068
+ * returns a typed error (never throws, never a silent overwrite) on a conflicting re-append under
1069
+ * the same `id`. Malformed records — a structurally-invalid `CorpusRecord` from disk or a caller —
1070
+ * fail loud (the validator throws), since a corpus that silently accepts garbage would poison
1071
+ * every downstream run that reads it back.
1072
+ */
1073
+
1074
+ /**
1075
+ * In-memory `Corpus`. Keyed by record `id`; `append` validates the record, is idempotent on an
1076
+ * identical re-append, and returns a typed `{ succeeded: false }` on a conflicting re-append under
1077
+ * the same `id` (never overwrites). `query` routes through the single-sourced `applyFilter`.
1078
+ */
1079
+ declare class InMemoryCorpus implements Corpus {
1080
+ private readonly byId;
1081
+ append(record: CorpusRecord): Promise<{
1082
+ succeeded: true;
1083
+ } | {
1084
+ succeeded: false;
1085
+ error: string;
1086
+ }>;
1087
+ query(filter: CorpusFilter): Promise<ReadonlyArray<CorpusRecord>>;
1088
+ }
1089
+ /**
1090
+ * JSONL on disk — one validated `CorpusRecord` per line, append-only. `query` replays the whole
1091
+ * file, validating every line (a malformed line fails loud — a corrupted corpus must never read
1092
+ * back silently) and folding by `id`: a later identical line dedups, a later conflicting line
1093
+ * under the same `id` is a corruption (fail loud). `append` first replays to enforce the same
1094
+ * idempotence/conflict contract as the in-mem impl, then fsyncs the new line so a crash between
1095
+ * writes never loses an acknowledged fact. Shares the JSONL append-line spine with the spawn
1096
+ * journal, but the interface stays separate (a learned fact is not a replay record).
1097
+ */
1098
+ declare class FileCorpus implements Corpus {
1099
+ private readonly path;
1100
+ constructor(path: string);
1101
+ append(record: CorpusRecord): Promise<{
1102
+ succeeded: true;
1103
+ } | {
1104
+ succeeded: false;
1105
+ error: string;
1106
+ }>;
1107
+ query(filter: CorpusFilter): Promise<ReadonlyArray<CorpusRecord>>;
1108
+ private load;
1109
+ private appendLine;
1110
+ }
1111
+ /**
1112
+ * The learning-flywheel READ side. Queries the corpus through `filter`, renders the matching facts
1113
+ * (most-confident first, capped by `maxLines`) into instruction lines, and returns a FRESH
1114
+ * `AgentProfile` with them merged in — never mutates the input profile. Default `target: 'prompt'`
1115
+ * appends the lines to `prompt.instructions[]` (the additive append-line seam); `target:
1116
+ * 'resources'` folds them into the single-blob `resources.instructions` string (preserving any
1117
+ * existing blob, but failing loud on a non-string existing blob — a `resources.instructions` that
1118
+ * was already an `AgentProfileResourceRef` cannot be string-appended without dropping it).
1119
+ *
1120
+ * An empty query result returns a fresh COPY of the profile with no instruction change (a valid
1121
+ * "nothing learned yet" read, not an error).
1122
+ */
1123
+ declare function renderCorpusToInstructions(opts: RenderCorpusToInstructionsOptions): Promise<AgentProfile$1>;
1124
+
1125
+ /**
1126
+ * @experimental
1127
+ *
1128
+ * The personify layer impl — `definePersona` (the thin builder) + `runPersonified` (composes
1129
+ * the persona + chosen shape onto the keystone `Supervisor`), plus `createShapeContext`, the
1130
+ * seam that hands a shape its spawn helpers without it touching the registry.
1131
+ *
1132
+ * This file adds NO engine: `runPersonified` is `createSupervisor().run(rootAgent, task, …)`
1133
+ * where `rootAgent` is the persona's chosen `LoopShape` applied to a `ShapeContext`. All the
1134
+ * conserved-budget / journal / abort / typed-result machinery is the keystone's; this layer
1135
+ * only wires the persona's CONTENT (root spec + directive + context + seams) into it.
1136
+ *
1137
+ * One non-obvious invariant it must honor: `createSupervisor().run` builds the root `Scope`
1138
+ * with an EMPTY seam bag (`seams: {}`), so the built-in metered runtimes (router/sandbox/cli)
1139
+ * cannot read their seams off `ExecutorContext` through the default supervisor path. A persona
1140
+ * that supplies raw `seams` is therefore wrapped here into a registry whose resolved factories
1141
+ * receive a ctx with the persona seams merged in — so a persona never has to pre-close its
1142
+ * factories by hand. A persona may instead supply a fully-built `registry` and skip the wrap.
1143
+ */
1144
+
1145
+ /**
1146
+ * Build a frozen `Persona`. Fails loud on the executors-supplied invariant: a persona with
1147
+ * neither a pre-built registry nor a seam bag cannot resolve its built-in runtimes, so it is
1148
+ * unrunnable — refuse it at definition time, not at the first spawn. Pure; no I/O.
1149
+ */
1150
+ declare function definePersona<D = unknown>(input: DefinePersonaInput<D>): Persona<D>;
1151
+ /**
1152
+ * Compose the persona + chosen shape onto a fresh keystone `Supervisor`. Resolves the shape
1153
+ * (a factory verbatim, or a registered name through `builtinShapes`), applies it to a
1154
+ * `ShapeContext`, and runs the resulting root `Agent` to a typed `SupervisedResult<Outcome>`.
1155
+ * Fail loud on an unknown shape name or an unresolvable persona registry — never a silent
1156
+ * default-shape fallback.
1157
+ */
1158
+ declare function runPersonified<Task, D>(options: RunPersonifiedOptions<Task, D>): Promise<SupervisedResult<Outcome<D>>>;
1159
+
1160
+ /**
1161
+ * @experimental
1162
+ *
1163
+ * The loop-shape registry — the OPEN, content-free extension point for the personify layer.
1164
+ *
1165
+ * A `LoopShape` is reusable STRUCTURE (how to decompose / fan out / verify / synthesize),
1166
+ * parameterized by a persona's CONTENT. The registry lets a caller resolve a composed shape by
1167
+ * NAME: register a factory once, then `runPersonified({ shape: '<name>' })` resolves it with zero
1168
+ * edits elsewhere. `register` fails loud on a duplicate; `resolve` returns a typed outcome so an
1169
+ * unknown name is a named error, never a silent default.
1170
+ *
1171
+ * No shape is pre-registered: the generic combinators (`pipeline`/`fanout`/`loopUntil`/`panel`/
1172
+ * `verify`/`widen`) take spec arguments, so they are not bare zero-arg factories — a caller that
1173
+ * wants name-resolution registers its own COMPOSED shape (a combinator already applied to its
1174
+ * spec) on a registry instance. The registry carries SHAPE only; the domain lives on the persona.
1175
+ */
1176
+
1177
+ /**
1178
+ * Build a fresh open `ShapeRegistry`. A factory is stored type-erased and re-cast on resolve — the
1179
+ * caller asserts the `<Task, D>` it expects, exactly as the executor registry stores its factories.
1180
+ */
1181
+ declare function createShapeRegistry(): ShapeRegistry;
1182
+ /** The default registry `runPersonified` resolves a shape name against. Empty by construction —
1183
+ * a caller registers its own composed shapes; the engine ships no domain shape. */
1184
+ declare const builtinShapes: ShapeRegistry;
1185
+ /** Register a composed shape on the default `builtinShapes` registry — the one-call extension
1186
+ * point a caller invokes so its shape is resolvable by name with zero edits to the engine. */
1187
+ declare function registerShape<Task, D>(name: string, factory: LoopShape<Task, D>): void;
1188
+
1189
+ /**
1190
+ * @experimental
1191
+ *
1192
+ * Trajectory trace + cost ledger — the post-hoc tree reconstructor (§4 of `wave-types`).
1193
+ *
1194
+ * `trajectoryReport` rebuilds the WHOLE realized spawn tree from the durable
1195
+ * `SpawnJournal` (+ optionally the `ResultBlobStore` for `done` artifacts): every node
1196
+ * (driver AND leaf), the real parent/child edges, each node's terminal status, its OWN
1197
+ * conserved `Spend`, and the `Spend` ROLLED UP over its subtree. Roll-up is a post-order
1198
+ * fold over the parent edges: a node's `rolledUpSpend` is its own spend plus every
1199
+ * descendant's, so a driver is charged for the fanout it caused — the root's roll-up is
1200
+ * the whole run's conserved total (tokens + usd + iterations + ms).
1201
+ *
1202
+ * `equalKOnCost` compares separate runs (arms) on that conserved COST, not on raw
1203
+ * iteration COUNT. The sandbox executor reports tokens/usd INCLUSIVE of a leaf's internal
1204
+ * sub-agent fanout, so charging an arm by `total.tokens`/`total.usd` (not by how many
1205
+ * `next()` cursors it logged) closes the leaf-fanout confound: a treatment leaf that fanned
1206
+ * out internally pays for it in cost, where a per-iteration count would hide it. The
1207
+ * within-run conserved pool already guarantees `Σk` equal by construction; this check is the
1208
+ * CROSS-run analogue the pool cannot reach — proving equal compute before any win is claimed.
1209
+ *
1210
+ * Pure over the journal/blobs — no live agent calls; safe to run on a finished run's log.
1211
+ */
1212
+
1213
+ /**
1214
+ * Reconstruct the whole spawn tree for `root` with per-node + rolled-up `Spend`. Reads the
1215
+ * journal for structure + spend and, when `withOutputs`, the blob store for each `done`
1216
+ * node's artifact. Fail loud on a tree that was never journaled, a settle/cancel for an
1217
+ * un-spawned node (a corrupted log), or — under `withOutputs` — a `done` node whose blob the
1218
+ * store cannot rehydrate (a silent gap would mis-cost or mis-evidence the tree).
1219
+ */
1220
+ declare function trajectoryReport(journal: SpawnJournal, blobs: ResultBlobStore, root: NodeId, options?: TrajectoryReportOptions): Promise<TrajectoryReport>;
1221
+ /**
1222
+ * Assert the arms are comparable at EQUAL conserved COST (tokens + usd), NOT raw iteration
1223
+ * count. Compares each arm's root-rolled-up `total` on the two conserved channels: an arm is
1224
+ * within-tolerance when the per-channel spread (max − min across arms) over the median is
1225
+ * `≤ tolerance`. Pure over the reports — no I/O. Fails loud on an empty arm list (nothing to
1226
+ * compare) so a vacuous "equal" is never returned.
1227
+ */
1228
+ declare function equalKOnCost(arms: ReadonlyArray<EqualKArm>, options?: EqualKOnCostOptions): EqualKVerdict;
1229
+
1230
+ /**
1231
+ * Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
1232
+ * dispatch.
1233
+ *
1234
+ * `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
1235
+ * the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
1236
+ * `runLoop` must forward the loop's cost AND token usage, or the guard reads
1237
+ * the run as a stub and throws. `reportLoopUsage` is that one line:
1238
+ *
1239
+ * const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
1240
+ * const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
1241
+ * reportLoopUsage(ctx, result)
1242
+ * return result.winner?.output as A
1243
+ * }
1244
+ *
1245
+ * Typed structurally against the campaign `DispatchContext.cost` so this module
1246
+ * stays free of an agent-eval import — it works with any cost meter exposing
1247
+ * `observe` + `observeTokens`.
1248
+ */
1249
+
1250
+ /** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
1251
+ interface UsageSink {
1252
+ observe(amountUsd: number, source: string): void;
1253
+ observeTokens(usage: LoopTokenUsage): void;
1254
+ }
1255
+ /**
1256
+ * Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
1257
+ * meter so the backend-integrity guard sees real LLM activity. `source`
1258
+ * defaults to `'loop'`.
1259
+ */
1260
+ declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
1261
+
1262
+ /**
1263
+ * @experimental
1264
+ *
1265
+ * `acquireSandbox` — cold-start-resilient sandbox acquisition. Eliminates the
1266
+ * "create timed out at the proxy" failure mode conceptually by DECOUPLING "the
1267
+ * create HTTP call returned" from "the sandbox is ready":
1268
+ *
1269
+ * - Create is initiated with a known `name`.
1270
+ * - Readiness is observed from the sandbox's own `status` (`refresh()` polls
1271
+ * true state), NOT from whether the create call returned in time.
1272
+ * - If the create call itself times out at a gateway (502/503/504/522/524 or
1273
+ * a transport timeout), provisioning is still running server-side — so we
1274
+ * find the named sandbox via `list()` and wait for it to reach `running`.
1275
+ *
1276
+ * Result: a scale-from-zero cold start (node boot + host-agent registration,
1277
+ * minutes) can no longer surface as a create failure behind a ~100s proxy
1278
+ * limit. The loop becomes indifferent to whether the host pool is warm or cold.
1279
+ *
1280
+ * Invariant: an instance reporting no `status` (the minimal test fakes) is
1281
+ * treated as ready; only an explicit `pending`/`provisioning` status triggers
1282
+ * waiting, and only a retryable THROW triggers the find-by-name path. Real
1283
+ * errors (auth, validation, budget) fail loud. A box that is created (or found)
1284
+ * but never reaches `running` (abort, terminal status, budget) is torn down
1285
+ * before the failure propagates, so an abort storm during cold start does not
1286
+ * leak live sandboxes.
1287
+ */
1288
+
1289
+ /** @experimental */
1290
+ interface AcquireOptions {
1291
+ /**
1292
+ * Total budget for the sandbox to reach `running`, covering on-demand node
1293
+ * cold-start. Default 600_000ms — matches the orchestrator's pending-host
1294
+ * registration window so we never give up before the platform itself would.
1295
+ */
1296
+ readyTimeoutMs?: number;
1297
+ /** Poll interval while waiting for `running` / for the named sandbox to appear. */
1298
+ pollIntervalMs?: number;
1299
+ /** Cancellation (user abort). Distinct from create-call timeouts. */
1300
+ signal?: AbortSignal;
1301
+ /** Stamp a name so a timed-out create is recoverable by lookup. Auto-generated if absent. */
1302
+ name?: string;
1303
+ /** Clock override for deterministic tests. */
1304
+ now?: () => number;
1305
+ /** Sleep override for deterministic tests. */
1306
+ sleep?: (ms: number) => Promise<void>;
1307
+ }
1308
+ /** @experimental */
1309
+ declare function acquireSandbox(client: SandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
1310
+
1311
+ /**
1312
+ * @experimental
1313
+ *
1314
+ * Capability probe for the loop kernel's backend-blind lineage seams. The
1315
+ * kernel must NEVER ask "is this Docker or Firecracker?"; it asks "can this
1316
+ * platform fork a checkpoint?" via `client.criuStatus()` and degrades to fresh
1317
+ * boxes when the answer is no. CRIU availability is a per-platform fact, so the
1318
+ * probe is memoized per client — one network round-trip, reused across every
1319
+ * fanout in the run.
1320
+ *
1321
+ * Invariant: a client with no `criuStatus` method (the loop's test fakes, the
1322
+ * raw SDK before it grew the probe) reports `canFork = false`. The seam is
1323
+ * fail-CLOSED — never assume forking works, only enable it on a positive probe.
1324
+ */
1325
+
1326
+ /**
1327
+ * What the loop kernel is allowed to know about a sandbox backend: a single
1328
+ * capability bit, never the backend's identity. `canFork` gates the
1329
+ * checkpoint+fork fanout path; everything else (session continuation) is a
1330
+ * universal SDK feature that needs no probe.
1331
+ *
1332
+ * @experimental
1333
+ */
1334
+ interface SandboxCapabilities {
1335
+ /**
1336
+ * True only when `client.criuStatus()` returned `{ available: true }`. When
1337
+ * false, a fork-enabled fanout degrades to independent fresh boxes — same
1338
+ * result, no shared context prefix.
1339
+ */
1340
+ canFork: boolean;
1341
+ }
1342
+ /**
1343
+ * Probe (and memoize per client) what the loop may rely on. A client without a
1344
+ * `criuStatus` method, or whose probe rejects, yields `canFork = false` — a
1345
+ * failed probe must never claim a capability the platform may not have. The
1346
+ * promise is cached so concurrent fanout branches share one round-trip.
1347
+ *
1348
+ * @experimental
1349
+ */
1350
+ declare function probeSandboxCapabilities(client: SandboxClient): Promise<SandboxCapabilities>;
1351
+ /**
1352
+ * Narrowed view of the optional CRIU probe. The loop-side `SandboxClient`
1353
+ * does not require `criuStatus`; this widens it optionally so the probe can be
1354
+ * read without importing sandbox-backend specifics. @experimental
1355
+ */
1356
+ interface CriuCapableClient {
1357
+ criuStatus?: () => Promise<{
1358
+ available: boolean;
1359
+ criuVersion?: string;
1360
+ reason?: string;
1361
+ }>;
1362
+ }
1363
+
1364
+ /**
1365
+ * Sandbox-event → runtime-event mapping.
1366
+ *
1367
+ * The sandbox SDK emits a polymorphic `SandboxEvent = { type, data, id? }`
1368
+ * whose `type` vocabulary is backend-determined (opencode, etc.) rather than
1369
+ * enumerated by the SDK. Two consumers project it:
1370
+ * - the loop kernel's cost ledger (`extractLlmCallEvent`) — sums usage off
1371
+ * every cost-bearing event, regardless of stream shape;
1372
+ * - the `AgentRuntime.act` streaming contract (`mapSandboxEvent`) — projects
1373
+ * incremental events to the `RuntimeStreamEvent` chat-UX vocabulary.
1374
+ *
1375
+ * Both live here so the empirically-observed `type` vocabulary has one home.
1376
+ */
1377
+
1378
+ /**
1379
+ * Extract a `RuntimeStreamEvent`-shaped `llm_call` from a sandbox event when
1380
+ * the event carries usage/cost data. Returns `undefined` for non-cost events
1381
+ * so the kernel can iterate the full stream without branching.
1382
+ *
1383
+ * Canonical cost-carrying types observed in the wild:
1384
+ * - `llm_call` — `data: { model, tokensIn, tokensOut, costUsd, ... }`
1385
+ * - `message.completed` / `result` — `data: { usage: { inputTokens,
1386
+ * outputTokens, totalCostUsd? } }`
1387
+ * - `cost.usage` / `usage` — same shape under a dedicated type
1388
+ *
1389
+ * Numeric coercion is strict: `Number.isFinite` gates every accumulator write
1390
+ * so a sentinel `NaN` from a misbehaving backend cannot poison the ledger.
1391
+ */
1392
+ declare function extractLlmCallEvent(event: SandboxEvent, agentRunName: string): (RuntimeStreamEvent & {
1393
+ type: 'llm_call';
1394
+ }) | undefined;
1395
+ /**
1396
+ * Project one `SandboxEvent` onto the `RuntimeStreamEvent` chat-UX vocabulary,
1397
+ * for runtimes that bridge a sandbox `streamPrompt` into the
1398
+ * `AgentRuntime.act` streaming contract. Returns `undefined` for events that
1399
+ * have no faithful projection — the raw stream is preserved separately for the
1400
+ * `OutputAdapter`, so an unmapped event never loses data.
1401
+ *
1402
+ * Mapped (the task-optional incremental variants — no synthesized task
1403
+ * lifecycle, no guessed tool-part shapes):
1404
+ * - `message.part.updated` text part → `text_delta`
1405
+ * - `message.part.updated` reasoning/thinking part → `reasoning_delta`
1406
+ * - cost-bearing events → `llm_call` (shared with the ledger extractor)
1407
+ *
1408
+ * The opencode backend emits incremental text as
1409
+ * `{ type: 'message.part.updated', data: { part: { type, text }, delta } }`;
1410
+ * `delta` is the increment, `part.text` the running accumulation.
1411
+ */
1412
+ declare function mapSandboxEvent(event: SandboxEvent, opts?: {
1413
+ agentRunName?: string;
1414
+ }): RuntimeStreamEvent | undefined;
1415
+
1416
+ /**
1417
+ * @experimental
1418
+ *
1419
+ * `SandboxLineage` — the backend-blind owner of box + session handles for a
1420
+ * single `runLoop` invocation. It exists so `run-loop.ts` never references a
1421
+ * backend (Docker / Firecracker): the lineage turns "continue this session" and
1422
+ * "fork this branch" into capability-gated sandbox-SDK calls and degrades to
1423
+ * fresh boxes when a capability is absent.
1424
+ *
1425
+ * Three operations, mirroring the kernel's per-iteration choices:
1426
+ * - `start(spec, prompt)` → a fresh box; the FIRST `streamPrompt` carries a
1427
+ * minted `sessionId` so later `continue` calls reuse the same server-side
1428
+ * conversation instead of re-injecting prior context as prompt text.
1429
+ * - `continue(handle, prompt)` → the SAME box, `streamPrompt({ sessionId })`.
1430
+ * The context lives in the sandbox; the prompt is only the new turn. Before
1431
+ * streaming it ASSERTS the session is still live server-side (via
1432
+ * `box.session(id).status()`): if the platform never honored the
1433
+ * client-minted id (or reaped it), `status()` is `null` and `continue`
1434
+ * fails loud rather than silently re-running the turn without prior context.
1435
+ * - `fork(handle, n, ...)` → when `canFork`, `checkpoint({ leaveRunning })` on
1436
+ * the parent then `fork(checkpointId)` × n so N branches inherit a shared
1437
+ * context prefix; otherwise N independent fresh boxes (same result, no
1438
+ * prefix). Either way each branch streams its own turn. Child-box creation
1439
+ * is bounded by the lineage's `maxConcurrency` — a 20-way fanout under a
1440
+ * concurrency cap of 2 provisions boxes in bounded waves, not all at once.
1441
+ *
1442
+ * Invariant: the lineage OWNS every box it starts or forks and tears them all
1443
+ * down on `teardown()` (or earlier via `prune`). It never tears down a box
1444
+ * mid-flight — the kernel decides when a handle is done. Streaming itself stays
1445
+ * in `run-loop.ts`; the lineage only hands back the live `streamPrompt` iterable
1446
+ * so the kernel keeps ownership of event collection, cost accounting, and trace
1447
+ * emission.
1448
+ */
1449
+
1450
+ /**
1451
+ * A live box plus the session that threads its iterations together. Handed back
1452
+ * by `start`/`fork`, passed into `continue`/`fork` to descend from. Opaque to
1453
+ * the kernel beyond `box` (for placement/teardown) and `sessionId` (trace).
1454
+ *
1455
+ * @experimental
1456
+ */
1457
+ interface SandboxLineageHandle {
1458
+ /** The owned, running sandbox this handle drives. */
1459
+ box: SandboxInstance;
1460
+ /**
1461
+ * Stable session id threaded through this box's `streamPrompt` calls. Minted
1462
+ * by the lineage on `start`; reused on `continue` so the server continues the
1463
+ * same conversation. A forked handle starts a fresh session on its new box —
1464
+ * the shared context comes from the checkpoint, not a shared session id.
1465
+ */
1466
+ sessionId: string;
1467
+ }
1468
+ /**
1469
+ * Owns box + session handles for one loop run and offers the three
1470
+ * capability-gated lifecycle moves. Construct via `createSandboxLineage`.
1471
+ *
1472
+ * @experimental
1473
+ */
1474
+ interface SandboxLineage {
1475
+ /**
1476
+ * Acquire a fresh box and begin a new session on it. Returns the handle and
1477
+ * the live `streamPrompt` iterable for the first turn (caller drains it).
1478
+ */
1479
+ start(spec: AgentRunSpec<unknown>, prompt: string, signal: AbortSignal): Promise<{
1480
+ handle: SandboxLineageHandle;
1481
+ events: AsyncIterable<SandboxEvent>;
1482
+ }>;
1483
+ /**
1484
+ * Continue an existing handle's session with one more turn on the SAME box.
1485
+ * The prior context is server-side; `prompt` is only the new turn. Asserts the
1486
+ * session is still known to the sandbox first (fail-loud) so a platform that
1487
+ * silently dropped the client-minted session id surfaces as an error instead
1488
+ * of a contextless turn the caller mistakes for a real continuation.
1489
+ */
1490
+ continue(handle: SandboxLineageHandle, prompt: string, signal: AbortSignal): Promise<AsyncIterable<SandboxEvent>>;
1491
+ /**
1492
+ * Branch `count` children from `parent`. When the platform can fork, each
1493
+ * child inherits `parent`'s checkpoint — and therefore the parent's IMAGE and
1494
+ * PROFILE: under a real fork `specs[i]` does NOT re-select a per-branch
1495
+ * profile (the SDK forks the running box, it can't swap the image). `specs[i]`
1496
+ * picks the per-branch profile ONLY on the degraded fresh-box path (no CRIU).
1497
+ * A heterogeneous-profile fanout therefore homogenizes to the parent's profile
1498
+ * when fork is available — pass a single shared spec for forked fanouts, or
1499
+ * use `random@k` (no fork) when branches must differ. Each child's first turn
1500
+ * streams `prompts[i]`. Child-box creation is bounded by `maxConcurrency`.
1501
+ */
1502
+ fork(parent: SandboxLineageHandle, prompts: string[], specs: AgentRunSpec<unknown>[], signal: AbortSignal): Promise<{
1503
+ handle: SandboxLineageHandle;
1504
+ events: AsyncIterable<SandboxEvent>;
1505
+ }[]>;
1506
+ /**
1507
+ * Destroy every owned box whose handle is NOT in `keep`, freeing it before
1508
+ * loop end. The kernel calls this after a round when it can prove no future
1509
+ * round will descend from the pruned boxes (deterministic, monotonic branch
1510
+ * selection); boxes still reachable as a future branch source are retained.
1511
+ * Best-effort, bounded, parallel — a failed delete never throws.
1512
+ */
1513
+ prune(keep: Iterable<SandboxLineageHandle>): Promise<void>;
1514
+ /** Destroy every box this lineage owns. Best-effort, bounded, parallel. */
1515
+ teardown(): Promise<void>;
1516
+ }
1517
+ /**
1518
+ * Build a lineage bound to one client + its probed capabilities. The
1519
+ * capabilities are passed in (not re-probed) so the kernel probes once per run
1520
+ * and the lineage stays a pure function of "what this platform can do".
1521
+ *
1522
+ * @experimental
1523
+ */
1524
+ declare function createSandboxLineage(client: SandboxClient, capabilities: SandboxCapabilities, options?: {
1525
+ maxConcurrency?: number;
1526
+ streaming?: 'sse' | 'poll';
1527
+ }): SandboxLineage;
1528
+ /**
1529
+ * Loop-side widening of the box's optional checkpoint method. The
1530
+ * `SandboxClient`/`SandboxInstance` surface the kernel relies on does not
1531
+ * require checkpointing; this reads it optionally so the lineage can probe-gate
1532
+ * without importing sandbox-backend specifics. @experimental
1533
+ */
1534
+ interface CheckpointCapableBox {
1535
+ checkpoint?: (options?: {
1536
+ leaveRunning?: boolean;
1537
+ tags?: string[];
1538
+ }) => Promise<{
1539
+ checkpointId: string;
1540
+ }>;
1541
+ }
1542
+ /** Loop-side widening of the box's optional fork method. @experimental */
1543
+ interface ForkCapableBox {
1544
+ fork?: (checkpointId: string, options?: {
1545
+ name?: string;
1546
+ }) => Promise<SandboxInstance>;
1547
+ }
1548
+ /**
1549
+ * Loop-side widening of the box's optional session accessor. The real
1550
+ * `SandboxInstance` exposes `session(id).status()`; the loop reads it optionally
1551
+ * so `continue` can assert session liveness without requiring it of the test
1552
+ * fakes. `status()` resolves `null` when the id is unknown to the sandbox.
1553
+ * @experimental
1554
+ */
1555
+ interface SessionCapableBox {
1556
+ session?: (id: string) => {
1557
+ status: () => Promise<unknown | null>;
1558
+ };
1559
+ }
1560
+
1561
+ /**
1562
+ * `openSandboxRun` — the ONE harness-agnostic seam for running an agent in a
1563
+ * sandbox over a persistent artifact: run it, stream it, RESUME the same session
1564
+ * across turns. Domain-agnostic: a coding agent, a research agent, a tax/legal
1565
+ * agent — all flow through this; the domain lives only in the `Deliverable<Out>`
1566
+ * the caller supplies, never in a per-domain copy of this function.
1567
+ *
1568
+ * It is a thin facade (NOT a new layer) over code that already exists and is
1569
+ * already hardened:
1570
+ * - `acquireSandbox` — cold-start / 502-503-504 / gateway-timeout recovery,
1571
+ * - `buildBackendOptions` — the harness IS `backend.type` (opencode / codex /
1572
+ * claude-code / kimi-code / hermes / pi); the only "which agent" knob,
1573
+ * - `createSandboxLineage` — `start` mints a session; `resume` continues the
1574
+ * SAME server-side session with a fail-loud `assertSessionLive`.
1575
+ *
1576
+ * The one genuinely-new piece is {@link Deliverable}: it widens the pure
1577
+ * `OutputAdapter.parse(events)` to ALSO admit a post-turn read off the box FS —
1578
+ * the structural gap that made the bench gates hand-roll `box.fs.read`, because a
1579
+ * large produced file (a git diff, a generated document) truncates in the chat
1580
+ * stream and a pure events-parser cannot reach the workspace. Per the SDK, a
1581
+ * RELATIVE `deliverable.path` resolves from the workspace root and an ABSOLUTE one
1582
+ * (e.g. `/tmp/solution.patch`) reads the container filesystem directly — both are
1583
+ * valid; pick the one the agent actually wrote to. Avoid `..` traversal segments.
1584
+ *
1585
+ * What this deliberately does NOT do (so it stays a facade, not slop): no custom
1586
+ * reconnect/replay (the SDK + platform own per-session buffering + `Last-Event-ID`);
1587
+ * no fork verb (platform CRIU is probe-gated and currently absent — fork lives in
1588
+ * `SandboxLineage.fork` behind the capability probe, surfaced only if it returns).
1589
+ * It is also distinct from `runLoop`: `runLoop` is the multi-round, driver-driven
1590
+ * kernel (fresh box per round, events deliverable); this is a SINGLE rollout +
1591
+ * artifact-or-events deliverable + resume over ONE persistent box.
1592
+ */
1593
+
1594
+ /**
1595
+ * @experimental
1596
+ * How a typed deliverable `Out` is materialized from a finished turn.
1597
+ * - `events` — pure parse over the event array (identical to `OutputAdapter`).
1598
+ * - `artifact` — read a file off the box AFTER the turn drains, then map it (+ the
1599
+ * events). For diffs/codebases/documents that don't fit the chat
1600
+ * stream. `path` relative ⇒ workspace root; absolute ⇒ container FS.
1601
+ */
1602
+ type Deliverable<Out> = {
1603
+ kind: 'events';
1604
+ fromEvents: (events: SandboxEvent[]) => Out;
1605
+ } | {
1606
+ kind: 'artifact';
1607
+ path: string;
1608
+ fromArtifact: (raw: string, events: SandboxEvent[]) => Out;
1609
+ };
1610
+ /**
1611
+ * @experimental
1612
+ * One finished turn over the artifact. A failed FS read is surfaced in `readError`
1613
+ * (never masked as an empty deliverable) so a caller distinguishes "agent produced
1614
+ * nothing" from a transport/FS fault.
1615
+ */
1616
+ interface TurnResult<Out> {
1617
+ out: Out;
1618
+ events: SandboxEvent[];
1619
+ readError?: string;
1620
+ }
1621
+ /** @experimental A live run over ONE persistent artifact (box + session). Close it
1622
+ * when done — `close()` tears the box down. */
1623
+ interface SandboxRun<Out> {
1624
+ readonly box: SandboxInstance;
1625
+ readonly sessionId: string;
1626
+ /** First turn over the fresh box (mints the session). Throws if already started. */
1627
+ start(prompt: string): Promise<TurnResult<Out>>;
1628
+ /** Continue THE SAME session over THE SAME artifact — a resumed turn/rollout. */
1629
+ resume(prompt: string): Promise<TurnResult<Out>>;
1630
+ close(): Promise<void>;
1631
+ }
1632
+ /** @experimental */
1633
+ interface OpenSandboxRunOptions {
1634
+ /** Profile + sandbox env/overrides. `sandboxOverrides.backend.type` is the harness. */
1635
+ agentRun: AgentRunSpec<string>;
1636
+ signal: AbortSignal;
1637
+ /** Optional execution-scoped observers. Hook failures never fail the run. */
1638
+ hooks?: RuntimeHooks;
1639
+ /** Stable run id for trace joins. Defaults to a short runtime-minted id. */
1640
+ runId?: string;
1641
+ /** Optional benchmark/scenario id carried into emitted hook events. */
1642
+ scenarioId?: string;
1643
+ /** Test seam for deterministic hook timestamps. Defaults to `Date.now`. */
1644
+ now?: () => number;
1645
+ /** Bounds box-creation bursts inside lineage fanout. Default from lineage. */
1646
+ maxConcurrency?: number;
1647
+ /** Base backoff (ms) for retrying a transient artifact `fs.read` failure; the i-th
1648
+ * retry waits `readRetryDelayMs * i`. Default 1000. Set 0 to disable the wait (tests). */
1649
+ readRetryDelayMs?: number;
1650
+ }
1651
+ /**
1652
+ * @experimental
1653
+ * Open a sandbox run. Harness-agnostic: the harness lives in
1654
+ * `options.agentRun.sandboxOverrides.backend.type`, so opencode/codex/claude-code/
1655
+ * kimi-code all flow through this one entrypoint with identical env/auth wiring.
1656
+ */
1657
+ declare function openSandboxRun<Out>(client: SandboxClient, options: OpenSandboxRunOptions, deliverable: Deliverable<Out>): Promise<SandboxRun<Out>>;
1658
+
1659
+ /**
1660
+ * @experimental
1661
+ *
1662
+ * The conserved budget reservation pool — the invariant the whole instrument
1663
+ * rests on (critique M5/B3). One root `Budget` becomes a conserved pool of three
1664
+ * quantities (tokens, usd, iterations) plus an absolute deadline. Children RESERVE
1665
+ * atomically at spawn and RECONCILE at settle:
1666
+ *
1667
+ * total ≡ free + reserved + committed (invariant, always)
1668
+ *
1669
+ * `reserve` moves a child's whole ceiling from `free` → `reserved` and FAILS CLOSED
1670
+ * when `free` can't cover it (never read-then-spawn overcommit, so `Σk(treatment) ≡
1671
+ * Σk(blind)` by construction). `reconcile` releases the reservation, commits ACTUAL
1672
+ * spend, and refunds the unspent remainder to `free`. Tokens and usd are SEPARATE
1673
+ * channels (`LoopTokenUsage` has no `usd`); iterations are conserved alongside them.
1674
+ *
1675
+ * Pure and deterministic: `now()` is injected, there is no I/O, and no wall-clock or
1676
+ * RNG read. A `reserve`/`reconcile` ticket is single-use (fail-loud on double or
1677
+ * unknown reconcile) so a child can never refund twice.
1678
+ */
1679
+
1680
+ /** Opaque, single-use reservation handle returned by `reserve` and consumed by
1681
+ * `reconcile`. Carries the reserved ceilings so reconciliation needs no lookup. */
1682
+ interface ReservationTicket {
1683
+ readonly id: number;
1684
+ readonly reserved: {
1685
+ readonly tokens: number;
1686
+ readonly usd: number;
1687
+ readonly iterations: number;
1688
+ };
1689
+ }
1690
+ /** Post-reservation pool readout — the shape `Scope.budget` exposes. `tokensLeft`,
1691
+ * `usdLeft`, and `reservedTokens` reflect committed-but-unsettled reservations;
1692
+ * `deadlineMs` is the ABSOLUTE wall-clock deadline (0 when the root set none). */
1693
+ type BudgetReadout = Readonly<{
1694
+ tokensLeft: number;
1695
+ usdLeft: number;
1696
+ deadlineMs: number;
1697
+ reservedTokens: number;
1698
+ }>;
1699
+ interface BudgetPool {
1700
+ /**
1701
+ * Atomically reserve a child's full ceiling from the free balance. Fails closed
1702
+ * ({ ok: false }) when the pool can't cover tokens, usd, or iterations — the
1703
+ * caller inspects `ok` before `ticket`.
1704
+ */
1705
+ reserve(b: Budget): {
1706
+ ok: true;
1707
+ ticket: ReservationTicket;
1708
+ } | {
1709
+ ok: false;
1710
+ reason: 'budget-exhausted';
1711
+ };
1712
+ /**
1713
+ * Release a reservation: commit the actual `spent`, refund the unspent remainder
1714
+ * to the free pool. Throws on an unknown or already-reconciled ticket (fail loud —
1715
+ * a double refund would silently break conservation).
1716
+ */
1717
+ reconcile(ticket: ReservationTicket, spent: Spend): void;
1718
+ /** Fold a normalized `UsageEvent` stream (or array) into a `Spend`. Tokens via
1719
+ * `addTokenUsage`, usd on its own channel, iterations from `'iteration'` events.
1720
+ * `ms` is left zero — wall-clock duration is the caller's to record, not the pool's. */
1721
+ spendFrom(events: AsyncIterable<UsageEvent> | UsageEvent[]): Promise<Spend>;
1722
+ /** The current readout, reflecting all outstanding reservations. */
1723
+ readout(): BudgetReadout;
1724
+ /** Fail loud if any reservation is still open — the conserved-pool leak detector. Called at the
1725
+ * supervisor's join barrier: once every child has settled, no ticket may remain (a leaked
1726
+ * reservation would silently break `total ≡ free + reserved + committed`). */
1727
+ assertNoOpenTickets(): void;
1728
+ }
1729
+ /** Fold a normalized `UsageEvent` array into a `Spend`. Tokens and usd are separate
1730
+ * channels; iterations come from `'iteration'` events. Pure; `ms` stays zero (the
1731
+ * pool does not read wall-clock). */
1732
+ declare function spendFromUsageEvents(events: UsageEvent[]): Spend;
1733
+ /**
1734
+ * Create a conserved reservation pool from a root `Budget`. `now()` is injected so the
1735
+ * deadline readout is deterministic; defaults to `Date.now` for non-test callers. The
1736
+ * absolute deadline is fixed at construction (`now() + budget.deadlineMs`) so the
1737
+ * readout's `deadlineMs` is a stable wall-clock instant, not a shrinking remainder.
1738
+ */
1739
+ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
1740
+
1741
+ /**
1742
+ * @experimental
1743
+ *
1744
+ * The leaf runtime — the built-in `Executor` IMPLEMENTATIONS behind the ONE
1745
+ * open interface frozen in `./types`, plus the open resolver/registry that maps
1746
+ * an `AgentSpec` to one of them OR accepts a bring-your-own executor verbatim.
1747
+ *
1748
+ * The interface is the extension point, not a closed `inline|sandbox|cli` union:
1749
+ * - router/inline : a direct OpenAI-compatible Router call, no box (one-shot).
1750
+ * - sandbox : COMPOSES the existing `runLoop` kernel as a single-task
1751
+ * leaf and surfaces its token/cost usage as `UsageEvent`s;
1752
+ * forwards PR #150's optional `lineage` passthrough WITHOUT
1753
+ * reinventing checkpoint/fork (streaming).
1754
+ * - cli : a Halo/RLM subprocess; `budgetExempt` (no token accounting),
1755
+ * excluded from the equal-k arms by construction (streaming).
1756
+ * Every metered runtime reports through the SAME normalized `UsageEvent` channel
1757
+ * so the conserved budget pool meters them identically. A user's own agent is
1758
+ * first-class the moment it implements `Executor` — register it by name or
1759
+ * pass it as `AgentSpec.executor`.
1760
+ *
1761
+ * Layering: `estimateCost`/`isModelPriced` are substrate primitives from
1762
+ * `@tangle-network/agent-eval`; `runLoop`/`acquireSandbox` are runtime kernels
1763
+ * from this package. No per-vendor adapters live here.
1764
+ */
1765
+
1766
+ /**
1767
+ * Router/inline connection seam. A direct OpenAI-compatible Router endpoint —
1768
+ * the cheapest leaf, no box, no tools. `model` overrides the profile's model
1769
+ * hint when present; otherwise the profile's `model.default` is required.
1770
+ */
1771
+ interface RouterSeam {
1772
+ routerBaseUrl: string;
1773
+ routerKey: string;
1774
+ model?: string;
1775
+ }
1776
+ /**
1777
+ * Sandbox executor seam. The `sandboxClient` the composed `runLoop` creates
1778
+ * boxes through, plus the optional trace/run/lineage wiring forwarded into the
1779
+ * loop. `lineage` is opaque here (PR #150's `RunLoopOptions.lineage`): forwarded
1780
+ * forward-compatibly, never inspected — this executor does NOT reinvent
1781
+ * checkpoint/fork.
1782
+ */
1783
+ interface SandboxSeam {
1784
+ sandboxClient: SandboxClient;
1785
+ /** Forwarded into the composed `runLoop`'s `ctx` (trace emitter, run handle, etc.). */
1786
+ loopCtx?: Partial<Omit<ExecCtx, 'sandboxClient' | 'signal'>>;
1787
+ /** PR #150 `RunLoopOptions.lineage` passthrough — opaque; forwarded, not parsed. */
1788
+ lineage?: unknown;
1789
+ /** Hard cap on the composed loop's iterations. The budget pool reserves against
1790
+ * the spawn `Budget.maxIterations`; this is the leaf's own ceiling. Default 1. */
1791
+ maxIterations?: number;
1792
+ }
1793
+ /** CLI subprocess seam. `bin` + `args` describe the Halo/RLM process to spawn. */
1794
+ interface CliSeam {
1795
+ bin: string;
1796
+ args?: string[];
1797
+ /** Extra environment for the subprocess (merged over `process.env`). */
1798
+ env?: Record<string, string>;
1799
+ /** Working directory for the subprocess. */
1800
+ cwd?: string;
1801
+ }
1802
+ /**
1803
+ * cli-bridge seam. A local OpenAI-compatible bridge that fronts harness CLIs
1804
+ * (claude-code / opencode / kimi / pi) behind one HTTP surface; `model` doubles
1805
+ * as the harness selector (e.g. `claude-code/sonnet`, `opencode/<provider>/<model>`).
1806
+ * `agentProfile` is the bridge-dialect profile (metadata.disallowedTools, mcp)
1807
+ * forwarded verbatim per request — how an arm disables native tools or injects
1808
+ * a provider search MCP.
1809
+ */
1810
+ interface BridgeSeam {
1811
+ bridgeUrl: string;
1812
+ bridgeBearer: string;
1813
+ model: string;
1814
+ agentProfile?: Record<string, unknown>;
1815
+ timeoutMs?: number;
1816
+ }
1817
+ /**
1818
+ * The single built-in executor entrypoint. The backend is DATA — the cost dial a
1819
+ * profile, an experiment config, or a replay journal can name — not an import
1820
+ * choice. Injects the matching seam and delegates to the built-in implementation;
1821
+ * the port stays OPEN: bring-your-own agents implement `Executor` directly and
1822
+ * never pass through here.
1823
+ */
1824
+ type ExecutorConfig = ({
1825
+ backend: 'router';
1826
+ } & RouterSeam) | ({
1827
+ backend: 'bridge';
1828
+ } & BridgeSeam) | ({
1829
+ backend: 'cli';
1830
+ } & CliSeam) | ({
1831
+ backend: 'sandbox';
1832
+ harness?: BackendType;
1833
+ } & SandboxSeam);
1834
+ declare function createExecutor(config: ExecutorConfig): ExecutorFactory<unknown>;
1835
+ /**
1836
+ * The open resolver/registry. Pre-registers the three built-ins under their
1837
+ * runtime tags (`'router'`, `'sandbox'`, `'cli'`) and accepts `register(name,
1838
+ * factory)` for any additional runtime — and a BYO `AgentSpec.executor` resolves
1839
+ * without touching the registry at all. NOT a closed switch; registration + BYO
1840
+ * ARE the extension points.
1841
+ *
1842
+ * `resolve` precedence (frozen in `ExecutorRegistry`): a BYO `spec.executor` →
1843
+ * `harness === null` → the `'router'` factory; else a registered factory for the
1844
+ * harness-derived runtime (`'sandbox'` for any `BackendType`); else fail loud.
1845
+ */
1846
+ declare function createExecutorRegistry(): ExecutorRegistry;
1847
+
1848
+ /**
1849
+ * @experimental
1850
+ *
1851
+ * The reactive `Scope` impl (KEYSTONE, build step 4 + the step-8 adapter).
1852
+ *
1853
+ * An `Agent.act` runs inside a `Scope`. It `spawn`s children dynamically and reacts to
1854
+ * them via `next()`. The scope owns ONE in-memory nursery — the authoritative live set —
1855
+ * and is the single place that drives a child's lifecycle: reserve budget atomically,
1856
+ * resolve a `Executor` through the open registry, run it (one-shot OR streaming),
1857
+ * fold its normalized `UsageEvent`s into a conserved `Spend`, reconcile the reservation
1858
+ * (refunding the unspent remainder), persist the result blob + journal records, and
1859
+ * deliver the `Settled` through the `next()` cursor.
1860
+ *
1861
+ * Three invariants this impl enforces by construction:
1862
+ * - `next()` is a ray.wait n=1 cursor over THIS scope's live set; it assigns the
1863
+ * monotonic `seq` (the recorded cursor order) at the moment it yields a settlement, so
1864
+ * replay re-delivers in the identical order — `seq` is never wall-clock.
1865
+ * - Budget is reserved at spawn and reconciled at settle through the shared `BudgetPool`,
1866
+ * so `spawn` fails CLOSED on an exhausted pool and total ≡ free + reserved + committed.
1867
+ * - `view` reads the in-memory nursery, never the journal — O(live), synchronous.
1868
+ *
1869
+ * The settle path is the only writer of journal `settled` events; the spawn path the only
1870
+ * writer of `spawned` events. The result blob is `put` BEFORE the journal `settled` record
1871
+ * references its `outRef`, so a crash can never leave a journaled ref with no blob.
1872
+ */
1873
+
1874
+ /** Construction args for `createScope`. The supervisor threads the shared pool, journal,
1875
+ * blob store, and executor registry through; `depth`/`maxDepth` pair the runtime
1876
+ * recursion ceiling with the conserved pool (R3). */
1877
+ interface ScopeArgs {
1878
+ /** This scope's owning node id — children get `${parentId}:s${seq}` ids. */
1879
+ readonly parentId: NodeId;
1880
+ /** Journal/blob root key the supervisor `beginTree`'d. */
1881
+ readonly root: NodeId;
1882
+ /** The shared conserved reservation pool (one per supervised run). */
1883
+ readonly pool: BudgetPool;
1884
+ /** Append-only spawn journal; this scope writes `spawned` + `settled` records. */
1885
+ readonly journal: SpawnJournal;
1886
+ /** Content-addressed result store backing `outRef` rehydration. */
1887
+ readonly blobs: ResultBlobStore;
1888
+ /** The open executor resolver (BYO → router/inline → registered harness factory). */
1889
+ readonly executors: ExecutorRegistry;
1890
+ /** Per-spawn executor-construction seams (sandbox client, router config, cli bin). */
1891
+ readonly seams: Readonly<Record<string, unknown>>;
1892
+ /** This scope's recursion depth (root = 0). */
1893
+ readonly depth: number;
1894
+ /** Runtime recursion-depth ceiling — a spawn past it fails closed `depth-exceeded`. */
1895
+ readonly maxDepth?: number;
1896
+ /** Abort signal for this scope; an abort cascades into every live child's executor. */
1897
+ readonly signal: AbortSignal;
1898
+ /** Injected clock — keeps the journal `at` timestamp deterministic in tests. */
1899
+ readonly now?: () => number;
1900
+ /** Lifecycle stream sink. `spawn` emits `agent.spawn`, `next` emits `agent.child` — the
1901
+ * SAME stream `runLoop`/`tool-loop` feed, so the recursive tree is ONE observable stream
1902
+ * (the topology viewer reads it). Undefined ⇒ the journal stays the only record. */
1903
+ readonly hooks?: RuntimeHooks;
1904
+ }
1905
+ declare function createScope<Out>(args: ScopeArgs): Scope<Out>;
1906
+ /**
1907
+ * The step-8 merge-boundary adapter (M4): rehydrate a `Settled.done` into the kernel's
1908
+ * `Iteration` shape so `defaultSelectWinner` stays single-sourced — the supervisor selects
1909
+ * across settled children with the SAME argmax the loop kernel uses, not a forked copy.
1910
+ *
1911
+ * `index` is the cursor `seq` (the recorded, replay-stable order); `output`/`verdict`/
1912
+ * `tokenUsage`/`costUsd` are read straight off the settlement (already rehydrated from the
1913
+ * `outRef` blob by `next()`). Events are empty — a settled child is an opaque leaf result,
1914
+ * not a sandbox event stream — and the timing/cost fields project its conserved `Spend`.
1915
+ * Fail loud on a `down` settlement: only a `done` child is an iteration.
1916
+ */
1917
+ declare function settledToIteration<Out>(settled: Settled<Out>): Iteration<unknown, Out>;
1918
+
1919
+ /**
1920
+ * @experimental
1921
+ *
1922
+ * The `Supervisor` impl (KEYSTONE, build step 5).
1923
+ *
1924
+ * Owns the four things a free-running recursive `act` cannot own itself: the GLOBAL
1925
+ * conserved budget pool, the event-sourced spawn log, the abort cascade over the whole
1926
+ * live tree, and the OTP intensity breaker. `run` builds the root `Scope` over those,
1927
+ * runs the root `Agent.act`, and returns a TYPED `SupervisedResult` — a no-winner is
1928
+ * never coerced into a best-effort `Out`.
1929
+ *
1930
+ * Three lifecycle invariants this impl enforces by construction:
1931
+ * - Join barrier: when `act()` settles (resolve OR reject), every still-live child is
1932
+ * torn down before `run` returns — the generalization of the kernel's
1933
+ * `finally{ Promise.allSettled(destroy) }` barrier (run-loop.ts) from boxes to the
1934
+ * whole sub-tree. A teardown failure is `allSettled`'d and journaled as a
1935
+ * `cancelled` event; it NEVER masks act()'s own outcome. act()'s rejection is the
1936
+ * PRIMARY error (the kernel's firstError precedence), so a teardown throw during the
1937
+ * barrier can never overwrite the real failure.
1938
+ * - Abort cascade: a root abort (caller signal, `RootHandle.abort`, a tripped breaker,
1939
+ * or pool exhaustion) aborts ONE internal controller whose signal is the root scope's
1940
+ * signal. The scope cascades that into every live child's executor abort — which, for
1941
+ * an `acquiring` child, chains into the `acquireSandbox` signal and reaps the
1942
+ * find-by-name orphan box (M1). The supervisor never reaps children directly.
1943
+ * - The supervisor NEVER re-enters a child (m3): the kernel/`acquireSandbox` already
1944
+ * retried at the leaf, and a driver re-spawns through `scope.spawn`. The breaker only
1945
+ * COUNTS `down` settlements within the intensity window and trips to a typed
1946
+ * no-winner; it does not restart anything.
1947
+ *
1948
+ * Selection lives in the driver, not here (selector≠judge): `act` returns the synthesized
1949
+ * winner `Out`. The supervisor content-addresses that `Out` for its replay `outRef`,
1950
+ * reads `spentTotal` off the conserved pool, and wraps it as a typed `winner` — it does
1951
+ * not re-rank children behind the driver's back.
1952
+ */
1953
+
1954
+ declare function createSupervisor<Task, Out>(): Supervisor<Task, Out>;
1955
+ /**
1956
+ * Mint a `RootHandle` plus its supervisor-private control. The handle is the substrate a
1957
+ * chat/pi-viz client attaches to (Q2): `view()` reads the live tree, `signal()` delivers
1958
+ * an out-of-band message, `abort()` cascades. Before `run` binds it (and after `run`
1959
+ * unbinds it) the handle is fail-loud: a client that talks to a handle that is not
1960
+ * driving a live run gets a typed error, never a silent no-op.
1961
+ */
1962
+ declare function createRootHandle<Out>(): RootHandle<Out>;
1963
+
1964
+ export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, type BridgeSeam, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type Deliverable, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, type ExecutorConfig, ExecutorFactory, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, NodeId, type OpenSandboxRunOptions, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, SandboxClient, type SandboxLineage, type SandboxLineageHandle, type SandboxRun, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, SpawnEvent, SpawnJournal, Spend, type SteerContext, SupervisedResult, Supervisor, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, type TurnResult, UsageEvent, type UsageSink, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, inlineSandboxClient, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, openSandboxRun, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, replaySpawnTree, reportLoopUsage, runPersonified, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen };