@tangle-network/agent-runtime 0.43.0 → 0.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -202
- package/dist/agent.d.ts +5 -4
- package/dist/agent.js +5 -7
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +65 -4
- package/dist/analyst-loop.js +6 -1
- package/dist/audit.d.ts +93 -0
- package/dist/audit.js +312 -0
- package/dist/audit.js.map +1 -0
- package/dist/chunk-4B6U4CVQ.js +15 -0
- package/dist/chunk-4B6U4CVQ.js.map +1 -0
- package/dist/chunk-FK53TXOP.js +603 -0
- package/dist/chunk-FK53TXOP.js.map +1 -0
- package/dist/{chunk-MJDGCRAT.js → chunk-IJ6FGOPO.js} +5 -5
- package/dist/chunk-IJ6FGOPO.js.map +1 -0
- package/dist/{chunk-HVYOHJHK.js → chunk-IJGS6J7X.js} +2 -2
- package/dist/chunk-IJGS6J7X.js.map +1 -0
- package/dist/chunk-KEWO4KI6.js +3599 -0
- package/dist/chunk-KEWO4KI6.js.map +1 -0
- package/dist/{chunk-NRZOXCJK.js → chunk-KSMX62JF.js} +2 -2
- package/dist/{chunk-C5HMTTNY.js → chunk-NYN5RTLP.js} +13 -12
- package/dist/chunk-NYN5RTLP.js.map +1 -0
- package/dist/chunk-PRX45WE2.js +264 -0
- package/dist/chunk-PRX45WE2.js.map +1 -0
- package/dist/{chunk-3HMHSN22.js → chunk-QR4UUC5P.js} +6 -6
- package/dist/chunk-QR4UUC5P.js.map +1 -0
- package/dist/chunk-WIR4HOOJ.js +27 -0
- package/dist/chunk-WIR4HOOJ.js.map +1 -0
- package/dist/{chunk-MNCB4SJ5.js → chunk-Z2QXVBA6.js} +296 -8
- package/dist/chunk-Z2QXVBA6.js.map +1 -0
- package/dist/coder-CczgMqFx.d.ts +114 -0
- package/dist/dynamic-BvllHV6M.d.ts +221 -0
- package/dist/{improvement-adapter-BC4HhuAR.d.ts → improvement-adapter-CWegd3vw.d.ts} +1 -1
- package/dist/improvement.d.ts +2 -3
- package/dist/improvement.js +0 -5
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +123 -10
- package/dist/index.js +407 -19
- package/dist/index.js.map +1 -1
- package/dist/{kb-gate-DTBum3vH.d.ts → kb-gate-D9GBocLN.d.ts} +82 -5
- package/dist/{loop-runner-bin-CVoCBmYk.d.ts → loop-runner-bin-CPrCoKqC.d.ts} +14 -10
- package/dist/loop-runner-bin.d.ts +9 -7
- package/dist/loop-runner-bin.js +6 -8
- package/dist/loops.d.ts +7 -371
- package/dist/loops.js +96 -19
- package/dist/mcp/bin.js +7 -7
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +284 -11
- package/dist/mcp/index.js +341 -9
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-BzvF1Ela.d.ts → otel-export-Dy2DyUCU.d.ts} +1 -1
- package/dist/profiles.d.ts +385 -86
- package/dist/profiles.js +549 -4
- package/dist/profiles.js.map +1 -1
- package/dist/run-loop--hSoIknW.d.ts +112 -0
- package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
- package/dist/runtime.d.ts +1860 -0
- package/dist/runtime.js +114 -0
- package/dist/runtime.js.map +1 -0
- package/dist/substrate-CUgk7F7s.d.ts +77 -0
- package/dist/topology.d.ts +73 -0
- package/dist/topology.js +111 -0
- package/dist/topology.js.map +1 -0
- package/dist/types-1HbsFa7H.d.ts +438 -0
- package/dist/{types-p8dWBIXL.d.ts → types-BtRLF2U3.d.ts} +1 -1
- package/dist/{types-Bcp071Jg.d.ts → types-DdzkffAm.d.ts} +95 -1
- package/dist/workflow.d.ts +551 -0
- package/dist/workflow.js +1778 -0
- package/dist/workflow.js.map +1 -0
- package/package.json +53 -16
- package/skills/agent-runtime-adoption/SKILL.md +29 -26
- package/dist/chunk-3HMHSN22.js.map +0 -1
- package/dist/chunk-C5HMTTNY.js.map +0 -1
- package/dist/chunk-EKBSQYZE.js +0 -813
- package/dist/chunk-EKBSQYZE.js.map +0 -1
- package/dist/chunk-HVYOHJHK.js.map +0 -1
- package/dist/chunk-MJDGCRAT.js.map +0 -1
- package/dist/chunk-MNCB4SJ5.js.map +0 -1
- package/dist/chunk-PY6NMZYX.js +0 -52
- package/dist/chunk-PY6NMZYX.js.map +0 -1
- package/dist/chunk-SQSCRJ7U.js +0 -65
- package/dist/chunk-SQSCRJ7U.js.map +0 -1
- package/dist/chunk-VOX6Z3II.js +0 -90
- package/dist/chunk-VOX6Z3II.js.map +0 -1
- package/dist/chunk-XBUG326M.js +0 -261
- package/dist/chunk-XBUG326M.js.map +0 -1
- package/dist/dynamic-B_7GgCwu.d.ts +0 -108
- package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
- /package/dist/{chunk-NRZOXCJK.js.map → chunk-KSMX62JF.js.map} +0 -0
|
@@ -0,0 +1,1860 @@
|
|
|
1
|
+
import { AgentProfile as AgentProfile$1, BackendType, CreateSandboxOptions, SandboxInstance, SandboxEvent } from '@tangle-network/sandbox';
|
|
2
|
+
export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
3
|
+
import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, d as AgentSpec, E as ExecutorRegistry, B as Budget, A as Agent, e as RootHandle, f as SupervisedResult, g as Spend, S as Scope, U as UsageEvent, L as LeafExecutorFactory, h as Supervisor } from './types-1HbsFa7H.js';
|
|
4
|
+
export { i as ExecutorContext, H as Handle, j as LeafExecutor, k as LeafResult, l as NodeSnapshot, m as NodeStatus, n as Restart, o as RootSignal, p as Runtime, q as SpawnOpts, r as SupervisorOpts, W as WidenGate } from './types-1HbsFa7H.js';
|
|
5
|
+
export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDynamicDriverOptions, D as DynamicDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDynamicDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './dynamic-BvllHV6M.js';
|
|
6
|
+
import { AgentProfile, AnalystFinding, DefaultVerdict } from '@tangle-network/agent-eval';
|
|
7
|
+
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
8
|
+
import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
|
|
9
|
+
import { R as RunLoopOptions } from './run-loop--hSoIknW.js';
|
|
10
|
+
export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop--hSoIknW.js';
|
|
11
|
+
import { b as LoopSandboxClient, c as LoopResult, d as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-DdzkffAm.js';
|
|
12
|
+
export { D as Driver, h as LoopDecisionPayload, i as LoopEndedPayload, j as LoopIterationDispatchPayload, k as LoopIterationEndedPayload, l as LoopIterationStartedPayload, a as LoopLineageOptions, m as LoopPlanDescription, n as LoopPlanPayload, g as LoopSandboxPlacement, o as LoopStartedPayload, p as LoopTeardownFailedPayload, f as LoopTraceEmitter, q as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, r as ValidationCtx, V as Validator } from './types-DdzkffAm.js';
|
|
13
|
+
import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* @experimental
|
|
17
|
+
*
|
|
18
|
+
* Event-sourced spawn journal for the recursive execution atom (build steps 3 + 7).
|
|
19
|
+
*
|
|
20
|
+
* The supervision tree is journaled as an append-only event log: every `spawned`,
|
|
21
|
+
* `settled`, and `cancelled` is recorded AFTER it is observed-committed (never
|
|
22
|
+
* speculative), mirroring `ConversationJournal`'s begin/append/load shape. The log
|
|
23
|
+
* holds only the THIN decision record — ids, parentage, budget, the spend a decision
|
|
24
|
+
* consumed, and a content-addressed `outRef`. The payloads the driver branched on
|
|
25
|
+
* (the `out` artifacts) live in a separate `ResultBlobStore`, keyed by `outRef`, so
|
|
26
|
+
* the journal stays small (decisions) and replay rehydrates the exact `Settled` from
|
|
27
|
+
* the blob store (evidence). This is the decision/payload split the replay argument
|
|
28
|
+
* rests on (B1/B2).
|
|
29
|
+
*
|
|
30
|
+
* Replay determinism (B2): `seq` is the monotonic cursor order `scope.next()` yielded
|
|
31
|
+
* each settlement — NOT wall-clock. `replaySpawnTree` sorts strictly by `seq` before
|
|
32
|
+
* touching the blob store, so the order in which rehydration `get`s resolve can never
|
|
33
|
+
* reorder the replayed `Settled[]`; the result is identical regardless of blob latency.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Mint the content-addressed `outRef` for a result artifact: `sha256:<hex>` over a
|
|
38
|
+
* stable JSON encoding. Producers call this to derive the `outRef` they journal and
|
|
39
|
+
* `put`; the FS/in-mem stores re-derive it on `put` to verify the supplied ref
|
|
40
|
+
* matches (fail loud on a mismatch — a forged ref breaks the replay invariant).
|
|
41
|
+
*
|
|
42
|
+
* Stable encoding: object keys are sorted recursively so two structurally-equal
|
|
43
|
+
* artifacts hash identically regardless of key insertion order.
|
|
44
|
+
*/
|
|
45
|
+
declare function contentAddress(artifact: unknown): string;
|
|
46
|
+
/**
|
|
47
|
+
* In-memory `ResultBlobStore`. Content-addressed: `put` verifies the supplied
|
|
48
|
+
* `outRef` matches the artifact's hash so a stale/forged ref fails loud rather than
|
|
49
|
+
* silently rehydrating the wrong payload. Idempotent on an identical re-put.
|
|
50
|
+
*/
|
|
51
|
+
declare class InMemoryResultBlobStore implements ResultBlobStore {
|
|
52
|
+
private readonly blobs;
|
|
53
|
+
put(outRef: string, artifact: unknown): Promise<void>;
|
|
54
|
+
get(outRef: string): Promise<unknown | undefined>;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* FS `ResultBlobStore`. One JSON file per artifact under `dir`, named by a
|
|
58
|
+
* filesystem-safe encoding of the `outRef` (`sha256:<hex>` → `sha256-<hex>.json`).
|
|
59
|
+
* `put` fsyncs so a crash between writes never loses an acknowledged blob.
|
|
60
|
+
*/
|
|
61
|
+
declare class FileResultBlobStore implements ResultBlobStore {
|
|
62
|
+
private readonly dir;
|
|
63
|
+
constructor(dir: string);
|
|
64
|
+
put(outRef: string, artifact: unknown): Promise<void>;
|
|
65
|
+
get(outRef: string): Promise<unknown | undefined>;
|
|
66
|
+
private blobPath;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* In-memory `SpawnJournal`. Appends are observed-committed only; the impl enforces
|
|
70
|
+
* the corruption guards a durable replay rests on:
|
|
71
|
+
* - an event before `beginTree` is a corrupted tree (fail loud),
|
|
72
|
+
* - a duplicate `seq` within a tree is a corrupted cursor (fail loud) — two
|
|
73
|
+
* settlements cannot share the cursor position replay orders by.
|
|
74
|
+
*/
|
|
75
|
+
declare class InMemorySpawnJournal implements SpawnJournal {
|
|
76
|
+
private readonly trees;
|
|
77
|
+
loadTree(root: NodeId): Promise<SpawnEvent[] | undefined>;
|
|
78
|
+
beginTree(root: NodeId, at: string): Promise<void>;
|
|
79
|
+
appendEvent(root: NodeId, ev: SpawnEvent): Promise<void>;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* JSONL on disk. One line per record: the first record is `begin`, subsequent records
|
|
83
|
+
* are `event` envelopes wrapping a `SpawnEvent`. `loadTree` replays the whole file,
|
|
84
|
+
* filtering by `root`, and applies the same begin-precedes-events + unique-seq
|
|
85
|
+
* corruption guards as the in-memory impl. Each append fsyncs so a crash between
|
|
86
|
+
* writes never loses an acknowledged event.
|
|
87
|
+
*/
|
|
88
|
+
declare class FileSpawnJournal implements SpawnJournal {
|
|
89
|
+
private readonly path;
|
|
90
|
+
constructor(path: string);
|
|
91
|
+
loadTree(root: NodeId): Promise<SpawnEvent[] | undefined>;
|
|
92
|
+
beginTree(root: NodeId, at: string): Promise<void>;
|
|
93
|
+
appendEvent(root: NodeId, ev: SpawnEvent): Promise<void>;
|
|
94
|
+
private loadTreeBegin;
|
|
95
|
+
private appendRecord;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Re-feed a journaled spawn tree in strict `seq` order, rehydrating each settled
|
|
99
|
+
* child's `out` from the blob store by `outRef`, and return the `Settled[]` exactly
|
|
100
|
+
* as `scope.next()` originally delivered them.
|
|
101
|
+
*
|
|
102
|
+
* Determinism (B2): the events are sorted by `seq` BEFORE any blob `get`, so the
|
|
103
|
+
* replay order is the recorded cursor order regardless of how fast each rehydration
|
|
104
|
+
* resolves. `at` (wall-clock) is never a replay input. Fail loud on a tree that was
|
|
105
|
+
* never begun, a settled-done event missing its `outRef`, or a blob the store can't
|
|
106
|
+
* rehydrate — a silent gap would let `act` branch on the wrong evidence.
|
|
107
|
+
*/
|
|
108
|
+
declare function replaySpawnTree(journal: SpawnJournal, blobs: ResultBlobStore, root: NodeId): Promise<Settled<unknown>[]>;
|
|
109
|
+
/**
|
|
110
|
+
* Materialize the live tree (`TreeView`) from a journaled event list for resume. Folds
|
|
111
|
+
* `spawned`/`settled`/`cancelled` into a per-node snapshot in `seq` order so the
|
|
112
|
+
* resumed view matches what `scope.view` showed at the recorded cursor position.
|
|
113
|
+
*/
|
|
114
|
+
declare function materializeTreeView(events: SpawnEvent[]): TreeView;
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
|
|
118
|
+
*
|
|
119
|
+
* Without this adapter a consumer wiring `runLoop` into `runProfileMatrix` /
|
|
120
|
+
* `runCampaign` has to, by hand, every time: (a) build an `ExecCtx` with a
|
|
121
|
+
* sandbox client, (b) adapt the campaign `DispatchContext.trace` into a
|
|
122
|
+
* `LoopTraceEmitter` (or lose all loop trace correlation), and (c) remember to
|
|
123
|
+
* forward the loop's cost + tokens via `ctx.cost` (forgetting it yields a
|
|
124
|
+
* `{0,0}` cell the backend-integrity guard reads as a stub). Three foot-guns,
|
|
125
|
+
* the third silent. The fleet's products skipped (c) and fell back to a
|
|
126
|
+
* `workerRecords[]` side-channel — the exact anti-pattern the substrate exists
|
|
127
|
+
* to kill.
|
|
128
|
+
*
|
|
129
|
+
* `loopDispatch` collapses all three into one typed call:
|
|
130
|
+
*
|
|
131
|
+
* const dispatch = loopDispatch({
|
|
132
|
+
* sandboxClient,
|
|
133
|
+
* toLoopOptions: (scenario, profile) => ({ driver, agentRun, output, validator, task }),
|
|
134
|
+
* })
|
|
135
|
+
* await runProfileMatrix({ profiles, scenarios, dispatch, judges, commitSha })
|
|
136
|
+
*
|
|
137
|
+
* Usage is reported automatically; trace events are forwarded automatically;
|
|
138
|
+
* the ctx is built automatically. The seam becomes impossible to mis-wire.
|
|
139
|
+
*
|
|
140
|
+
* Typed structurally against the campaign `DispatchContext` (imported type-only
|
|
141
|
+
* from `@tangle-network/agent-eval/campaign`) — a downward dependency, never an
|
|
142
|
+
* inversion.
|
|
143
|
+
*/
|
|
144
|
+
|
|
145
|
+
/** runLoop options minus the `ctx` (loopDispatch builds the ctx). */
|
|
146
|
+
type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
|
|
147
|
+
interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
|
|
148
|
+
/** Sandbox client used for every cell's `runLoop`. Supplied once. */
|
|
149
|
+
sandboxClient: LoopSandboxClient;
|
|
150
|
+
/** Build the per-cell runLoop options from the scenario (+ profile, when
|
|
151
|
+
* used with `runProfileMatrix`). */
|
|
152
|
+
toLoopOptions: (scenario: TScenario, profile: AgentProfile) => LoopOptionsForDispatch<Task, Output, Decision>;
|
|
153
|
+
/** Map the finished loop to the artifact the judges score. Default:
|
|
154
|
+
* `result.winner?.output`. A loop with no winner yields `undefined` (judges
|
|
155
|
+
* skip the cell) — but the loop's token usage is STILL reported, so the
|
|
156
|
+
* integrity guard sees real activity. */
|
|
157
|
+
toArtifact?: (result: LoopResult<Task, Output, Decision>) => TArtifact;
|
|
158
|
+
/** Forward `loop.*` trace events into the campaign's scoped trace so loop
|
|
159
|
+
* spans correlate with the cell. Default true. */
|
|
160
|
+
forwardTrace?: boolean;
|
|
161
|
+
/** Cost-meter source label for the loop's spend. Default `'loop'`. */
|
|
162
|
+
costSource?: string;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Adapter for `runProfileMatrix` (profile is an axis). Returns a
|
|
166
|
+
* `ProfileDispatchFn` that runs `runLoop` per (profile, scenario) cell and
|
|
167
|
+
* reports usage automatically.
|
|
168
|
+
*/
|
|
169
|
+
declare function loopDispatch<Task, Output, Decision, TScenario extends Scenario, TArtifact>(opts: LoopDispatchOptions<Task, Output, Decision, TScenario, TArtifact>): ProfileDispatchFn<TScenario, TArtifact>;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* @experimental
|
|
173
|
+
*
|
|
174
|
+
* The personify layer — the "act like X" knob on top of the recursive keystone.
|
|
175
|
+
*
|
|
176
|
+
* The keystone (`src/loops/supervise/`) is pure STRUCTURE: a recursive `Agent` atom inside
|
|
177
|
+
* a budget-conserving `Scope`, an `ExecutorRegistry` mapping an `AgentSpec` to a runtime,
|
|
178
|
+
* and a `Supervisor` that runs a root agent to a typed `SupervisedResult`. It carries no
|
|
179
|
+
* CONTENT — no model, no prompt, no goal framing, no notion of "who this loop is".
|
|
180
|
+
*
|
|
181
|
+
* This layer adds exactly that content seam without inventing a second engine:
|
|
182
|
+
* - A `Persona` is a thin record: the root `AgentSpec` (profile + harness + optional BYO
|
|
183
|
+
* executor), a root `directive` (the goal framing handed to the chosen shape), a
|
|
184
|
+
* `context` blob (who the loop is acting as), and the executor seams the registry needs.
|
|
185
|
+
* `definePersona` builds it; it is data, not behavior.
|
|
186
|
+
* - A `LoopShape` is a reusable act-body FACTORY: `(ctx: ShapeContext) => Agent`. The shape
|
|
187
|
+
* owns the STRUCTURE (how to decompose / fan out / verify / synthesize); the persona's
|
|
188
|
+
* content parameterizes it. A new shape is ONE file + one `registerShape` call.
|
|
189
|
+
* - `Outcome<D>` is the contract every shape synthesizes into: a finished deliverable OR a
|
|
190
|
+
* list of concrete blockers — "100% done or 100%-defined blockers", never a vague middle.
|
|
191
|
+
*
|
|
192
|
+
* Layering: this module imports ONLY keystone runtime types (`./supervise/types`) and the
|
|
193
|
+
* substrate `AgentProfile`/`BackendType`. It typechecks standalone — no impl, no engine.
|
|
194
|
+
* Extensibility is structural: `Persona` carries an open `extensions` bag so a later
|
|
195
|
+
* world-model / memory field is additive (a new optional key), never a breaking change.
|
|
196
|
+
*/
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* The terminal contract Drew wants: a loop returns a FINISHED deliverable, or the concrete
|
|
200
|
+
* list of blockers that stopped it — never a half-done best-effort coercion. A `blocked`
|
|
201
|
+
* outcome with an empty `blockers` list is a contract violation (a shape that can't finish
|
|
202
|
+
* MUST name why); impls fail loud on it rather than emitting a vacuous block.
|
|
203
|
+
*
|
|
204
|
+
* `Outcome` is the `Out` type a personified `Agent`/`Supervisor` is parameterized by, so the
|
|
205
|
+
* keystone's typed `SupervisedResult<Outcome<D>>` carries it end to end with no coercion.
|
|
206
|
+
*/
|
|
207
|
+
type Outcome<D> = {
|
|
208
|
+
kind: 'done';
|
|
209
|
+
deliverable: D;
|
|
210
|
+
} | {
|
|
211
|
+
kind: 'blocked';
|
|
212
|
+
blockers: string[];
|
|
213
|
+
};
|
|
214
|
+
/**
|
|
215
|
+
* The "act like X" record. A thin composition over the keystone's `AgentSpec`: it pairs the
|
|
216
|
+
* root spec (the executor mapping for the root agent the shape builds) with the CONTENT a
|
|
217
|
+
* shape consumes — the goal framing (`directive`) and who the loop is acting as (`context`).
|
|
218
|
+
*
|
|
219
|
+
* The framework never reads `directive`/`context` semantically; it threads them to the shape
|
|
220
|
+
* verbatim through `ShapeContext`. This is the rule the mandate names: the FRAMEWORK is
|
|
221
|
+
* structure, the PERSONA carries model/prompt/tools/directive. No model name, prompt, or
|
|
222
|
+
* persona string is ever hardcoded in a shape or the engine.
|
|
223
|
+
*
|
|
224
|
+
* `D` is the deliverable type this persona's loops produce; it flows into `Outcome<D>`.
|
|
225
|
+
*/
|
|
226
|
+
interface Persona<D = unknown> {
|
|
227
|
+
/** Stable persona name — used as the trace/journal label root, never as content. */
|
|
228
|
+
readonly name: string;
|
|
229
|
+
/**
|
|
230
|
+
* The root agent's executor mapping (profile + harness + optional BYO executor). The
|
|
231
|
+
* shape's root `Agent` carries THIS as its `executorSpec`; child specs the shape spawns
|
|
232
|
+
* are derived from / resolved against the same persona registry (see `ShapeContext`).
|
|
233
|
+
*/
|
|
234
|
+
readonly root: AgentSpec;
|
|
235
|
+
/** The goal framing handed to the shape — the "what to achieve", not "how". */
|
|
236
|
+
readonly directive: string;
|
|
237
|
+
/** Who the loop is acting as — the opaque persona context blob the shape may inject into
|
|
238
|
+
* child tasks. Opaque to the framework; only the persona's profiles/prompts interpret it. */
|
|
239
|
+
readonly context: PersonaContext;
|
|
240
|
+
/**
|
|
241
|
+
* The executor seams (router endpoint+key, sandbox client, cli bin) the built-in runtimes
|
|
242
|
+
* read off `ExecutorContext.seams`, OR a fully pre-configured registry. The supervisor
|
|
243
|
+
* threads an EMPTY seam bag to the root scope, so a persona that uses built-in metered
|
|
244
|
+
* runtimes MUST supply a registry whose factories close over their seams (or BYO executors
|
|
245
|
+
* on each `AgentSpec`). Carried here so `runPersonified` can build `SupervisorOpts.executors`.
|
|
246
|
+
*/
|
|
247
|
+
readonly executors: PersonaExecutors;
|
|
248
|
+
/**
|
|
249
|
+
* Forward-compatible extension bag — a later world-model / memory / tool-budget field is an
|
|
250
|
+
* additive key here, never a breaking change to the `Persona` shape. Opaque to the engine.
|
|
251
|
+
*/
|
|
252
|
+
readonly extensions?: Readonly<Record<string, unknown>>;
|
|
253
|
+
/** Phantom: binds the persona to its deliverable type so `runPersonified` infers `D` from
|
|
254
|
+
* the persona and the chosen shape must agree. Type-only — never present at runtime. */
|
|
255
|
+
readonly __deliverable?: D;
|
|
256
|
+
}
|
|
257
|
+
/** The persona context blob — who the loop is acting as. Open by intent: a persona names its
|
|
258
|
+
* own role/audience/constraints; the framework treats it as opaque content. */
|
|
259
|
+
interface PersonaContext {
|
|
260
|
+
/** The role the loop embodies ("senior staff engineer", "equity research analyst", …). */
|
|
261
|
+
readonly role: string;
|
|
262
|
+
/** Optional freeform framing the persona's prompts/profiles consume. */
|
|
263
|
+
readonly notes?: string;
|
|
264
|
+
/** Open content bag — persona-specific fields a shape's child tasks may carry. */
|
|
265
|
+
readonly [key: string]: unknown;
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* How a persona supplies executor resolution. Either a pre-built registry (factories already
|
|
269
|
+
* closed over their seams) OR the raw seam bag the engine uses to construct a registry +
|
|
270
|
+
* thread the seams onto each spawn. Exactly one is required — fail loud if neither is set.
|
|
271
|
+
*/
|
|
272
|
+
interface PersonaExecutors {
|
|
273
|
+
/** A registry whose factories already capture their seams. Highest precedence. */
|
|
274
|
+
readonly registry?: ExecutorRegistry;
|
|
275
|
+
/** Raw seams to thread onto built-in runtimes (`router`/`sandbox`/`cli` keys). */
|
|
276
|
+
readonly seams?: Readonly<Record<string, unknown>>;
|
|
277
|
+
}
|
|
278
|
+
/** The minimal input to build a `Persona`. Mirrors `Persona` but lets the builder default
|
|
279
|
+
* the executors-supplied invariant check and freeze the record. */
|
|
280
|
+
interface DefinePersonaInput<D = unknown> {
|
|
281
|
+
readonly name: string;
|
|
282
|
+
readonly root: AgentSpec;
|
|
283
|
+
readonly directive: string;
|
|
284
|
+
readonly context: PersonaContext;
|
|
285
|
+
readonly executors: PersonaExecutors;
|
|
286
|
+
readonly extensions?: Readonly<Record<string, unknown>>;
|
|
287
|
+
/** Phantom: pins the input's deliverable type so `definePersona<D>` returns a `Persona<D>`
|
|
288
|
+
* the caller's shape must agree with. Type-only — never supplied at a call site. */
|
|
289
|
+
readonly __deliverable?: D;
|
|
290
|
+
}
|
|
291
|
+
/** Builds a frozen `Persona`, failing loud on the executors-supplied invariant (neither a
|
|
292
|
+
* registry nor seams = an unresolvable persona). Pure — no I/O, no engine. */
|
|
293
|
+
type DefinePersona = <D = unknown>(input: DefinePersonaInput<D>) => Persona<D>;
|
|
294
|
+
/**
|
|
295
|
+
* Budget knobs a shape reads to size its fanout/children WITHOUT owning the conserved pool.
|
|
296
|
+
* The root budget lives on `SupervisorOpts.budget`; the shape only needs the per-child
|
|
297
|
+
* sizing hints + the fanout width it is allowed to open. All ceilings — the pool reserves
|
|
298
|
+
* against them and fails closed, so an over-eager shape can never overspend.
|
|
299
|
+
*/
|
|
300
|
+
interface ShapeBudget {
|
|
301
|
+
/** Per-child spawn budget the shape reserves for each leaf/sub-loop it opens. */
|
|
302
|
+
readonly perChild: Budget;
|
|
303
|
+
/** Max children a fanout step may open in one round (the shape's structural width). */
|
|
304
|
+
readonly fanout: number;
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* The construction context a `LoopShape` factory receives. Carries the persona's resolved
|
|
308
|
+
* executor seams + the budget knobs, plus the ONE helper a shape needs to spawn a child
|
|
309
|
+
* through the keystone: `spawnChild` resolves an `AgentSpec` (or a persona-derived child
|
|
310
|
+
* profile) into an `Agent` the shape hands to `scope.spawn`. The shape never touches the
|
|
311
|
+
* registry directly — it asks the context, keeping resolution single-sourced.
|
|
312
|
+
*/
|
|
313
|
+
interface ShapeContext<D = unknown> {
|
|
314
|
+
readonly persona: Persona<D>;
|
|
315
|
+
readonly budget: ShapeBudget;
|
|
316
|
+
/**
|
|
317
|
+
* Wrap an `AgentSpec` into a leaf `Agent` carrying it as `executorSpec`, so the shape can
|
|
318
|
+
* `scope.spawn(spawnChild(spec), task, opts)`. `name` labels the child for traces. The
|
|
319
|
+
* returned agent's `act` is never invoked by the keystone (it is spawned, not run) — the
|
|
320
|
+
* spec drives the resolved `LeafExecutor`; `act` exists only to satisfy the `Agent` shape.
|
|
321
|
+
*/
|
|
322
|
+
spawnChild(name: string, spec: AgentSpec): Agent<unknown, Outcome<D>>;
|
|
323
|
+
/** Derive a child `AgentSpec` from the persona's root spec with an overridden profile —
|
|
324
|
+
* the seam a shape uses to give a worker a narrower role/prompt than the root persona. */
|
|
325
|
+
childSpec(profile: AgentProfile$1, harness?: BackendType | null): AgentSpec;
|
|
326
|
+
}
|
|
327
|
+
/**
|
|
328
|
+
* A reusable act-body factory. Given the persona's content + seams (`ShapeContext`), it
|
|
329
|
+
* returns the root `Agent<Task, Outcome<D>>` whose `act` decomposes the task, fans out
|
|
330
|
+
* children through `scope.spawn`, verifies/selects across their settlements (selector≠judge:
|
|
331
|
+
* via `settledToIteration` + `defaultSelectWinner`, never re-ranking behind the driver), and
|
|
332
|
+
* synthesizes the terminal `Outcome<D>`. The shape is STRUCTURE; the persona is CONTENT.
|
|
333
|
+
*/
|
|
334
|
+
type LoopShape<Task, D> = (ctx: ShapeContext<D>) => Agent<Task, Outcome<D>>;
|
|
335
|
+
/**
|
|
336
|
+
* The open shape registry — the extension point that makes a new loop-shape ONE file + one
|
|
337
|
+
* `registerShape` call with zero edits elsewhere. `resolve` returns a typed outcome (inspect
|
|
338
|
+
* `succeeded` before `value`); `register` fails loud on a duplicate name.
|
|
339
|
+
*/
|
|
340
|
+
interface ShapeRegistry {
|
|
341
|
+
register<Task, D>(name: string, factory: LoopShape<Task, D>): void;
|
|
342
|
+
resolve<Task, D>(name: string): {
|
|
343
|
+
succeeded: true;
|
|
344
|
+
value: LoopShape<Task, D>;
|
|
345
|
+
} | {
|
|
346
|
+
succeeded: false;
|
|
347
|
+
error: string;
|
|
348
|
+
};
|
|
349
|
+
/** The registered shape names — for diagnostics + a fail-loud "unknown shape" message. */
|
|
350
|
+
names(): string[];
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* The end-to-end entrypoint. Builds the persona's root `Agent` from the chosen shape, then
|
|
354
|
+
* runs it through a fresh `createSupervisor` over the persona's executors + the supplied
|
|
355
|
+
* budget/journal/blobs. Returns the keystone's typed `SupervisedResult<Outcome<D>>` — a
|
|
356
|
+
* `winner` carries the synthesized `Outcome<D>`; a `no-winner` is never coerced into one.
|
|
357
|
+
*
|
|
358
|
+
* `shape` is either a resolved `LoopShape` or a registered shape NAME (resolved through the
|
|
359
|
+
* default registry). The journal/blobs default to in-memory impls in the engine when omitted
|
|
360
|
+
* (durable FS impls are passed explicitly for a persisted run).
|
|
361
|
+
*/
|
|
362
|
+
interface RunPersonifiedOptions<Task, D> {
|
|
363
|
+
readonly persona: Persona<D>;
|
|
364
|
+
/** A resolved shape factory OR a registered shape name. */
|
|
365
|
+
readonly shape: LoopShape<Task, D> | string;
|
|
366
|
+
readonly task: Task;
|
|
367
|
+
readonly budget: Budget;
|
|
368
|
+
/** Per-child sizing + fanout width handed to the shape. Defaults derive from `budget`. */
|
|
369
|
+
readonly shapeBudget?: Partial<ShapeBudget>;
|
|
370
|
+
/** Trace/journal root key. Defaults to the persona name + a run discriminator in the engine. */
|
|
371
|
+
readonly runId?: string;
|
|
372
|
+
readonly journal?: SpawnJournal;
|
|
373
|
+
readonly blobs?: ResultBlobStore;
|
|
374
|
+
/** Runtime recursion-depth ceiling, paired with the conserved pool. */
|
|
375
|
+
readonly maxDepth?: number;
|
|
376
|
+
/** OTP intensity breaker bounds, forwarded to the supervisor verbatim. */
|
|
377
|
+
readonly maxRestarts?: number;
|
|
378
|
+
readonly withinMs?: number;
|
|
379
|
+
/** A live root handle to attach (view/signal/abort) before the run starts. */
|
|
380
|
+
readonly handle?: RootHandle<Outcome<D>>;
|
|
381
|
+
readonly now?: () => number;
|
|
382
|
+
readonly signal?: AbortSignal;
|
|
383
|
+
}
|
|
384
|
+
/** The composed run signature. */
|
|
385
|
+
type RunPersonified = <Task, D>(options: RunPersonifiedOptions<Task, D>) => Promise<SupervisedResult<Outcome<D>>>;
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* @experimental
|
|
389
|
+
*
|
|
390
|
+
* The RSI-wave type surface — the FROZEN contracts the wave's Core + Compose build to.
|
|
391
|
+
*
|
|
392
|
+
* The keystone (`../supervise/`) is pure execution structure: a recursive `Agent` atom in a
|
|
393
|
+
* budget-conserving `Scope`, run to a typed `SupervisedResult` by a `Supervisor`. The persona
|
|
394
|
+
* layer (`./types`, `./persona`) adds the "act like X" content seam (`Persona` = `AgentSpec` +
|
|
395
|
+
* `directive` + `context`, `LoopShape = (ctx) => Agent`, `Outcome<D>`). This module freezes the
|
|
396
|
+
* remaining four wave seams ON TOP of those — and nothing more:
|
|
397
|
+
*
|
|
398
|
+
* 1. GENERIC COMBINATORS — the content-free act-library. Five composable shapes
|
|
399
|
+
* (`pipeline`/`fanout`/`loopUntil`/`panel`/`verify`) plus the streaming widener (G5). Each
|
|
400
|
+
* is a `CombinatorShape` (a `LoopShape` whose `Agent.act` runs the combinator over `Scope`),
|
|
401
|
+
* so a combinator IS just a `LoopShape` — no new engine type. The SHAPE is here; the DOMAIN
|
|
402
|
+
* (model, prompt, role) stays on the `Persona`. There is no "research" or "code" combinator:
|
|
403
|
+
* a research sweep is `fanout` under a research persona; a build is `pipeline` under a coder.
|
|
404
|
+
* 2. ANALYST-ON-SCOPE (G1, a PORT) — `ScopeAnalyst` carries the round-synchronous driver's
|
|
405
|
+
* analyze→findings→steer wire (dynamic.ts) across to the reactive `Scope`, behind
|
|
406
|
+
* the same trace-derived firewall (`assertTraceDerivedFindings` semantics): a reactive
|
|
407
|
+
* combinator steers from trace FINDINGS, never a child's raw `verdict`.
|
|
408
|
+
* 3. CROSS-RUN CORPUS (G2) — `Corpus` is the DURABLE accreted-fact store, DISTINCT from the
|
|
409
|
+
* per-run `SpawnJournal`/`ResultBlobStore`. `renderCorpusToInstructions` is the read-back:
|
|
410
|
+
* it projects accreted facts into `AgentProfile.prompt.instructions` / `resources.instructions`
|
|
411
|
+
* for the next run's persona (the learning-flywheel READ side).
|
|
412
|
+
* 4. TRAJECTORY TRACE + COST LEDGER — `trajectoryReport(journal, blobs)` reconstructs the whole
|
|
413
|
+
* spawn tree with per-node + rolled-up `Spend`; `equalKOnCost` compares arms on conserved
|
|
414
|
+
* COST (tokens/usd), NOT raw iteration count — closing the leaf-fanout confound.
|
|
415
|
+
*
|
|
416
|
+
* Layering: imports ONLY keystone runtime types (`../supervise/types`), persona types
|
|
417
|
+
* (`./types`), the substrate `AnalystFinding`/`AgentProfile`, and the durable-store interfaces.
|
|
418
|
+
* Pure types/interfaces — this module typechecks standalone, owns no impl, invents no engine.
|
|
419
|
+
*/
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* A combinator is just a `LoopShape`: a factory `(ShapeContext) => Agent` whose `Agent.act`
|
|
423
|
+
* runs the combinator's structure over the `Scope` (spawn children, drain `next()`, select via
|
|
424
|
+
* the single-sourced `settledToIteration`+`defaultSelectWinner`, synthesize an `Outcome<D>`).
|
|
425
|
+
* Aliased — NOT a new type — so a combinator stays a first-class shape the persona layer's
|
|
426
|
+
* `runPersonified`/`ShapeRegistry` resolve with zero new machinery. The SHAPE is content-free;
|
|
427
|
+
* the persona carries the domain.
|
|
428
|
+
*/
|
|
429
|
+
type CombinatorShape<Task, D> = LoopShape<Task, D>;
|
|
430
|
+
/**
|
|
431
|
+
* `pipeline(stages)` — sequential composition: each stage's `Outcome.deliverable` feeds the next
|
|
432
|
+
* stage's task (via `feed`). The first `blocked` stage short-circuits the whole pipeline (its
|
|
433
|
+
* blockers ARE the pipeline's blockers — never coerced past a failed stage). The terminal
|
|
434
|
+
* stage's `done` deliverable is the pipeline's deliverable. Spawns one child per stage in order;
|
|
435
|
+
* a stage that the conserved pool cannot admit is a concrete blocker.
|
|
436
|
+
*
|
|
437
|
+
* No domain: "code build test" is `pipeline([plan, implement, integrate])` under a coder persona,
|
|
438
|
+
* not a named shape. A stage names only its label + how to derive its task from the prior output.
|
|
439
|
+
*/
|
|
440
|
+
interface PipelineStage<Task, StepIn, StepOut> {
|
|
441
|
+
/** Trace/journal label for this stage's spawned child. */
|
|
442
|
+
readonly label: string;
|
|
443
|
+
/** Derive this stage's task from the prior stage's deliverable (or the root task for stage 0).
|
|
444
|
+
* Pure projection — the framework never interprets the result; the resolved leaf does. */
|
|
445
|
+
feed(prior: StepIn, ctx: ShapeContext<unknown>, rootTask: Task): unknown;
|
|
446
|
+
/** Read this stage's settled child output into the typed `StepOut` the next stage feeds on.
|
|
447
|
+
* Fail loud (return a `blocked`) when the child produced nothing usable for the next stage. */
|
|
448
|
+
collect(settled: Settled<Outcome<StepOut>>): Outcome<StepOut>;
|
|
449
|
+
}
|
|
450
|
+
/** `pipeline(stages)` — build the sequential combinator from an ordered stage list. The first
|
|
451
|
+
* stage's `StepIn` is the root `Task`; the last stage's `StepOut` is the deliverable `D`. */
|
|
452
|
+
type Pipeline = <Task, D>(stages: ReadonlyArray<PipelineStage<Task, unknown, unknown>>) => CombinatorShape<Task, D>;
|
|
453
|
+
/**
|
|
454
|
+
* `fanout(items, { synthesize? })` — N children spawned in one round (one per item, bounded by
|
|
455
|
+
* the conserved pool's fail-closed admission), drained via `scope.next()`, then optionally a
|
|
456
|
+
* single SYNTHESIS child over the gathered results. Without `synthesize`, the combinator returns
|
|
457
|
+
* the best-valid child via the single-sourced selector (selector≠judge). A round that admitted
|
|
458
|
+
* zero children, or whose synthesis child could not be admitted, is a concrete blocker.
|
|
459
|
+
*
|
|
460
|
+
* No domain: a "research sweep over angles" is `fanout(angles, { synthesize: cite })` under a
|
|
461
|
+
* research persona; a "fanout-vote" is `fanout(copies)` with the default selector. The item list
|
|
462
|
+
* + the synthesis posture are the SHAPE's args; the prompt that turns an item into work is the
|
|
463
|
+
* persona's.
|
|
464
|
+
*/
|
|
465
|
+
interface FanoutOptions<Item, D> {
|
|
466
|
+
/** One child task per item: `item` + the index discriminator. The persona's directive/context
|
|
467
|
+
* is threaded in by the combinator; this only supplies the per-item discriminator. */
|
|
468
|
+
itemTask(item: Item, index: number, ctx: ShapeContext<D>): unknown;
|
|
469
|
+
/** Per-item child label (defaults to `item:<index>` in the impl). */
|
|
470
|
+
label?(item: Item, index: number): string;
|
|
471
|
+
/**
|
|
472
|
+
* Optional synthesis over the gathered child results: when present, the combinator spawns ONE
|
|
473
|
+
* synthesis child whose task is built from the drained settlements, and its `done` output is
|
|
474
|
+
* the deliverable. When absent, the deliverable is the best-valid child via `defaultSelectWinner`.
|
|
475
|
+
* The synthesis child is a SEPARATE keystone agent (not a re-rank behind the driver).
|
|
476
|
+
*/
|
|
477
|
+
synthesize?: FanoutSynthesis<D>;
|
|
478
|
+
}
|
|
479
|
+
/** How a fanout's synthesis child is built + read. `synthesisTask` projects the drained child
|
|
480
|
+
* settlements into the synthesis child's task; `collect` reads its settled output into the
|
|
481
|
+
* deliverable `Outcome<D>`. */
|
|
482
|
+
interface FanoutSynthesis<D> {
|
|
483
|
+
synthesisTask(gathered: ReadonlyArray<Settled<Outcome<D>>>, ctx: ShapeContext<D>): unknown;
|
|
484
|
+
collect(settled: Settled<Outcome<D>>): Outcome<D>;
|
|
485
|
+
}
|
|
486
|
+
/** `fanout(items, opts)` — build the fanout combinator over a static item list. */
|
|
487
|
+
type Fanout = <Task, Item, D>(items: ReadonlyArray<Item>, opts: FanoutOptions<Item, D>) => CombinatorShape<Task, D>;
|
|
488
|
+
/**
|
|
489
|
+
* `loopUntil({ until, step })` — iterative deepening inside the conserved pool: spawn one `step`
|
|
490
|
+
* child per round, ask `until` whether the accumulated state satisfies the goal, and stop when it
|
|
491
|
+
* does OR when the pool can no longer admit a step (budget IS the loop bound — no unbounded
|
|
492
|
+
* while). The deployable, non-oracle stop: `until` is the satisfiability gate, read from trace
|
|
493
|
+
* findings + accumulated deliverables, never a fresh raw verdict the loop minted to stop itself.
|
|
494
|
+
*
|
|
495
|
+
* No domain: "refine until tests pass" is `loopUntil` with a coder persona + a `step` that edits
|
|
496
|
+
* and an `until` that reads the test-finding; the combinator owns only the round/stop wiring.
|
|
497
|
+
*/
|
|
498
|
+
interface LoopUntilSpec<Task, State, D> {
|
|
499
|
+
/** Build the next step child's task from the root task + the state accumulated so far. */
|
|
500
|
+
step(rootTask: Task, state: LoopUntilState<State>, ctx: ShapeContext<D>): unknown;
|
|
501
|
+
/** Fold one settled step into the accumulated state (the loop's running deliverable candidate). */
|
|
502
|
+
fold(prior: LoopUntilState<State>, settled: Settled<Outcome<D>>): LoopUntilState<State>;
|
|
503
|
+
/**
|
|
504
|
+
* The satisfiability gate: given the accumulated state + the round's trace findings, has the
|
|
505
|
+
* goal been reached? Returns the terminal deliverable when satisfied, or `null` to keep going.
|
|
506
|
+
* Reads `findings` (trace-derived), NOT a raw verdict score — the deployable-stop discipline.
|
|
507
|
+
*/
|
|
508
|
+
until(state: LoopUntilState<State>, findings: ReadonlyArray<AnalystFinding>): Outcome<D> | null;
|
|
509
|
+
/** Per-round step label (defaults to `step:<round>` in the impl). */
|
|
510
|
+
label?(round: number): string;
|
|
511
|
+
}
|
|
512
|
+
/** The accumulated state `loopUntil` threads across rounds — the running candidate + the round
|
|
513
|
+
* index, so `step`/`fold`/`until` are pure functions of it (replay-safe, no wall-clock). */
|
|
514
|
+
interface LoopUntilState<State> {
|
|
515
|
+
readonly round: number;
|
|
516
|
+
readonly value: State;
|
|
517
|
+
}
|
|
518
|
+
/** `loopUntil(spec)` — build the iterative-deepening combinator. `seed` is the initial state. */
|
|
519
|
+
type LoopUntil = <Task, State, D>(seed: State, spec: LoopUntilSpec<Task, State, D>) => CombinatorShape<Task, D>;
|
|
520
|
+
/**
|
|
521
|
+
* `panel(judges)` — M judges over ONE artifact, merged WRITE-ONLY (selector≠judge taken to its
|
|
522
|
+
* limit). The combinator spawns the M judge children over the same input artifact, drains their
|
|
523
|
+
* settlements, and MERGES their findings into a panel verdict via `merge` — a pure WRITE-ONLY
|
|
524
|
+
* fold (a judge's output is never fed back to steer another judge, and the merge never re-ranks
|
|
525
|
+
* the children behind the driver). The merged verdict gates the deliverable.
|
|
526
|
+
*
|
|
527
|
+
* No domain: a "code review panel" and an "essay rubric panel" are the same `panel` shape under
|
|
528
|
+
* different personas; the rubric lives in each judge persona's profile, not the combinator.
|
|
529
|
+
*/
|
|
530
|
+
interface PanelSpec<Artifact, D> {
|
|
531
|
+
/** The M judge child specs: each is a persona-derived child (a narrower judge profile). The
|
|
532
|
+
* combinator spawns one child per entry over the SAME `artifact` and never lets one judge's
|
|
533
|
+
* output reach another's task (write-only). */
|
|
534
|
+
readonly judges: ReadonlyArray<PanelJudge>;
|
|
535
|
+
/** Build one judge child's task from the shared artifact under review + the judge descriptor. */
|
|
536
|
+
judgeTask(artifact: Artifact, judge: PanelJudge, ctx: ShapeContext<D>): unknown;
|
|
537
|
+
/**
|
|
538
|
+
* Write-only merge: fold the M settled judge verdicts into the panel's terminal `Outcome<D>`.
|
|
539
|
+
* Pure over the drained settlements — it MUST NOT spawn, re-judge, or feed one verdict into
|
|
540
|
+
* another. A panel that reached no quorum is a concrete blocker (fail loud, never a vacuous done).
|
|
541
|
+
*/
|
|
542
|
+
merge(verdicts: ReadonlyArray<PanelVerdict>, artifact: Artifact): Outcome<D>;
|
|
543
|
+
}
|
|
544
|
+
/** One judge in a panel — a labeled persona-derived judge child. Content (the rubric) lives in
|
|
545
|
+
* the judge's profile; this carries only the label + the optional weight the merge may read. */
|
|
546
|
+
interface PanelJudge {
|
|
547
|
+
readonly label: string;
|
|
548
|
+
/** Optional merge weight (a write-only hint the `merge` fold may use; default-equal in the impl). */
|
|
549
|
+
readonly weight?: number;
|
|
550
|
+
}
|
|
551
|
+
/** One judge child's settled verdict, surfaced to the write-only `merge`. `down` judges carry no
|
|
552
|
+
* verdict (excluded from the merge `n`, like an infra-errored cell). */
|
|
553
|
+
interface PanelVerdict {
|
|
554
|
+
readonly judge: PanelJudge;
|
|
555
|
+
readonly verdict?: DefaultVerdict;
|
|
556
|
+
/** The judge child's raw output — what it was asked to assess, for a merge that quotes it. */
|
|
557
|
+
readonly output?: unknown;
|
|
558
|
+
/** True when the judge child went `down` (no usable verdict — kept out of the merge denominator). */
|
|
559
|
+
readonly down: boolean;
|
|
560
|
+
}
|
|
561
|
+
/** `panel(spec)` — build the M-judge write-only-merge combinator. */
|
|
562
|
+
type Panel = <Task, Artifact, D>(spec: PanelSpec<Artifact, D>) => CombinatorShape<Task, D>;
|
|
563
|
+
/**
|
|
564
|
+
* `verify({ implement, verifier })` — the 2-node sequential gate: an IMPLEMENT child produces a
|
|
565
|
+
* candidate, then a SEPARATE VERIFIER child's verdict GATES shippability. A `valid` verifier
|
|
566
|
+
* verdict ships the implement deliverable; any other outcome (implement down, verifier down,
|
|
567
|
+
* invalid verdict) becomes a concrete blocker carrying the failure verbatim — never a coerced
|
|
568
|
+
* "done". The verifier is a distinct keystone agent (selector≠judge: the implement child does
|
|
569
|
+
* not grade itself).
|
|
570
|
+
*
|
|
571
|
+
* No domain: "write code then run the test gate" and "draft then fact-check" are the same `verify`
|
|
572
|
+
* shape under different personas; the gate rubric is the verifier persona's, not the combinator's.
|
|
573
|
+
*/
|
|
574
|
+
interface VerifySpec<Task, Candidate, D> {
|
|
575
|
+
/** Build the implement child's task from the root task. */
|
|
576
|
+
implement(rootTask: Task, ctx: ShapeContext<D>): unknown;
|
|
577
|
+
/** Build the verifier child's task from the implement child's settled candidate. */
|
|
578
|
+
verifier(candidate: Settled<Outcome<Candidate>>, ctx: ShapeContext<D>): unknown;
|
|
579
|
+
/** Project the gated (verifier-`valid`) candidate into the terminal deliverable. */
|
|
580
|
+
collect(candidate: Settled<Outcome<Candidate>>, verdict: DefaultVerdict): Outcome<D>;
|
|
581
|
+
/** Implement / verifier child labels (default `implement` / `verify` in the impl). */
|
|
582
|
+
readonly implementLabel?: string;
|
|
583
|
+
readonly verifierLabel?: string;
|
|
584
|
+
}
|
|
585
|
+
/** `verify(spec)` — build the 2-node implement→verifier-gate combinator. */
|
|
586
|
+
type Verify = <Task, Candidate, D>(spec: VerifySpec<Task, Candidate, D>) => CombinatorShape<Task, D>;
|
|
587
|
+
/**
|
|
588
|
+
* `widen({ gate })` (G5) — the STREAMING spawn-on-completion driver. Unlike the static-fanout
|
|
589
|
+
* combinators above, the widener REACTS to each `scope.next()`: as each child settles it consults
|
|
590
|
+
* the `WidenGate` and, when a lineage is `promising`, widens by AT MOST ONE child toward it under
|
|
591
|
+
* the remaining conserved pool. Defaults to FLAT (the gate never widens) so a gate run stays
|
|
592
|
+
* non-widening and the R2 selector≠judge collision is dormant. `promising` is derived from the
|
|
593
|
+
* round's analyst FINDINGS (via `ScopeAnalyst`, §2), NOT a child's raw `verdict` — the firewall.
|
|
594
|
+
*
|
|
595
|
+
* This is the progressive-widening (MCTS-PW) combinator: the one shape whose breadth is decided
|
|
596
|
+
* at runtime from the diagnosis, not fixed at spawn. It is the mechanism the diverse-strategy-vs-
|
|
597
|
+
* blind GATE is run with — kept FLAT by default until that gate returns positive (don't build
|
|
598
|
+
* mechanism ahead of the gate).
|
|
599
|
+
*/
|
|
600
|
+
interface WidenSpec<Seed, D> {
|
|
601
|
+
/** The initial children to spawn before any widening — the seed lineages the gate widens from.
|
|
602
|
+
* One child task per seed; bounded by the conserved pool's fail-closed admission. */
|
|
603
|
+
readonly seeds: ReadonlyArray<Seed>;
|
|
604
|
+
seedTask(seed: Seed, index: number, ctx: ShapeContext<D>): unknown;
|
|
605
|
+
/**
|
|
606
|
+
* The progressive-widening gate. Consulted on EVERY settled child with the round's
|
|
607
|
+
* trace-derived `findings`; returns a widen decision (spawn one more toward a lineage) or a
|
|
608
|
+
* stop. DEFAULTS to flat via `flatWidenGate` — never widens, so the firewall stays dormant.
|
|
609
|
+
*/
|
|
610
|
+
readonly gate: ScopeWidenGate<D>;
|
|
611
|
+
/** Build the widened child's task from the lineage the gate chose to extend. */
|
|
612
|
+
widenTask(toward: WidenLineage<D>, ctx: ShapeContext<D>): unknown;
|
|
613
|
+
/** Synthesize the terminal deliverable from every settled lineage (selector≠judge: the
|
|
614
|
+
* single-sourced selector over the gathered children, never a re-judge). */
|
|
615
|
+
synthesize(gathered: ReadonlyArray<Settled<Outcome<D>>>, ctx: ShapeContext<D>): Outcome<D>;
|
|
616
|
+
}
|
|
617
|
+
/**
|
|
618
|
+
* The runtime widening gate (the reactive analogue of the keystone's `WidenGate`, lifted to read
|
|
619
|
+
* trace FINDINGS instead of a raw verdict). `decide` is consulted per settled child; it MUST
|
|
620
|
+
* derive `promising` from `findings`, never from `settled.verdict`, unless `judgeExempt` is
|
|
621
|
+
* explicitly argued (the documented off-by-default escape hatch). Flat default never widens.
|
|
622
|
+
*/
|
|
623
|
+
interface ScopeWidenGate<D> {
|
|
624
|
+
decide(settled: Settled<Outcome<D>>, findings: ReadonlyArray<AnalystFinding>, budget: Scope<Outcome<D>>['budget']): WidenDecision<D>;
|
|
625
|
+
/** When true, `decide` may read `settled.verdict` directly — collides with the steer firewall,
|
|
626
|
+
* so it must be argued per cell, never defaulted on (mirrors the keystone `WidenGate`). */
|
|
627
|
+
readonly judgeExempt?: boolean;
|
|
628
|
+
}
|
|
629
|
+
/** A widening decision: extend one lineage by one child, or stop widening. `flatWidenGate`
|
|
630
|
+
* always returns `{ kind: 'stop' }`. */
|
|
631
|
+
type WidenDecision<D> = {
|
|
632
|
+
kind: 'widen';
|
|
633
|
+
toward: WidenLineage<D>;
|
|
634
|
+
} | {
|
|
635
|
+
kind: 'stop';
|
|
636
|
+
rationale?: string;
|
|
637
|
+
};
|
|
638
|
+
/** A lineage the gate may widen toward — the settled child that looked promising + the findings
|
|
639
|
+
* that justified it (the trace-derived provenance the firewall requires). */
|
|
640
|
+
interface WidenLineage<D> {
|
|
641
|
+
readonly settled: Extract<Settled<Outcome<D>>, {
|
|
642
|
+
kind: 'done';
|
|
643
|
+
}>;
|
|
644
|
+
readonly findings: ReadonlyArray<AnalystFinding>;
|
|
645
|
+
}
|
|
646
|
+
/** `widen(spec)` — build the streaming progressive-widening combinator. */
|
|
647
|
+
type Widen = <Task, Seed, D>(spec: WidenSpec<Seed, D>) => CombinatorShape<Task, D>;
|
|
648
|
+
/** The flat default `ScopeWidenGate` factory contract — never widens, keeping the R2 firewall
|
|
649
|
+
* conflict dormant. Exported so a gate run can pass it explicitly and a test can assert the
|
|
650
|
+
* default is flat. */
|
|
651
|
+
type FlatWidenGate = <D>() => ScopeWidenGate<D>;
|
|
652
|
+
/**
|
|
653
|
+
* The reactive analyst seam — the PORT of the round-synchronous driver's `analyze` hook
|
|
654
|
+
* (dynamic.ts) onto the reactive `Scope`. The old driver wired the analyst at round
|
|
655
|
+
* boundaries (`plan` ran the analyst over `history` BEFORE the planner); the reactive `Scope` has
|
|
656
|
+
* no rounds, so this carries the wire across: a combinator's `act` asks the `ScopeAnalyst` to turn
|
|
657
|
+
* the settled children SO FAR into `AnalystFinding[]`, and steers from THOSE findings.
|
|
658
|
+
*
|
|
659
|
+
* The firewall is preserved (selector≠judge): `analyze` runs the trace-derived analyst and the
|
|
660
|
+
* impl asserts `assertTraceDerivedFindings` semantics — a finding citing judge/verdict/score
|
|
661
|
+
* `metric` evidence aborts the round. The steer decision reads `findings`, NEVER the children's
|
|
662
|
+
* raw `verdict`. Fail loud — a throwing or non-array analyst aborts (no silent empty findings).
|
|
663
|
+
*/
|
|
664
|
+
interface ScopeAnalyst<D> {
|
|
665
|
+
/**
|
|
666
|
+
* Turn the children settled so far into trace-derived findings. `settledSoFar` is the cursor-
|
|
667
|
+
* ordered settlement list a combinator has drained (the reactive analogue of the old driver's
|
|
668
|
+
* `history`). The impl runs the analyst, then enforces the trace-derived firewall before
|
|
669
|
+
* returning — a judge-derived finding is rejected, not filtered.
|
|
670
|
+
*/
|
|
671
|
+
analyze(input: ScopeAnalyzeInput<D>): Promise<ReadonlyArray<AnalystFinding>>;
|
|
672
|
+
}
|
|
673
|
+
/** Input to a `ScopeAnalyst.analyze` — the root task framing + the children settled so far. The
|
|
674
|
+
* reactive analogue of the old `AnalyzeInput { task, history }`. */
|
|
675
|
+
interface ScopeAnalyzeInput<D> {
|
|
676
|
+
/** Opaque root-task framing (whatever the combinator was invoked with). */
|
|
677
|
+
readonly task: unknown;
|
|
678
|
+
/** The children this combinator has drained off `scope.next()`, in cursor order. */
|
|
679
|
+
readonly settledSoFar: ReadonlyArray<Settled<Outcome<D>>>;
|
|
680
|
+
/** This combinator's scope id (the trace-correlation root for the analyst). */
|
|
681
|
+
readonly nodeId: NodeId;
|
|
682
|
+
}
|
|
683
|
+
/**
|
|
684
|
+
* How a combinator's `act` consumes findings to steer — the SINGLE firewalled steer surface a
|
|
685
|
+
* reactive combinator reads. `loopUntil.until`, `widen` gate, and any future steer all funnel
|
|
686
|
+
* through a `SteerContext` so the firewall is enforced in one place: `findings` is trace-derived
|
|
687
|
+
* (the analyst already asserted it), and a combinator MUST NOT reach back to `settled.verdict`
|
|
688
|
+
* for the steer decision. `lastValidScore` is provided for OBSERVABILITY only (rendering/traces),
|
|
689
|
+
* explicitly NOT for steering — reading it to steer is the coupling the architecture forbids.
|
|
690
|
+
*/
|
|
691
|
+
interface SteerContext<D> {
|
|
692
|
+
readonly findings: ReadonlyArray<AnalystFinding>;
|
|
693
|
+
readonly settledSoFar: ReadonlyArray<Settled<Outcome<D>>>;
|
|
694
|
+
/** Observability-only: the best valid score seen so far. Rendering/trace use ONLY — steering
|
|
695
|
+
* off this re-introduces selector=judge. Marked so a reviewer catches a misuse. */
|
|
696
|
+
readonly lastValidScore?: number;
|
|
697
|
+
}
|
|
698
|
+
/**
|
|
699
|
+
* The firewall assertion contract, re-stated for the reactive seam (PORT of
|
|
700
|
+
* `assertTraceDerivedFindings`). A PROVENANCE check, not a content check: span/event/artifact/
|
|
701
|
+
* finding refs and empty-evidence findings pass; only a `metric` ref whose uri is a
|
|
702
|
+
* judge/verdict/score scheme is rejected. Fail loud — a tainted finding aborts. The impl lives in
|
|
703
|
+
* `analyst.ts`; this type pins its signature so callers depend on the contract, not the impl.
|
|
704
|
+
*/
|
|
705
|
+
type AssertTraceDerivedFindings = (findings: ReadonlyArray<AnalystFinding>) => void;
|
|
706
|
+
/**
|
|
707
|
+
* One accreted fact in the cross-run corpus — the learning-flywheel's durable unit. DISTINCT from
|
|
708
|
+
* a `SpawnEvent` (a per-run decision record): a `CorpusRecord` is a fact a run LEARNED that a
|
|
709
|
+
* FUTURE run should read back (the world-model for story 5). It is content the next persona reads,
|
|
710
|
+
* not a replay input. Tagged + scored so `query`/`renderCorpusToInstructions` can project the
|
|
711
|
+
* relevant, high-confidence subset.
|
|
712
|
+
*/
|
|
713
|
+
interface CorpusRecord {
|
|
714
|
+
readonly schemaVersion: '1.0.0';
|
|
715
|
+
/** Stable id over identity-defining fields (claim + tags) so a re-learned fact dedups. */
|
|
716
|
+
readonly id: string;
|
|
717
|
+
/** The run that produced this fact (the journal `runId`/`root`) — provenance back to the trace. */
|
|
718
|
+
readonly runId: NodeId;
|
|
719
|
+
readonly producedAt: string;
|
|
720
|
+
/** Coarse classification the query/render filters on (free-form, mirrors `AnalystFinding.area`). */
|
|
721
|
+
readonly area: string;
|
|
722
|
+
/** The accreted fact — the instruction-shaped statement the next run reads back. */
|
|
723
|
+
readonly claim: string;
|
|
724
|
+
/** Optional supporting detail the renderer may include under the claim. */
|
|
725
|
+
readonly rationale?: string;
|
|
726
|
+
/** Free-form tags for `query` filtering (domain, persona, surface). */
|
|
727
|
+
readonly tags: ReadonlyArray<string>;
|
|
728
|
+
/** 0..1 — the producing run's confidence in this fact (the render threshold reads it). */
|
|
729
|
+
readonly confidence: number;
|
|
730
|
+
/** Optional provenance back into the run that learned it (a finding id / outRef / span). */
|
|
731
|
+
readonly evidence?: ReadonlyArray<{
|
|
732
|
+
readonly kind: string;
|
|
733
|
+
readonly uri: string;
|
|
734
|
+
}>;
|
|
735
|
+
}
|
|
736
|
+
/** A corpus query filter — every field is an AND-narrowing; an omitted field does not constrain. */
|
|
737
|
+
interface CorpusFilter {
|
|
738
|
+
readonly area?: string;
|
|
739
|
+
/** Match records carrying ALL of these tags. */
|
|
740
|
+
readonly tags?: ReadonlyArray<string>;
|
|
741
|
+
/** Minimum confidence a record must clear to be returned (the render gate). */
|
|
742
|
+
readonly minConfidence?: number;
|
|
743
|
+
/** Only records from this run (rare — usually a cross-run read). */
|
|
744
|
+
readonly runId?: NodeId;
|
|
745
|
+
/** Cap the result count (most-confident first in the impl). */
|
|
746
|
+
readonly limit?: number;
|
|
747
|
+
}
|
|
748
|
+
/**
|
|
749
|
+
* The durable cross-run corpus — the learning-flywheel store. DISTINCT from `SpawnJournal`
|
|
750
|
+
* (per-run decisions, replay) and `ResultBlobStore` (per-run payloads): `Corpus` holds accreted
|
|
751
|
+
* FACTS across runs that the next run reads back. `InMemoryCorpus` + `FileCorpus` (JSONL) impls
|
|
752
|
+
* live in `corpus.ts` and MAY share a storage spine with the JSONL journal, but the INTERFACE is
|
|
753
|
+
* separate so a consumer never confuses a replay record with a learned fact.
|
|
754
|
+
*
|
|
755
|
+
* Fail-loud, typed-outcome boundary: `append` is idempotent on an identical record (same `id` +
|
|
756
|
+
* `claim`); a conflicting re-append under the same `id` is a typed error, never a silent overwrite.
|
|
757
|
+
*/
|
|
758
|
+
interface Corpus {
|
|
759
|
+
/** Append one accreted fact. Idempotent on an identical record; returns a typed outcome —
|
|
760
|
+
* inspect `succeeded` before treating it as durable (no silent write-through on conflict). */
|
|
761
|
+
append(record: CorpusRecord): Promise<{
|
|
762
|
+
succeeded: true;
|
|
763
|
+
} | {
|
|
764
|
+
succeeded: false;
|
|
765
|
+
error: string;
|
|
766
|
+
}>;
|
|
767
|
+
/** Query accreted facts by filter — most-confident first. Returns the matching records (an
|
|
768
|
+
* empty array when none match is a valid result, NOT an error). */
|
|
769
|
+
query(filter: CorpusFilter): Promise<ReadonlyArray<CorpusRecord>>;
|
|
770
|
+
}
|
|
771
|
+
/**
|
|
772
|
+
* Project accreted corpus facts into an `AgentProfile`'s instruction seams — the learning-flywheel
|
|
773
|
+
* READ side. Reads the corpus through `filter`, renders the matching facts into instruction lines,
|
|
774
|
+
* and returns a NEW profile with them merged into `prompt.instructions` (the append-line seam) so
|
|
775
|
+
* the next run's persona reads the accreted world-model. Pure projection over the queried records;
|
|
776
|
+
* never mutates the input profile (returns a fresh one). The impl lives in `corpus.ts`.
|
|
777
|
+
*
|
|
778
|
+
* `resources.instructions` is `string | AgentProfileResourceRef`; `prompt.instructions` is
|
|
779
|
+
* `string[]`. The render targets `prompt.instructions` (additive lines) by default; a caller that
|
|
780
|
+
* wants the single-blob `resources.instructions` form passes `target: 'resources'`.
|
|
781
|
+
*/
|
|
782
|
+
interface RenderCorpusToInstructionsOptions {
|
|
783
|
+
readonly corpus: Corpus;
|
|
784
|
+
readonly filter: CorpusFilter;
|
|
785
|
+
/** The profile to project the facts into. The result is a fresh profile — the input is unchanged. */
|
|
786
|
+
readonly profile: AgentProfile$1;
|
|
787
|
+
/** Where the rendered facts land: appended to `prompt.instructions[]` (default) or folded into
|
|
788
|
+
* the single-blob `resources.instructions` string. */
|
|
789
|
+
readonly target?: 'prompt' | 'resources';
|
|
790
|
+
/** Optional cap on rendered lines (most-confident first), independent of the query `limit`. */
|
|
791
|
+
readonly maxLines?: number;
|
|
792
|
+
}
|
|
793
|
+
/** `renderCorpusToInstructions(opts)` — the flywheel read-back projection. Async (queries the
|
|
794
|
+
* durable corpus); returns a fresh `AgentProfile` with the accreted facts merged in. */
|
|
795
|
+
type RenderCorpusToInstructions = (opts: RenderCorpusToInstructionsOptions) => Promise<AgentProfile$1>;
|
|
796
|
+
/**
|
|
797
|
+
* One node in the reconstructed trajectory tree — a driver OR a leaf, with its OWN spend and the
|
|
798
|
+
* spend ROLLED UP over its subtree. Reconstructed from the `SpawnJournal` (structure + per-node
|
|
799
|
+
* `Spend`) + the `ResultBlobStore` (the `out` artifact, rehydrated by `outRef`). The realized tree
|
|
800
|
+
* shape: `parent`/`children` are the actual spawn edges the run took, not a planned topology.
|
|
801
|
+
*/
|
|
802
|
+
interface TrajectoryNode {
|
|
803
|
+
readonly id: NodeId;
|
|
804
|
+
readonly parent?: NodeId;
|
|
805
|
+
readonly children: ReadonlyArray<NodeId>;
|
|
806
|
+
readonly label: string;
|
|
807
|
+
readonly runtime: string;
|
|
808
|
+
/** Terminal status the journal recorded for this node. */
|
|
809
|
+
readonly status: 'done' | 'failed' | 'cancelled' | 'pending';
|
|
810
|
+
/** This node's OWN conserved spend (from its `settled` event). */
|
|
811
|
+
readonly ownSpend: Spend;
|
|
812
|
+
/** This node's spend PLUS every descendant's — the rolled-up subtree cost. The cost a parent
|
|
813
|
+
* "really" consumed inclusive of its children's fanout (the equal-k-on-cost basis). */
|
|
814
|
+
readonly rolledUpSpend: Spend;
|
|
815
|
+
/** The node's verdict, when its settlement carried one (observability — NOT a steer input). */
|
|
816
|
+
readonly verdict?: DefaultVerdict;
|
|
817
|
+
/** The rehydrated output artifact, when `withOutputs` was requested + the blob resolved. */
|
|
818
|
+
readonly output?: unknown;
|
|
819
|
+
readonly outRef?: string;
|
|
820
|
+
}
|
|
821
|
+
/** The whole reconstructed trajectory — the realized tree + its root-rolled-up total. The
|
|
822
|
+
* per-node + rolled-up `Spend` is the evidence both the trace viewer and `equalKOnCost` read. */
|
|
823
|
+
interface TrajectoryReport {
|
|
824
|
+
readonly root: NodeId;
|
|
825
|
+
/** Every node, in cursor/spawn order — the realized tree (`parent`/`children` are the real edges). */
|
|
826
|
+
readonly nodes: ReadonlyArray<TrajectoryNode>;
|
|
827
|
+
/** The root's rolled-up spend — the whole run's conserved total (tokens + usd + iterations + ms). */
|
|
828
|
+
readonly total: Spend;
|
|
829
|
+
/** Count of nodes by terminal status — a quick "how did the tree end" readout. */
|
|
830
|
+
readonly statusCounts: Readonly<Record<TrajectoryNode['status'], number>>;
|
|
831
|
+
}
|
|
832
|
+
/**
|
|
833
|
+
* `trajectoryReport(journal, blobs, root, { withOutputs? })` — reconstruct the whole tree with
|
|
834
|
+
* per-node + rolled-up `Spend`. Reads the journal for structure + spend and (when `withOutputs`)
|
|
835
|
+
* the blob store for each `done` node's artifact. Fail loud on a tree that was never journaled or
|
|
836
|
+
* a `done` node whose blob the store cannot rehydrate (a silent gap would mis-cost the tree). The
|
|
837
|
+
* impl lives in `trajectory.ts`.
|
|
838
|
+
*/
|
|
839
|
+
interface TrajectoryReportOptions {
|
|
840
|
+
/** Rehydrate each `done` node's `output` from the blob store. Off by default (cost-only report). */
|
|
841
|
+
readonly withOutputs?: boolean;
|
|
842
|
+
}
|
|
843
|
+
/** `trajectoryReport(...)` — the tree+cost reconstructor. Async (reads journal + optionally blobs). */
|
|
844
|
+
type TrajectoryReportFn = (journal: SpawnJournal, blobs: ResultBlobStore, root: NodeId, options?: TrajectoryReportOptions) => Promise<TrajectoryReport>;
|
|
845
|
+
/**
|
|
846
|
+
* One arm of an equal-k comparison — a labeled trajectory (a `TrajectoryReport` is one arm's whole
|
|
847
|
+
* run). The arm's conserved COST is `report.total` (tokens + usd), which the sandbox executor
|
|
848
|
+
* already reports INCLUSIVE of a leaf's internal sub-agent fanout — so comparing arms on this cost
|
|
849
|
+
* (not raw `iterations`) closes the leaf-fanout confound: a treatment arm whose leaf fanned out
|
|
850
|
+
* internally is charged for that fanout in `total.tokens`/`total.usd`, not hidden behind one
|
|
851
|
+
* iteration count.
|
|
852
|
+
*/
|
|
853
|
+
interface EqualKArm {
|
|
854
|
+
readonly label: string;
|
|
855
|
+
readonly report: TrajectoryReport;
|
|
856
|
+
}
|
|
857
|
+
/**
|
|
858
|
+
* The equal-k-on-cost verdict: whether every arm spent within `tolerance` of the others on the
|
|
859
|
+
* CONSERVED cost channels (tokens + usd), so a downstream metric comparison is "at equal k". Per-
|
|
860
|
+
* arm cost is surfaced so a caller can see HOW close. `withinTolerance: false` means the arms are
|
|
861
|
+
* NOT comparable at equal compute — a confound to report, not a result to publish.
|
|
862
|
+
*/
|
|
863
|
+
interface EqualKVerdict {
|
|
864
|
+
readonly withinTolerance: boolean;
|
|
865
|
+
/** Per-arm conserved cost (the basis: tokens total + usd). */
|
|
866
|
+
readonly arms: ReadonlyArray<{
|
|
867
|
+
readonly label: string;
|
|
868
|
+
readonly tokens: number;
|
|
869
|
+
readonly usd: number;
|
|
870
|
+
readonly iterations: number;
|
|
871
|
+
}>;
|
|
872
|
+
/** The realized spread on each channel (max − min across arms), for the report. */
|
|
873
|
+
readonly spread: {
|
|
874
|
+
readonly tokens: number;
|
|
875
|
+
readonly usd: number;
|
|
876
|
+
};
|
|
877
|
+
/** The fractional tolerance the check used (spread / median ≤ tolerance per channel). */
|
|
878
|
+
readonly tolerance: number;
|
|
879
|
+
}
|
|
880
|
+
/**
|
|
881
|
+
* `equalKOnCost(arms, { tolerance? })` — assert arms are comparable at EQUAL conserved COST
|
|
882
|
+
* (tokens + usd), NOT raw iteration count. The conserved-pool guarantees `Σk` equal by
|
|
883
|
+
* construction WITHIN one supervised run; this checks it ACROSS arms (separate runs) where the
|
|
884
|
+
* pool cannot, so a cross-arm gate comparison can prove equal compute before claiming a win. The
|
|
885
|
+
* impl lives in `trajectory.ts`. Pure over the reports — no I/O.
|
|
886
|
+
*/
|
|
887
|
+
interface EqualKOnCostOptions {
|
|
888
|
+
/** Max fractional spread (spread/median) per channel for arms to count as equal-k. Default in
|
|
889
|
+
* the impl (e.g. 0.05). A tighter tolerance = a stricter equal-compute claim. */
|
|
890
|
+
readonly tolerance?: number;
|
|
891
|
+
}
|
|
892
|
+
/** `equalKOnCost(arms, opts)` — the cross-arm equal-compute check on conserved cost. */
|
|
893
|
+
type EqualKOnCost = (arms: ReadonlyArray<EqualKArm>, options?: EqualKOnCostOptions) => EqualKVerdict;
|
|
894
|
+
|
|
895
|
+
/**
|
|
896
|
+
* @experimental
|
|
897
|
+
*
|
|
898
|
+
* Analyst-on-scope (G1) — the PORT of the round-synchronous driver's analyze→findings→steer
|
|
899
|
+
* wire (`dynamic.ts`) onto the reactive `Scope`.
|
|
900
|
+
*
|
|
901
|
+
* The old dynamic driver wired the analyst at round boundaries: `plan` ran the analyst over
|
|
902
|
+
* `history` BEFORE the planner and handed the findings forward via `PlannerContext.analyses`,
|
|
903
|
+
* behind a provenance firewall (`assertTraceDerivedFindings`) that keeps the external write-only
|
|
904
|
+
* judge out of the steer decision (selector ≠ judge). The reactive `Scope` has no rounds, so this
|
|
905
|
+
* module carries the same wire across: a combinator's `act` asks a `ScopeAnalyst` to turn the
|
|
906
|
+
* children it has drained off `scope.next()` SO FAR into `AnalystFinding[]`, and steers from THOSE
|
|
907
|
+
* findings through a single `SteerContext`.
|
|
908
|
+
*
|
|
909
|
+
* The analyst itself is not a new type — it is "just an `Agent<unknown, AnalystFinding[]>`" the
|
|
910
|
+
* combinator spawns over a child's trace (harness `null`/`cli`). `createScopeAnalyst` spawns that
|
|
911
|
+
* agent through `Scope.spawn` (so its compute is metered by the conserved pool like any child),
|
|
912
|
+
* drains its settlement, then enforces the firewall on the way out — a judge-derived finding
|
|
913
|
+
* ABORTS, it is never filtered. Fail loud: a down analyst, a non-array result, or a tainted finding
|
|
914
|
+
* throws; there is no silent empty-findings path that would let a combinator steer on nothing.
|
|
915
|
+
*/
|
|
916
|
+
|
|
917
|
+
declare const assertTraceDerivedFindings: AssertTraceDerivedFindings;
|
|
918
|
+
/**
|
|
919
|
+
* The analyst run an `Agent<unknown, AnalystFinding[]>` performs over the children settled so far.
|
|
920
|
+
* The combinator supplies the analyst's task projection (how to frame the drained settlements as
|
|
921
|
+
* the analyst's input) — the analyst's `act` reads the trace and returns its raw findings; the
|
|
922
|
+
* firewall is enforced afterwards by `createScopeAnalyst`, not by the analyst itself.
|
|
923
|
+
*/
|
|
924
|
+
interface CreateScopeAnalystOptions<D> {
|
|
925
|
+
/** The analyst agent the combinator spawns over the trace. `harness` is the persona's choice
|
|
926
|
+
* (`null` for an inline router analyst, a `BackendType` for a sandboxed one). Its `act` returns
|
|
927
|
+
* the RAW findings; this module asserts the firewall on them before returning. */
|
|
928
|
+
readonly analyst: Agent<unknown, ReadonlyArray<AnalystFinding>>;
|
|
929
|
+
/** Build the analyst agent's task from the analyze input (the root-task framing + the children
|
|
930
|
+
* drained so far). Pure projection — the analyst interprets it, this never reads it. */
|
|
931
|
+
buildTask(input: ScopeAnalyzeInput<D>): unknown;
|
|
932
|
+
/** The conserved budget reserved for one analyst spawn. The pool reserves against it and fails
|
|
933
|
+
* closed; an analyst that cannot be admitted is a fail-loud abort, never silent empty findings. */
|
|
934
|
+
readonly budget: Budget;
|
|
935
|
+
/** Trace/journal label for the spawned analyst child. Default `'analyst'`. */
|
|
936
|
+
readonly label?: string;
|
|
937
|
+
}
|
|
938
|
+
/**
|
|
939
|
+
* Build a `ScopeAnalyst` that spawns the analyst agent through `Scope.spawn` (so its compute is
|
|
940
|
+
* metered by the conserved pool), drains its single settlement, and enforces the trace-derived
|
|
941
|
+
* firewall before returning. The `scope` is the SAME scope the combinator is draining its children
|
|
942
|
+
* from — the analyst is spawned as a sibling and its result is read off `scope.next()` in cursor
|
|
943
|
+
* order, replay-safe like any other child.
|
|
944
|
+
*
|
|
945
|
+
* Fail loud (no silent empty findings):
|
|
946
|
+
* - the pool refuses the analyst spawn → `AnalystError` (the steer would otherwise run on nothing)
|
|
947
|
+
* - the analyst settles `down` → `AnalystError` (a broken capture path, not a verdict)
|
|
948
|
+
* - the analyst returns a non-array → `PlannerError`
|
|
949
|
+
* - any finding cites judge-derived metric evidence → `PlannerError` via the firewall
|
|
950
|
+
*/
|
|
951
|
+
declare function createScopeAnalyst<D>(scope: Scope<Outcome<D>>, options: CreateScopeAnalystOptions<D>): ScopeAnalyst<D>;
|
|
952
|
+
/**
|
|
953
|
+
* Build the `SteerContext` a combinator reads to steer (its `loopUntil.until`, `widen` gate, any
|
|
954
|
+
* future steer). One place enforces the firewall: `findings` is asserted trace-derived before it is
|
|
955
|
+
* surfaced, and `lastValidScore` is provided for OBSERVABILITY only — a combinator that steers off
|
|
956
|
+
* it re-introduces selector = judge, the coupling the architecture forbids.
|
|
957
|
+
*
|
|
958
|
+
* `findings` is re-asserted here even when it came from `createScopeAnalyst` (which already asserted
|
|
959
|
+
* it): the assertion is cheap and idempotent, and a `SteerContext` may be built from findings that
|
|
960
|
+
* arrived by another path (a caller-supplied diagnosis). Belt-and-suspenders on the one coupling
|
|
961
|
+
* that must never leak.
|
|
962
|
+
*/
|
|
963
|
+
declare function buildSteerContext<D>(findings: ReadonlyArray<AnalystFinding>, settledSoFar: ReadonlyArray<Settled<Outcome<D>>>): SteerContext<D>;
|
|
964
|
+
|
|
965
|
+
/**
|
|
966
|
+
* @experimental
|
|
967
|
+
*
|
|
968
|
+
* The generic combinator library — the content-free act-bodies the wave's §1 contract froze.
|
|
969
|
+
*
|
|
970
|
+
* Each export is a `CombinatorShape<Task, D>` (an alias of `LoopShape<Task, D>`): a factory
|
|
971
|
+
* `(ShapeContext) => Agent<Task, Outcome<D>>` whose `act` runs ONE composition shape over the
|
|
972
|
+
* keystone `Scope` — spawn children through `ctx.spawnChild` + `scope.spawn`, drain settlements
|
|
973
|
+
* via `scope.next()`, select across `done` children with the SINGLE-SOURCED `settledToIteration`
|
|
974
|
+
* + `defaultSelectWinner` (selector≠judge — never a re-rank behind the driver), and synthesize a
|
|
975
|
+
* terminal `Outcome<D>`.
|
|
976
|
+
*
|
|
977
|
+
* The shapes carry NO domain: a "research sweep over angles" is `fanout(angles, { synthesize })`
|
|
978
|
+
* under a research persona; a "code build test" is `pipeline([plan, implement, integrate])` under
|
|
979
|
+
* a coder persona. The SHAPE is here; the model/prompt/role/goal live on the `Persona` + task,
|
|
980
|
+
* threaded to each child verbatim by the spec-objects the builders take. No model name, prompt,
|
|
981
|
+
* role, or domain noun appears below.
|
|
982
|
+
*
|
|
983
|
+
* Two fail-loud invariants every combinator honors: a child the conserved pool cannot admit is a
|
|
984
|
+
* CONCRETE blocker (never an eager over-fan, never a silent drop), and a `blocked` outcome always
|
|
985
|
+
* names at least one blocker (a shape that cannot finish MUST say why — `blocked([])` throws).
|
|
986
|
+
*/
|
|
987
|
+
|
|
988
|
+
/**
|
|
989
|
+
* `pipeline(stages)` — run the stages in order, feeding each stage's `done` deliverable into the
|
|
990
|
+
* next stage's task. The first stage that ends `blocked` (a child that went down, a child the
|
|
991
|
+
* pool would not admit, or a stage whose `collect` chose to block) short-circuits — its blockers
|
|
992
|
+
* ARE the pipeline's blockers, never coerced past a failed stage. The terminal stage's `done`
|
|
993
|
+
* deliverable is the pipeline's deliverable.
|
|
994
|
+
*/
|
|
995
|
+
declare function pipeline<Task, D>(stages: ReadonlyArray<PipelineStage<Task, unknown, unknown>>): CombinatorShape<Task, D>;
|
|
996
|
+
/**
|
|
997
|
+
* `fanout(items, opts)` — spawn one child per item in a single round (bounded by the conserved
|
|
998
|
+
* pool's fail-closed admission), drain via `scope.next()`, then either synthesize over the
|
|
999
|
+
* gathered settlements (one SEPARATE synthesis child) or return the best-valid child via the
|
|
1000
|
+
* single-sourced selector. A round that admitted zero children, or whose synthesis child could
|
|
1001
|
+
* not be admitted, is a concrete blocker.
|
|
1002
|
+
*/
|
|
1003
|
+
declare function fanout<Task, Item, D>(items: ReadonlyArray<Item>, opts: FanoutOptions<Item, D>): CombinatorShape<Task, D>;
|
|
1004
|
+
/**
|
|
1005
|
+
* `loopUntil(seed, spec)` — one `step` child per round; `fold` accumulates each settlement into
|
|
1006
|
+
* the running state; `until` (reading the round's trace findings, NOT a fresh raw verdict) is
|
|
1007
|
+
* the deployable stop. The conserved pool IS the loop bound: once `spawn` fails closed the loop
|
|
1008
|
+
* stops. A loop that exhausted the pool without `until` ever satisfying is a concrete blocker.
|
|
1009
|
+
*
|
|
1010
|
+
* Findings are threaded through the `SteerContext` firewall in the analyst seam (`analyst.ts`);
|
|
1011
|
+
* absent a wired analyst on this surface the firewall stays dormant and `until` is consulted with
|
|
1012
|
+
* an empty findings array — never a fabricated finding (fail-loud honesty over a silent default).
|
|
1013
|
+
*/
|
|
1014
|
+
declare function loopUntil<Task, State, D>(seed: State, spec: LoopUntilSpec<Task, State, D>): CombinatorShape<Task, D>;
|
|
1015
|
+
/**
|
|
1016
|
+
* `panel(spec)` — spawn the M judge children over the SAME artifact, drain their settlements,
|
|
1017
|
+
* and fold them into a panel verdict via the pure WRITE-ONLY `merge` (a judge's output never
|
|
1018
|
+
* reaches another judge's task; the merge never spawns or re-ranks). A `down` judge carries no
|
|
1019
|
+
* verdict and is excluded from the merge denominator. A panel that admitted no judge is a
|
|
1020
|
+
* concrete blocker before `merge` is consulted.
|
|
1021
|
+
*/
|
|
1022
|
+
declare function panel<Task, Artifact, D>(spec: PanelSpec<Artifact, D>): CombinatorShape<Task, D>;
|
|
1023
|
+
/**
|
|
1024
|
+
* `verify(spec)` — an IMPLEMENT child produces a candidate, then a SEPARATE VERIFIER child grades
|
|
1025
|
+
* it; only a `valid` verifier verdict ships. Any other outcome (implement down, verifier down,
|
|
1026
|
+
* verifier verdict absent or not `valid`) is a concrete blocker carrying the failure verbatim —
|
|
1027
|
+
* never a coerced "done". The implement child does not grade itself.
|
|
1028
|
+
*/
|
|
1029
|
+
declare function verify<Task, Candidate, D>(spec: VerifySpec<Task, Candidate, D>): CombinatorShape<Task, D>;
|
|
1030
|
+
/**
|
|
1031
|
+
* `widen(spec)` — the streaming spawn-on-completion driver. Spawns the seed lineages, then REACTS
|
|
1032
|
+
* to each `scope.next()`: on every settled child it consults `spec.gate.decide` and, when the gate
|
|
1033
|
+
* returns `widen`, spawns AT MOST ONE more child toward the chosen lineage under the remaining
|
|
1034
|
+
* conserved pool. `promising` is derived from the round's trace findings (the analyst seam),
|
|
1035
|
+
* never a child's raw `verdict` — and the default gate (`flatWidenGate`) never widens, so the R2
|
|
1036
|
+
* firewall stays dormant. Terminal selection is `spec.synthesize` over every settled lineage.
|
|
1037
|
+
*
|
|
1038
|
+
* No analyst is wired on this frozen surface, so `decide` is consulted with an empty findings
|
|
1039
|
+
* array; a flat gate ignores it. A non-flat gate that wants findings reads them through the
|
|
1040
|
+
* `SteerContext` firewall the analyst seam owns — never fabricated here.
|
|
1041
|
+
*/
|
|
1042
|
+
declare function widen<Task, Seed, D>(spec: WidenSpec<Seed, D>): CombinatorShape<Task, D>;
|
|
1043
|
+
/**
|
|
1044
|
+
* The flat default `ScopeWidenGate` — never widens, keeping the R2 selector≠judge collision
|
|
1045
|
+
* dormant. A gate run passes this explicitly; a test asserts the default is flat.
|
|
1046
|
+
*/
|
|
1047
|
+
declare function flatWidenGate<D>(): ScopeWidenGate<D>;
|
|
1048
|
+
|
|
1049
|
+
/**
|
|
1050
|
+
* @experimental
|
|
1051
|
+
*
|
|
1052
|
+
* The cross-run corpus (G2) — the learning-flywheel's durable accreted-fact store.
|
|
1053
|
+
*
|
|
1054
|
+
* `Corpus` is DISTINCT from the per-run `SpawnJournal` (decisions/replay) and `ResultBlobStore`
|
|
1055
|
+
* (payloads): a `CorpusRecord` is a FACT one run LEARNED that a FUTURE run reads back (the
|
|
1056
|
+
* world-model), not a replay input. This module owns the two impls the wave surface pins —
|
|
1057
|
+
* `InMemoryCorpus` and `FileCorpus` (JSONL, append-only) — plus `renderCorpusToInstructions`,
|
|
1058
|
+
* the READ side that projects accreted facts into a fresh `AgentProfile`'s instruction seams.
|
|
1059
|
+
*
|
|
1060
|
+
* The boundary is fail-loud, typed-outcome: `append` is idempotent on an identical record and
|
|
1061
|
+
* returns a typed error (never throws, never a silent overwrite) on a conflicting re-append under
|
|
1062
|
+
* the same `id`. Malformed records — a structurally-invalid `CorpusRecord` from disk or a caller —
|
|
1063
|
+
* fail loud (the validator throws), since a corpus that silently accepts garbage would poison
|
|
1064
|
+
* every downstream run that reads it back.
|
|
1065
|
+
*/
|
|
1066
|
+
|
|
1067
|
+
/**
|
|
1068
|
+
* In-memory `Corpus`. Keyed by record `id`; `append` validates the record, is idempotent on an
|
|
1069
|
+
* identical re-append, and returns a typed `{ succeeded: false }` on a conflicting re-append under
|
|
1070
|
+
* the same `id` (never overwrites). `query` routes through the single-sourced `applyFilter`.
|
|
1071
|
+
*/
|
|
1072
|
+
declare class InMemoryCorpus implements Corpus {
|
|
1073
|
+
private readonly byId;
|
|
1074
|
+
append(record: CorpusRecord): Promise<{
|
|
1075
|
+
succeeded: true;
|
|
1076
|
+
} | {
|
|
1077
|
+
succeeded: false;
|
|
1078
|
+
error: string;
|
|
1079
|
+
}>;
|
|
1080
|
+
query(filter: CorpusFilter): Promise<ReadonlyArray<CorpusRecord>>;
|
|
1081
|
+
}
|
|
1082
|
+
/**
|
|
1083
|
+
* JSONL on disk — one validated `CorpusRecord` per line, append-only. `query` replays the whole
|
|
1084
|
+
* file, validating every line (a malformed line fails loud — a corrupted corpus must never read
|
|
1085
|
+
* back silently) and folding by `id`: a later identical line dedups, a later conflicting line
|
|
1086
|
+
* under the same `id` is a corruption (fail loud). `append` first replays to enforce the same
|
|
1087
|
+
* idempotence/conflict contract as the in-mem impl, then fsyncs the new line so a crash between
|
|
1088
|
+
* writes never loses an acknowledged fact. Shares the JSONL append-line spine with the spawn
|
|
1089
|
+
* journal, but the interface stays separate (a learned fact is not a replay record).
|
|
1090
|
+
*/
|
|
1091
|
+
declare class FileCorpus implements Corpus {
|
|
1092
|
+
private readonly path;
|
|
1093
|
+
constructor(path: string);
|
|
1094
|
+
append(record: CorpusRecord): Promise<{
|
|
1095
|
+
succeeded: true;
|
|
1096
|
+
} | {
|
|
1097
|
+
succeeded: false;
|
|
1098
|
+
error: string;
|
|
1099
|
+
}>;
|
|
1100
|
+
query(filter: CorpusFilter): Promise<ReadonlyArray<CorpusRecord>>;
|
|
1101
|
+
private load;
|
|
1102
|
+
private appendLine;
|
|
1103
|
+
}
|
|
1104
|
+
/**
|
|
1105
|
+
* The learning-flywheel READ side. Queries the corpus through `filter`, renders the matching facts
|
|
1106
|
+
* (most-confident first, capped by `maxLines`) into instruction lines, and returns a FRESH
|
|
1107
|
+
* `AgentProfile` with them merged in — never mutates the input profile. Default `target: 'prompt'`
|
|
1108
|
+
* appends the lines to `prompt.instructions[]` (the additive append-line seam); `target:
|
|
1109
|
+
* 'resources'` folds them into the single-blob `resources.instructions` string (preserving any
|
|
1110
|
+
* existing blob, but failing loud on a non-string existing blob — a `resources.instructions` that
|
|
1111
|
+
* was already an `AgentProfileResourceRef` cannot be string-appended without dropping it).
|
|
1112
|
+
*
|
|
1113
|
+
* An empty query result returns a fresh COPY of the profile with no instruction change (a valid
|
|
1114
|
+
* "nothing learned yet" read, not an error).
|
|
1115
|
+
*/
|
|
1116
|
+
declare function renderCorpusToInstructions(opts: RenderCorpusToInstructionsOptions): Promise<AgentProfile$1>;
|
|
1117
|
+
|
|
1118
|
+
/**
|
|
1119
|
+
* @experimental
|
|
1120
|
+
*
|
|
1121
|
+
* The personify layer impl — `definePersona` (the thin builder) + `runPersonified` (composes
|
|
1122
|
+
* the persona + chosen shape onto the keystone `Supervisor`), plus `createShapeContext`, the
|
|
1123
|
+
* seam that hands a shape its spawn helpers without it touching the registry.
|
|
1124
|
+
*
|
|
1125
|
+
* This file adds NO engine: `runPersonified` is `createSupervisor().run(rootAgent, task, …)`
|
|
1126
|
+
* where `rootAgent` is the persona's chosen `LoopShape` applied to a `ShapeContext`. All the
|
|
1127
|
+
* conserved-budget / journal / abort / typed-result machinery is the keystone's; this layer
|
|
1128
|
+
* only wires the persona's CONTENT (root spec + directive + context + seams) into it.
|
|
1129
|
+
*
|
|
1130
|
+
* One non-obvious invariant it must honor: `createSupervisor().run` builds the root `Scope`
|
|
1131
|
+
* with an EMPTY seam bag (`seams: {}`), so the built-in metered runtimes (router/sandbox/cli)
|
|
1132
|
+
* cannot read their seams off `ExecutorContext` through the default supervisor path. A persona
|
|
1133
|
+
* that supplies raw `seams` is therefore wrapped here into a registry whose resolved factories
|
|
1134
|
+
* receive a ctx with the persona seams merged in — so a persona never has to pre-close its
|
|
1135
|
+
* factories by hand. A persona may instead supply a fully-built `registry` and skip the wrap.
|
|
1136
|
+
*/
|
|
1137
|
+
|
|
1138
|
+
/**
|
|
1139
|
+
* Build a frozen `Persona`. Fails loud on the executors-supplied invariant: a persona with
|
|
1140
|
+
* neither a pre-built registry nor a seam bag cannot resolve its built-in runtimes, so it is
|
|
1141
|
+
* unrunnable — refuse it at definition time, not at the first spawn. Pure; no I/O.
|
|
1142
|
+
*/
|
|
1143
|
+
declare function definePersona<D = unknown>(input: DefinePersonaInput<D>): Persona<D>;
|
|
1144
|
+
/**
|
|
1145
|
+
* Compose the persona + chosen shape onto a fresh keystone `Supervisor`. Resolves the shape
|
|
1146
|
+
* (a factory verbatim, or a registered name through `builtinShapes`), applies it to a
|
|
1147
|
+
* `ShapeContext`, and runs the resulting root `Agent` to a typed `SupervisedResult<Outcome>`.
|
|
1148
|
+
* Fail loud on an unknown shape name or an unresolvable persona registry — never a silent
|
|
1149
|
+
* default-shape fallback.
|
|
1150
|
+
*/
|
|
1151
|
+
declare function runPersonified<Task, D>(options: RunPersonifiedOptions<Task, D>): Promise<SupervisedResult<Outcome<D>>>;
|
|
1152
|
+
|
|
1153
|
+
/**
|
|
1154
|
+
* @experimental
|
|
1155
|
+
*
|
|
1156
|
+
* The loop-shape registry — the OPEN, content-free extension point for the personify layer.
|
|
1157
|
+
*
|
|
1158
|
+
* A `LoopShape` is reusable STRUCTURE (how to decompose / fan out / verify / synthesize),
|
|
1159
|
+
* parameterized by a persona's CONTENT. The registry lets a caller resolve a composed shape by
|
|
1160
|
+
* NAME: register a factory once, then `runPersonified({ shape: '<name>' })` resolves it with zero
|
|
1161
|
+
* edits elsewhere. `register` fails loud on a duplicate; `resolve` returns a typed outcome so an
|
|
1162
|
+
* unknown name is a named error, never a silent default.
|
|
1163
|
+
*
|
|
1164
|
+
* No shape is pre-registered: the generic combinators (`pipeline`/`fanout`/`loopUntil`/`panel`/
|
|
1165
|
+
* `verify`/`widen`) take spec arguments, so they are not bare zero-arg factories — a caller that
|
|
1166
|
+
* wants name-resolution registers its own COMPOSED shape (a combinator already applied to its
|
|
1167
|
+
* spec) on a registry instance. The registry carries SHAPE only; the domain lives on the persona.
|
|
1168
|
+
*/
|
|
1169
|
+
|
|
1170
|
+
/**
|
|
1171
|
+
* Build a fresh open `ShapeRegistry`. A factory is stored type-erased and re-cast on resolve — the
|
|
1172
|
+
* caller asserts the `<Task, D>` it expects, exactly as the executor registry stores its factories.
|
|
1173
|
+
*/
|
|
1174
|
+
declare function createShapeRegistry(): ShapeRegistry;
|
|
1175
|
+
/** The default registry `runPersonified` resolves a shape name against. Empty by construction —
|
|
1176
|
+
* a caller registers its own composed shapes; the engine ships no domain shape. */
|
|
1177
|
+
declare const builtinShapes: ShapeRegistry;
|
|
1178
|
+
/** Register a composed shape on the default `builtinShapes` registry — the one-call extension
|
|
1179
|
+
* point a caller invokes so its shape is resolvable by name with zero edits to the engine. */
|
|
1180
|
+
declare function registerShape<Task, D>(name: string, factory: LoopShape<Task, D>): void;
|
|
1181
|
+
|
|
1182
|
+
/**
|
|
1183
|
+
* @experimental
|
|
1184
|
+
*
|
|
1185
|
+
* Trajectory trace + cost ledger — the post-hoc tree reconstructor (§4 of `wave-types`).
|
|
1186
|
+
*
|
|
1187
|
+
* `trajectoryReport` rebuilds the WHOLE realized spawn tree from the durable
|
|
1188
|
+
* `SpawnJournal` (+ optionally the `ResultBlobStore` for `done` artifacts): every node
|
|
1189
|
+
* (driver AND leaf), the real parent/child edges, each node's terminal status, its OWN
|
|
1190
|
+
* conserved `Spend`, and the `Spend` ROLLED UP over its subtree. Roll-up is a post-order
|
|
1191
|
+
* fold over the parent edges: a node's `rolledUpSpend` is its own spend plus every
|
|
1192
|
+
* descendant's, so a driver is charged for the fanout it caused — the root's roll-up is
|
|
1193
|
+
* the whole run's conserved total (tokens + usd + iterations + ms).
|
|
1194
|
+
*
|
|
1195
|
+
* `equalKOnCost` compares separate runs (arms) on that conserved COST, not on raw
|
|
1196
|
+
* iteration COUNT. The sandbox executor reports tokens/usd INCLUSIVE of a leaf's internal
|
|
1197
|
+
* sub-agent fanout, so charging an arm by `total.tokens`/`total.usd` (not by how many
|
|
1198
|
+
* `next()` cursors it logged) closes the leaf-fanout confound: a treatment leaf that fanned
|
|
1199
|
+
* out internally pays for it in cost, where a per-iteration count would hide it. The
|
|
1200
|
+
* within-run conserved pool already guarantees `Σk` equal by construction; this check is the
|
|
1201
|
+
* CROSS-run analogue the pool cannot reach — proving equal compute before any win is claimed.
|
|
1202
|
+
*
|
|
1203
|
+
* Pure over the journal/blobs — no live agent calls; safe to run on a finished run's log.
|
|
1204
|
+
*/
|
|
1205
|
+
|
|
1206
|
+
/**
|
|
1207
|
+
* Reconstruct the whole spawn tree for `root` with per-node + rolled-up `Spend`. Reads the
|
|
1208
|
+
* journal for structure + spend and, when `withOutputs`, the blob store for each `done`
|
|
1209
|
+
* node's artifact. Fail loud on a tree that was never journaled, a settle/cancel for an
|
|
1210
|
+
* un-spawned node (a corrupted log), or — under `withOutputs` — a `done` node whose blob the
|
|
1211
|
+
* store cannot rehydrate (a silent gap would mis-cost or mis-evidence the tree).
|
|
1212
|
+
*/
|
|
1213
|
+
declare function trajectoryReport(journal: SpawnJournal, blobs: ResultBlobStore, root: NodeId, options?: TrajectoryReportOptions): Promise<TrajectoryReport>;
|
|
1214
|
+
/**
|
|
1215
|
+
* Assert the arms are comparable at EQUAL conserved COST (tokens + usd), NOT raw iteration
|
|
1216
|
+
* count. Compares each arm's root-rolled-up `total` on the two conserved channels: an arm is
|
|
1217
|
+
* within-tolerance when the per-channel spread (max − min across arms) over the median is
|
|
1218
|
+
* `≤ tolerance`. Pure over the reports — no I/O. Fails loud on an empty arm list (nothing to
|
|
1219
|
+
* compare) so a vacuous "equal" is never returned.
|
|
1220
|
+
*/
|
|
1221
|
+
declare function equalKOnCost(arms: ReadonlyArray<EqualKArm>, options?: EqualKOnCostOptions): EqualKVerdict;
|
|
1222
|
+
|
|
1223
|
+
/**
|
|
1224
|
+
* Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
|
|
1225
|
+
* dispatch.
|
|
1226
|
+
*
|
|
1227
|
+
* `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
|
|
1228
|
+
* the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
|
|
1229
|
+
* `runLoop` must forward the loop's cost AND token usage, or the guard reads
|
|
1230
|
+
* the run as a stub and throws. `reportLoopUsage` is that one line:
|
|
1231
|
+
*
|
|
1232
|
+
* const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
|
|
1233
|
+
* const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
|
|
1234
|
+
* reportLoopUsage(ctx, result)
|
|
1235
|
+
* return result.winner?.output as A
|
|
1236
|
+
* }
|
|
1237
|
+
*
|
|
1238
|
+
* Typed structurally against the campaign `DispatchContext.cost` so this module
|
|
1239
|
+
* stays free of an agent-eval import — it works with any cost meter exposing
|
|
1240
|
+
* `observe` + `observeTokens`.
|
|
1241
|
+
*/
|
|
1242
|
+
|
|
1243
|
+
/** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
|
|
1244
|
+
interface UsageSink {
|
|
1245
|
+
observe(amountUsd: number, source: string): void;
|
|
1246
|
+
observeTokens(usage: LoopTokenUsage): void;
|
|
1247
|
+
}
|
|
1248
|
+
/**
|
|
1249
|
+
* Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
|
|
1250
|
+
* meter so the backend-integrity guard sees real LLM activity. `source`
|
|
1251
|
+
* defaults to `'loop'`.
|
|
1252
|
+
*/
|
|
1253
|
+
declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
|
|
1254
|
+
|
|
1255
|
+
/**
|
|
1256
|
+
* @experimental
|
|
1257
|
+
*
|
|
1258
|
+
* `acquireSandbox` — cold-start-resilient sandbox acquisition. Eliminates the
|
|
1259
|
+
* "create timed out at the proxy" failure mode conceptually by DECOUPLING "the
|
|
1260
|
+
* create HTTP call returned" from "the sandbox is ready":
|
|
1261
|
+
*
|
|
1262
|
+
* - Create is initiated with a known `name`.
|
|
1263
|
+
* - Readiness is observed from the sandbox's own `status` (`refresh()` polls
|
|
1264
|
+
* true state), NOT from whether the create call returned in time.
|
|
1265
|
+
* - If the create call itself times out at a gateway (502/503/504/522/524 or
|
|
1266
|
+
* a transport timeout), provisioning is still running server-side — so we
|
|
1267
|
+
* find the named sandbox via `list()` and wait for it to reach `running`.
|
|
1268
|
+
*
|
|
1269
|
+
* Result: a scale-from-zero cold start (node boot + host-agent registration,
|
|
1270
|
+
* minutes) can no longer surface as a create failure behind a ~100s proxy
|
|
1271
|
+
* limit. The loop becomes indifferent to whether the host pool is warm or cold.
|
|
1272
|
+
*
|
|
1273
|
+
* Invariant: an instance reporting no `status` (the minimal test fakes) is
|
|
1274
|
+
* treated as ready; only an explicit `pending`/`provisioning` status triggers
|
|
1275
|
+
* waiting, and only a retryable THROW triggers the find-by-name path. Real
|
|
1276
|
+
* errors (auth, validation, budget) fail loud. A box that is created (or found)
|
|
1277
|
+
* but never reaches `running` (abort, terminal status, budget) is torn down
|
|
1278
|
+
* before the failure propagates, so an abort storm during cold start does not
|
|
1279
|
+
* leak live sandboxes.
|
|
1280
|
+
*/
|
|
1281
|
+
|
|
1282
|
+
/** @experimental */
|
|
1283
|
+
interface AcquireOptions {
|
|
1284
|
+
/**
|
|
1285
|
+
* Total budget for the sandbox to reach `running`, covering on-demand node
|
|
1286
|
+
* cold-start. Default 600_000ms — matches the orchestrator's pending-host
|
|
1287
|
+
* registration window so we never give up before the platform itself would.
|
|
1288
|
+
*/
|
|
1289
|
+
readyTimeoutMs?: number;
|
|
1290
|
+
/** Poll interval while waiting for `running` / for the named sandbox to appear. */
|
|
1291
|
+
pollIntervalMs?: number;
|
|
1292
|
+
/** Cancellation (user abort). Distinct from create-call timeouts. */
|
|
1293
|
+
signal?: AbortSignal;
|
|
1294
|
+
/** Stamp a name so a timed-out create is recoverable by lookup. Auto-generated if absent. */
|
|
1295
|
+
name?: string;
|
|
1296
|
+
/** Clock override for deterministic tests. */
|
|
1297
|
+
now?: () => number;
|
|
1298
|
+
/** Sleep override for deterministic tests. */
|
|
1299
|
+
sleep?: (ms: number) => Promise<void>;
|
|
1300
|
+
}
|
|
1301
|
+
/** @experimental */
|
|
1302
|
+
declare function acquireSandbox(client: LoopSandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
|
|
1303
|
+
|
|
1304
|
+
/**
|
|
1305
|
+
* @experimental
|
|
1306
|
+
*
|
|
1307
|
+
* Capability probe for the loop kernel's backend-blind lineage seams. The
|
|
1308
|
+
* kernel must NEVER ask "is this Docker or Firecracker?"; it asks "can this
|
|
1309
|
+
* platform fork a checkpoint?" via `client.criuStatus()` and degrades to fresh
|
|
1310
|
+
* boxes when the answer is no. CRIU availability is a per-platform fact, so the
|
|
1311
|
+
* probe is memoized per client — one network round-trip, reused across every
|
|
1312
|
+
* fanout in the run.
|
|
1313
|
+
*
|
|
1314
|
+
* Invariant: a client with no `criuStatus` method (the loop's test fakes, the
|
|
1315
|
+
* raw SDK before it grew the probe) reports `canFork = false`. The seam is
|
|
1316
|
+
* fail-CLOSED — never assume forking works, only enable it on a positive probe.
|
|
1317
|
+
*/
|
|
1318
|
+
|
|
1319
|
+
/**
|
|
1320
|
+
* What the loop kernel is allowed to know about a sandbox backend: a single
|
|
1321
|
+
* capability bit, never the backend's identity. `canFork` gates the
|
|
1322
|
+
* checkpoint+fork fanout path; everything else (session continuation) is a
|
|
1323
|
+
* universal SDK feature that needs no probe.
|
|
1324
|
+
*
|
|
1325
|
+
* @experimental
|
|
1326
|
+
*/
|
|
1327
|
+
interface SandboxCapabilities {
|
|
1328
|
+
/**
|
|
1329
|
+
* True only when `client.criuStatus()` returned `{ available: true }`. When
|
|
1330
|
+
* false, a fork-enabled fanout degrades to independent fresh boxes — same
|
|
1331
|
+
* result, no shared context prefix.
|
|
1332
|
+
*/
|
|
1333
|
+
canFork: boolean;
|
|
1334
|
+
}
|
|
1335
|
+
/**
|
|
1336
|
+
* Probe (and memoize per client) what the loop may rely on. A client without a
|
|
1337
|
+
* `criuStatus` method, or whose probe rejects, yields `canFork = false` — a
|
|
1338
|
+
* failed probe must never claim a capability the platform may not have. The
|
|
1339
|
+
* promise is cached so concurrent fanout branches share one round-trip.
|
|
1340
|
+
*
|
|
1341
|
+
* @experimental
|
|
1342
|
+
*/
|
|
1343
|
+
declare function probeSandboxCapabilities(client: LoopSandboxClient): Promise<SandboxCapabilities>;
|
|
1344
|
+
/**
|
|
1345
|
+
* Narrowed view of the optional CRIU probe. The loop-side `LoopSandboxClient`
|
|
1346
|
+
* does not require `criuStatus`; this widens it optionally so the probe can be
|
|
1347
|
+
* read without importing sandbox-backend specifics. @experimental
|
|
1348
|
+
*/
|
|
1349
|
+
interface CriuCapableClient {
|
|
1350
|
+
criuStatus?: () => Promise<{
|
|
1351
|
+
available: boolean;
|
|
1352
|
+
criuVersion?: string;
|
|
1353
|
+
reason?: string;
|
|
1354
|
+
}>;
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
/**
|
|
1358
|
+
* Sandbox-event → runtime-event mapping.
|
|
1359
|
+
*
|
|
1360
|
+
* The sandbox SDK emits a polymorphic `SandboxEvent = { type, data, id? }`
|
|
1361
|
+
* whose `type` vocabulary is backend-determined (opencode, etc.) rather than
|
|
1362
|
+
* enumerated by the SDK. Two consumers project it:
|
|
1363
|
+
* - the loop kernel's cost ledger (`extractLlmCallEvent`) — sums usage off
|
|
1364
|
+
* every cost-bearing event, regardless of stream shape;
|
|
1365
|
+
* - the `AgentRuntime.act` streaming contract (`mapSandboxEvent`) — projects
|
|
1366
|
+
* incremental events to the `RuntimeStreamEvent` chat-UX vocabulary.
|
|
1367
|
+
*
|
|
1368
|
+
* Both live here so the empirically-observed `type` vocabulary has one home.
|
|
1369
|
+
*/
|
|
1370
|
+
|
|
1371
|
+
/**
|
|
1372
|
+
* Extract a `RuntimeStreamEvent`-shaped `llm_call` from a sandbox event when
|
|
1373
|
+
* the event carries usage/cost data. Returns `undefined` for non-cost events
|
|
1374
|
+
* so the kernel can iterate the full stream without branching.
|
|
1375
|
+
*
|
|
1376
|
+
* Canonical cost-carrying types observed in the wild:
|
|
1377
|
+
* - `llm_call` — `data: { model, tokensIn, tokensOut, costUsd, ... }`
|
|
1378
|
+
* - `message.completed` / `result` — `data: { usage: { inputTokens,
|
|
1379
|
+
* outputTokens, totalCostUsd? } }`
|
|
1380
|
+
* - `cost.usage` / `usage` — same shape under a dedicated type
|
|
1381
|
+
*
|
|
1382
|
+
* Numeric coercion is strict: `Number.isFinite` gates every accumulator write
|
|
1383
|
+
* so a sentinel `NaN` from a misbehaving backend cannot poison the ledger.
|
|
1384
|
+
*/
|
|
1385
|
+
declare function extractLlmCallEvent(event: SandboxEvent, agentRunName: string): (RuntimeStreamEvent & {
|
|
1386
|
+
type: 'llm_call';
|
|
1387
|
+
}) | undefined;
|
|
1388
|
+
/**
|
|
1389
|
+
* Project one `SandboxEvent` onto the `RuntimeStreamEvent` chat-UX vocabulary,
|
|
1390
|
+
* for runtimes that bridge a sandbox `streamPrompt` into the
|
|
1391
|
+
* `AgentRuntime.act` streaming contract. Returns `undefined` for events that
|
|
1392
|
+
* have no faithful projection — the raw stream is preserved separately for the
|
|
1393
|
+
* `OutputAdapter`, so an unmapped event never loses data.
|
|
1394
|
+
*
|
|
1395
|
+
* Mapped (the task-optional incremental variants — no synthesized task
|
|
1396
|
+
* lifecycle, no guessed tool-part shapes):
|
|
1397
|
+
* - `message.part.updated` text part → `text_delta`
|
|
1398
|
+
* - `message.part.updated` reasoning/thinking part → `reasoning_delta`
|
|
1399
|
+
* - cost-bearing events → `llm_call` (shared with the ledger extractor)
|
|
1400
|
+
*
|
|
1401
|
+
* The opencode backend emits incremental text as
|
|
1402
|
+
* `{ type: 'message.part.updated', data: { part: { type, text }, delta } }`;
|
|
1403
|
+
* `delta` is the increment, `part.text` the running accumulation.
|
|
1404
|
+
*/
|
|
1405
|
+
declare function mapSandboxEvent(event: SandboxEvent, opts?: {
|
|
1406
|
+
agentRunName?: string;
|
|
1407
|
+
}): RuntimeStreamEvent | undefined;
|
|
1408
|
+
|
|
1409
|
+
/**
|
|
1410
|
+
* @experimental
|
|
1411
|
+
*
|
|
1412
|
+
* `SandboxLineage` — the backend-blind owner of box + session handles for a
|
|
1413
|
+
* single `runLoop` invocation. It exists so `run-loop.ts` never references a
|
|
1414
|
+
* backend (Docker / Firecracker): the lineage turns "continue this session" and
|
|
1415
|
+
* "fork this branch" into capability-gated sandbox-SDK calls and degrades to
|
|
1416
|
+
* fresh boxes when a capability is absent.
|
|
1417
|
+
*
|
|
1418
|
+
* Three operations, mirroring the kernel's per-iteration choices:
|
|
1419
|
+
* - `start(spec, prompt)` → a fresh box; the FIRST `streamPrompt` carries a
|
|
1420
|
+
* minted `sessionId` so later `continue` calls reuse the same server-side
|
|
1421
|
+
* conversation instead of re-injecting prior context as prompt text.
|
|
1422
|
+
* - `continue(handle, prompt)` → the SAME box, `streamPrompt({ sessionId })`.
|
|
1423
|
+
* The context lives in the sandbox; the prompt is only the new turn. Before
|
|
1424
|
+
* streaming it ASSERTS the session is still live server-side (via
|
|
1425
|
+
* `box.session(id).status()`): if the platform never honored the
|
|
1426
|
+
* client-minted id (or reaped it), `status()` is `null` and `continue`
|
|
1427
|
+
* fails loud rather than silently re-running the turn without prior context.
|
|
1428
|
+
* - `fork(handle, n, ...)` → when `canFork`, `checkpoint({ leaveRunning })` on
|
|
1429
|
+
* the parent then `fork(checkpointId)` × n so N branches inherit a shared
|
|
1430
|
+
* context prefix; otherwise N independent fresh boxes (same result, no
|
|
1431
|
+
* prefix). Either way each branch streams its own turn. Child-box creation
|
|
1432
|
+
* is bounded by the lineage's `maxConcurrency` — a 20-way fanout under a
|
|
1433
|
+
* concurrency cap of 2 provisions boxes in bounded waves, not all at once.
|
|
1434
|
+
*
|
|
1435
|
+
* Invariant: the lineage OWNS every box it starts or forks and tears them all
|
|
1436
|
+
* down on `teardown()` (or earlier via `prune`). It never tears down a box
|
|
1437
|
+
* mid-flight — the kernel decides when a handle is done. Streaming itself stays
|
|
1438
|
+
* in `run-loop.ts`; the lineage only hands back the live `streamPrompt` iterable
|
|
1439
|
+
* so the kernel keeps ownership of event collection, cost accounting, and trace
|
|
1440
|
+
* emission.
|
|
1441
|
+
*/
|
|
1442
|
+
|
|
1443
|
+
/**
|
|
1444
|
+
* A live box plus the session that threads its iterations together. Handed back
|
|
1445
|
+
* by `start`/`fork`, passed into `continue`/`fork` to descend from. Opaque to
|
|
1446
|
+
* the kernel beyond `box` (for placement/teardown) and `sessionId` (trace).
|
|
1447
|
+
*
|
|
1448
|
+
* @experimental
|
|
1449
|
+
*/
|
|
1450
|
+
interface SandboxLineageHandle {
|
|
1451
|
+
/** The owned, running sandbox this handle drives. */
|
|
1452
|
+
box: SandboxInstance;
|
|
1453
|
+
/**
|
|
1454
|
+
* Stable session id threaded through this box's `streamPrompt` calls. Minted
|
|
1455
|
+
* by the lineage on `start`; reused on `continue` so the server continues the
|
|
1456
|
+
* same conversation. A forked handle starts a fresh session on its new box —
|
|
1457
|
+
* the shared context comes from the checkpoint, not a shared session id.
|
|
1458
|
+
*/
|
|
1459
|
+
sessionId: string;
|
|
1460
|
+
}
|
|
1461
|
+
/**
|
|
1462
|
+
* Owns box + session handles for one loop run and offers the three
|
|
1463
|
+
* capability-gated lifecycle moves. Construct via `createSandboxLineage`.
|
|
1464
|
+
*
|
|
1465
|
+
* @experimental
|
|
1466
|
+
*/
|
|
1467
|
+
interface SandboxLineage {
|
|
1468
|
+
/**
|
|
1469
|
+
* Acquire a fresh box and begin a new session on it. Returns the handle and
|
|
1470
|
+
* the live `streamPrompt` iterable for the first turn (caller drains it).
|
|
1471
|
+
*/
|
|
1472
|
+
start(spec: AgentRunSpec<unknown>, prompt: string, signal: AbortSignal): Promise<{
|
|
1473
|
+
handle: SandboxLineageHandle;
|
|
1474
|
+
events: AsyncIterable<SandboxEvent>;
|
|
1475
|
+
}>;
|
|
1476
|
+
/**
|
|
1477
|
+
* Continue an existing handle's session with one more turn on the SAME box.
|
|
1478
|
+
* The prior context is server-side; `prompt` is only the new turn. Asserts the
|
|
1479
|
+
* session is still known to the sandbox first (fail-loud) so a platform that
|
|
1480
|
+
* silently dropped the client-minted session id surfaces as an error instead
|
|
1481
|
+
* of a contextless turn the caller mistakes for a real continuation.
|
|
1482
|
+
*/
|
|
1483
|
+
continue(handle: SandboxLineageHandle, prompt: string, signal: AbortSignal): Promise<AsyncIterable<SandboxEvent>>;
|
|
1484
|
+
/**
|
|
1485
|
+
* Branch `count` children from `parent`. When the platform can fork, each
|
|
1486
|
+
* child inherits `parent`'s checkpoint — and therefore the parent's IMAGE and
|
|
1487
|
+
* PROFILE: under a real fork `specs[i]` does NOT re-select a per-branch
|
|
1488
|
+
* profile (the SDK forks the running box, it can't swap the image). `specs[i]`
|
|
1489
|
+
* picks the per-branch profile ONLY on the degraded fresh-box path (no CRIU).
|
|
1490
|
+
* A heterogeneous-profile fanout therefore homogenizes to the parent's profile
|
|
1491
|
+
* when fork is available — pass a single shared spec for forked fanouts, or
|
|
1492
|
+
* use `random@k` (no fork) when branches must differ. Each child's first turn
|
|
1493
|
+
* streams `prompts[i]`. Child-box creation is bounded by `maxConcurrency`.
|
|
1494
|
+
*/
|
|
1495
|
+
fork(parent: SandboxLineageHandle, prompts: string[], specs: AgentRunSpec<unknown>[], signal: AbortSignal): Promise<{
|
|
1496
|
+
handle: SandboxLineageHandle;
|
|
1497
|
+
events: AsyncIterable<SandboxEvent>;
|
|
1498
|
+
}[]>;
|
|
1499
|
+
/**
|
|
1500
|
+
* Destroy every owned box whose handle is NOT in `keep`, freeing it before
|
|
1501
|
+
* loop end. The kernel calls this after a round when it can prove no future
|
|
1502
|
+
* round will descend from the pruned boxes (deterministic, monotonic branch
|
|
1503
|
+
* selection); boxes still reachable as a future branch source are retained.
|
|
1504
|
+
* Best-effort, bounded, parallel — a failed delete never throws.
|
|
1505
|
+
*/
|
|
1506
|
+
prune(keep: Iterable<SandboxLineageHandle>): Promise<void>;
|
|
1507
|
+
/** Destroy every box this lineage owns. Best-effort, bounded, parallel. */
|
|
1508
|
+
teardown(): Promise<void>;
|
|
1509
|
+
}
|
|
1510
|
+
/**
|
|
1511
|
+
* Build a lineage bound to one client + its probed capabilities. The
|
|
1512
|
+
* capabilities are passed in (not re-probed) so the kernel probes once per run
|
|
1513
|
+
* and the lineage stays a pure function of "what this platform can do".
|
|
1514
|
+
*
|
|
1515
|
+
* @experimental
|
|
1516
|
+
*/
|
|
1517
|
+
declare function createSandboxLineage(client: LoopSandboxClient, capabilities: SandboxCapabilities, options?: {
|
|
1518
|
+
maxConcurrency?: number;
|
|
1519
|
+
}): SandboxLineage;
|
|
1520
|
+
/**
|
|
1521
|
+
* Loop-side widening of the box's optional checkpoint method. The
|
|
1522
|
+
* `LoopSandboxClient`/`SandboxInstance` surface the kernel relies on does not
|
|
1523
|
+
* require checkpointing; this reads it optionally so the lineage can probe-gate
|
|
1524
|
+
* without importing sandbox-backend specifics. @experimental
|
|
1525
|
+
*/
|
|
1526
|
+
interface CheckpointCapableBox {
|
|
1527
|
+
checkpoint?: (options?: {
|
|
1528
|
+
leaveRunning?: boolean;
|
|
1529
|
+
tags?: string[];
|
|
1530
|
+
}) => Promise<{
|
|
1531
|
+
checkpointId: string;
|
|
1532
|
+
}>;
|
|
1533
|
+
}
|
|
1534
|
+
/** Loop-side widening of the box's optional fork method. @experimental */
|
|
1535
|
+
interface ForkCapableBox {
|
|
1536
|
+
fork?: (checkpointId: string, options?: {
|
|
1537
|
+
name?: string;
|
|
1538
|
+
}) => Promise<SandboxInstance>;
|
|
1539
|
+
}
|
|
1540
|
+
/**
|
|
1541
|
+
* Loop-side widening of the box's optional session accessor. The real
|
|
1542
|
+
* `SandboxInstance` exposes `session(id).status()`; the loop reads it optionally
|
|
1543
|
+
* so `continue` can assert session liveness without requiring it of the test
|
|
1544
|
+
* fakes. `status()` resolves `null` when the id is unknown to the sandbox.
|
|
1545
|
+
* @experimental
|
|
1546
|
+
*/
|
|
1547
|
+
interface SessionCapableBox {
|
|
1548
|
+
session?: (id: string) => {
|
|
1549
|
+
status: () => Promise<unknown | null>;
|
|
1550
|
+
};
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
/**
|
|
1554
|
+
* @experimental
|
|
1555
|
+
*
|
|
1556
|
+
* The conserved budget reservation pool — the invariant the whole instrument
|
|
1557
|
+
* rests on (critique M5/B3). One root `Budget` becomes a conserved pool of three
|
|
1558
|
+
* quantities (tokens, usd, iterations) plus an absolute deadline. Children RESERVE
|
|
1559
|
+
* atomically at spawn and RECONCILE at settle:
|
|
1560
|
+
*
|
|
1561
|
+
* total ≡ free + reserved + committed (invariant, always)
|
|
1562
|
+
*
|
|
1563
|
+
* `reserve` moves a child's whole ceiling from `free` → `reserved` and FAILS CLOSED
|
|
1564
|
+
* when `free` can't cover it (never read-then-spawn overcommit, so `Σk(treatment) ≡
|
|
1565
|
+
* Σk(blind)` by construction). `reconcile` releases the reservation, commits ACTUAL
|
|
1566
|
+
* spend, and refunds the unspent remainder to `free`. Tokens and usd are SEPARATE
|
|
1567
|
+
* channels (`LoopTokenUsage` has no `usd`); iterations are conserved alongside them.
|
|
1568
|
+
*
|
|
1569
|
+
* Pure and deterministic: `now()` is injected, there is no I/O, and no wall-clock or
|
|
1570
|
+
* RNG read. A `reserve`/`reconcile` ticket is single-use (fail-loud on double or
|
|
1571
|
+
* unknown reconcile) so a child can never refund twice.
|
|
1572
|
+
*/
|
|
1573
|
+
|
|
1574
|
+
/** Opaque, single-use reservation handle returned by `reserve` and consumed by
|
|
1575
|
+
* `reconcile`. Carries the reserved ceilings so reconciliation needs no lookup. */
|
|
1576
|
+
interface ReservationTicket {
|
|
1577
|
+
readonly id: number;
|
|
1578
|
+
readonly reserved: {
|
|
1579
|
+
readonly tokens: number;
|
|
1580
|
+
readonly usd: number;
|
|
1581
|
+
readonly iterations: number;
|
|
1582
|
+
};
|
|
1583
|
+
}
|
|
1584
|
+
/** Post-reservation pool readout — the shape `Scope.budget` exposes. `tokensLeft`,
|
|
1585
|
+
* `usdLeft`, and `reservedTokens` reflect committed-but-unsettled reservations;
|
|
1586
|
+
* `deadlineMs` is the ABSOLUTE wall-clock deadline (0 when the root set none). */
|
|
1587
|
+
type BudgetReadout = Readonly<{
|
|
1588
|
+
tokensLeft: number;
|
|
1589
|
+
usdLeft: number;
|
|
1590
|
+
deadlineMs: number;
|
|
1591
|
+
reservedTokens: number;
|
|
1592
|
+
}>;
|
|
1593
|
+
interface BudgetPool {
|
|
1594
|
+
/**
|
|
1595
|
+
* Atomically reserve a child's full ceiling from the free balance. Fails closed
|
|
1596
|
+
* ({ ok: false }) when the pool can't cover tokens, usd, or iterations — the
|
|
1597
|
+
* caller inspects `ok` before `ticket`.
|
|
1598
|
+
*/
|
|
1599
|
+
reserve(b: Budget): {
|
|
1600
|
+
ok: true;
|
|
1601
|
+
ticket: ReservationTicket;
|
|
1602
|
+
} | {
|
|
1603
|
+
ok: false;
|
|
1604
|
+
reason: 'budget-exhausted';
|
|
1605
|
+
};
|
|
1606
|
+
/**
|
|
1607
|
+
* Release a reservation: commit the actual `spent`, refund the unspent remainder
|
|
1608
|
+
* to the free pool. Throws on an unknown or already-reconciled ticket (fail loud —
|
|
1609
|
+
* a double refund would silently break conservation).
|
|
1610
|
+
*/
|
|
1611
|
+
reconcile(ticket: ReservationTicket, spent: Spend): void;
|
|
1612
|
+
/** Fold a normalized `UsageEvent` stream (or array) into a `Spend`. Tokens via
|
|
1613
|
+
* `addTokenUsage`, usd on its own channel, iterations from `'iteration'` events.
|
|
1614
|
+
* `ms` is left zero — wall-clock duration is the caller's to record, not the pool's. */
|
|
1615
|
+
spendFrom(events: AsyncIterable<UsageEvent> | UsageEvent[]): Promise<Spend>;
|
|
1616
|
+
/** The current readout, reflecting all outstanding reservations. */
|
|
1617
|
+
readout(): BudgetReadout;
|
|
1618
|
+
/** Fail loud if any reservation is still open — the conserved-pool leak detector. Called at the
|
|
1619
|
+
* supervisor's join barrier: once every child has settled, no ticket may remain (a leaked
|
|
1620
|
+
* reservation would silently break `total ≡ free + reserved + committed`). */
|
|
1621
|
+
assertNoOpenTickets(): void;
|
|
1622
|
+
}
|
|
1623
|
+
/** Fold a normalized `UsageEvent` array into a `Spend`. Tokens and usd are separate
|
|
1624
|
+
* channels; iterations come from `'iteration'` events. Pure; `ms` stays zero (the
|
|
1625
|
+
* pool does not read wall-clock). */
|
|
1626
|
+
declare function spendFromUsageEvents(events: UsageEvent[]): Spend;
|
|
1627
|
+
/**
|
|
1628
|
+
* Create a conserved reservation pool from a root `Budget`. `now()` is injected so the
|
|
1629
|
+
* deadline readout is deterministic; defaults to `Date.now` for non-test callers. The
|
|
1630
|
+
* absolute deadline is fixed at construction (`now() + budget.deadlineMs`) so the
|
|
1631
|
+
* readout's `deadlineMs` is a stable wall-clock instant, not a shrinking remainder.
|
|
1632
|
+
*/
|
|
1633
|
+
declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
|
|
1634
|
+
|
|
1635
|
+
/**
|
|
1636
|
+
* @experimental
|
|
1637
|
+
*
|
|
1638
|
+
* The leaf runtime — the built-in `LeafExecutor` IMPLEMENTATIONS behind the ONE
|
|
1639
|
+
* open interface frozen in `./types`, plus the open resolver/registry that maps
|
|
1640
|
+
* an `AgentSpec` to one of them OR accepts a bring-your-own executor verbatim.
|
|
1641
|
+
*
|
|
1642
|
+
* The interface is the extension point, not a closed `inline|sandbox|cli` union:
|
|
1643
|
+
* - router/inline : a direct OpenAI-compatible Router call, no box (one-shot).
|
|
1644
|
+
* - sandbox : COMPOSES the existing `runLoop` kernel as a single-task
|
|
1645
|
+
* leaf and surfaces its token/cost usage as `UsageEvent`s;
|
|
1646
|
+
* forwards PR #150's optional `lineage` passthrough WITHOUT
|
|
1647
|
+
* reinventing checkpoint/fork (streaming).
|
|
1648
|
+
* - cli : a Halo/RLM subprocess; `budgetExempt` (no token accounting),
|
|
1649
|
+
* excluded from the equal-k arms by construction (streaming).
|
|
1650
|
+
* Every metered runtime reports through the SAME normalized `UsageEvent` channel
|
|
1651
|
+
* so the conserved budget pool meters them identically. A user's own agent is
|
|
1652
|
+
* first-class the moment it implements `LeafExecutor` — register it by name or
|
|
1653
|
+
* pass it as `AgentSpec.executor`.
|
|
1654
|
+
*
|
|
1655
|
+
* Layering: `estimateCost`/`isModelPriced` are substrate primitives from
|
|
1656
|
+
* `@tangle-network/agent-eval`; `runLoop`/`acquireSandbox` are runtime kernels
|
|
1657
|
+
* from this package. No per-vendor adapters live here.
|
|
1658
|
+
*/
|
|
1659
|
+
|
|
1660
|
+
/**
|
|
1661
|
+
* Router/inline connection seam. A direct OpenAI-compatible Router endpoint —
|
|
1662
|
+
* the cheapest leaf, no box, no tools. `model` overrides the profile's model
|
|
1663
|
+
* hint when present; otherwise the profile's `model.default` is required.
|
|
1664
|
+
*/
|
|
1665
|
+
interface RouterSeam {
|
|
1666
|
+
routerBaseUrl: string;
|
|
1667
|
+
routerKey: string;
|
|
1668
|
+
model?: string;
|
|
1669
|
+
}
|
|
1670
|
+
/**
|
|
1671
|
+
* Sandbox executor seam. The `sandboxClient` the composed `runLoop` creates
|
|
1672
|
+
* boxes through, plus the optional trace/run/lineage wiring forwarded into the
|
|
1673
|
+
* loop. `lineage` is opaque here (PR #150's `RunLoopOptions.lineage`): forwarded
|
|
1674
|
+
* forward-compatibly, never inspected — this executor does NOT reinvent
|
|
1675
|
+
* checkpoint/fork.
|
|
1676
|
+
*/
|
|
1677
|
+
interface SandboxSeam {
|
|
1678
|
+
sandboxClient: LoopSandboxClient;
|
|
1679
|
+
/** Forwarded into the composed `runLoop`'s `ctx` (trace emitter, run handle, etc.). */
|
|
1680
|
+
loopCtx?: Partial<Omit<ExecCtx, 'sandboxClient' | 'signal'>>;
|
|
1681
|
+
/** PR #150 `RunLoopOptions.lineage` passthrough — opaque; forwarded, not parsed. */
|
|
1682
|
+
lineage?: unknown;
|
|
1683
|
+
/** Hard cap on the composed loop's iterations. The budget pool reserves against
|
|
1684
|
+
* the spawn `Budget.maxIterations`; this is the leaf's own ceiling. Default 1. */
|
|
1685
|
+
maxIterations?: number;
|
|
1686
|
+
}
|
|
1687
|
+
/** CLI subprocess seam. `bin` + `args` describe the Halo/RLM process to spawn. */
|
|
1688
|
+
interface CliSeam {
|
|
1689
|
+
bin: string;
|
|
1690
|
+
args?: string[];
|
|
1691
|
+
/** Extra environment for the subprocess (merged over `process.env`). */
|
|
1692
|
+
env?: Record<string, string>;
|
|
1693
|
+
/** Working directory for the subprocess. */
|
|
1694
|
+
cwd?: string;
|
|
1695
|
+
}
|
|
1696
|
+
/**
|
|
1697
|
+
* A direct OpenAI-compatible Router chat-completion. One-shot: resolves a
|
|
1698
|
+
* `LeafResult` and reports its terminal usage as `UsageEvent`s through the
|
|
1699
|
+
* conserved pool. Reports REAL token usage — when the provider omits `usage`,
|
|
1700
|
+
* the spend records zero tokens but the call still counts one iteration (a
|
|
1701
|
+
* phantom fabricated 0 is never emitted as a priced cost).
|
|
1702
|
+
*
|
|
1703
|
+
* NOTE for the Integrate phase: this duplicates the minimal body of
|
|
1704
|
+
* `bench/src/router-client.ts#routerChatWithUsage`. `bench/` is a sub-package
|
|
1705
|
+
* outside this package's `rootDir: "src"`, so it cannot be imported here without
|
|
1706
|
+
* breaking the build. Integrate should lift that helper into `src/loops/` and
|
|
1707
|
+
* have both call sites share it (do not re-copy a third time).
|
|
1708
|
+
*/
|
|
1709
|
+
declare const routerInlineExecutor: LeafExecutorFactory<unknown>;
|
|
1710
|
+
/**
|
|
1711
|
+
* COMPOSES `runLoop` as a single-task leaf: one box, a refine driver bounded to
|
|
1712
|
+
* the seam's `maxIterations` (default 1), the spec's profile as the agent run.
|
|
1713
|
+
* Surfaces the loop's aggregated `tokenUsage` + `costUsd` as `UsageEvent`s after
|
|
1714
|
+
* it drains, and yields one `iteration` event per loop iteration. Forwards the
|
|
1715
|
+
* optional `lineage` passthrough WITHOUT importing sandbox-lineage / reinventing
|
|
1716
|
+
* checkpoint/fork.
|
|
1717
|
+
*
|
|
1718
|
+
* Streaming shape: the loop runs to completion inside the first `next()`, then
|
|
1719
|
+
* the recorded usage events are yielded; the terminal artifact is read from
|
|
1720
|
+
* `resultArtifact()` after the stream drains.
|
|
1721
|
+
*/
|
|
1722
|
+
declare const sandboxExecutor: LeafExecutorFactory<unknown>;
|
|
1723
|
+
/**
|
|
1724
|
+
* Spawns a subprocess (`bin` + `args`). It cannot account tokens, so it is
|
|
1725
|
+
* `budgetExempt: true`: its spend is NOT metered against the conserved pool and
|
|
1726
|
+
* its iterations are EXCLUDED from the equal-k arms by construction (the
|
|
1727
|
+
* resolver/equal-k path checks `budgetExempt`). teardown is SIGTERM → SIGKILL
|
|
1728
|
+
* with a grace window. Streaming: yields one `iteration` event on clean exit.
|
|
1729
|
+
*/
|
|
1730
|
+
declare const cliExecutor: LeafExecutorFactory<unknown>;
|
|
1731
|
+
/**
|
|
1732
|
+
* The open resolver/registry. Pre-registers the three built-ins under their
|
|
1733
|
+
* runtime tags (`'router'`, `'sandbox'`, `'cli'`) and accepts `register(name,
|
|
1734
|
+
* factory)` for any additional runtime — and a BYO `AgentSpec.executor` resolves
|
|
1735
|
+
* without touching the registry at all. NOT a closed switch; registration + BYO
|
|
1736
|
+
* ARE the extension points.
|
|
1737
|
+
*
|
|
1738
|
+
* `resolve` precedence (frozen in `ExecutorRegistry`): a BYO `spec.executor` →
|
|
1739
|
+
* `harness === null` → the `'router'` factory; else a registered factory for the
|
|
1740
|
+
* harness-derived runtime (`'sandbox'` for any `BackendType`); else fail loud.
|
|
1741
|
+
*/
|
|
1742
|
+
declare function createExecutorRegistry(): ExecutorRegistry;
|
|
1743
|
+
|
|
1744
|
+
/**
|
|
1745
|
+
* @experimental
|
|
1746
|
+
*
|
|
1747
|
+
* The reactive `Scope` impl (KEYSTONE, build step 4 + the step-8 adapter).
|
|
1748
|
+
*
|
|
1749
|
+
* An `Agent.act` runs inside a `Scope`. It `spawn`s children dynamically and reacts to
|
|
1750
|
+
* them via `next()`. The scope owns ONE in-memory nursery — the authoritative live set —
|
|
1751
|
+
* and is the single place that drives a child's lifecycle: reserve budget atomically,
|
|
1752
|
+
* resolve a `LeafExecutor` through the open registry, run it (one-shot OR streaming),
|
|
1753
|
+
* fold its normalized `UsageEvent`s into a conserved `Spend`, reconcile the reservation
|
|
1754
|
+
* (refunding the unspent remainder), persist the result blob + journal records, and
|
|
1755
|
+
* deliver the `Settled` through the `next()` cursor.
|
|
1756
|
+
*
|
|
1757
|
+
* Three invariants this impl enforces by construction:
|
|
1758
|
+
* - `next()` is a ray.wait n=1 cursor over THIS scope's live set; it assigns the
|
|
1759
|
+
* monotonic `seq` (the recorded cursor order) at the moment it yields a settlement, so
|
|
1760
|
+
* replay re-delivers in the identical order — `seq` is never wall-clock.
|
|
1761
|
+
* - Budget is reserved at spawn and reconciled at settle through the shared `BudgetPool`,
|
|
1762
|
+
* so `spawn` fails CLOSED on an exhausted pool and total ≡ free + reserved + committed.
|
|
1763
|
+
* - `view` reads the in-memory nursery, never the journal — O(live), synchronous.
|
|
1764
|
+
*
|
|
1765
|
+
* The settle path is the only writer of journal `settled` events; the spawn path the only
|
|
1766
|
+
* writer of `spawned` events. The result blob is `put` BEFORE the journal `settled` record
|
|
1767
|
+
* references its `outRef`, so a crash can never leave a journaled ref with no blob.
|
|
1768
|
+
*/
|
|
1769
|
+
|
|
1770
|
+
/** Construction args for `createScope`. The supervisor threads the shared pool, journal,
|
|
1771
|
+
* blob store, and executor registry through; `depth`/`maxDepth` pair the runtime
|
|
1772
|
+
* recursion ceiling with the conserved pool (R3). */
|
|
1773
|
+
interface ScopeArgs {
|
|
1774
|
+
/** This scope's owning node id — children get `${parentId}:s${seq}` ids. */
|
|
1775
|
+
readonly parentId: NodeId;
|
|
1776
|
+
/** Journal/blob root key the supervisor `beginTree`'d. */
|
|
1777
|
+
readonly root: NodeId;
|
|
1778
|
+
/** The shared conserved reservation pool (one per supervised run). */
|
|
1779
|
+
readonly pool: BudgetPool;
|
|
1780
|
+
/** Append-only spawn journal; this scope writes `spawned` + `settled` records. */
|
|
1781
|
+
readonly journal: SpawnJournal;
|
|
1782
|
+
/** Content-addressed result store backing `outRef` rehydration. */
|
|
1783
|
+
readonly blobs: ResultBlobStore;
|
|
1784
|
+
/** The open executor resolver (BYO → router/inline → registered harness factory). */
|
|
1785
|
+
readonly executors: ExecutorRegistry;
|
|
1786
|
+
/** Per-spawn executor-construction seams (sandbox client, router config, cli bin). */
|
|
1787
|
+
readonly seams: Readonly<Record<string, unknown>>;
|
|
1788
|
+
/** This scope's recursion depth (root = 0). */
|
|
1789
|
+
readonly depth: number;
|
|
1790
|
+
/** Runtime recursion-depth ceiling — a spawn past it fails closed `depth-exceeded`. */
|
|
1791
|
+
readonly maxDepth?: number;
|
|
1792
|
+
/** Abort signal for this scope; an abort cascades into every live child's executor. */
|
|
1793
|
+
readonly signal: AbortSignal;
|
|
1794
|
+
/** Injected clock — keeps the journal `at` timestamp deterministic in tests. */
|
|
1795
|
+
readonly now?: () => number;
|
|
1796
|
+
/** Lifecycle stream sink. `spawn` emits `agent.spawn`, `next` emits `agent.child` — the
|
|
1797
|
+
* SAME stream `runLoop`/`tool-loop` feed, so the recursive tree is ONE observable stream
|
|
1798
|
+
* (the topology viewer reads it). Undefined ⇒ the journal stays the only record. */
|
|
1799
|
+
readonly hooks?: RuntimeHooks;
|
|
1800
|
+
}
|
|
1801
|
+
declare function createScope<Out>(args: ScopeArgs): Scope<Out>;
|
|
1802
|
+
/**
|
|
1803
|
+
* The step-8 merge-boundary adapter (M4): rehydrate a `Settled.done` into the kernel's
|
|
1804
|
+
* `Iteration` shape so `defaultSelectWinner` stays single-sourced — the supervisor selects
|
|
1805
|
+
* across settled children with the SAME argmax the loop kernel uses, not a forked copy.
|
|
1806
|
+
*
|
|
1807
|
+
* `index` is the cursor `seq` (the recorded, replay-stable order); `output`/`verdict`/
|
|
1808
|
+
* `tokenUsage`/`costUsd` are read straight off the settlement (already rehydrated from the
|
|
1809
|
+
* `outRef` blob by `next()`). Events are empty — a settled child is an opaque leaf result,
|
|
1810
|
+
* not a sandbox event stream — and the timing/cost fields project its conserved `Spend`.
|
|
1811
|
+
* Fail loud on a `down` settlement: only a `done` child is an iteration.
|
|
1812
|
+
*/
|
|
1813
|
+
declare function settledToIteration<Out>(settled: Settled<Out>): Iteration<unknown, Out>;
|
|
1814
|
+
|
|
1815
|
+
/**
|
|
1816
|
+
* @experimental
|
|
1817
|
+
*
|
|
1818
|
+
* The `Supervisor` impl (KEYSTONE, build step 5).
|
|
1819
|
+
*
|
|
1820
|
+
* Owns the four things a free-running recursive `act` cannot own itself: the GLOBAL
|
|
1821
|
+
* conserved budget pool, the event-sourced spawn log, the abort cascade over the whole
|
|
1822
|
+
* live tree, and the OTP intensity breaker. `run` builds the root `Scope` over those,
|
|
1823
|
+
* runs the root `Agent.act`, and returns a TYPED `SupervisedResult` — a no-winner is
|
|
1824
|
+
* never coerced into a best-effort `Out`.
|
|
1825
|
+
*
|
|
1826
|
+
* Three lifecycle invariants this impl enforces by construction:
|
|
1827
|
+
* - Join barrier: when `act()` settles (resolve OR reject), every still-live child is
|
|
1828
|
+
* torn down before `run` returns — the generalization of the kernel's
|
|
1829
|
+
* `finally{ Promise.allSettled(destroy) }` barrier (run-loop.ts) from boxes to the
|
|
1830
|
+
* whole sub-tree. A teardown failure is `allSettled`'d and journaled as a
|
|
1831
|
+
* `cancelled` event; it NEVER masks act()'s own outcome. act()'s rejection is the
|
|
1832
|
+
* PRIMARY error (the kernel's firstError precedence), so a teardown throw during the
|
|
1833
|
+
* barrier can never overwrite the real failure.
|
|
1834
|
+
* - Abort cascade: a root abort (caller signal, `RootHandle.abort`, a tripped breaker,
|
|
1835
|
+
* or pool exhaustion) aborts ONE internal controller whose signal is the root scope's
|
|
1836
|
+
* signal. The scope cascades that into every live child's executor abort — which, for
|
|
1837
|
+
* an `acquiring` child, chains into the `acquireSandbox` signal and reaps the
|
|
1838
|
+
* find-by-name orphan box (M1). The supervisor never reaps children directly.
|
|
1839
|
+
* - The supervisor NEVER re-enters a child (m3): the kernel/`acquireSandbox` already
|
|
1840
|
+
* retried at the leaf, and a driver re-spawns through `scope.spawn`. The breaker only
|
|
1841
|
+
* COUNTS `down` settlements within the intensity window and trips to a typed
|
|
1842
|
+
* no-winner; it does not restart anything.
|
|
1843
|
+
*
|
|
1844
|
+
* Selection lives in the driver, not here (selector≠judge): `act` returns the synthesized
|
|
1845
|
+
* winner `Out`. The supervisor content-addresses that `Out` for its replay `outRef`,
|
|
1846
|
+
* reads `spentTotal` off the conserved pool, and wraps it as a typed `winner` — it does
|
|
1847
|
+
* not re-rank children behind the driver's back.
|
|
1848
|
+
*/
|
|
1849
|
+
|
|
1850
|
+
declare function createSupervisor<Task, Out>(): Supervisor<Task, Out>;
|
|
1851
|
+
/**
|
|
1852
|
+
* Mint a `RootHandle` plus its supervisor-private control. The handle is the substrate a
|
|
1853
|
+
* chat/pi-viz client attaches to (Q2): `view()` reads the live tree, `signal()` delivers
|
|
1854
|
+
* an out-of-band message, `abort()` cascades. Before `run` binds it (and after `run`
|
|
1855
|
+
* unbinds it) the handle is fail-loud: a client that talks to a handle that is not
|
|
1856
|
+
* driving a live run gets a typed error, never a silent no-op.
|
|
1857
|
+
*/
|
|
1858
|
+
declare function createRootHandle<Out>(): RootHandle<Out>;
|
|
1859
|
+
|
|
1860
|
+
export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration, LeafExecutorFactory, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, NodeId, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, type SandboxLineage, type SandboxLineageHandle, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, SpawnEvent, SpawnJournal, Spend, type SteerContext, SupervisedResult, Supervisor, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, UsageEvent, type UsageSink, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, cliExecutor, contentAddress, createBudgetPool, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, replaySpawnTree, reportLoopUsage, routerInlineExecutor, runPersonified, sandboxExecutor, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen };
|