@tangle-network/agent-runtime 0.48.0 → 0.50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +79 -15
  2. package/dist/agent.d.ts +1 -1
  3. package/dist/agent.js +1 -1
  4. package/dist/analyst-loop.d.ts +1 -1
  5. package/dist/{chunk-656G2XCL.js → chunk-BKAIVNFA.js} +3 -3
  6. package/dist/{chunk-IW2LMLK6.js → chunk-CM2IK7VS.js} +913 -152
  7. package/dist/chunk-CM2IK7VS.js.map +1 -0
  8. package/dist/{chunk-VR4JIC5H.js → chunk-ML4IXGTV.js} +2 -2
  9. package/dist/{chunk-TJS7S3HJ.js → chunk-NDM5VXZW.js} +19 -8
  10. package/dist/chunk-NDM5VXZW.js.map +1 -0
  11. package/dist/chunk-OM3YNZIW.js +978 -0
  12. package/dist/chunk-OM3YNZIW.js.map +1 -0
  13. package/dist/{chunk-JNPK46YH.js → chunk-RHW75JW5.js} +498 -350
  14. package/dist/chunk-RHW75JW5.js.map +1 -0
  15. package/dist/{coder-CVZNGbyg.d.ts → coder-_YCf3BAK.d.ts} +2 -2
  16. package/dist/{driver-DYU2sgHr.d.ts → driver-DLI1io57.d.ts} +1 -1
  17. package/dist/index.d.ts +34 -9
  18. package/dist/index.js +117 -27
  19. package/dist/index.js.map +1 -1
  20. package/dist/kb-gate-CHAyt4aI.d.ts +1571 -0
  21. package/dist/{loop-runner-bin-DEm4roYF.d.ts → loop-runner-bin-DFUNgpeK.d.ts} +4 -4
  22. package/dist/loop-runner-bin.d.ts +5 -5
  23. package/dist/loop-runner-bin.js +3 -3
  24. package/dist/loops.d.ts +6 -6
  25. package/dist/loops.js +17 -1
  26. package/dist/mcp/bin.js +206 -29
  27. package/dist/mcp/bin.js.map +1 -1
  28. package/dist/mcp/index.d.ts +41 -177
  29. package/dist/mcp/index.js +40 -6
  30. package/dist/mcp/index.js.map +1 -1
  31. package/dist/openai-tools-D4HLDWgw.d.ts +45 -0
  32. package/dist/platform.js +2 -2
  33. package/dist/platform.js.map +1 -1
  34. package/dist/profiles.d.ts +2 -2
  35. package/dist/{run-loop-DvD4aGiE.d.ts → run-loop-BIineL1T.d.ts} +1 -1
  36. package/dist/runtime.d.ts +403 -24
  37. package/dist/runtime.js +17 -1
  38. package/dist/{types-BpDfCPUp.d.ts → types-5MGt5KTY.d.ts} +1 -1
  39. package/dist/{types-nBMuollC.d.ts → types-BEQsBhOE.d.ts} +1 -1
  40. package/dist/workflow.d.ts +2 -2
  41. package/dist/workflow.js +1 -1
  42. package/package.json +6 -5
  43. package/dist/chunk-IW2LMLK6.js.map +0 -1
  44. package/dist/chunk-JNPK46YH.js.map +0 -1
  45. package/dist/chunk-LX66I3SC.js +0 -218
  46. package/dist/chunk-LX66I3SC.js.map +0 -1
  47. package/dist/chunk-TJS7S3HJ.js.map +0 -1
  48. package/dist/kb-gate-51BlLlVM.d.ts +0 -529
  49. package/dist/otel-export-EzfsVUhh.d.ts +0 -191
  50. /package/dist/{chunk-656G2XCL.js.map → chunk-BKAIVNFA.js.map} +0 -0
  51. /package/dist/{chunk-VR4JIC5H.js.map → chunk-ML4IXGTV.js.map} +0 -0
package/dist/runtime.d.ts CHANGED
@@ -1,16 +1,16 @@
1
1
  import { AgentProfile, BackendType, CreateSandboxOptions, SandboxInstance, SandboxEvent } from '@tangle-network/sandbox';
2
2
  export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
3
- import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, d as AgentSpec, E as ExecutorRegistry, B as Budget, A as Agent, e as RootHandle, f as SupervisedResult, g as Spend, S as Scope, h as ExecutorFactory, U as UsageEvent, i as Supervisor } from './types-BpDfCPUp.js';
4
- export { j as Executor, k as ExecutorContext, l as ExecutorResult, H as Handle, m as NodeSnapshot, n as NodeStatus, o as Restart, p as RootSignal, q as Runtime, r as SpawnOpts, s as SupervisorOpts, W as WidenGate } from './types-BpDfCPUp.js';
3
+ import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, d as AgentSpec, E as ExecutorRegistry, B as Budget, A as Agent, e as RootHandle, f as SupervisedResult, g as Spend, S as Scope, h as ExecutorFactory, U as UsageEvent, i as Supervisor } from './types-5MGt5KTY.js';
4
+ export { j as Executor, k as ExecutorContext, l as ExecutorResult, H as Handle, m as NodeSnapshot, n as NodeStatus, o as Restart, p as RootSignal, q as Runtime, r as SpawnOpts, s as SupervisorOpts, W as WidenGate } from './types-5MGt5KTY.js';
5
+ import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
5
6
  import { ChatClient, AnalystFinding, DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
6
7
  export { DefaultVerdict } from '@tangle-network/agent-eval';
7
- export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDriverOptions, D as DriverDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './driver-DYU2sgHr.js';
8
- import { S as SandboxClient, b as LoopResult, c as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-nBMuollC.js';
9
- export { D as Driver, C as LoopDecisionPayload, F as LoopEndedPayload, G as LoopIterationDispatchPayload, H as LoopIterationEndedPayload, J as LoopIterationStartedPayload, a as LoopLineageOptions, M as LoopPlanDescription, N as LoopPlanPayload, f as LoopSandboxPlacement, P as LoopStartedPayload, Q as LoopTeardownFailedPayload, e as LoopTraceEmitter, T as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, U as ValidationCtx, V as Validator } from './types-nBMuollC.js';
8
+ export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDriverOptions, D as DriverDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './driver-DLI1io57.js';
9
+ import { S as SandboxClient, b as LoopResult, d as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-BEQsBhOE.js';
10
+ export { D as Driver, F as LoopDecisionPayload, G as LoopEndedPayload, H as LoopIterationDispatchPayload, J as LoopIterationEndedPayload, M as LoopIterationStartedPayload, a as LoopLineageOptions, N as LoopPlanDescription, P as LoopPlanPayload, e as LoopSandboxPlacement, Q as LoopStartedPayload, T as LoopTeardownFailedPayload, f as LoopTraceEmitter, g as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, U as ValidationCtx, V as Validator } from './types-BEQsBhOE.js';
10
11
  import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
11
- import { R as RunLoopOptions } from './run-loop-DvD4aGiE.js';
12
- export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop-DvD4aGiE.js';
13
- import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
12
+ import { R as RunLoopOptions } from './run-loop-BIineL1T.js';
13
+ export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop-BIineL1T.js';
14
14
 
15
15
  /**
16
16
  * @experimental
@@ -113,6 +113,140 @@ declare function replaySpawnTree(journal: SpawnJournal, blobs: ResultBlobStore,
113
113
  */
114
114
  declare function materializeTreeView(events: SpawnEvent[]): TreeView;
115
115
 
116
+ /**
117
+ * createWaterfallCollector — 100% trajectory observability from the lifecycle stream:
118
+ * every spawn/settle (shots, analysts, nested agents) becomes one timed, billed span.
119
+ * The sum of spans IS the run's cost story — what each step cost in dollars, tokens,
120
+ * and wall-clock, rendered as a text waterfall or exported as structured rows for any
121
+ * chart. Attach the collector's `hooks` to `runAgentic`/`runBenchmark`; spans accumulate
122
+ * across every task the hooks observe.
123
+ */
124
+
125
+ interface WaterfallSpan {
126
+ id: string;
127
+ /** The spawn label (`shot:0`, `analyst:1`, a nested agent's label) — the row name. */
128
+ label: string;
129
+ runId: string;
130
+ parentId?: string;
131
+ startMs: number;
132
+ endMs?: number;
133
+ status: 'running' | 'done' | 'down';
134
+ usd: number;
135
+ tokens: {
136
+ input: number;
137
+ output: number;
138
+ };
139
+ score?: number;
140
+ }
141
+ interface WaterfallReport {
142
+ spans: WaterfallSpan[];
143
+ /** Wall-clock of the observed window (first spawn → last settle). */
144
+ totalMs: number;
145
+ totalUsd: number;
146
+ totalTokens: {
147
+ input: number;
148
+ output: number;
149
+ };
150
+ /** Rollup by label prefix (the part before ':') — shots vs analysts vs anything else. */
151
+ byKind: Record<string, {
152
+ count: number;
153
+ ms: number;
154
+ usd: number;
155
+ tokens: {
156
+ input: number;
157
+ output: number;
158
+ };
159
+ }>;
160
+ }
161
+ interface WaterfallCollector {
162
+ /** Attach these to RunAgenticOptions.hooks / BenchmarkConfig.hooks. */
163
+ hooks: RuntimeHooks;
164
+ report(): WaterfallReport;
165
+ /** The text waterfall — one row per span, bars scaled to the observed window. */
166
+ render(opts?: {
167
+ width?: number;
168
+ maxRows?: number;
169
+ }): string;
170
+ reset(): void;
171
+ }
172
+ declare function createWaterfallCollector(): WaterfallCollector;
173
+
174
+ /**
175
+ * anytimeReport — time-to-satisfactory-output metrics, derived entirely from the
176
+ * waterfall's spans (no new instrumentation): per task, the best-so-far score after each
177
+ * shot with its elapsed wall-clock and cumulative spend; per strategy, the standard
178
+ * anytime-optimization metrics:
179
+ *
180
+ * TTT time-to-target — elapsed ms until best-so-far ≥ the target (per task; median
181
+ * over tasks that reached it)
182
+ * STT shots-to-target — attempts until best-so-far ≥ target
183
+ * ERT expected running time (the COCO benchmarking convention): TOTAL time spent
184
+ * across all tasks — including failures' full budgets — divided by the number of
185
+ * tasks that reached the target. The honest "how long per success, all-in".
186
+ * AUC the anytime curve's area (mean best-so-far score across the budget, per shot
187
+ * index) — higher = climbs earlier.
188
+ *
189
+ * The "satisfactory" bar follows the COCO/BBOB convention: a SET of satisficing targets
190
+ * (e.g. [0.5, 0.8, 1.0] on the normalized check score), each measured independently —
191
+ * runtime-to-target per (task, target) pair — optionally overridden per task
192
+ * (`targetFor`) when satisfaction is task-specific. Spans come from
193
+ * `createWaterfallCollector().report()`; tasks are grouped by the supervisor runId
194
+ * (`agentic:<strategy>:<taskId>`); shot spans are `shot:N` labels.
195
+ */
196
+
197
+ interface AnytimeTaskCurve {
198
+ taskId: string;
199
+ strategy: string;
200
+ /** Best-so-far after each settled shot: elapsed ms from the task's first spawn,
201
+ * cumulative usd, and the running max score. */
202
+ points: Array<{
203
+ elapsedMs: number;
204
+ cumUsd: number;
205
+ best: number;
206
+ }>;
207
+ /** Per satisficing target (keyed by the target value as a string): the first point
208
+ * where best ≥ target, or null when never reached within budget. */
209
+ hits: Record<string, {
210
+ ms: number;
211
+ shots: number;
212
+ usd: number;
213
+ } | null>;
214
+ }
215
+ interface AnytimeStrategySummary {
216
+ strategy: string;
217
+ /** The satisficing target this row summarizes. */
218
+ target: number;
219
+ tasks: number;
220
+ reachedTarget: number;
221
+ /** Median time-to-target over the tasks that reached it (null when none did). */
222
+ medianTttMs: number | null;
223
+ medianShotsToTarget: number | null;
224
+ /** COCO ERT: Σ all task wall-time (incl. failures) / #successes. Null when 0 succeed. */
225
+ ertMs: number | null;
226
+ /** Same construction over dollars: Σ all spend / #successes. */
227
+ erUsd: number | null;
228
+ /** Mean best-so-far score by shot index (the anytime curve, averaged over tasks). */
229
+ curveByShot: number[];
230
+ /** Area under the per-shot anytime curve, normalized to [0,1]. */
231
+ auc: number;
232
+ }
233
+ interface AnytimeReport {
234
+ targets: number[];
235
+ perTask: AnytimeTaskCurve[];
236
+ /** One summary per (strategy, target) pair — the COCO-style multi-target view. */
237
+ perStrategy: AnytimeStrategySummary[];
238
+ }
239
+ /** Derive anytime metrics from waterfall spans. `targets` are the satisficing score
240
+ * bars (default [1] = fully resolved; COCO-style multi-target: [0.5, 0.8, 1]);
241
+ * `targetFor` overrides the bar per task (task-specific satisfaction) — when set, the
242
+ * per-task bar replaces every entry of `targets` for that task. */
243
+ declare function anytimeReport(spans: WaterfallSpan[], opts?: {
244
+ targets?: number[];
245
+ targetFor?: (taskId: string) => number;
246
+ }): AnytimeReport;
247
+ /** One row per (strategy, satisficing target): the shareable time-to-satisfactory table. */
248
+ declare function renderAnytimeTable(report: AnytimeReport): string;
249
+
116
250
  /**
117
251
  * auditIntent — the route-rigor analyst: is this trajectory even going the RIGHT WAY?
118
252
  *
@@ -1144,6 +1278,9 @@ interface AgenticOptions {
1144
1278
  routerKey: string;
1145
1279
  model: string;
1146
1280
  temperature?: number;
1281
+ /** Completion cap per worker turn — REQUIRED for thinking models (they burn unbounded
1282
+ * budgets on reasoning and return empty content without it). Omitted ⇒ provider default. */
1283
+ maxTokens?: number;
1147
1284
  /** Turns the agent may take within ONE shot before the driver intervenes. */
1148
1285
  innerTurns?: number;
1149
1286
  /** The depth STEERER's analyst instruction (observe()'s system prompt). The knob a
@@ -1225,6 +1362,9 @@ interface ShotSpec {
1225
1362
  messages?: Msg[];
1226
1363
  steer?: string;
1227
1364
  persona?: ShotPersona;
1365
+ /** Restrict THIS shot to a subset of the domain's tools (by name) — focus a shot on
1366
+ * the relevant capabilities. Restriction-only; unknown names throw. Omitted ⇒ all. */
1367
+ tools?: string[];
1228
1368
  }
1229
1369
  interface StrategyResult {
1230
1370
  score: number;
@@ -1253,6 +1393,18 @@ interface StrategyCtx {
1253
1393
  shot(spec?: ShotSpec): Promise<ShotResult | null>;
1254
1394
  /** The firewalled critic reads the trajectory → a steer string, or null on COMPLETE/down. */
1255
1395
  critique(messages: Msg[]): Promise<string | null>;
1396
+ /** The RAW analyst channel: the firewalled critic answers `instruction` over the
1397
+ * trajectory verbatim — no findings extraction, so verdict-shaped formats
1398
+ * (CONTINUE/STOP decisions, calibrated predictions) survive. Same firewall:
1399
+ * trajectory in, never scores. Null when the analyst went down. */
1400
+ consult(messages: Msg[], instruction: string): Promise<string | null>;
1401
+ /** The tools THIS artifact's task actually offers (names + descriptions only — never
1402
+ * the implementations). Tool sets vary per task on heterogeneous domains; a strategy
1403
+ * that restricts shots MUST select from this list, never from hardcoded names. */
1404
+ listTools(handle: ArtifactHandle): Promise<Array<{
1405
+ name: string;
1406
+ description?: string;
1407
+ }>>;
1256
1408
  }
1257
1409
  /** Author a Strategy from the composable steps — the open, compact way. */
1258
1410
  declare function defineStrategy(name: string, run: (ctx: StrategyCtx) => Promise<StrategyResult>): Strategy;
@@ -1346,6 +1498,10 @@ interface BenchmarkTaskRow {
1346
1498
  taskId: string;
1347
1499
  /** Per-strategy cells; absent when the task errored before completing all strategies. */
1348
1500
  cells?: Record<string, BenchmarkCell>;
1501
+ /** Per-strategy failures on this task: the strategy competed, threw, and scored an
1502
+ * honest zero — it loses, it does not poison the row. The message is kept so a later
1503
+ * generation's author can see WHY a candidate died. */
1504
+ errors?: Record<string, string>;
1349
1505
  /** Why the task was excluded (infra/setup failure) — never silently dropped. */
1350
1506
  error?: string;
1351
1507
  }
@@ -1757,6 +1913,13 @@ interface PromotionGateOptions {
1757
1913
  incumbent: string;
1758
1914
  /** The challenger's strategy name. */
1759
1915
  candidate: string;
1916
+ /** 'superiority' (default): the candidate must score significantly BETTER.
1917
+ * 'non-inferiority': the candidate must prove its score is not worse than the
1918
+ * incumbent by more than `scoreTolerance` AND its cost savings are significant —
1919
+ * the gate for "same quality, cheaper" claims. */
1920
+ mode?: 'superiority' | 'non-inferiority';
1921
+ /** non-inferiority: the score CI lower bound must clear −scoreTolerance. Default 0.05. */
1922
+ scoreTolerance?: number;
1760
1923
  /** The CI lower bound on the paired lift must EXCEED this (score scale). Default 0. */
1761
1924
  deltaThreshold?: number;
1762
1925
  /** Minimum paired tasks before significance can be claimed. Default 6 — below that
@@ -1770,7 +1933,8 @@ interface PromotionGateOptions {
1770
1933
  }
1771
1934
  interface PromotionVerdict {
1772
1935
  promoted: boolean;
1773
- reason: 'identical-champion' | 'few-tasks' | 'no-margin' | 'significant';
1936
+ reason: 'identical-champion' | 'few-tasks' | 'no-margin' | 'significant' | 'non-inferior-and-cheaper' | 'non-inferiority-unproven' | 'not-cheaper';
1937
+ mode: 'superiority' | 'non-inferiority';
1774
1938
  /** Paired tasks that carried both strategies' cells. */
1775
1939
  n: number;
1776
1940
  /** Paired (candidate − incumbent) lift across the holdout tasks. */
@@ -1780,6 +1944,23 @@ interface PromotionVerdict {
1780
1944
  low: number;
1781
1945
  high: number;
1782
1946
  };
1947
+ /** non-inferiority mode: paired (incumbent − candidate) cost SAVINGS per task (usd) —
1948
+ * positive means the candidate is cheaper; significant iff the CI low clears zero. */
1949
+ costSavings?: {
1950
+ mean: number;
1951
+ median: number;
1952
+ low: number;
1953
+ high: number;
1954
+ };
1955
+ /** Paired (candidate − incumbent) wall-clock per task (ms) — negative = the candidate
1956
+ * is FASTER. Informational in every mode (never gates); the latency answer to "what
1957
+ * does this win actually cost the user?". */
1958
+ latency?: {
1959
+ mean: number;
1960
+ median: number;
1961
+ low: number;
1962
+ high: number;
1963
+ };
1783
1964
  }
1784
1965
  declare function promotionGate(opts: PromotionGateOptions): PromotionVerdict;
1785
1966
 
@@ -1815,6 +1996,117 @@ interface UsageSink {
1815
1996
  */
1816
1997
  declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
1817
1998
 
1999
+ /**
2000
+ * The one router chat client: direct OpenAI-compatible completions through the
2001
+ * Tangle router — the cheapest dial, no sandbox. Three layers: `routerChatWithUsage`
2002
+ * (chat-only), `routerChatWithTools` (one completion with function tools), and
2003
+ * `routerToolLoop` (the off-box agentic loop over tool-calling). Shared by the
2004
+ * built-in executors and the bench/lab harnesses.
2005
+ *
2006
+ * Reports REAL token usage so the backend-integrity guard sees a real backend.
2007
+ * Returns `undefined` usage when the provider omitted it — never a fabricated 0
2008
+ * (a phantom 0 reads as a free call downstream, which the gate would act on).
2009
+ */
2010
+ interface RouterConfig {
2011
+ routerBaseUrl: string;
2012
+ routerKey: string;
2013
+ model: string;
2014
+ }
2015
+ interface RouterChatResult {
2016
+ content: string;
2017
+ /** REAL usage, or undefined when the provider reported none. */
2018
+ usage?: {
2019
+ input: number;
2020
+ output: number;
2021
+ };
2022
+ /** Derived from usage via `estimateCost` when the model is priced; else undefined. */
2023
+ costUsd?: number;
2024
+ }
2025
+ declare function routerChatWithUsage(cfg: RouterConfig, messages: Array<{
2026
+ role: string;
2027
+ content: string;
2028
+ }>, opts?: {
2029
+ temperature?: number;
2030
+ signal?: AbortSignal;
2031
+ maxTokens?: number;
2032
+ }): Promise<RouterChatResult>;
2033
+ /** A tool-call the model emitted (provider-neutral; mirrors the runtime's ToolCallRequest). */
2034
+ interface RouterToolCall {
2035
+ id: string;
2036
+ name: string;
2037
+ /** Raw JSON arguments string as emitted by the model. */
2038
+ arguments: string;
2039
+ }
2040
+ interface RouterChatToolsResult {
2041
+ content: string | null;
2042
+ toolCalls: RouterToolCall[];
2043
+ usage?: {
2044
+ input: number;
2045
+ output: number;
2046
+ };
2047
+ costUsd?: number;
2048
+ }
2049
+ /**
2050
+ * A router completion WITH tool-calling — the operator driver's LLM seam. Passes OpenAI-shape
2051
+ * `messages` (system/user/assistant-with-tool_calls/tool roles) + function `tools`, and returns the
2052
+ * assistant text plus the tool calls the model wants run. Same fail-loud + real-usage discipline as
2053
+ * `routerChatWithUsage`. `tool_choice: 'auto'` lets the model decide; the driver loops on the result.
2054
+ */
2055
+ declare function routerChatWithTools(cfg: RouterConfig, messages: ReadonlyArray<Record<string, unknown>>, tools: ReadonlyArray<{
2056
+ type: 'function';
2057
+ function: {
2058
+ name: string;
2059
+ description?: string;
2060
+ parameters: unknown;
2061
+ };
2062
+ }>, opts?: {
2063
+ temperature?: number;
2064
+ signal?: AbortSignal;
2065
+ toolChoice?: 'auto' | 'required' | 'none';
2066
+ }): Promise<RouterChatToolsResult>;
2067
+ interface ToolSpec {
2068
+ type: 'function';
2069
+ function: {
2070
+ name: string;
2071
+ description?: string;
2072
+ parameters: unknown;
2073
+ };
2074
+ }
2075
+ interface RouterToolLoopResult {
2076
+ /** The model's final assistant text (the turn where it stopped calling tools, or the budget turn). */
2077
+ final: string;
2078
+ /** Inference turns spent (≤ maxTurns) — the equal-budget unit vs random@k. */
2079
+ turns: number;
2080
+ toolCalls: number;
2081
+ /** The behavior trace: each tool call + its result, in order. What a trace-analyst
2082
+ * steerer reads (behavior, never the verdict) to diagnose + redirect the next shot. */
2083
+ toolTrace: Array<{
2084
+ name: string;
2085
+ args: string;
2086
+ result: string;
2087
+ }>;
2088
+ usage: {
2089
+ input: number;
2090
+ output: number;
2091
+ };
2092
+ }
2093
+ /**
2094
+ * The tool-using router backend: a real agentic loop OVER the Tangle router (which
2095
+ * supports tool-calling), off-box — no sandbox. Each turn is one router completion
2096
+ * with `tools`; if the model emits tool_calls, `execute` runs them on the host and
2097
+ * their results are folded back as `tool` messages; the loop repeats until the
2098
+ * model answers without a tool call or the turn budget is hit. One turn = one
2099
+ * inference call, so `maxTurns` is the equal-compute unit against random@k.
2100
+ *
2101
+ * This is the depth substrate for agentic gates (the worker ACTS, observes the real
2102
+ * result, and continues) that the chat-only `routerChatWithUsage` cannot express.
2103
+ */
2104
+ declare function routerToolLoop(cfg: RouterConfig, system: string, user: string, tools: ReadonlyArray<ToolSpec>, execute: (name: string, args: Record<string, unknown>) => Promise<string>, opts?: {
2105
+ maxTurns?: number;
2106
+ temperature?: number;
2107
+ signal?: AbortSignal;
2108
+ }): Promise<RouterToolLoopResult>;
2109
+
1818
2110
  /**
1819
2111
  * @experimental
1820
2112
  *
@@ -2227,7 +2519,7 @@ declare function openSandboxRun<Out>(client: SandboxClient, options: OpenSandbox
2227
2519
  */
2228
2520
 
2229
2521
  /** The compressed consumable a skill carries: everything an author needs to emit a loop. */
2230
- declare const strategyAuthorContract = "\nYou author an OPTIMIZATION STRATEGY for an agentic loop system. A strategy decides how to\nspend a compute budget to beat a task's deployable check. You compose exactly two steps:\n\n shot(spec?: { handle?, messages?, steer?, persona? }): Promise<ShotResult | null>\n Runs ONE worker attempt (a bounded tool loop) over an artifact.\n - omit handle => the shot opens its OWN fresh artifact and closes it after (a sample).\n - pass handle => the shot CONTINUES that artifact (state accumulates across shots).\n - messages => the carried conversation (pass the previous ShotResult.messages to continue).\n - steer => a corrective instruction injected before the shot.\n - persona => { systemPrompt?, model? } \u2014 give THIS shot its own role and/or model\n (multi-agent strategies: a researcher shot then an engineer shot, a panel of k\n personas over one budget). On a fresh shot the systemPrompt replaces the task's; on\n a carried conversation it arrives as a hand-off message. Same conserved budget.\n ShotResult = { messages, score (0..1 on the task's check), passes, total, completions, toolErrors }\n Returns null if the attempt failed infra-wise.\n\n critique(messages): Promise<string | null>\n A firewalled trace-analyst reads the attempt's trajectory and returns ONE corrective\n instruction (or null when it judges the work complete). Costs ~1 completion.\n\n surface.open(task) / surface.close(handle)\n Open a persistent artifact you manage yourself (remember to close in a finally).\n\nRules:\n- Stay within ~budget total shots; every shot/critique spends from a conserved pool.\n- For a FRESH attempt OMIT `messages` entirely (never pass `[]` \u2014 an empty array is a\n fresh conversation too, but be explicit). To CONTINUE, pass the previous\n ShotResult.messages unchanged.\n- Return { score, resolved, completions, progression, shots } \u2014 score = the BEST checkpoint\n you reached (keep-best, never final-state), progression = score after each shot.\n- The module must be EXACTLY this shape (no other imports, no commentary outside code):\n\nimport { defineStrategy } from '@tangle-network/agent-runtime/loops'\nexport default defineStrategy('your-strategy-name', async ({ surface, task, budget, shot, critique }) => {\n // your composition\n})\n";
2522
+ declare const strategyAuthorContract = "\nYou author an OPTIMIZATION STRATEGY for an agentic loop system. A strategy decides how to\nspend a compute budget to beat a task's deployable check. You compose exactly two steps:\n\n shot(spec?: { handle?, messages?, steer?, persona?, tools? }): Promise<ShotResult | null>\n Runs ONE worker attempt (a bounded tool loop) over an artifact.\n - omit handle => the shot opens its OWN fresh artifact and closes it after (a sample).\n - pass handle => the shot CONTINUES that artifact (state accumulates across shots).\n - messages => the carried conversation (pass the previous ShotResult.messages to continue).\n - steer => a corrective instruction injected before the shot.\n - persona => { systemPrompt?, model? } \u2014 give THIS shot its own role and/or model\n (multi-agent strategies: a researcher shot then an engineer shot, a panel of k\n personas over one budget). On a fresh shot the systemPrompt replaces the task's; on\n a carried conversation it arrives as a hand-off message. Same conserved budget.\n - tools => string[] \u2014 restrict THIS shot to a subset of the task's tools by\n name (focus an explore shot on read-only tools, an execute shot on write tools).\n Restriction-only; unknown names make the shot fail. ALWAYS select from\n await listTools(handle) \u2014 never hardcode. Omitted => the shot sees every tool.\n ShotResult = { messages, score (0..1 on the task's check), passes, total, completions, toolErrors }\n Returns null if the attempt failed infra-wise.\n\n critique(messages): Promise<string | null>\n A firewalled trace-analyst reads the attempt's trajectory and returns ONE corrective\n instruction (or null when it judges the work complete). Costs ~1 completion.\n\n consult(messages, instruction): Promise<string | null>\n The RAW analyst channel: the same firewalled critic answers YOUR instruction over the\n trajectory verbatim (no reformatting) \u2014 use it when you need a specific reply format\n (a decision, a prediction). Costs ~1 completion.\n\n surface.open(task) / surface.close(handle)\n Open a persistent artifact you manage yourself (remember to close in a finally).\n close is idempotent \u2014 closing an already-closed handle is a safe no-op.\n\n listTools(handle): Promise<Array<{ name, description? }>>\n The tools THIS task actually offers. TOOL SETS VARY PER TASK \u2014 if you restrict a\n shot with `tools`, you MUST pick names from await listTools(handle); hardcoding\n names from an example kills your shots on every task whose tools differ.\n\nRules:\n- ALWAYS await every shot/critique/surface call \u2014 a floating promise that rejects\n crashes the whole benchmark run.\n- Stay within ~budget total shots; every shot/critique spends from a conserved pool.\n- For a FRESH attempt OMIT `messages` entirely (never pass `[]` \u2014 an empty array is a\n fresh conversation too, but be explicit). To CONTINUE, pass the previous\n ShotResult.messages unchanged.\n- Return { score, resolved, completions, progression, shots } \u2014 score = the BEST checkpoint\n you reached (keep-best, never final-state), progression = score after each shot.\n- The module must be EXACTLY this shape (no other imports, no commentary outside code):\n\nimport { defineStrategy } from '@tangle-network/agent-runtime/loops'\nexport default defineStrategy('your-strategy-name', async ({ surface, task, budget, shot, critique, listTools }) => {\n // your composition (listTools comes from the destructured context \u2014 it is NOT a global)\n})\n";
2231
2523
  interface AuthorStrategyOptions {
2232
2524
  /** The model-call seam (agent-eval `createChatClient`). */
2233
2525
  chat: ChatClient;
@@ -2328,6 +2620,14 @@ interface StrategyEvolutionConfig {
2328
2620
  populationSize?: number;
2329
2621
  /** The gen0 field. Default [sample, refine, sampleThenRefine]. */
2330
2622
  baselines?: Strategy[];
2623
+ /** What "better" means for PROMOTION. 'score' (default): the candidate must beat the
2624
+ * incumbent's score (superiority gate). 'cost': the candidate must prove score
2625
+ * NON-INFERIORITY (not worse by more than `scoreTolerance`) plus significant cost
2626
+ * savings — the "same quality, cheaper" objective. The author is told the objective
2627
+ * and sees per-task spend either way. */
2628
+ objective?: 'score' | 'cost';
2629
+ /** Cost objective: the score CI lower bound must clear −scoreTolerance. Default 0.05. */
2630
+ scoreTolerance?: number;
2331
2631
  /** Search-side champion selection. Default 'costAware'. */
2332
2632
  champion?: ChampionPolicy;
2333
2633
  /** Score band treated as a tie under 'costAware'. Default 0.01. */
@@ -2336,6 +2636,48 @@ interface StrategyEvolutionConfig {
2336
2636
  outDir: string;
2337
2637
  /** Promotion-gate evidence floor (paired holdout tasks). */
2338
2638
  minPairedTasks?: number;
2639
+ /** BAND-AWARE scoring — concentrate the measurement where lift is possible.
2640
+ * Holdout: draw `holdoutPoolN` candidate tasks and run `baselines[0]` once at the run
2641
+ * budget as an INDEPENDENT reference screen; keep tasks scoring ≤ `maxRefScore`
2642
+ * (headroom exists) and take the first `holdoutN`. Band membership is decided before
2643
+ * either finalist touches a task and both finalists then face the SAME tasks — the
2644
+ * estimand becomes "paired lift on headroom tasks", pre-registered by this config.
2645
+ * Train: champion selection ignores zero-spread tasks (every field strategy scored
2646
+ * identically — zero selection information, pure noise dilution). */
2647
+ band?: {
2648
+ holdoutPoolN: number;
2649
+ /** Keep holdout tasks where the reference scores ≤ this. Default 0.99 — drop only
2650
+ * tasks the reference already solves fully (no headroom, a candidate can only tie). */
2651
+ maxRefScore?: number;
2652
+ };
2653
+ /** What the author learns from a tournament. 'exact' (default) = scores + progressions
2654
+ * per task; 'binary' = pass/fail only — the leakage-bounded channel (one bit per cell
2655
+ * per generation reaches the author from the evaluation data). */
2656
+ lossesDetail?: 'exact' | 'binary';
2657
+ /** Reproducer certification (arXiv:2606.11045): when the final champion is AUTHORED,
2658
+ * compress it to a short natural-language summary, have a fresh author re-implement
2659
+ * from the summary alone (no losses, no code), and score the reproduction on the same
2660
+ * holdout. A reproduction gap is an overfitting signal (their detector: 100%
2661
+ * sensitivity / 91% specificity in the ML-agent setting) — recorded on the report,
2662
+ * never gate-blocking in v1. */
2663
+ reproducerCheck?: {
2664
+ /** Word budget for the strategy summary. Default 64. */
2665
+ summaryMaxWords?: number;
2666
+ /** Reproduction counts as faithful when reproducedScore ≥ championScore − tolerance.
2667
+ * Default 0.05. */
2668
+ tolerance?: number;
2669
+ };
2670
+ /** Endurance: write the run state after every completed phase; with `resume`, a
2671
+ * restart skips completed phases (authored modules re-imported from their files).
2672
+ * Worst case after a mid-run death is re-paying ONE phase, never the run. */
2673
+ checkpoint?: {
2674
+ path: string;
2675
+ resume?: boolean;
2676
+ };
2677
+ /** Called before each benchmark phase (gen0, gen1…, band-screen, holdout, reproduce).
2678
+ * The seam for environment recycling — no artifacts span phases, so a runner may
2679
+ * recreate a wedge-prone environment container here. */
2680
+ onPhase?: (phase: string) => Promise<void>;
2339
2681
  onTask?: (phase: string, row: BenchmarkTaskRow, done: number, total: number) => void;
2340
2682
  hooks?: RuntimeHooks;
2341
2683
  }
@@ -2371,6 +2713,32 @@ interface EvolutionArchiveNode {
2371
2713
  score: number;
2372
2714
  usd: number;
2373
2715
  }
2716
+ interface ReproductionCheck {
2717
+ /** The compressed strategy description the reproducer implemented from. */
2718
+ summary: string;
2719
+ reproducedName: string;
2720
+ file?: string;
2721
+ championHoldoutScore: number;
2722
+ reproducedHoldoutScore: number;
2723
+ /** champion − reproduced (positive = the reproduction fell short). */
2724
+ gap: number;
2725
+ /** reproducedScore ≥ championScore − tolerance. A failed reproduction is an
2726
+ * overfitting signal: the champion's win did not fit through the summary. */
2727
+ reproducible: boolean;
2728
+ /** Infra failure during reproduction (distinct from a semantic reproduction failure). */
2729
+ error?: string;
2730
+ }
2731
+ interface EvolutionBandInfo {
2732
+ /** Tasks screened by the reference on the holdout pool. */
2733
+ screened: number;
2734
+ /** Tasks kept (reference score ≤ maxRefScore) before truncating to holdoutN. */
2735
+ inBand: number;
2736
+ /** Reference scores per screened task (the screening record). */
2737
+ refScores: Array<{
2738
+ taskId: string;
2739
+ score: number;
2740
+ }>;
2741
+ }
2374
2742
  interface EvolutionReport {
2375
2743
  gen0: BenchmarkReport;
2376
2744
  gen0Champion: ChampionPick;
@@ -2379,6 +2747,11 @@ interface EvolutionReport {
2379
2747
  finalChampion: ChampionPick;
2380
2748
  holdout: BenchmarkReport;
2381
2749
  verdict: PromotionVerdict;
2750
+ /** Present when band screening ran — the verdict's estimand is then "paired lift on
2751
+ * headroom tasks" (band membership fixed by the reference screen, pre-registered). */
2752
+ band?: EvolutionBandInfo;
2753
+ /** Present when reproducerCheck ran (final champion was authored). */
2754
+ reproduction?: ReproductionCheck;
2382
2755
  /** SEARCH TELEMETRY, not evidence: each entry is that generation's own train-slice
2383
2756
  * re-measurement, so cross-generation deltas mix true drift with run-to-run variance
2384
2757
  * (entries are unpaired across generations). The only evidence-grade comparison in
@@ -2390,9 +2763,22 @@ interface EvolutionReport {
2390
2763
  usd: number;
2391
2764
  }>;
2392
2765
  }
2393
- /** Search-side champion selection over a tournament report. 'score' takes the best mean
2394
- * score (ties field order). 'costAware' treats scores within `epsilon` of the best as
2395
- * tied and takes the cheapest the (score, $) Pareto rule collapsed to one pick. */
2766
+ /** Strategy means recomputed over the DISCRIMINATING tasks only tasks where the field
2767
+ * strategies did not all score identically. Zero-spread tasks (everyone 1.0, everyone
2768
+ * 0.0, everyone tied) carry no selection information; averaging over them dilutes real
2769
+ * differences toward zero. Search-side denoising only — the gate never uses this. */
2770
+ declare function discriminatingMeans(report: BenchmarkReport, fieldOrder: string[]): Record<string, {
2771
+ score: number;
2772
+ usd: number;
2773
+ }> | null;
2774
+ /** The champion pick over a means table. 'score' takes the best mean score (ties →
2775
+ * field order). 'costAware' treats scores within `epsilon` of the best as tied and
2776
+ * takes the cheapest — the (score, $) Pareto rule collapsed to one pick. */
2777
+ declare function pickChampion(means: Record<string, {
2778
+ score: number;
2779
+ usd: number;
2780
+ }>, fieldOrder: string[], policy: ChampionPolicy, epsilon: number): ChampionPick;
2781
+ /** Search-side champion selection over a tournament report. */
2396
2782
  declare function selectChampion(report: BenchmarkReport, fieldOrder: string[], policy: ChampionPolicy, epsilon: number): ChampionPick;
2397
2783
  declare function runStrategyEvolution(cfg: StrategyEvolutionConfig): Promise<EvolutionReport>;
2398
2784
 
@@ -2554,15 +2940,6 @@ interface BridgeSeam {
2554
2940
  agentProfile?: Record<string, unknown>;
2555
2941
  timeoutMs?: number;
2556
2942
  }
2557
- /** An OpenAI-shape function tool the model may call. */
2558
- interface ToolSpec {
2559
- type: 'function';
2560
- function: {
2561
- name: string;
2562
- description?: string;
2563
- parameters: unknown;
2564
- };
2565
- }
2566
2943
  /**
2567
2944
  * Router seam WITH tool use — the tool-using router backend. Same direct
2568
2945
  * OpenAI-compatible endpoint as `RouterSeam`, but each turn passes `tools`; when
@@ -2579,7 +2956,9 @@ interface RouterToolsSeam {
2579
2956
  model?: string;
2580
2957
  tools: ReadonlyArray<ToolSpec>;
2581
2958
  executeToolCall: (name: string, args: Record<string, unknown>, task: unknown) => Promise<string>;
2582
- /** Max inference turns (default 4). */
2959
+ /** Max inference turns. Default 200 (runaway backstop — set far above any
2960
+ * legitimate workflow). For tighter per-workflow limits use a cost budget
2961
+ * or wall-clock deadline at the call site. */
2583
2962
  maxTurns?: number;
2584
2963
  }
2585
2964
  /**
@@ -2805,4 +3184,4 @@ declare function gitWorkspace(opts: GitWorkspaceOptions): Workspace;
2805
3184
  * requires `jj` on the `Shell`'s host. */
2806
3185
  declare function jjWorkspace(opts: GitWorkspaceOptions): Workspace;
2807
3186
 
2808
- export { Agent, AgentRunSpec, AgentSpec, type AgenticOptions, type AgenticRunResult, type AgenticSurface, type AgenticTask, type AgenticTool, type ArtifactHandle, type AssertTraceDerivedFindings, type AuditIntentInput, type AuditIntentOptions, type AuthorStrategyOptions, type AuthoredStrategy, type BenchmarkCell, type BenchmarkConfig, type BenchmarkLift, type BenchmarkReport, type BenchmarkStrategySummary, type BenchmarkTaskRow, type BridgeSeam, Budget, type BudgetPool, type BudgetReadout, type ChampionPick, type ChampionPolicy, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type Deliverable, type Environment, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, type EvolutionArchiveNode, type EvolutionAuthor, type EvolutionCandidate, type EvolutionGeneration, type EvolutionReport, ExecCtx, type ExecutorConfig, ExecutorFactory, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, type GitWorkspaceOptions, type HarvestCorpusOptions, type HarvestFailure, type HarvestReport, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, type IntentAudit, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, type McpEndpoint, type McpEnvironmentOptions, NodeId, type Observation, type ObserveInput, type ObserveOptions, type OpenSandboxRunOptions, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type PromotionGateOptions, type PromotionVerdict, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, type RouterToolsSeam, type RunAgenticOptions, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, SandboxClient, type SandboxLineage, type SandboxLineageHandle, type SandboxRun, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, type Shell, type ShotPersona, type ShotSpec, SpawnEvent, SpawnJournal, Spend, type SteerContext, type Strategy, type StrategyCtx, type StrategyEvolutionConfig, type StrategyResult, SupervisedResult, Supervisor, type SurfaceScore, type ToolSpec, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, type TurnResult, UsageEvent, type UsageSink, type VerifierEnvironmentOptions, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, type Workspace, type WorkspaceCommit, acquireSandbox, adaptiveRefine, assertStrategyContract, assertTraceDerivedFindings, auditIntent, authorStrategy, breadthDriver, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createMcpEnvironment, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, createVerifierEnvironment, defaultAnalystInstruction, defaultAuditorInstruction, definePersona, defineStrategy, depthDriver, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, gitWorkspace, harvestCorpus, inlineSandboxClient, jjWorkspace, localShell, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, observe, openSandboxRun, panel, pipeline, printBenchmarkReport, probeSandboxCapabilities, promotionGate, refine, registerShape, renderCorpusToInstructions, renderReport, replaySpawnTree, reportLoopUsage, runAgentic, runBenchmark, runPersonified, runStrategyEvolution, sample, sampleThenRefine, selectChampion, settledToIteration, spendFromUsageEvents, strategyAuthorContract, trajectoryReport, verify, widen };
3187
+ export { Agent, AgentRunSpec, AgentSpec, type AgenticOptions, type AgenticRunResult, type AgenticSurface, type AgenticTask, type AgenticTool, type AnytimeReport, type AnytimeStrategySummary, type AnytimeTaskCurve, type ArtifactHandle, type AssertTraceDerivedFindings, type AuditIntentInput, type AuditIntentOptions, type AuthorStrategyOptions, type AuthoredStrategy, type BenchmarkCell, type BenchmarkConfig, type BenchmarkLift, type BenchmarkReport, type BenchmarkStrategySummary, type BenchmarkTaskRow, type BridgeSeam, Budget, type BudgetPool, type BudgetReadout, type ChampionPick, type ChampionPolicy, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type Deliverable, type Environment, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, type EvolutionArchiveNode, type EvolutionAuthor, type EvolutionBandInfo, type EvolutionCandidate, type EvolutionGeneration, type EvolutionReport, ExecCtx, type ExecutorConfig, ExecutorFactory, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, type GitWorkspaceOptions, type HarvestCorpusOptions, type HarvestFailure, type HarvestReport, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, type IntentAudit, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, type McpEndpoint, type McpEnvironmentOptions, NodeId, type Observation, type ObserveInput, type ObserveOptions, type OpenSandboxRunOptions, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type PromotionGateOptions, type PromotionVerdict, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterChatResult, type RouterChatToolsResult, type RouterConfig, type RouterSeam, type RouterToolCall, type RouterToolLoopResult, type RouterToolsSeam, type RunAgenticOptions, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, SandboxClient, type SandboxLineage, type SandboxLineageHandle, type SandboxRun, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, type Shell, type ShotPersona, type ShotSpec, SpawnEvent, SpawnJournal, Spend, type SteerContext, type Strategy, type StrategyCtx, type StrategyEvolutionConfig, type StrategyResult, SupervisedResult, Supervisor, type SurfaceScore, type ToolSpec, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, type TurnResult, UsageEvent, type UsageSink, type VerifierEnvironmentOptions, type Verify, type VerifySpec, type WaterfallCollector, type WaterfallReport, type WaterfallSpan, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, type Workspace, type WorkspaceCommit, acquireSandbox, adaptiveRefine, anytimeReport, assertStrategyContract, assertTraceDerivedFindings, auditIntent, authorStrategy, breadthDriver, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createMcpEnvironment, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, createVerifierEnvironment, createWaterfallCollector, defaultAnalystInstruction, defaultAuditorInstruction, definePersona, defineStrategy, depthDriver, discriminatingMeans, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, gitWorkspace, harvestCorpus, inlineSandboxClient, jjWorkspace, localShell, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, observe, openSandboxRun, panel, pickChampion, pipeline, printBenchmarkReport, probeSandboxCapabilities, promotionGate, refine, registerShape, renderAnytimeTable, renderCorpusToInstructions, renderReport, replaySpawnTree, reportLoopUsage, routerChatWithTools, routerChatWithUsage, routerToolLoop, runAgentic, runBenchmark, runPersonified, runStrategyEvolution, sample, sampleThenRefine, selectChampion, settledToIteration, spendFromUsageEvents, strategyAuthorContract, trajectoryReport, verify, widen };