@tangle-network/agent-runtime 0.45.0 → 0.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +3 -3
  2. package/dist/agent.d.ts +5 -5
  3. package/dist/agent.js +2 -2
  4. package/dist/agent.js.map +1 -1
  5. package/dist/analyst-loop.d.ts +5 -40
  6. package/dist/analyst-loop.js +2 -4
  7. package/dist/{chunk-IJ6FGOPO.js → chunk-5YDS7BLC.js} +12 -7
  8. package/dist/chunk-5YDS7BLC.js.map +1 -0
  9. package/dist/{chunk-KEWO4KI6.js → chunk-72JQCHOZ.js} +850 -131
  10. package/dist/chunk-72JQCHOZ.js.map +1 -0
  11. package/dist/{chunk-PRX45WE2.js → chunk-GSUO5QS6.js} +1 -119
  12. package/dist/chunk-GSUO5QS6.js.map +1 -0
  13. package/dist/{chunk-FK53TXOP.js → chunk-HNUXAZIJ.js} +4 -27
  14. package/dist/chunk-HNUXAZIJ.js.map +1 -0
  15. package/dist/{chunk-IJGS6J7X.js → chunk-JNPK46YH.js} +2 -2
  16. package/dist/{chunk-QR4UUC5P.js → chunk-KADIJAD4.js} +33 -19
  17. package/dist/chunk-KADIJAD4.js.map +1 -0
  18. package/dist/{chunk-NYN5RTLP.js → chunk-MGFEUYOH.js} +7 -7
  19. package/dist/chunk-MGFEUYOH.js.map +1 -0
  20. package/dist/{chunk-Z2QXVBA6.js → chunk-T4OQQEE3.js} +4 -4
  21. package/dist/chunk-T4OQQEE3.js.map +1 -0
  22. package/dist/{chunk-KSMX62JF.js → chunk-VR4JIC5H.js} +2 -2
  23. package/dist/{coder-CczgMqFx.d.ts → coder-CVZNGbyg.d.ts} +1 -1
  24. package/dist/{dynamic-BvllHV6M.d.ts → driver-DYU2sgHr.d.ts} +6 -6
  25. package/dist/{improvement-adapter-CWegd3vw.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
  26. package/dist/improvement.d.ts +2 -2
  27. package/dist/index.d.ts +8 -8
  28. package/dist/index.js +8 -8
  29. package/dist/{kb-gate-D9GBocLN.d.ts → kb-gate-51BlLlVM.d.ts} +13 -7
  30. package/dist/{loop-runner-bin-CPrCoKqC.d.ts → loop-runner-bin-DEm4roYF.d.ts} +11 -11
  31. package/dist/loop-runner-bin.d.ts +6 -6
  32. package/dist/loop-runner-bin.js +6 -6
  33. package/dist/loops.d.ts +5 -5
  34. package/dist/loops.js +18 -10
  35. package/dist/mcp/bin.js +6 -6
  36. package/dist/mcp/bin.js.map +1 -1
  37. package/dist/mcp/index.d.ts +75 -74
  38. package/dist/mcp/index.js +203 -31
  39. package/dist/mcp/index.js.map +1 -1
  40. package/dist/{otel-export-Dy2DyUCU.d.ts → otel-export-EzfsVUhh.d.ts} +1 -1
  41. package/dist/profiles.d.ts +8 -8
  42. package/dist/profiles.js +1 -1
  43. package/dist/profiles.js.map +1 -1
  44. package/dist/{run-loop--hSoIknW.d.ts → run-loop-DvD4aGiE.d.ts} +2 -2
  45. package/dist/runtime.d.ts +244 -57
  46. package/dist/runtime.js +18 -10
  47. package/dist/{types-1HbsFa7H.d.ts → types-Cbx3dNK5.d.ts} +23 -23
  48. package/dist/{types-DdzkffAm.d.ts → types-nBMuollC.d.ts} +34 -5
  49. package/dist/{types-BtRLF2U3.d.ts → types-p8dWBIXL.d.ts} +1 -1
  50. package/dist/workflow.d.ts +3 -3
  51. package/dist/workflow.js +2 -2
  52. package/dist/workflow.js.map +1 -1
  53. package/package.json +1 -1
  54. package/skills/agent-runtime-adoption/SKILL.md +3 -3
  55. package/skills/generate-eval/SKILL.md +60 -0
  56. package/skills/loop-writer/SKILL.md +163 -0
  57. package/dist/chunk-FK53TXOP.js.map +0 -1
  58. package/dist/chunk-IJ6FGOPO.js.map +0 -1
  59. package/dist/chunk-KEWO4KI6.js.map +0 -1
  60. package/dist/chunk-NYN5RTLP.js.map +0 -1
  61. package/dist/chunk-PRX45WE2.js.map +0 -1
  62. package/dist/chunk-QR4UUC5P.js.map +0 -1
  63. package/dist/chunk-Z2QXVBA6.js.map +0 -1
  64. /package/dist/{chunk-IJGS6J7X.js.map → chunk-JNPK46YH.js.map} +0 -0
  65. /package/dist/{chunk-KSMX62JF.js.map → chunk-VR4JIC5H.js.map} +0 -0
package/dist/runtime.d.ts CHANGED
@@ -1,15 +1,15 @@
1
1
  import { AgentProfile as AgentProfile$1, BackendType, CreateSandboxOptions, SandboxInstance, SandboxEvent } from '@tangle-network/sandbox';
2
2
  export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
3
- import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, d as AgentSpec, E as ExecutorRegistry, B as Budget, A as Agent, e as RootHandle, f as SupervisedResult, g as Spend, S as Scope, U as UsageEvent, L as LeafExecutorFactory, h as Supervisor } from './types-1HbsFa7H.js';
4
- export { i as ExecutorContext, H as Handle, j as LeafExecutor, k as LeafResult, l as NodeSnapshot, m as NodeStatus, n as Restart, o as RootSignal, p as Runtime, q as SpawnOpts, r as SupervisorOpts, W as WidenGate } from './types-1HbsFa7H.js';
5
- export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDynamicDriverOptions, D as DynamicDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDynamicDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './dynamic-BvllHV6M.js';
6
- import { AgentProfile, AnalystFinding, DefaultVerdict } from '@tangle-network/agent-eval';
3
+ import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, E as ExecutorFactory, d as AgentSpec, e as ExecutorRegistry, B as Budget, A as Agent, f as RootHandle, g as SupervisedResult, h as Spend, S as Scope, U as UsageEvent, i as Supervisor } from './types-Cbx3dNK5.js';
4
+ export { j as Executor, k as ExecutorContext, l as ExecutorResult, H as Handle, m as NodeSnapshot, n as NodeStatus, o as Restart, p as RootSignal, q as Runtime, r as SpawnOpts, s as SupervisorOpts, W as WidenGate } from './types-Cbx3dNK5.js';
5
+ export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDriverOptions, D as DriverDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './driver-DYU2sgHr.js';
6
+ import { S as SandboxClient, b as LoopResult, c as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-nBMuollC.js';
7
+ export { D as Driver, C as LoopDecisionPayload, F as LoopEndedPayload, G as LoopIterationDispatchPayload, H as LoopIterationEndedPayload, J as LoopIterationStartedPayload, a as LoopLineageOptions, M as LoopPlanDescription, N as LoopPlanPayload, f as LoopSandboxPlacement, P as LoopStartedPayload, Q as LoopTeardownFailedPayload, e as LoopTraceEmitter, T as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, U as ValidationCtx, V as Validator } from './types-nBMuollC.js';
8
+ import { AgentProfile, AnalystFinding, DefaultVerdict, ChatClient } from '@tangle-network/agent-eval';
7
9
  export { DefaultVerdict } from '@tangle-network/agent-eval';
8
10
  import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
9
- import { R as RunLoopOptions } from './run-loop--hSoIknW.js';
10
- export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop--hSoIknW.js';
11
- import { b as LoopSandboxClient, c as LoopResult, d as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-DdzkffAm.js';
12
- export { D as Driver, h as LoopDecisionPayload, i as LoopEndedPayload, j as LoopIterationDispatchPayload, k as LoopIterationEndedPayload, l as LoopIterationStartedPayload, a as LoopLineageOptions, m as LoopPlanDescription, n as LoopPlanPayload, g as LoopSandboxPlacement, o as LoopStartedPayload, p as LoopTeardownFailedPayload, f as LoopTraceEmitter, q as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, r as ValidationCtx, V as Validator } from './types-DdzkffAm.js';
11
+ import { R as RunLoopOptions } from './run-loop-DvD4aGiE.js';
12
+ export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop-DvD4aGiE.js';
13
13
  import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
14
14
 
15
15
  /**
@@ -113,6 +113,13 @@ declare function replaySpawnTree(journal: SpawnJournal, blobs: ResultBlobStore,
113
113
  */
114
114
  declare function materializeTreeView(events: SpawnEvent[]): TreeView;
115
115
 
116
+ /**
117
+ * Adapt an `ExecutorFactory` into a `SandboxClient` for `runLoop`. The factory is
118
+ * instantiated fresh per `streamPrompt` (mirrors the per-spawn executor lifecycle):
119
+ * run once on the prompt, emit the terminal result event, tear down.
120
+ */
121
+ declare function inlineSandboxClient(factory: ExecutorFactory<unknown>): SandboxClient;
122
+
116
123
  /**
117
124
  * `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
118
125
  *
@@ -146,7 +153,7 @@ declare function materializeTreeView(events: SpawnEvent[]): TreeView;
146
153
  type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
147
154
  interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
148
155
  /** Sandbox client used for every cell's `runLoop`. Supplied once. */
149
- sandboxClient: LoopSandboxClient;
156
+ sandboxClient: SandboxClient;
150
157
  /** Build the per-cell runLoop options from the scenario (+ profile, when
151
158
  * used with `runProfileMatrix`). */
152
159
  toLoopOptions: (scenario: TScenario, profile: AgentProfile) => LoopOptionsForDispatch<Task, Output, Decision>;
@@ -317,7 +324,7 @@ interface ShapeContext<D = unknown> {
317
324
  * Wrap an `AgentSpec` into a leaf `Agent` carrying it as `executorSpec`, so the shape can
318
325
  * `scope.spawn(spawnChild(spec), task, opts)`. `name` labels the child for traces. The
319
326
  * returned agent's `act` is never invoked by the keystone (it is spawned, not run) — the
320
- * spec drives the resolved `LeafExecutor`; `act` exists only to satisfy the `Agent` shape.
327
+ * spec drives the resolved `Executor`; `act` exists only to satisfy the `Agent` shape.
321
328
  */
322
329
  spawnChild(name: string, spec: AgentSpec): Agent<unknown, Outcome<D>>;
323
330
  /** Derive a child `AgentSpec` from the persona's root spec with an overridden profile —
@@ -892,6 +899,61 @@ interface EqualKOnCostOptions {
892
899
  /** `equalKOnCost(arms, opts)` — the cross-arm equal-compute check on conserved cost. */
893
900
  type EqualKOnCost = (arms: ReadonlyArray<EqualKArm>, options?: EqualKOnCostOptions) => EqualKVerdict;
894
901
 
902
+ /**
903
+ * The third-person observer — the connective tissue that closes the loop.
904
+ *
905
+ * A driver spawns a worker; the worker can't see itself. `observe` reads the
906
+ * worker's TRACE (what it actually did — every tool call, cost, failure) and
907
+ * produces two streams:
908
+ * - `findings` / `report` — fed back DOWN (a steer for the next attempt) and
909
+ * OUT (the operator-facing "what I noticed + what to change").
910
+ * - `learned` — durable facts written to the cross-run `Corpus` so the NEXT
911
+ * run starts smarter (the continuous half of "continuous self-improvement").
912
+ *
913
+ * Findings are TRACE-derived, never JUDGE-derived (`derived_from_judge:false`):
914
+ * the observer reads behavior, never the acceptance verdict — the selector≠judge
915
+ * firewall (docs/learning-flywheel.md). The observer is harness-agnostic: it
916
+ * reads a trace + an output, so it watches opencode, codex, hermes, or a BYO
917
+ * agent identically.
918
+ */
919
+
920
+ interface ObserveInput {
921
+ /** What the worker was asked to do. */
922
+ task: string;
923
+ /** What it produced (its final answer / artifact summary). */
924
+ output: string;
925
+ /** The worker's trace — any event array (sandbox events, tool-call records). */
926
+ trace: ReadonlyArray<unknown>;
927
+ /** Terminal status only (passed/failed/unknown) — NOT a judge score; the
928
+ * observer never reads the verdict, it reads behavior. */
929
+ outcome?: 'passed' | 'failed' | 'unknown';
930
+ /** Provenance back to the run. */
931
+ runId?: string;
932
+ }
933
+ interface ObserveOptions {
934
+ /** The model-call seam (agent-eval `createChatClient`: router / cli-bridge / …). */
935
+ chat: ChatClient;
936
+ model?: string;
937
+ /** When set, learned facts are appended (idempotent) for the next run to read. */
938
+ corpus?: Corpus;
939
+ /** Tags written onto learned facts + used by the next run's corpus query. */
940
+ tags?: ReadonlyArray<string>;
941
+ signal?: AbortSignal;
942
+ /** Cap the trace lines fed to the observer (keeps the call cheap). Default 80. */
943
+ maxTraceLines?: number;
944
+ }
945
+ interface Observation {
946
+ findings: AnalystFinding[];
947
+ /** Facts persisted to the corpus (empty when no corpus was supplied). */
948
+ learned: CorpusRecord[];
949
+ /** Operator-facing markdown: what the observer noticed + what to change. */
950
+ report: string;
951
+ }
952
+ declare function observe(input: ObserveInput, opts: ObserveOptions): Promise<Observation>;
953
+ /** Operator-facing report, split by who should act. The agent block is the
954
+ * steer; the operator block is the advice. */
955
+ declare function renderReport(findings: ReadonlyArray<AnalystFinding>): string;
956
+
895
957
  /**
896
958
  * @experimental
897
959
  *
@@ -1299,7 +1361,7 @@ interface AcquireOptions {
1299
1361
  sleep?: (ms: number) => Promise<void>;
1300
1362
  }
1301
1363
  /** @experimental */
1302
- declare function acquireSandbox(client: LoopSandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
1364
+ declare function acquireSandbox(client: SandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
1303
1365
 
1304
1366
  /**
1305
1367
  * @experimental
@@ -1340,9 +1402,9 @@ interface SandboxCapabilities {
1340
1402
  *
1341
1403
  * @experimental
1342
1404
  */
1343
- declare function probeSandboxCapabilities(client: LoopSandboxClient): Promise<SandboxCapabilities>;
1405
+ declare function probeSandboxCapabilities(client: SandboxClient): Promise<SandboxCapabilities>;
1344
1406
  /**
1345
- * Narrowed view of the optional CRIU probe. The loop-side `LoopSandboxClient`
1407
+ * Narrowed view of the optional CRIU probe. The loop-side `SandboxClient`
1346
1408
  * does not require `criuStatus`; this widens it optionally so the probe can be
1347
1409
  * read without importing sandbox-backend specifics. @experimental
1348
1410
  */
@@ -1514,12 +1576,13 @@ interface SandboxLineage {
1514
1576
  *
1515
1577
  * @experimental
1516
1578
  */
1517
- declare function createSandboxLineage(client: LoopSandboxClient, capabilities: SandboxCapabilities, options?: {
1579
+ declare function createSandboxLineage(client: SandboxClient, capabilities: SandboxCapabilities, options?: {
1518
1580
  maxConcurrency?: number;
1581
+ streaming?: 'sse' | 'poll';
1519
1582
  }): SandboxLineage;
1520
1583
  /**
1521
1584
  * Loop-side widening of the box's optional checkpoint method. The
1522
- * `LoopSandboxClient`/`SandboxInstance` surface the kernel relies on does not
1585
+ * `SandboxClient`/`SandboxInstance` surface the kernel relies on does not
1523
1586
  * require checkpointing; this reads it optionally so the lineage can probe-gate
1524
1587
  * without importing sandbox-backend specifics. @experimental
1525
1588
  */
@@ -1550,20 +1613,118 @@ interface SessionCapableBox {
1550
1613
  };
1551
1614
  }
1552
1615
 
1616
+ /**
1617
+ * `openSandboxRun` — the ONE harness-agnostic seam for running an agent in a
1618
+ * sandbox over a persistent artifact: run it, stream it, RESUME the same session
1619
+ * across turns. Domain-agnostic: a coding agent, a research agent, a tax/legal
1620
+ * agent — all flow through this; the domain lives only in the `Deliverable<Out>`
1621
+ * the caller supplies, never in a per-domain copy of this function.
1622
+ *
1623
+ * It is a thin facade (NOT a new layer) over code that already exists and is
1624
+ * already hardened:
1625
+ * - `acquireSandbox` — cold-start / 502-503-504 / gateway-timeout recovery,
1626
+ * - `buildBackendOptions` — the harness IS `backend.type` (opencode / codex /
1627
+ * claude-code / kimi-code / hermes / pi); the only "which agent" knob,
1628
+ * - `createSandboxLineage` — `start` mints a session; `resume` continues the
1629
+ * SAME server-side session with a fail-loud `assertSessionLive`.
1630
+ *
1631
+ * The one genuinely-new piece is {@link Deliverable}: it widens the pure
1632
+ * `OutputAdapter.parse(events)` to ALSO admit a post-turn read off the box FS —
1633
+ * the structural gap that made the bench gates hand-roll `box.fs.read`, because a
1634
+ * large produced file (a git diff, a generated document) truncates in the chat
1635
+ * stream and a pure events-parser cannot reach the workspace. Per the SDK, a
1636
+ * RELATIVE `deliverable.path` resolves from the workspace root and an ABSOLUTE one
1637
+ * (e.g. `/tmp/solution.patch`) reads the container filesystem directly — both are
1638
+ * valid; pick the one the agent actually wrote to. Avoid `..` traversal segments.
1639
+ *
1640
+ * What this deliberately does NOT do (so it stays a facade, not slop): no custom
1641
+ * reconnect/replay (the SDK + platform own per-session buffering + `Last-Event-ID`);
1642
+ * no fork verb (platform CRIU is probe-gated and currently absent — fork lives in
1643
+ * `SandboxLineage.fork` behind the capability probe, surfaced only if it returns).
1644
+ * It is also distinct from `runLoop`: `runLoop` is the multi-round, driver-driven
1645
+ * kernel (fresh box per round, events deliverable); this is a SINGLE rollout +
1646
+ * artifact-or-events deliverable + resume over ONE persistent box.
1647
+ */
1648
+
1649
+ /**
1650
+ * @experimental
1651
+ * How a typed deliverable `Out` is materialized from a finished turn.
1652
+ * - `events` — pure parse over the event array (identical to `OutputAdapter`).
1653
+ * - `artifact` — read a file off the box AFTER the turn drains, then map it (+ the
1654
+ * events). For diffs/codebases/documents that don't fit the chat
1655
+ * stream. `path` relative ⇒ workspace root; absolute ⇒ container FS.
1656
+ */
1657
+ type Deliverable<Out> = {
1658
+ kind: 'events';
1659
+ fromEvents: (events: SandboxEvent[]) => Out;
1660
+ } | {
1661
+ kind: 'artifact';
1662
+ path: string;
1663
+ fromArtifact: (raw: string, events: SandboxEvent[]) => Out;
1664
+ };
1665
+ /**
1666
+ * @experimental
1667
+ * One finished turn over the artifact. A failed FS read is surfaced in `readError`
1668
+ * (never masked as an empty deliverable) so a caller distinguishes "agent produced
1669
+ * nothing" from a transport/FS fault.
1670
+ */
1671
+ interface TurnResult<Out> {
1672
+ out: Out;
1673
+ events: SandboxEvent[];
1674
+ readError?: string;
1675
+ }
1676
+ /** @experimental A live run over ONE persistent artifact (box + session). Close it
1677
+ * when done — `close()` tears the box down. */
1678
+ interface SandboxRun<Out> {
1679
+ readonly box: SandboxInstance;
1680
+ readonly sessionId: string;
1681
+ /** First turn over the fresh box (mints the session). Throws if already started. */
1682
+ start(prompt: string): Promise<TurnResult<Out>>;
1683
+ /** Continue THE SAME session over THE SAME artifact — a resumed turn/rollout. */
1684
+ resume(prompt: string): Promise<TurnResult<Out>>;
1685
+ close(): Promise<void>;
1686
+ }
1687
+ /** @experimental */
1688
+ interface OpenSandboxRunOptions {
1689
+ /** Profile + sandbox env/overrides. `sandboxOverrides.backend.type` is the harness. */
1690
+ agentRun: AgentRunSpec<string>;
1691
+ signal: AbortSignal;
1692
+ /** Optional execution-scoped observers. Hook failures never fail the run. */
1693
+ hooks?: RuntimeHooks;
1694
+ /** Stable run id for trace joins. Defaults to a short runtime-minted id. */
1695
+ runId?: string;
1696
+ /** Optional benchmark/scenario id carried into emitted hook events. */
1697
+ scenarioId?: string;
1698
+ /** Test seam for deterministic hook timestamps. Defaults to `Date.now`. */
1699
+ now?: () => number;
1700
+ /** Bounds box-creation bursts inside lineage fanout. Default from lineage. */
1701
+ maxConcurrency?: number;
1702
+ /** Base backoff (ms) for retrying a transient artifact `fs.read` failure; the i-th
1703
+ * retry waits `readRetryDelayMs * i`. Default 1000. Set 0 to disable the wait (tests). */
1704
+ readRetryDelayMs?: number;
1705
+ }
1706
+ /**
1707
+ * @experimental
1708
+ * Open a sandbox run. Harness-agnostic: the harness lives in
1709
+ * `options.agentRun.sandboxOverrides.backend.type`, so opencode/codex/claude-code/
1710
+ * kimi-code all flow through this one entrypoint with identical env/auth wiring.
1711
+ */
1712
+ declare function openSandboxRun<Out>(client: SandboxClient, options: OpenSandboxRunOptions, deliverable: Deliverable<Out>): Promise<SandboxRun<Out>>;
1713
+
1553
1714
  /**
1554
1715
  * @experimental
1555
1716
  *
1556
1717
  * The conserved budget reservation pool — the invariant the whole instrument
1557
1718
  * rests on (critique M5/B3). One root `Budget` becomes a conserved pool of three
1558
- * quantities (tokens, usd, iterations) plus an absolute deadline. Children RESERVE
1559
- * atomically at spawn and RECONCILE at settle:
1719
+ * quantities (tokens, usd, iterations) plus an absolute deadline. Children reserve
1720
+ * atomically at spawn and reconcile at settle:
1560
1721
  *
1561
1722
  * total ≡ free + reserved + committed (invariant, always)
1562
1723
  *
1563
- * `reserve` moves a child's whole ceiling from `free` → `reserved` and FAILS CLOSED
1724
+ * `reserve` moves a child's whole ceiling from `free` → `reserved` and fails closed
1564
1725
  * when `free` can't cover it (never read-then-spawn overcommit, so `Σk(treatment) ≡
1565
1726
  * Σk(blind)` by construction). `reconcile` releases the reservation, commits ACTUAL
1566
- * spend, and refunds the unspent remainder to `free`. Tokens and usd are SEPARATE
1727
+ * spend, and refunds the unspent remainder to `free`. Tokens and usd are separate
1567
1728
  * channels (`LoopTokenUsage` has no `usd`); iterations are conserved alongside them.
1568
1729
  *
1569
1730
  * Pure and deterministic: `now()` is injected, there is no I/O, and no wall-clock or
@@ -1635,7 +1796,7 @@ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
1635
1796
  /**
1636
1797
  * @experimental
1637
1798
  *
1638
- * The leaf runtime — the built-in `LeafExecutor` IMPLEMENTATIONS behind the ONE
1799
+ * The leaf runtime — the built-in `Executor` IMPLEMENTATIONS behind the ONE
1639
1800
  * open interface frozen in `./types`, plus the open resolver/registry that maps
1640
1801
  * an `AgentSpec` to one of them OR accepts a bring-your-own executor verbatim.
1641
1802
  *
@@ -1649,7 +1810,7 @@ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
1649
1810
  * excluded from the equal-k arms by construction (streaming).
1650
1811
  * Every metered runtime reports through the SAME normalized `UsageEvent` channel
1651
1812
  * so the conserved budget pool meters them identically. A user's own agent is
1652
- * first-class the moment it implements `LeafExecutor` — register it by name or
1813
+ * first-class the moment it implements `Executor` — register it by name or
1653
1814
  * pass it as `AgentSpec.executor`.
1654
1815
  *
1655
1816
  * Layering: `estimateCost`/`isModelPriced` are substrate primitives from
@@ -1675,7 +1836,7 @@ interface RouterSeam {
1675
1836
  * checkpoint/fork.
1676
1837
  */
1677
1838
  interface SandboxSeam {
1678
- sandboxClient: LoopSandboxClient;
1839
+ sandboxClient: SandboxClient;
1679
1840
  /** Forwarded into the composed `runLoop`'s `ctx` (trace emitter, run handle, etc.). */
1680
1841
  loopCtx?: Partial<Omit<ExecCtx, 'sandboxClient' | 'signal'>>;
1681
1842
  /** PR #150 `RunLoopOptions.lineage` passthrough — opaque; forwarded, not parsed. */
@@ -1694,40 +1855,38 @@ interface CliSeam {
1694
1855
  cwd?: string;
1695
1856
  }
1696
1857
  /**
1697
- * A direct OpenAI-compatible Router chat-completion. One-shot: resolves a
1698
- * `LeafResult` and reports its terminal usage as `UsageEvent`s through the
1699
- * conserved pool. Reports REAL token usage — when the provider omits `usage`,
1700
- * the spend records zero tokens but the call still counts one iteration (a
1701
- * phantom fabricated 0 is never emitted as a priced cost).
1702
- *
1703
- * NOTE for the Integrate phase: this duplicates the minimal body of
1704
- * `bench/src/router-client.ts#routerChatWithUsage`. `bench/` is a sub-package
1705
- * outside this package's `rootDir: "src"`, so it cannot be imported here without
1706
- * breaking the build. Integrate should lift that helper into `src/loops/` and
1707
- * have both call sites share it (do not re-copy a third time).
1708
- */
1709
- declare const routerInlineExecutor: LeafExecutorFactory<unknown>;
1710
- /**
1711
- * COMPOSES `runLoop` as a single-task leaf: one box, a refine driver bounded to
1712
- * the seam's `maxIterations` (default 1), the spec's profile as the agent run.
1713
- * Surfaces the loop's aggregated `tokenUsage` + `costUsd` as `UsageEvent`s after
1714
- * it drains, and yields one `iteration` event per loop iteration. Forwards the
1715
- * optional `lineage` passthrough WITHOUT importing sandbox-lineage / reinventing
1716
- * checkpoint/fork.
1717
- *
1718
- * Streaming shape: the loop runs to completion inside the first `next()`, then
1719
- * the recorded usage events are yielded; the terminal artifact is read from
1720
- * `resultArtifact()` after the stream drains.
1721
- */
1722
- declare const sandboxExecutor: LeafExecutorFactory<unknown>;
1858
+ * cli-bridge seam. A local OpenAI-compatible bridge that fronts harness CLIs
1859
+ * (claude-code / opencode / kimi / pi) behind one HTTP surface; `model` doubles
1860
+ * as the harness selector (e.g. `claude-code/sonnet`, `opencode/<provider>/<model>`).
1861
+ * `agentProfile` is the bridge-dialect profile (metadata.disallowedTools, mcp)
1862
+ * forwarded verbatim per request how an arm disables native tools or injects
1863
+ * a provider search MCP.
1864
+ */
1865
+ interface BridgeSeam {
1866
+ bridgeUrl: string;
1867
+ bridgeBearer: string;
1868
+ model: string;
1869
+ agentProfile?: Record<string, unknown>;
1870
+ timeoutMs?: number;
1871
+ }
1723
1872
  /**
1724
- * Spawns a subprocess (`bin` + `args`). It cannot account tokens, so it is
1725
- * `budgetExempt: true`: its spend is NOT metered against the conserved pool and
1726
- * its iterations are EXCLUDED from the equal-k arms by construction (the
1727
- * resolver/equal-k path checks `budgetExempt`). teardown is SIGTERM → SIGKILL
1728
- * with a grace window. Streaming: yields one `iteration` event on clean exit.
1729
- */
1730
- declare const cliExecutor: LeafExecutorFactory<unknown>;
1873
+ * The single built-in executor entrypoint. The backend is DATA the cost dial a
1874
+ * profile, an experiment config, or a replay journal can name not an import
1875
+ * choice. Injects the matching seam and delegates to the built-in implementation;
1876
+ * the port stays OPEN: bring-your-own agents implement `Executor` directly and
1877
+ * never pass through here.
1878
+ */
1879
+ type ExecutorConfig = ({
1880
+ backend: 'router';
1881
+ } & RouterSeam) | ({
1882
+ backend: 'bridge';
1883
+ } & BridgeSeam) | ({
1884
+ backend: 'cli';
1885
+ } & CliSeam) | ({
1886
+ backend: 'sandbox';
1887
+ harness?: BackendType;
1888
+ } & SandboxSeam);
1889
+ declare function createExecutor(config: ExecutorConfig): ExecutorFactory<unknown>;
1731
1890
  /**
1732
1891
  * The open resolver/registry. Pre-registers the three built-ins under their
1733
1892
  * runtime tags (`'router'`, `'sandbox'`, `'cli'`) and accepts `register(name,
@@ -1749,7 +1908,7 @@ declare function createExecutorRegistry(): ExecutorRegistry;
1749
1908
  * An `Agent.act` runs inside a `Scope`. It `spawn`s children dynamically and reacts to
1750
1909
  * them via `next()`. The scope owns ONE in-memory nursery — the authoritative live set —
1751
1910
  * and is the single place that drives a child's lifecycle: reserve budget atomically,
1752
- * resolve a `LeafExecutor` through the open registry, run it (one-shot OR streaming),
1911
+ * resolve a `Executor` through the open registry, run it (one-shot OR streaming),
1753
1912
  * fold its normalized `UsageEvent`s into a conserved `Spend`, reconcile the reservation
1754
1913
  * (refunding the unspent remainder), persist the result blob + journal records, and
1755
1914
  * deliver the `Settled` through the `next()` cursor.
@@ -1857,4 +2016,32 @@ declare function createSupervisor<Task, Out>(): Supervisor<Task, Out>;
1857
2016
  */
1858
2017
  declare function createRootHandle<Out>(): RootHandle<Out>;
1859
2018
 
1860
- export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration, LeafExecutorFactory, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, NodeId, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, type SandboxLineage, type SandboxLineageHandle, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, SpawnEvent, SpawnJournal, Spend, type SteerContext, SupervisedResult, Supervisor, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, UsageEvent, type UsageSink, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, cliExecutor, contentAddress, createBudgetPool, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, replaySpawnTree, reportLoopUsage, routerInlineExecutor, runPersonified, sandboxExecutor, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen };
2019
+ /** Command runner seam. Host code can use `localShell`; sandbox code can wrap `box.exec`. */
2020
+ type Shell = (args: ReadonlyArray<string>, cwd?: string) => Promise<{
2021
+ stdout: string;
2022
+ stderr: string;
2023
+ code: number;
2024
+ }>;
2025
+ type WorkspaceCommit = {
2026
+ readonly ok: true;
2027
+ readonly rev: string;
2028
+ } | {
2029
+ readonly ok: false;
2030
+ readonly conflict: string;
2031
+ };
2032
+ interface Workspace {
2033
+ readonly ref: string;
2034
+ materialize(dir: string): Promise<void>;
2035
+ commit(dir: string, message: string): Promise<WorkspaceCommit>;
2036
+ head(): Promise<string>;
2037
+ }
2038
+ declare function localShell(): Shell;
2039
+ interface GitWorkspaceOptions {
2040
+ readonly ref: string;
2041
+ readonly shell?: Shell;
2042
+ readonly branch?: string;
2043
+ readonly noHooks?: boolean;
2044
+ }
2045
+ declare function gitWorkspace(opts: GitWorkspaceOptions): Workspace;
2046
+
2047
+ export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, type BridgeSeam, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type Deliverable, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, type ExecutorConfig, ExecutorFactory, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, type GitWorkspaceOptions, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, NodeId, type Observation, type ObserveInput, type ObserveOptions, type OpenSandboxRunOptions, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, SandboxClient, type SandboxLineage, type SandboxLineageHandle, type SandboxRun, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, type Shell, SpawnEvent, SpawnJournal, Spend, type SteerContext, SupervisedResult, Supervisor, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, type TurnResult, UsageEvent, type UsageSink, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, type Workspace, type WorkspaceCommit, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, gitWorkspace, inlineSandboxClient, localShell, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, observe, openSandboxRun, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, renderReport, replaySpawnTree, reportLoopUsage, runPersonified, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen };
package/dist/runtime.js CHANGED
@@ -9,11 +9,11 @@ import {
9
9
  assertTraceDerivedFindings,
10
10
  buildSteerContext,
11
11
  builtinShapes,
12
- cliExecutor,
13
12
  completionAuthorizes,
14
13
  contentAddress,
15
14
  createBudgetPool,
16
- createDynamicDriver,
15
+ createDriver,
16
+ createExecutor,
17
17
  createExecutorRegistry,
18
18
  createRootHandle,
19
19
  createSandboxForSpec,
@@ -28,21 +28,25 @@ import {
28
28
  equalKOnCost,
29
29
  fanout,
30
30
  flatWidenGate,
31
+ gitWorkspace,
32
+ inlineSandboxClient,
33
+ localShell,
31
34
  loopDispatch,
32
35
  loopUntil,
33
36
  materializeTreeView,
37
+ observe,
38
+ openSandboxRun,
34
39
  panel,
35
40
  pipeline,
36
41
  probeSandboxCapabilities,
37
42
  registerShape,
38
43
  renderAnalyses,
39
44
  renderCorpusToInstructions,
45
+ renderReport,
40
46
  replaySpawnTree,
41
47
  reportLoopUsage,
42
- routerInlineExecutor,
43
48
  runLoop,
44
49
  runPersonified,
45
- sandboxExecutor,
46
50
  sentinelCompletion,
47
51
  settledToIteration,
48
52
  spendFromUsageEvents,
@@ -50,11 +54,11 @@ import {
50
54
  trajectoryReport,
51
55
  verify,
52
56
  widen
53
- } from "./chunk-KEWO4KI6.js";
57
+ } from "./chunk-72JQCHOZ.js";
54
58
  import {
55
59
  extractLlmCallEvent,
56
60
  mapSandboxEvent
57
- } from "./chunk-PRX45WE2.js";
61
+ } from "./chunk-GSUO5QS6.js";
58
62
  import "./chunk-DGUM43GV.js";
59
63
  export {
60
64
  FileCorpus,
@@ -67,11 +71,11 @@ export {
67
71
  assertTraceDerivedFindings,
68
72
  buildSteerContext,
69
73
  builtinShapes,
70
- cliExecutor,
71
74
  completionAuthorizes,
72
75
  contentAddress,
73
76
  createBudgetPool,
74
- createDynamicDriver,
77
+ createDriver,
78
+ createExecutor,
75
79
  createExecutorRegistry,
76
80
  createRootHandle,
77
81
  createSandboxForSpec,
@@ -87,22 +91,26 @@ export {
87
91
  extractLlmCallEvent,
88
92
  fanout,
89
93
  flatWidenGate,
94
+ gitWorkspace,
95
+ inlineSandboxClient,
96
+ localShell,
90
97
  loopDispatch,
91
98
  loopUntil,
92
99
  mapSandboxEvent,
93
100
  materializeTreeView,
101
+ observe,
102
+ openSandboxRun,
94
103
  panel,
95
104
  pipeline,
96
105
  probeSandboxCapabilities,
97
106
  registerShape,
98
107
  renderAnalyses,
99
108
  renderCorpusToInstructions,
109
+ renderReport,
100
110
  replaySpawnTree,
101
111
  reportLoopUsage,
102
- routerInlineExecutor,
103
112
  runLoop,
104
113
  runPersonified,
105
- sandboxExecutor,
106
114
  sentinelCompletion,
107
115
  settledToIteration,
108
116
  spendFromUsageEvents,