npm - @tangle-network/agent-runtime - Versions diffs - 0.45.0 → 0.47.0 - Mend

@tangle-network/agent-runtime 0.45.0 → 0.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/README.md +3 -3
package/dist/agent.d.ts +5 -5
package/dist/agent.js +2 -2
package/dist/agent.js.map +1 -1
package/dist/analyst-loop.d.ts +5 -40
package/dist/analyst-loop.js +2 -4
package/dist/{chunk-IJ6FGOPO.js → chunk-5YDS7BLC.js} +12 -7
package/dist/chunk-5YDS7BLC.js.map +1 -0
package/dist/{chunk-KEWO4KI6.js → chunk-72JQCHOZ.js} +850 -131
package/dist/chunk-72JQCHOZ.js.map +1 -0
package/dist/{chunk-PRX45WE2.js → chunk-GSUO5QS6.js} +1 -119
package/dist/chunk-GSUO5QS6.js.map +1 -0
package/dist/{chunk-FK53TXOP.js → chunk-HNUXAZIJ.js} +4 -27
package/dist/chunk-HNUXAZIJ.js.map +1 -0
package/dist/{chunk-IJGS6J7X.js → chunk-JNPK46YH.js} +2 -2
package/dist/{chunk-QR4UUC5P.js → chunk-KADIJAD4.js} +33 -19
package/dist/chunk-KADIJAD4.js.map +1 -0
package/dist/{chunk-NYN5RTLP.js → chunk-MGFEUYOH.js} +7 -7
package/dist/chunk-MGFEUYOH.js.map +1 -0
package/dist/{chunk-Z2QXVBA6.js → chunk-T4OQQEE3.js} +4 -4
package/dist/chunk-T4OQQEE3.js.map +1 -0
package/dist/{chunk-KSMX62JF.js → chunk-VR4JIC5H.js} +2 -2
package/dist/{coder-CczgMqFx.d.ts → coder-CVZNGbyg.d.ts} +1 -1
package/dist/{dynamic-BvllHV6M.d.ts → driver-DYU2sgHr.d.ts} +6 -6
package/dist/{improvement-adapter-CWegd3vw.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
package/dist/improvement.d.ts +2 -2
package/dist/index.d.ts +8 -8
package/dist/index.js +8 -8
package/dist/{kb-gate-D9GBocLN.d.ts → kb-gate-51BlLlVM.d.ts} +13 -7
package/dist/{loop-runner-bin-CPrCoKqC.d.ts → loop-runner-bin-DEm4roYF.d.ts} +11 -11
package/dist/loop-runner-bin.d.ts +6 -6
package/dist/loop-runner-bin.js +6 -6
package/dist/loops.d.ts +5 -5
package/dist/loops.js +18 -10
package/dist/mcp/bin.js +6 -6
package/dist/mcp/bin.js.map +1 -1
package/dist/mcp/index.d.ts +75 -74
package/dist/mcp/index.js +203 -31
package/dist/mcp/index.js.map +1 -1
package/dist/{otel-export-Dy2DyUCU.d.ts → otel-export-EzfsVUhh.d.ts} +1 -1
package/dist/profiles.d.ts +8 -8
package/dist/profiles.js +1 -1
package/dist/profiles.js.map +1 -1
package/dist/{run-loop--hSoIknW.d.ts → run-loop-DvD4aGiE.d.ts} +2 -2
package/dist/runtime.d.ts +244 -57
package/dist/runtime.js +18 -10
package/dist/{types-1HbsFa7H.d.ts → types-Cbx3dNK5.d.ts} +23 -23
package/dist/{types-DdzkffAm.d.ts → types-nBMuollC.d.ts} +34 -5
package/dist/{types-BtRLF2U3.d.ts → types-p8dWBIXL.d.ts} +1 -1
package/dist/workflow.d.ts +3 -3
package/dist/workflow.js +2 -2
package/dist/workflow.js.map +1 -1
package/package.json +1 -1
package/skills/agent-runtime-adoption/SKILL.md +3 -3
package/skills/generate-eval/SKILL.md +60 -0
package/skills/loop-writer/SKILL.md +163 -0
package/dist/chunk-FK53TXOP.js.map +0 -1
package/dist/chunk-IJ6FGOPO.js.map +0 -1
package/dist/chunk-KEWO4KI6.js.map +0 -1
package/dist/chunk-NYN5RTLP.js.map +0 -1
package/dist/chunk-PRX45WE2.js.map +0 -1
package/dist/chunk-QR4UUC5P.js.map +0 -1
package/dist/chunk-Z2QXVBA6.js.map +0 -1
/package/dist/{chunk-IJGS6J7X.js.map → chunk-JNPK46YH.js.map} +0 -0
/package/dist/{chunk-KSMX62JF.js.map → chunk-VR4JIC5H.js.map} +0 -0

package/dist/runtime.d.ts CHANGED Viewed

@@ -1,15 +1,15 @@
 import { AgentProfile as AgentProfile$1, BackendType, CreateSandboxOptions, SandboxInstance, SandboxEvent } from '@tangle-network/sandbox';
 export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
-import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, d as AgentSpec, E as ExecutorRegistry, B as Budget, A as Agent, e as RootHandle, f as SupervisedResult, g as Spend, S as Scope, U as UsageEvent, L as LeafExecutorFactory, h as Supervisor } from './types-1HbsFa7H.js';
-export { i as ExecutorContext, H as Handle, j as LeafExecutor, k as LeafResult, l as NodeSnapshot, m as NodeStatus, n as Restart, o as RootSignal, p as Runtime, q as SpawnOpts, r as SupervisorOpts, W as WidenGate } from './types-1HbsFa7H.js';
-export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDynamicDriverOptions, D as DynamicDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDynamicDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './dynamic-BvllHV6M.js';
-import { AgentProfile, AnalystFinding, DefaultVerdict } from '@tangle-network/agent-eval';
+import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, E as ExecutorFactory, d as AgentSpec, e as ExecutorRegistry, B as Budget, A as Agent, f as RootHandle, g as SupervisedResult, h as Spend, S as Scope, U as UsageEvent, i as Supervisor } from './types-Cbx3dNK5.js';
+export { j as Executor, k as ExecutorContext, l as ExecutorResult, H as Handle, m as NodeSnapshot, n as NodeStatus, o as Restart, p as RootSignal, q as Runtime, r as SpawnOpts, s as SupervisorOpts, W as WidenGate } from './types-Cbx3dNK5.js';
+export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDriverOptions, D as DriverDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './driver-DYU2sgHr.js';
+import { S as SandboxClient, b as LoopResult, c as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-nBMuollC.js';
+export { D as Driver, C as LoopDecisionPayload, F as LoopEndedPayload, G as LoopIterationDispatchPayload, H as LoopIterationEndedPayload, J as LoopIterationStartedPayload, a as LoopLineageOptions, M as LoopPlanDescription, N as LoopPlanPayload, f as LoopSandboxPlacement, P as LoopStartedPayload, Q as LoopTeardownFailedPayload, e as LoopTraceEmitter, T as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, U as ValidationCtx, V as Validator } from './types-nBMuollC.js';
+import { AgentProfile, AnalystFinding, DefaultVerdict, ChatClient } from '@tangle-network/agent-eval';
 export { DefaultVerdict } from '@tangle-network/agent-eval';
 import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
-import { R as RunLoopOptions } from './run-loop--hSoIknW.js';
-export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop--hSoIknW.js';
-import { b as LoopSandboxClient, c as LoopResult, d as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-DdzkffAm.js';
-export { D as Driver, h as LoopDecisionPayload, i as LoopEndedPayload, j as LoopIterationDispatchPayload, k as LoopIterationEndedPayload, l as LoopIterationStartedPayload, a as LoopLineageOptions, m as LoopPlanDescription, n as LoopPlanPayload, g as LoopSandboxPlacement, o as LoopStartedPayload, p as LoopTeardownFailedPayload, f as LoopTraceEmitter, q as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, r as ValidationCtx, V as Validator } from './types-DdzkffAm.js';
+import { R as RunLoopOptions } from './run-loop-DvD4aGiE.js';
+export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop-DvD4aGiE.js';
 import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
 /**
@@ -113,6 +113,13 @@ declare function replaySpawnTree(journal: SpawnJournal, blobs: ResultBlobStore,
  */
 declare function materializeTreeView(events: SpawnEvent[]): TreeView;
+/**
+ * Adapt an `ExecutorFactory` into a `SandboxClient` for `runLoop`. The factory is
+ * instantiated fresh per `streamPrompt` (mirrors the per-spawn executor lifecycle):
+ * run once on the prompt, emit the terminal result event, tear down.
+ */
+declare function inlineSandboxClient(factory: ExecutorFactory<unknown>): SandboxClient;
 /**
  * `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
  *
@@ -146,7 +153,7 @@ declare function materializeTreeView(events: SpawnEvent[]): TreeView;
 type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
 interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
     /** Sandbox client used for every cell's `runLoop`. Supplied once. */
-    sandboxClient: LoopSandboxClient;
+    sandboxClient: SandboxClient;
     /** Build the per-cell runLoop options from the scenario (+ profile, when
      *  used with `runProfileMatrix`). */
     toLoopOptions: (scenario: TScenario, profile: AgentProfile) => LoopOptionsForDispatch<Task, Output, Decision>;
@@ -317,7 +324,7 @@ interface ShapeContext<D = unknown> {
      * Wrap an `AgentSpec` into a leaf `Agent` carrying it as `executorSpec`, so the shape can
      * `scope.spawn(spawnChild(spec), task, opts)`. `name` labels the child for traces. The
      * returned agent's `act` is never invoked by the keystone (it is spawned, not run) — the
-     * spec drives the resolved `LeafExecutor`; `act` exists only to satisfy the `Agent` shape.
+     * spec drives the resolved `Executor`; `act` exists only to satisfy the `Agent` shape.
      */
     spawnChild(name: string, spec: AgentSpec): Agent<unknown, Outcome<D>>;
     /** Derive a child `AgentSpec` from the persona's root spec with an overridden profile —
@@ -892,6 +899,61 @@ interface EqualKOnCostOptions {
 /** `equalKOnCost(arms, opts)` — the cross-arm equal-compute check on conserved cost. */
 type EqualKOnCost = (arms: ReadonlyArray<EqualKArm>, options?: EqualKOnCostOptions) => EqualKVerdict;
+/**
+ * The third-person observer — the connective tissue that closes the loop.
+ *
+ * A driver spawns a worker; the worker can't see itself. `observe` reads the
+ * worker's TRACE (what it actually did — every tool call, cost, failure) and
+ * produces two streams:
+ *   - `findings` / `report` — fed back DOWN (a steer for the next attempt) and
+ *     OUT (the operator-facing "what I noticed + what to change").
+ *   - `learned` — durable facts written to the cross-run `Corpus` so the NEXT
+ *     run starts smarter (the continuous half of "continuous self-improvement").
+ *
+ * Findings are TRACE-derived, never JUDGE-derived (`derived_from_judge:false`):
+ * the observer reads behavior, never the acceptance verdict — the selector≠judge
+ * firewall (docs/learning-flywheel.md). The observer is harness-agnostic: it
+ * reads a trace + an output, so it watches opencode, codex, hermes, or a BYO
+ * agent identically.
+ */
+interface ObserveInput {
+    /** What the worker was asked to do. */
+    task: string;
+    /** What it produced (its final answer / artifact summary). */
+    output: string;
+    /** The worker's trace — any event array (sandbox events, tool-call records). */
+    trace: ReadonlyArray<unknown>;
+    /** Terminal status only (passed/failed/unknown) — NOT a judge score; the
+     *  observer never reads the verdict, it reads behavior. */
+    outcome?: 'passed' | 'failed' | 'unknown';
+    /** Provenance back to the run. */
+    runId?: string;
+}
+interface ObserveOptions {
+    /** The model-call seam (agent-eval `createChatClient`: router / cli-bridge / …). */
+    chat: ChatClient;
+    model?: string;
+    /** When set, learned facts are appended (idempotent) for the next run to read. */
+    corpus?: Corpus;
+    /** Tags written onto learned facts + used by the next run's corpus query. */
+    tags?: ReadonlyArray<string>;
+    signal?: AbortSignal;
+    /** Cap the trace lines fed to the observer (keeps the call cheap). Default 80. */
+    maxTraceLines?: number;
+}
+interface Observation {
+    findings: AnalystFinding[];
+    /** Facts persisted to the corpus (empty when no corpus was supplied). */
+    learned: CorpusRecord[];
+    /** Operator-facing markdown: what the observer noticed + what to change. */
+    report: string;
+}
+declare function observe(input: ObserveInput, opts: ObserveOptions): Promise<Observation>;
+/** Operator-facing report, split by who should act. The agent block is the
+ *  steer; the operator block is the advice. */
+declare function renderReport(findings: ReadonlyArray<AnalystFinding>): string;
 /**
  * @experimental
  *
@@ -1299,7 +1361,7 @@ interface AcquireOptions {
     sleep?: (ms: number) => Promise<void>;
 }
 /** @experimental */
-declare function acquireSandbox(client: LoopSandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
+declare function acquireSandbox(client: SandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
 /**
  * @experimental
@@ -1340,9 +1402,9 @@ interface SandboxCapabilities {
  *
  * @experimental
  */
-declare function probeSandboxCapabilities(client: LoopSandboxClient): Promise<SandboxCapabilities>;
+declare function probeSandboxCapabilities(client: SandboxClient): Promise<SandboxCapabilities>;
 /**
- * Narrowed view of the optional CRIU probe. The loop-side `LoopSandboxClient`
+ * Narrowed view of the optional CRIU probe. The loop-side `SandboxClient`
  * does not require `criuStatus`; this widens it optionally so the probe can be
  * read without importing sandbox-backend specifics. @experimental
  */
@@ -1514,12 +1576,13 @@ interface SandboxLineage {
  *
  * @experimental
  */
-declare function createSandboxLineage(client: LoopSandboxClient, capabilities: SandboxCapabilities, options?: {
+declare function createSandboxLineage(client: SandboxClient, capabilities: SandboxCapabilities, options?: {
     maxConcurrency?: number;
+    streaming?: 'sse' | 'poll';
 }): SandboxLineage;
 /**
  * Loop-side widening of the box's optional checkpoint method. The
- * `LoopSandboxClient`/`SandboxInstance` surface the kernel relies on does not
+ * `SandboxClient`/`SandboxInstance` surface the kernel relies on does not
  * require checkpointing; this reads it optionally so the lineage can probe-gate
  * without importing sandbox-backend specifics. @experimental
  */
@@ -1550,20 +1613,118 @@ interface SessionCapableBox {
     };
 }
+/**
+ * `openSandboxRun` — the ONE harness-agnostic seam for running an agent in a
+ * sandbox over a persistent artifact: run it, stream it, RESUME the same session
+ * across turns. Domain-agnostic: a coding agent, a research agent, a tax/legal
+ * agent — all flow through this; the domain lives only in the `Deliverable<Out>`
+ * the caller supplies, never in a per-domain copy of this function.
+ *
+ * It is a thin facade (NOT a new layer) over code that already exists and is
+ * already hardened:
+ *   - `acquireSandbox` — cold-start / 502-503-504 / gateway-timeout recovery,
+ *   - `buildBackendOptions` — the harness IS `backend.type` (opencode / codex /
+ *     claude-code / kimi-code / hermes / pi); the only "which agent" knob,
+ *   - `createSandboxLineage` — `start` mints a session; `resume` continues the
+ *     SAME server-side session with a fail-loud `assertSessionLive`.
+ *
+ * The one genuinely-new piece is {@link Deliverable}: it widens the pure
+ * `OutputAdapter.parse(events)` to ALSO admit a post-turn read off the box FS —
+ * the structural gap that made the bench gates hand-roll `box.fs.read`, because a
+ * large produced file (a git diff, a generated document) truncates in the chat
+ * stream and a pure events-parser cannot reach the workspace. Per the SDK, a
+ * RELATIVE `deliverable.path` resolves from the workspace root and an ABSOLUTE one
+ * (e.g. `/tmp/solution.patch`) reads the container filesystem directly — both are
+ * valid; pick the one the agent actually wrote to. Avoid `..` traversal segments.
+ *
+ * What this deliberately does NOT do (so it stays a facade, not slop): no custom
+ * reconnect/replay (the SDK + platform own per-session buffering + `Last-Event-ID`);
+ * no fork verb (platform CRIU is probe-gated and currently absent — fork lives in
+ * `SandboxLineage.fork` behind the capability probe, surfaced only if it returns).
+ * It is also distinct from `runLoop`: `runLoop` is the multi-round, driver-driven
+ * kernel (fresh box per round, events deliverable); this is a SINGLE rollout +
+ * artifact-or-events deliverable + resume over ONE persistent box.
+ */
+/**
+ * @experimental
+ * How a typed deliverable `Out` is materialized from a finished turn.
+ * - `events`   — pure parse over the event array (identical to `OutputAdapter`).
+ * - `artifact` — read a file off the box AFTER the turn drains, then map it (+ the
+ *                events). For diffs/codebases/documents that don't fit the chat
+ *                stream. `path` relative ⇒ workspace root; absolute ⇒ container FS.
+ */
+type Deliverable<Out> = {
+    kind: 'events';
+    fromEvents: (events: SandboxEvent[]) => Out;
+} | {
+    kind: 'artifact';
+    path: string;
+    fromArtifact: (raw: string, events: SandboxEvent[]) => Out;
+};
+/**
+ * @experimental
+ * One finished turn over the artifact. A failed FS read is surfaced in `readError`
+ * (never masked as an empty deliverable) so a caller distinguishes "agent produced
+ * nothing" from a transport/FS fault.
+ */
+interface TurnResult<Out> {
+    out: Out;
+    events: SandboxEvent[];
+    readError?: string;
+}
+/** @experimental A live run over ONE persistent artifact (box + session). Close it
+ *  when done — `close()` tears the box down. */
+interface SandboxRun<Out> {
+    readonly box: SandboxInstance;
+    readonly sessionId: string;
+    /** First turn over the fresh box (mints the session). Throws if already started. */
+    start(prompt: string): Promise<TurnResult<Out>>;
+    /** Continue THE SAME session over THE SAME artifact — a resumed turn/rollout. */
+    resume(prompt: string): Promise<TurnResult<Out>>;
+    close(): Promise<void>;
+}
+/** @experimental */
+interface OpenSandboxRunOptions {
+    /** Profile + sandbox env/overrides. `sandboxOverrides.backend.type` is the harness. */
+    agentRun: AgentRunSpec<string>;
+    signal: AbortSignal;
+    /** Optional execution-scoped observers. Hook failures never fail the run. */
+    hooks?: RuntimeHooks;
+    /** Stable run id for trace joins. Defaults to a short runtime-minted id. */
+    runId?: string;
+    /** Optional benchmark/scenario id carried into emitted hook events. */
+    scenarioId?: string;
+    /** Test seam for deterministic hook timestamps. Defaults to `Date.now`. */
+    now?: () => number;
+    /** Bounds box-creation bursts inside lineage fanout. Default from lineage. */
+    maxConcurrency?: number;
+    /** Base backoff (ms) for retrying a transient artifact `fs.read` failure; the i-th
+     *  retry waits `readRetryDelayMs * i`. Default 1000. Set 0 to disable the wait (tests). */
+    readRetryDelayMs?: number;
+}
+/**
+ * @experimental
+ * Open a sandbox run. Harness-agnostic: the harness lives in
+ * `options.agentRun.sandboxOverrides.backend.type`, so opencode/codex/claude-code/
+ * kimi-code all flow through this one entrypoint with identical env/auth wiring.
+ */
+declare function openSandboxRun<Out>(client: SandboxClient, options: OpenSandboxRunOptions, deliverable: Deliverable<Out>): Promise<SandboxRun<Out>>;
 /**
  * @experimental
  *
  * The conserved budget reservation pool — the invariant the whole instrument
  * rests on (critique M5/B3). One root `Budget` becomes a conserved pool of three
- * quantities (tokens, usd, iterations) plus an absolute deadline. Children RESERVE
- * atomically at spawn and RECONCILE at settle:
+ * quantities (tokens, usd, iterations) plus an absolute deadline. Children reserve
+ * atomically at spawn and reconcile at settle:
  *
  *   total ≡ free + reserved + committed          (invariant, always)
  *
- * `reserve` moves a child's whole ceiling from `free` → `reserved` and FAILS CLOSED
+ * `reserve` moves a child's whole ceiling from `free` → `reserved` and fails closed
  * when `free` can't cover it (never read-then-spawn overcommit, so `Σk(treatment) ≡
  * Σk(blind)` by construction). `reconcile` releases the reservation, commits ACTUAL
- * spend, and refunds the unspent remainder to `free`. Tokens and usd are SEPARATE
+ * spend, and refunds the unspent remainder to `free`. Tokens and usd are separate
  * channels (`LoopTokenUsage` has no `usd`); iterations are conserved alongside them.
  *
  * Pure and deterministic: `now()` is injected, there is no I/O, and no wall-clock or
@@ -1635,7 +1796,7 @@ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
 /**
  * @experimental
  *
- * The leaf runtime — the built-in `LeafExecutor` IMPLEMENTATIONS behind the ONE
+ * The leaf runtime — the built-in `Executor` IMPLEMENTATIONS behind the ONE
  * open interface frozen in `./types`, plus the open resolver/registry that maps
  * an `AgentSpec` to one of them OR accepts a bring-your-own executor verbatim.
  *
@@ -1649,7 +1810,7 @@ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
  *                     excluded from the equal-k arms by construction (streaming).
  * Every metered runtime reports through the SAME normalized `UsageEvent` channel
  * so the conserved budget pool meters them identically. A user's own agent is
- * first-class the moment it implements `LeafExecutor` — register it by name or
+ * first-class the moment it implements `Executor` — register it by name or
  * pass it as `AgentSpec.executor`.
  *
  * Layering: `estimateCost`/`isModelPriced` are substrate primitives from
@@ -1675,7 +1836,7 @@ interface RouterSeam {
  * checkpoint/fork.
  */
 interface SandboxSeam {
-    sandboxClient: LoopSandboxClient;
+    sandboxClient: SandboxClient;
     /** Forwarded into the composed `runLoop`'s `ctx` (trace emitter, run handle, etc.). */
     loopCtx?: Partial<Omit<ExecCtx, 'sandboxClient' | 'signal'>>;
     /** PR #150 `RunLoopOptions.lineage` passthrough — opaque; forwarded, not parsed. */
@@ -1694,40 +1855,38 @@ interface CliSeam {
     cwd?: string;
 }
 /**
- * A direct OpenAI-compatible Router chat-completion. One-shot: resolves a
- * `LeafResult` and reports its terminal usage as `UsageEvent`s through the
- * conserved pool. Reports REAL token usage — when the provider omits `usage`,
- * the spend records zero tokens but the call still counts one iteration (a
- * phantom fabricated 0 is never emitted as a priced cost).
- *
- * NOTE for the Integrate phase: this duplicates the minimal body of
- * `bench/src/router-client.ts#routerChatWithUsage`. `bench/` is a sub-package
- * outside this package's `rootDir: "src"`, so it cannot be imported here without
- * breaking the build. Integrate should lift that helper into `src/loops/` and
- * have both call sites share it (do not re-copy a third time).
- */
-declare const routerInlineExecutor: LeafExecutorFactory<unknown>;
-/**
- * COMPOSES `runLoop` as a single-task leaf: one box, a refine driver bounded to
- * the seam's `maxIterations` (default 1), the spec's profile as the agent run.
- * Surfaces the loop's aggregated `tokenUsage` + `costUsd` as `UsageEvent`s after
- * it drains, and yields one `iteration` event per loop iteration. Forwards the
- * optional `lineage` passthrough WITHOUT importing sandbox-lineage / reinventing
- * checkpoint/fork.
- *
- * Streaming shape: the loop runs to completion inside the first `next()`, then
- * the recorded usage events are yielded; the terminal artifact is read from
- * `resultArtifact()` after the stream drains.
- */
-declare const sandboxExecutor: LeafExecutorFactory<unknown>;
+ * cli-bridge seam. A local OpenAI-compatible bridge that fronts harness CLIs
+ * (claude-code / opencode / kimi / pi) behind one HTTP surface; `model` doubles
+ * as the harness selector (e.g. `claude-code/sonnet`, `opencode/<provider>/<model>`).
+ * `agentProfile` is the bridge-dialect profile (metadata.disallowedTools, mcp)
+ * forwarded verbatim per request — how an arm disables native tools or injects
+ * a provider search MCP.
+ */
+interface BridgeSeam {
+    bridgeUrl: string;
+    bridgeBearer: string;
+    model: string;
+    agentProfile?: Record<string, unknown>;
+    timeoutMs?: number;
+}
 /**
- * Spawns a subprocess (`bin` + `args`). It cannot account tokens, so it is
- * `budgetExempt: true`: its spend is NOT metered against the conserved pool and
- * its iterations are EXCLUDED from the equal-k arms by construction (the
- * resolver/equal-k path checks `budgetExempt`). teardown is SIGTERM → SIGKILL
- * with a grace window. Streaming: yields one `iteration` event on clean exit.
- */
-declare const cliExecutor: LeafExecutorFactory<unknown>;
+ * The single built-in executor entrypoint. The backend is DATA — the cost dial a
+ * profile, an experiment config, or a replay journal can name — not an import
+ * choice. Injects the matching seam and delegates to the built-in implementation;
+ * the port stays OPEN: bring-your-own agents implement `Executor` directly and
+ * never pass through here.
+ */
+type ExecutorConfig = ({
+    backend: 'router';
+} & RouterSeam) | ({
+    backend: 'bridge';
+} & BridgeSeam) | ({
+    backend: 'cli';
+} & CliSeam) | ({
+    backend: 'sandbox';
+    harness?: BackendType;
+} & SandboxSeam);
+declare function createExecutor(config: ExecutorConfig): ExecutorFactory<unknown>;
 /**
  * The open resolver/registry. Pre-registers the three built-ins under their
  * runtime tags (`'router'`, `'sandbox'`, `'cli'`) and accepts `register(name,
@@ -1749,7 +1908,7 @@ declare function createExecutorRegistry(): ExecutorRegistry;
  * An `Agent.act` runs inside a `Scope`. It `spawn`s children dynamically and reacts to
  * them via `next()`. The scope owns ONE in-memory nursery — the authoritative live set —
  * and is the single place that drives a child's lifecycle: reserve budget atomically,
- * resolve a `LeafExecutor` through the open registry, run it (one-shot OR streaming),
+ * resolve a `Executor` through the open registry, run it (one-shot OR streaming),
  * fold its normalized `UsageEvent`s into a conserved `Spend`, reconcile the reservation
  * (refunding the unspent remainder), persist the result blob + journal records, and
  * deliver the `Settled` through the `next()` cursor.
@@ -1857,4 +2016,32 @@ declare function createSupervisor<Task, Out>(): Supervisor<Task, Out>;
  */
 declare function createRootHandle<Out>(): RootHandle<Out>;
-export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration, LeafExecutorFactory, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, NodeId, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, type SandboxLineage, type SandboxLineageHandle, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, SpawnEvent, SpawnJournal, Spend, type SteerContext, SupervisedResult, Supervisor, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, UsageEvent, type UsageSink, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, cliExecutor, contentAddress, createBudgetPool, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, replaySpawnTree, reportLoopUsage, routerInlineExecutor, runPersonified, sandboxExecutor, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen };
+/** Command runner seam. Host code can use `localShell`; sandbox code can wrap `box.exec`. */
+type Shell = (args: ReadonlyArray<string>, cwd?: string) => Promise<{
+    stdout: string;
+    stderr: string;
+    code: number;
+}>;
+type WorkspaceCommit = {
+    readonly ok: true;
+    readonly rev: string;
+} | {
+    readonly ok: false;
+    readonly conflict: string;
+};
+interface Workspace {
+    readonly ref: string;
+    materialize(dir: string): Promise<void>;
+    commit(dir: string, message: string): Promise<WorkspaceCommit>;
+    head(): Promise<string>;
+}
+declare function localShell(): Shell;
+interface GitWorkspaceOptions {
+    readonly ref: string;
+    readonly shell?: Shell;
+    readonly branch?: string;
+    readonly noHooks?: boolean;
+}
+declare function gitWorkspace(opts: GitWorkspaceOptions): Workspace;
+export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, type BridgeSeam, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type Deliverable, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, type ExecutorConfig, ExecutorFactory, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, type GitWorkspaceOptions, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, NodeId, type Observation, type ObserveInput, type ObserveOptions, type OpenSandboxRunOptions, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, SandboxClient, type SandboxLineage, type SandboxLineageHandle, type SandboxRun, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, type Shell, SpawnEvent, SpawnJournal, Spend, type SteerContext, SupervisedResult, Supervisor, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, type TurnResult, UsageEvent, type UsageSink, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, type Workspace, type WorkspaceCommit, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, gitWorkspace, inlineSandboxClient, localShell, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, observe, openSandboxRun, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, renderReport, replaySpawnTree, reportLoopUsage, runPersonified, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen };

package/dist/runtime.js CHANGED Viewed

@@ -9,11 +9,11 @@ import {
   assertTraceDerivedFindings,
   buildSteerContext,
   builtinShapes,
-  cliExecutor,
   completionAuthorizes,
   contentAddress,
   createBudgetPool,
-  createDynamicDriver,
+  createDriver,
+  createExecutor,
   createExecutorRegistry,
   createRootHandle,
   createSandboxForSpec,
@@ -28,21 +28,25 @@ import {
   equalKOnCost,
   fanout,
   flatWidenGate,
+  gitWorkspace,
+  inlineSandboxClient,
+  localShell,
   loopDispatch,
   loopUntil,
   materializeTreeView,
+  observe,
+  openSandboxRun,
   panel,
   pipeline,
   probeSandboxCapabilities,
   registerShape,
   renderAnalyses,
   renderCorpusToInstructions,
+  renderReport,
   replaySpawnTree,
   reportLoopUsage,
-  routerInlineExecutor,
   runLoop,
   runPersonified,
-  sandboxExecutor,
   sentinelCompletion,
   settledToIteration,
   spendFromUsageEvents,
@@ -50,11 +54,11 @@ import {
   trajectoryReport,
   verify,
   widen
-} from "./chunk-KEWO4KI6.js";
+} from "./chunk-72JQCHOZ.js";
 import {
   extractLlmCallEvent,
   mapSandboxEvent
-} from "./chunk-PRX45WE2.js";
+} from "./chunk-GSUO5QS6.js";
 import "./chunk-DGUM43GV.js";
 export {
   FileCorpus,
@@ -67,11 +71,11 @@ export {
   assertTraceDerivedFindings,
   buildSteerContext,
   builtinShapes,
-  cliExecutor,
   completionAuthorizes,
   contentAddress,
   createBudgetPool,
-  createDynamicDriver,
+  createDriver,
+  createExecutor,
   createExecutorRegistry,
   createRootHandle,
   createSandboxForSpec,
@@ -87,22 +91,26 @@ export {
   extractLlmCallEvent,
   fanout,
   flatWidenGate,
+  gitWorkspace,
+  inlineSandboxClient,
+  localShell,
   loopDispatch,
   loopUntil,
   mapSandboxEvent,
   materializeTreeView,
+  observe,
+  openSandboxRun,
   panel,
   pipeline,
   probeSandboxCapabilities,
   registerShape,
   renderAnalyses,
   renderCorpusToInstructions,
+  renderReport,
   replaySpawnTree,
   reportLoopUsage,
-  routerInlineExecutor,
   runLoop,
   runPersonified,
-  sandboxExecutor,
   sentinelCompletion,
   settledToIteration,
   spendFromUsageEvents,