npm - @tangle-network/agent-runtime - Versions diffs - 0.44.0 → 0.46.0 - Mend

@tangle-network/agent-runtime 0.44.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/README.md +95 -203
package/dist/agent.d.ts +3 -2
package/dist/agent.js +5 -7
package/dist/agent.js.map +1 -1
package/dist/analyst-loop.d.ts +28 -2
package/dist/analyst-loop.js +4 -1
package/dist/audit.d.ts +93 -0
package/dist/audit.js +312 -0
package/dist/audit.js.map +1 -0
package/dist/chunk-4B6U4CVQ.js +15 -0
package/dist/chunk-4B6U4CVQ.js.map +1 -0
package/dist/chunk-65FQLI4V.js +4089 -0
package/dist/chunk-65FQLI4V.js.map +1 -0
package/dist/{chunk-GFKVVRQ7.js → chunk-GN75RGM6.js} +13 -12
package/dist/chunk-GN75RGM6.js.map +1 -0
package/dist/chunk-GSUO5QS6.js +146 -0
package/dist/chunk-GSUO5QS6.js.map +1 -0
package/dist/chunk-HNUXAZIJ.js +580 -0
package/dist/chunk-HNUXAZIJ.js.map +1 -0
package/dist/{chunk-SKUZZCHE.js → chunk-I42NHLKX.js} +5 -5
package/dist/chunk-I42NHLKX.js.map +1 -0
package/dist/{chunk-HVYOHJHK.js → chunk-JNPK46YH.js} +2 -2
package/dist/chunk-JNPK46YH.js.map +1 -0
package/dist/{chunk-3HMHSN22.js → chunk-KADIJAD4.js} +38 -24
package/dist/chunk-KADIJAD4.js.map +1 -0
package/dist/{chunk-KDMRUD2P.js → chunk-KPN7OQ64.js} +296 -8
package/dist/chunk-KPN7OQ64.js.map +1 -0
package/dist/{chunk-NRZOXCJK.js → chunk-VR4JIC5H.js} +2 -2
package/dist/chunk-WIR4HOOJ.js +27 -0
package/dist/chunk-WIR4HOOJ.js.map +1 -0
package/dist/coder-DCWFQpmJ.d.ts +114 -0
package/dist/driver-C-mtBo7h.d.ts +221 -0
package/dist/improvement.d.ts +0 -1
package/dist/improvement.js +0 -5
package/dist/improvement.js.map +1 -1
package/dist/index.d.ts +122 -9
package/dist/index.js +398 -10
package/dist/index.js.map +1 -1
package/dist/{kb-gate-D0ZIhFOU.d.ts → kb-gate-2Gwpz_27.d.ts} +86 -9
package/dist/{loop-runner-bin-BLMa8He3.d.ts → loop-runner-bin-D-K6bRp3.d.ts} +17 -13
package/dist/loop-runner-bin.d.ts +8 -6
package/dist/loop-runner-bin.js +6 -8
package/dist/loops.d.ts +7 -393
package/dist/loops.js +96 -27
package/dist/mcp/bin.js +7 -7
package/dist/mcp/bin.js.map +1 -1
package/dist/mcp/index.d.ts +286 -13
package/dist/mcp/index.js +341 -9
package/dist/mcp/index.js.map +1 -1
package/dist/{otel-export-wFDmmurL.d.ts → otel-export-nurzFwuJ.d.ts} +1 -1
package/dist/profiles.d.ts +385 -86
package/dist/profiles.js +549 -4
package/dist/profiles.js.map +1 -1
package/dist/{run-loop-C4L1Sted.d.ts → run-loop-CU2Y00Si.d.ts} +36 -13
package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
package/dist/runtime.d.ts +1964 -0
package/dist/runtime.js +114 -0
package/dist/runtime.js.map +1 -0
package/dist/substrate-CUgk7F7s.d.ts +77 -0
package/dist/topology.d.ts +73 -0
package/dist/topology.js +111 -0
package/dist/topology.js.map +1 -0
package/dist/types-BfoeiQRZ.d.ts +438 -0
package/dist/{types-DbJzz2uf.d.ts → types-DnYoHvvZ.d.ts} +110 -4
package/dist/workflow.d.ts +4 -3
package/dist/workflow.js +4 -5
package/dist/workflow.js.map +1 -1
package/package.json +37 -28
package/skills/agent-runtime-adoption/SKILL.md +32 -29
package/skills/generate-eval/SKILL.md +60 -0
package/dist/chunk-3HMHSN22.js.map +0 -1
package/dist/chunk-GFKVVRQ7.js.map +0 -1
package/dist/chunk-HVYOHJHK.js.map +0 -1
package/dist/chunk-KDMRUD2P.js.map +0 -1
package/dist/chunk-PY6NMZYX.js +0 -52
package/dist/chunk-PY6NMZYX.js.map +0 -1
package/dist/chunk-S7JXV32P.js +0 -947
package/dist/chunk-S7JXV32P.js.map +0 -1
package/dist/chunk-SKUZZCHE.js.map +0 -1
package/dist/chunk-SQSCRJ7U.js +0 -65
package/dist/chunk-SQSCRJ7U.js.map +0 -1
package/dist/chunk-VOX6Z3II.js +0 -90
package/dist/chunk-VOX6Z3II.js.map +0 -1
package/dist/chunk-XBUG326M.js +0 -261
package/dist/chunk-XBUG326M.js.map +0 -1
package/dist/dynamic-wUgp6UKs.d.ts +0 -108
package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
/package/dist/{chunk-NRZOXCJK.js.map → chunk-VR4JIC5H.js.map} +0 -0

package/dist/types-BfoeiQRZ.d.ts ADDED Viewed

@@ -0,0 +1,438 @@
+import { DefaultVerdict } from '@tangle-network/agent-eval';
+import { AgentProfile, BackendType } from '@tangle-network/sandbox';
+import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
+import { c as LoopTokenUsage } from './types-DnYoHvvZ.js';
+/**
+ * @experimental
+ *
+ * Recursive execution atom — the FROZEN type surface (the keystone contract).
+ *
+ * One self-similar `Agent` atom runs inside a budget-conserving reactive `Scope`,
+ * orchestrated by a `Supervisor` over an event-sourced `SpawnJournal`. A leaf is an
+ * `Agent` that never calls `scope.spawn`; a driver is an `Agent` that spawns and runs
+ * a policy over its children's streaming results.
+ *
+ * Two invariants the surface exists to make enforceable:
+ *  - Budget is an atomically-reserved CONSERVED pool, so `Σk(treatment) ≡ Σk(blind)` by
+ *    construction (reserve-on-spawn, refund-unspent-on-settle, fail-closed admission).
+ *  - The journal records a content-addressed `outRef` per child result, so replay
+ *    rehydrates the exact `Settled` the driver branched on (the replay invariant below).
+ *
+ * The leaf RUNTIME is one OPEN `Executor` interface, not a closed `inline|sandbox|cli`
+ * union the call site switches on. The built-ins (router/inline, sandbox, cli) are the
+ * initial IMPLEMENTATIONS; any user agent is first-class the moment it implements the
+ * interface. The interface IS the extension point — no per-vendor adapters live here.
+ *
+ * Layering: substrate types (`DefaultVerdict`) come from `@tangle-network/agent-eval`;
+ * runtime-shaped types (everything else) live here. Pure types/interfaces only — this
+ * module typechecks standalone and is imported by every keystone impl.
+ */
+/**
+ * One self-similar atom. A leaf is an `Agent` that never calls `scope.spawn`; a driver
+ * is an `Agent` whose `act` spawns children and reacts to them via `scope.next()`. An
+ * analyst is an `Agent` whose task is "read these traces → findings" — `where` it runs
+ * is its executor, not a separate type.
+ *
+ * `act` MUST be replay-safe: it may read `verdict`, `spent`, and `out` (rehydrated by
+ * `outRef`) off each `Settled`; it MUST NOT read `Date.now`, `Math.random`, or any
+ * unordered collection. `scope.next()` delivers strictly in recorded `seq` order.
+ */
+interface Agent<Task, Out> {
+    readonly name: string;
+    act(task: Task, scope: Scope<Out>): Promise<Out>;
+}
+/**
+ * The leaf runtime — ONE open interface, not a closed union. `execute` returns a
+ * `Promise<ExecutorResult>` for one-shot executors OR an `AsyncIterable<UsageEvent>` for
+ * streaming ones; a streaming executor reports incremental normalized usage as it runs
+ * (the budget pool reconciles against it) and exposes its terminal artifact via
+ * `resultArtifact()`. Both shapes normalize usage to `UsageEvent` so the conserved pool
+ * meters every runtime identically.
+ *
+ * Built-in implementations (in `runtime.ts`, NOT variants here): router/inline (a direct
+ * Router/HTTP inference call, no box), sandbox (COMPOSES `runLoop` as a leaf, forwarding
+ * PR #150's optional `lineage` passthrough — does NOT reinvent checkpoint/fork), cli
+ * (Halo/RLM subprocess; `budgetExempt`, excluded from equal-k by construction). A user's
+ * own agent (mastra/agno/raw HTTP/anything) is first-class by implementing this interface.
+ */
+interface Executor<Out> {
+    /** Stable runtime tag for traces + the equal-k exemption check. */
+    readonly runtime: Runtime;
+    /**
+     * When true, this executor's spend is NOT metered against the conserved pool and its
+     * iterations are excluded from the equal-k assertion (a `cli` subprocess without
+     * token accounting). Fail-loud everywhere else: a metered executor MUST report usage.
+     */
+    readonly budgetExempt?: boolean;
+    /**
+     * One-shot → resolves a `ExecutorResult`; streaming → yields incremental `UsageEvent`s and
+     * the terminal artifact is read from `resultArtifact()` after the stream drains.
+     * `signal` is the spawn-scoped abort (chains the acquire lifecycle for sandbox).
+     */
+    execute(task: unknown, signal: AbortSignal): Promise<ExecutorResult<Out>> | AsyncIterable<UsageEvent>;
+    /**
+     * Optional inbox: receive an out-of-band message from the driver mid-run (the `send`/`steer_worker`
+     * verb). A streaming executor drains pending messages between turns and folds them into the next
+     * step (a steer / interrupt / resume). A one-shot executor that can't be steered mid-flight omits
+     * this; `Scope.send` then returns `false` for it. Never throws — a malformed message is the
+     * executor's to ignore.
+     */
+    deliver?(msg: unknown): void;
+    /**
+     * Tear the executor's resources down. `grace` mirrors the OTP shutdown spec
+     * (`'brutalKill'` = immediate, a number = ms grace, `'infinity'` = await clean exit).
+     */
+    teardown(grace: number | 'brutalKill' | 'infinity'): Promise<{
+        destroyed: boolean;
+    }>;
+    /**
+     * The replay source (B1): the content-addressed `outRef` + the materialized output the
+     * driver branched on, its verdict, and the conserved spend. Read once, after settle.
+     */
+    resultArtifact(): {
+        outRef: string;
+        out: Out;
+        verdict?: DefaultVerdict;
+        spent: Spend;
+    };
+}
+/** Terminal artifact of a one-shot `Executor.execute`. */
+interface ExecutorResult<Out> {
+    outRef: string;
+    out: Out;
+    verdict?: DefaultVerdict;
+    spent: Spend;
+}
+/**
+ * Normalized usage event — the single channel every executor reports through, so the
+ * conserved pool meters all runtimes identically. `tokens` carries `LoopTokenUsage`'s
+ * `{ input, output }`; `usd` is a SEPARATE channel (never folded into tokens).
+ */
+type UsageEvent = {
+    kind: 'tokens';
+    input: number;
+    output: number;
+} | {
+    kind: 'cost';
+    usd: number;
+} | {
+    kind: 'iteration';
+};
+/** The runtime tag of a `Executor` impl. Open by intent — `string` so a BYO executor
+ *  names its own runtime; the built-ins use these literals. */
+type Runtime = 'router' | 'inline' | 'sandbox' | 'cli' | (string & {});
+/**
+ * `AgentProfile` does NOT carry a `harness`/backend field — `harness` lives on the
+ * sandbox SDK's `BackendConfig`, not the portable profile. So an agent is mapped to its
+ * executor through this MINIMAL wrapper, never by fabricating a field onto `AgentProfile`.
+ *
+ * Resolution (in `runtime.ts`):
+ *  - `executor` present        → BYO: use it verbatim (a user's own `Executor`).
+ *  - `harness === null`        → router/inline: a direct Router call, no box.
+ *  - `harness` is a `BackendType` → sandbox: compose `runLoop` against `profile` on that backend.
+ * Fail loud on an unresolvable spec (no executor and an unknown harness).
+ */
+interface AgentSpec {
+    readonly profile: AgentProfile;
+    /** `null` selects router/inline; a `BackendType` selects the sandboxed harness. */
+    readonly harness: BackendType | null;
+    /** Bring-your-own executor: when set, overrides harness-based resolution entirely. */
+    readonly executor?: Executor<unknown>;
+}
+/**
+ * Builds a fresh `Executor` for one spawn from the resolved spec. Per-spawn (not
+ * shared) so each child owns its own box/abort/teardown lifecycle. A BYO factory lets a
+ * user supply construction args without pre-instantiating.
+ */
+type ExecutorFactory<Out> = (spec: AgentSpec, ctx: ExecutorContext) => Executor<Out>;
+/** Construction context handed to a `ExecutorFactory` — the seams a built-in needs
+ *  (sandbox client for the sandbox executor, router config for router/inline) without
+ *  the factory reaching into module globals. */
+interface ExecutorContext {
+    readonly signal: AbortSignal;
+    /** Opaque seams the registry threads through; a built-in narrows what it needs. */
+    readonly seams: Readonly<Record<string, unknown>>;
+}
+/**
+ * The OPEN resolver: maps an `AgentSpec` to a `ExecutorFactory`. The default
+ * registry resolves the three built-ins AND accepts a BYO `executor`/factory; callers
+ * register more runtimes by name. NOT a closed switch — registration is the extension
+ * point, mirroring the open `Executor` interface.
+ */
+interface ExecutorRegistry {
+    /** Register a factory for a named runtime. Throws on a duplicate name (fail loud). */
+    register<Out>(runtime: Runtime, factory: ExecutorFactory<Out>): void;
+    /**
+     * Resolve a spec to a factory. Precedence: a BYO `spec.executor` → a trivial factory
+     * returning it; else `harness === null` → the `'router'` factory; else a registered
+     * factory for the harness-derived runtime. Returns a typed outcome — the caller
+     * inspects `succeeded` before `value` (no silent fallback).
+     */
+    resolve<Out>(spec: AgentSpec): {
+        succeeded: true;
+        value: ExecutorFactory<Out>;
+    } | {
+        succeeded: false;
+        error: string;
+    };
+}
+/** A budget envelope on a spawn or the root. All ceilings; the pool reserves against them. */
+interface Budget {
+    readonly maxIterations: number;
+    readonly maxTokens: number;
+    readonly maxUsd?: number;
+    readonly deadlineMs?: number;
+}
+/** Conserved spend, reconciled from the normalized `UsageEvent` stream. Tokens and usd
+ *  are separate channels (never folded). */
+interface Spend {
+    iterations: number;
+    tokens: LoopTokenUsage;
+    usd: number;
+    ms: number;
+}
+/** OTP child-spec restart class. */
+type Restart = 'temporary' | 'transient' | 'permanent';
+/** `'acquiring'` is first-class (M1): a node spends real time + reaps an orphan box
+ *  during sandbox acquire BEFORE it is `running`, so abort must be defined over it. */
+type NodeStatus = 'pending' | 'acquiring' | 'running' | 'done' | 'failed' | 'cancelled';
+/** Deterministic node id — `${parent}:s${seq}` from the cursor order, never wall-clock. */
+type NodeId = string;
+interface SpawnOpts {
+    readonly budget: Budget;
+    readonly label: string;
+    readonly restart?: Restart;
+    /** Teardown grace handed to the executor when this node is reaped. */
+    readonly shutdown?: number | 'brutalKill' | 'infinity';
+}
+/**
+ * A live child handle. `abort()` is defined over the ACQUIRE lifecycle: it chains into
+ * the `acquireSandbox` signal and reaps a find-by-name orphan box, so a node aborted
+ * mid-acquire never leaks (M1).
+ */
+interface Handle<Out> {
+    readonly id: NodeId;
+    readonly label: string;
+    readonly status: NodeStatus;
+    abort(reason?: string): void;
+    /** Phantom: binds the handle to the child's output type so `spawn<C>` returns a
+     *  `Handle<C>` distinct from a `Handle<other>`. Type-only — never present at runtime. */
+    readonly __out?: Out;
+}
+/**
+ * A settled child, delivered by `scope.next()`. `seq` is the monotonic cursor order
+ * `next()` yielded this settlement (B2) — NOT wall-clock — and replay delivers strictly
+ * in `seq` order. `outRef` rehydrates `out` from the `ResultBlobStore` on replay.
+ */
+type Settled<Out> = {
+    kind: 'done';
+    handle: Handle<Out>;
+    out: Out;
+    outRef: string;
+    verdict?: DefaultVerdict;
+    spent: Spend;
+    seq: number;
+} | {
+    kind: 'down';
+    handle: Handle<Out>;
+    reason: string;
+    /** True = infrastructure failure (excluded from merge `n` / equal-k), not a bad result. */
+    infra: boolean;
+    restartCount: number;
+    seq: number;
+};
+/**
+ * The budget-conserving reactive scope an `Agent.act` runs inside. `spawn` reserves
+ * budget atomically from the shared pool and FAILS CLOSED when the pool can't cover it;
+ * `next()` is a ray.wait cursor (n=1) over THIS scope's IN-MEMORY live set; `view` reads
+ * the in-memory nursery (NOT the log), O(live).
+ */
+interface Scope<Out> {
+    /**
+     * Spawn a child. Reserves `opts.budget` from the conserved pool atomically; refunds the
+     * unspent remainder on settle. Returns a typed outcome — fail-closed on an exhausted
+     * pool or an exceeded depth ceiling (the caller inspects `ok` before `handle`).
+     */
+    spawn<C extends Out>(agent: Agent<unknown, C>, task: unknown, opts: SpawnOpts): {
+        ok: true;
+        handle: Handle<C>;
+    } | {
+        ok: false;
+        reason: 'budget-exhausted' | 'depth-exceeded';
+    };
+    /** ray.wait n=1 over this scope's in-memory live set; resolves as each child settles;
+     *  `null` when the live set is empty. */
+    next(): Promise<Settled<Out> | null>;
+    /**
+     * Steer a RUNNING child out-of-band — deliver a message to its executor's inbox (the driver's
+     * `send` verb: next-instruction, interrupt, or resume). Returns `true` if the message was
+     * delivered to a live child whose executor accepts delivery, `false` otherwise (unknown id,
+     * already settled, or an executor with no inbox). The executor drains its inbox between turns;
+     * a leaf that does not implement `deliver` simply cannot be steered mid-flight. In-process this
+     * is a direct call; the sandbox/Agent-Bus transports surface the SAME verb as an MCP tool.
+     */
+    send(nodeId: NodeId, msg: unknown): boolean;
+    /** The live tree — reads the in-memory nursery, not the journal. */
+    readonly view: TreeView;
+    /** Conserved-pool readouts (post-reservation). */
+    readonly budget: Readonly<{
+        tokensLeft: number;
+        usdLeft: number;
+        deadlineMs: number;
+        reservedTokens: number;
+    }>;
+}
+interface NodeSnapshot {
+    readonly id: NodeId;
+    readonly parent?: NodeId;
+    readonly label: string;
+    readonly status: NodeStatus;
+    readonly runtime: Runtime;
+    readonly budget: Budget;
+    /** Conserved spend so far for this node. */
+    readonly spent: Spend;
+    /** `outRef` once the node is `done` (the replay/result pointer). */
+    readonly outRef?: string;
+}
+/** The live tree — what `scope.view` / `RootHandle.view()` materialize for a viewer. */
+interface TreeView {
+    readonly root: NodeId;
+    readonly nodes: ReadonlyArray<NodeSnapshot>;
+    /** Count of nodes in `running` or `acquiring` — the "what's in flow?" answer. */
+    readonly inFlight: number;
+}
+/** Journaled spawn-tree events (B1/B2). `seq` is the cursor order; `at` is an ISO
+ *  timestamp for human inspection only (NOT a replay input). */
+type SpawnEvent = {
+    kind: 'spawned';
+    id: NodeId;
+    parent?: NodeId;
+    label: string;
+    budget: Budget;
+    runtime: Runtime;
+    seq: number;
+    at: string;
+} | {
+    kind: 'settled';
+    id: NodeId;
+    status: 'done' | 'down';
+    /** Content-addressed result pointer; rehydrates `out` from `ResultBlobStore`. */
+    outRef?: string;
+    verdict?: DefaultVerdict;
+    spent: Spend;
+    infra?: boolean;
+    seq: number;
+    at: string;
+} | {
+    kind: 'cancelled';
+    id: NodeId;
+    reason: string;
+    seq: number;
+    at: string;
+};
+/**
+ * The spawn-tree event source (mirrors `ConversationJournal`'s begin/append/load shape).
+ * `loadTree` replays the full ordered event list for resume/replay; `appendEvent` is
+ * called only AFTER the event is observed-committed (never speculative).
+ */
+interface SpawnJournal {
+    loadTree(root: NodeId): Promise<SpawnEvent[] | undefined>;
+    beginTree(root: NodeId, at: string): Promise<void>;
+    appendEvent(root: NodeId, ev: SpawnEvent): Promise<void>;
+}
+/** Content-addressed result blobs (the `outRef` → artifact map) backing the replay
+ *  invariant. Split from the journal so the journal stays small (decisions) and the
+ *  payloads (evidence) live where a viewer/replayer rehydrates them. */
+interface ResultBlobStore {
+    put(outRef: string, artifact: unknown): Promise<void>;
+    get(outRef: string): Promise<unknown | undefined>;
+}
+/**
+ * Owns the conserved pool, the spawn log, the abort cascade, the OTP intensity breaker,
+ * and the root handle. `run` executes the root `Agent` to completion; `attach` wires a
+ * live `RootHandle` (the Q2 substrate the chat/pi-viz client later consumes).
+ */
+interface Supervisor<Task, Out> {
+    run(root: Agent<Task, Out>, task: Task, opts: SupervisorOpts): Promise<SupervisedResult<Out>>;
+    attach(h: RootHandle<Out>): void;
+}
+interface SupervisorOpts {
+    /** The root conserved-pool ceiling (tokens + usd + iterations + deadline). */
+    readonly budget: Budget;
+    /** Trace-correlation root + the journal/blob root key. */
+    readonly runId: NodeId;
+    /** Event source — defaults to the in-memory journal in the impl; pass JSONL/FS for durability. */
+    readonly journal: SpawnJournal;
+    /** Result payload store backing `outRef` rehydration. */
+    readonly blobs: ResultBlobStore;
+    /** Executor resolution — the open registry mapping `AgentSpec` → `Executor`. */
+    readonly executors: ExecutorRegistry;
+    /** Runtime recursion-depth ceiling (paired with the conserved pool per R3). */
+    readonly maxDepth?: number;
+    /**
+     * OTP intensity breaker: more than `maxRestarts` child restarts within `withinMs`
+     * trips the supervisor to `no-winner` rather than restarting forever.
+     */
+    readonly maxRestarts?: number;
+    readonly withinMs?: number;
+    readonly now?: () => number;
+    readonly signal?: AbortSignal;
+    /** Lifecycle stream sink, threaded into the root `Scope` so every `spawn`/settle emits on the
+     *  same `agent.spawn`/`agent.child` stream `runLoop` feeds — one observable recursive tree. */
+    readonly hooks?: RuntimeHooks;
+}
+/** Typed terminal result (M2) — a no-winner is NEVER coerced to a best-effort output. */
+type SupervisedResult<Out> = {
+    kind: 'winner';
+    out: Out;
+    outRef: string;
+    verdict?: DefaultVerdict;
+    tree: TreeView;
+    spentTotal: Spend;
+} | {
+    kind: 'no-winner';
+    reason: 'all-children-down' | 'budget-exhausted' | 'aborted';
+    tree: TreeView;
+    downCount: number;
+};
+/** Live root handle — the substrate a chat/pi-viz client attaches to (Q2). `signal`
+ *  delivers an out-of-band message to the running root; `view()` materializes the tree. */
+interface RootHandle<Out> {
+    view(): TreeView;
+    signal(msg: RootSignal): void;
+    abort(reason?: string): void;
+    /** Phantom: binds the handle to the supervised run's output type. Type-only — never
+     *  present at runtime; lets `attach(h: RootHandle<Out>)` stay output-typed. */
+    readonly __out?: Out;
+}
+/** Out-of-band message to a running root. Open by intent — a client extends it. */
+type RootSignal = {
+    kind: 'pause';
+} | {
+    kind: 'resume';
+} | {
+    kind: 'cancel';
+    reason?: string;
+} | {
+    kind: 'ask';
+    question: string;
+};
+/**
+ * The progressive-widening gate (MCTS-PW). Decides whether a settled child is
+ * `promising` enough to spawn another under the remaining pool. DEFAULTS TO FLAT
+ * (`shouldWiden` always false) so a gate run never widens and the selector≠judge
+ * firewall conflict (R2) stays dormant. When widening IS enabled, `promising` MUST be
+ * derived from TRACE findings (`analyses`), never raw `verdict` — or the gate carries
+ * an explicit, argued `judgeExempt: true` (the documented escape hatch, off by default).
+ */
+interface WidenGate<Out> {
+    /** Default impl returns false for every settlement (flat — never widens). */
+    shouldWiden(settled: Settled<Out>, budget: Scope<Out>['budget']): boolean;
+    /** When true, widening may read `verdict` directly (collides with the steer firewall —
+     *  must be explicitly argued per cell, never defaulted on). */
+    readonly judgeExempt?: boolean;
+}
+export type { Agent as A, Budget as B, ExecutorFactory as E, Handle as H, NodeId as N, ResultBlobStore as R, Scope as S, TreeView as T, UsageEvent as U, WidenGate as W, SpawnJournal as a, SpawnEvent as b, Settled as c, AgentSpec as d, ExecutorRegistry as e, RootHandle as f, SupervisedResult as g, Spend as h, Supervisor as i, Executor as j, ExecutorContext as k, ExecutorResult as l, NodeSnapshot as m, NodeStatus as n, Restart as o, RootSignal as p, Runtime as q, SpawnOpts as r, SupervisorOpts as s };

package/dist/{types-DbJzz2uf.d.ts → types-DnYoHvvZ.d.ts} RENAMED Viewed

@@ -1,5 +1,6 @@
 import { ControlEvalResult, KnowledgeRequirement, ControlBudget, KnowledgeReadinessReport, ControlStep, ControlDecision, UserQuestion, DataAcquisitionPlan, ControlRunResult, RunRecord, TraceStore, DefaultVerdict } from '@tangle-network/agent-eval';
 import { CreateSandboxOptions, SandboxInstance, SandboxEvent, AgentProfile } from '@tangle-network/sandbox';
+import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
 /**
  * @stable
@@ -744,9 +745,19 @@ interface Driver<Task, Output, Decision> {
      * move + rationale (not just the inferred fan-width). Drivers whose topology
      * is a pure function of count (refine/fanout-vote) omit it — the kernel
      * infers `moveKind` from the planned-task count. Agent-authored drivers
-     * (`createDynamicDriver`) return their chosen move's kind + rationale.
+     * (`createDriver`) return their chosen move's kind + rationale.
      */
     describePlan?(): LoopPlanDescription | undefined;
+    /**
+     * Optional: the driver AUTHORS the winner instead of the kernel's argmax. The
+     * kernel consults this at finalize ONLY when the caller did not pass an explicit
+     * `selectWinner` to runLoop. Return the driver-declared winner (e.g. from a
+     * `select` topology move) or `undefined` to fall through to the default
+     * (best-valid-score, earliest index). This is the SELECTOR role made
+     * agent-authorable — the planner runs the selection, not the kernel.
+     * @experimental
+     */
+    selectWinner?(history: ReadonlyArray<Iteration<Task, Output>>): LoopWinner<Task, Output> | undefined;
 }
 /** @experimental Driver-supplied description of the just-planned move. */
 interface LoopPlanDescription {
@@ -796,9 +807,89 @@ interface LoopResult<Task, Output, Decision> {
  *
  * @experimental
  */
-interface LoopSandboxClient {
+interface SandboxClient {
     create(options?: CreateSandboxOptions): Promise<SandboxInstance>;
     describePlacement?(box: SandboxInstance): LoopSandboxPlacement;
+    /**
+     * Optional CRIU capability probe. When present and it resolves
+     * `{ available: true }`, the loop's `lineage.fork` seam may checkpoint+fork a
+     * parent box so a fanout's branches inherit a shared context prefix; absent or
+     * `false`, the fanout degrades to independent fresh boxes. The kernel reads
+     * this ONLY through the capability probe — it never branches on backend kind.
+     * The raw `Sandbox` SDK class satisfies it; the loop's test fakes omit it
+     * (⇒ `canFork = false`).
+     * @experimental
+     */
+    criuStatus?(): Promise<{
+        available: boolean;
+        criuVersion?: string;
+        reason?: string;
+    }>;
+}
+/**
+ * Opt-in box-lineage controls for `runLoop`. Default OFF — with both flags
+ * unset the kernel's per-iteration behavior is byte-identical to acquiring a
+ * fresh box, streaming once, and tearing it down. The independence of N fresh
+ * boxes (e.g. `random@k`) is a compute-control invariant; these flags must
+ * never apply to it. Enable them ONLY on a steered loop (refine / planner-driven
+ * fanout) where reusing the parent's context is intended.
+ *
+ * Live-box footprint: the lineage keeps every box it starts or forks alive
+ * across rounds so a later round can descend from it, and tears them down at
+ * loop end. When the driver's branch point is kernel-inferred (no
+ * `describePlan` — refine, fanout-vote), the kernel prunes boxes no future
+ * round can reach after each round, so the live set tracks the active frontier.
+ * When the driver authors its own branch point (`describePlan().parentIndex` —
+ * `createDriver`), it may descend from any prior
+ * iteration, so no box is pruned and the live-box count rises to the total
+ * iterations across all rounds. Size `forkFanout` runs accordingly (CRIU forks
+ * are copy-on-write, but each is still a live box until loop end).
+ *
+ * @experimental
+ */
+interface LoopLineageOptions {
+    /**
+     * When true, a refine round (1 planned task) descending from a prior round
+     * CONTINUES the parent iteration's session on the SAME box
+     * (`streamPrompt({ sessionId })`) instead of acquiring a fresh box and
+     * re-injecting prior context as prompt text. Round 0 (no parent) always
+     * starts fresh. Usable on any single-task path, not just the refine driver.
+     *
+     * Requires a platform that honors a client-supplied `sessionId`. The lineage
+     * mints the id and `continue` asserts the session is still live
+     * (`box.session(id).status()`), failing loud if the platform dropped it — so a
+     * non-honoring platform errors instead of silently running contextless turns.
+     * Verify continuity against the live platform before enabling: the assertion
+     * proves the session EXISTS server-side, not that prior turns replay into it.
+     */
+    sessionContinuity?: boolean;
+    /**
+     * When true AND the platform reports CRIU fork support, a fanout round (N
+     * planned tasks) descending from a prior round FORKS the parent iteration's
+     * checkpoint so all N branches inherit a shared context prefix. Without fork
+     * support it degrades to N independent fresh boxes (same result, no prefix).
+     * Round 0 always starts fresh. NEVER set this for a `random@k` control arm —
+     * forking would couple the independent samples.
+     *
+     * A real fork inherits the parent's IMAGE/PROFILE: per-branch `AgentRunSpec`
+     * profiles are honored only on the degraded fresh-box path, so a
+     * heterogeneous-profile fanout silently homogenizes to the parent's profile
+     * when fork is available. Use this for same-profile branching; for
+     * different-per-branch profiles use the unforked fanout path.
+     */
+    forkFanout?: boolean;
+    /**
+     * Per-turn sandbox streaming mode. Default `'sse'` (live `streamPrompt` —
+     * low-latency, full per-token trace; best for interactive chat). `'poll'`
+     * fire-and-detaches via `dispatchPrompt` and awaits the terminal result by
+     * status-polling, so a long, quiet in-box turn (clone + build + test) never
+     * holds a live stream a proxy idle-timeout can drop mid-execution. Lower trace
+     * fidelity (one terminal event), so it is opt-in — intended for BATCH eval
+     * runs, which don't need live streaming and were losing long turns to the
+     * idle-drop. Applies to the default fresh-box path too, not only when
+     * `sessionContinuity`/`forkFanout` are on.
+     */
+    streaming?: 'sse' | 'poll';
 }
 /** @experimental */
 interface LoopSandboxPlacement {
@@ -847,6 +938,11 @@ type LoopTraceEvent = {
     runId: string;
     timestamp: number;
     payload: LoopEndedPayload;
+} | {
+    kind: 'loop.teardown.failed';
+    runId: string;
+    timestamp: number;
+    payload: LoopTeardownFailedPayload;
 };
 /** @experimental */
 interface LoopStartedPayload {
@@ -946,10 +1042,20 @@ interface LoopEndedPayload {
     durationMs: number;
     iterations: number;
 }
+/** Emitted when a box's `delete()` throws or times out during teardown — the
+ *  loop swallows the failure (platform reaps on expiry) but surfaces it here so
+ *  a real leak (e.g. mid-loop auth expiry) is observable. @experimental */
+interface LoopTeardownFailedPayload {
+    sandboxId?: string;
+    /** `'timeout'` or the delete error message. */
+    reason: string;
+}
 /** @experimental */
 interface ExecCtx {
     /** Sandbox SDK client — the kernel calls `.create()` per iteration. */
-    sandboxClient: LoopSandboxClient;
+    sandboxClient: SandboxClient;
+    /** Optional runtime hooks. Execution-scoped; never part of `AgentProfile`. */
+    hooks?: RuntimeHooks;
     /** Optional trace emitter. When set, the kernel emits `loop.*` events. */
     traceEmitter?: LoopTraceEmitter;
     /**
@@ -973,4 +1079,4 @@ interface ExecCtx {
     parentSpanId?: string;
 }
-export { type AgentRunSpec as A, type RuntimeSession as B, type AgentAdapter as C, type Driver as D, type ExecCtx as E, type AgentKnowledgeProvider as F, type AgentRuntimeEventSink as G, type AgentTaskContext as H, type Iteration as I, type AgentTaskSpec as J, type KnowledgeReadinessDecision as K, type LoopWinner as L, type BackendErrorDetail as M, type RuntimeRunHandle as N, type OutputAdapter as O, type RuntimeRunPersistenceAdapter as P, type RuntimeRunRow as Q, type RuntimeStreamEvent as R, startRuntimeRun as S, type Validator as V, type LoopSandboxClient as a, type LoopResult as b, type OpenAIChatTool as c, type LoopTraceEmitter as d, type LoopDecisionPayload as e, type LoopEndedPayload as f, type LoopIterationDispatchPayload as g, type LoopIterationEndedPayload as h, type LoopIterationStartedPayload as i, type LoopPlanDescription as j, type LoopPlanPayload as k, type LoopSandboxPlacement as l, type LoopStartedPayload as m, type LoopTokenUsage as n, type LoopTraceEvent as o, type ValidationCtx as p, type AgentBackendInput as q, type AgentExecutionBackend as r, type OpenAIChatToolChoice as s, type AgentBackendContext as t, type RunAgentTaskOptions as u, type AgentTaskRunResult as v, type RunAgentTaskStreamOptions as w, type AgentRuntimeEvent as x, type AgentTaskStatus as y, type RuntimeSessionStore as z };
+export { type AgentRunSpec as A, type BackendErrorDetail as B, type LoopDecisionPayload as C, type Driver as D, type ExecCtx as E, type LoopEndedPayload as F, type LoopIterationDispatchPayload as G, type LoopIterationEndedPayload as H, type Iteration as I, type LoopIterationStartedPayload as J, type KnowledgeReadinessDecision as K, type LoopWinner as L, type LoopPlanDescription as M, type LoopPlanPayload as N, type OutputAdapter as O, type LoopStartedPayload as P, type LoopTeardownFailedPayload as Q, type RuntimeStreamEvent as R, type SandboxClient as S, type LoopTraceEvent as T, type ValidationCtx as U, type Validator as V, type LoopLineageOptions as a, type LoopResult as b, type LoopTokenUsage as c, type OpenAIChatTool as d, type LoopTraceEmitter as e, type LoopSandboxPlacement as f, type AgentBackendInput as g, type AgentExecutionBackend as h, type OpenAIChatToolChoice as i, type AgentBackendContext as j, type RunAgentTaskOptions as k, type AgentTaskRunResult as l, type RunAgentTaskStreamOptions as m, type AgentRuntimeEvent as n, type AgentTaskStatus as o, type RuntimeSessionStore as p, type RuntimeSession as q, type AgentAdapter as r, type AgentKnowledgeProvider as s, type AgentRuntimeEventSink as t, type AgentTaskContext as u, type AgentTaskSpec as v, type RuntimeRunHandle as w, type RuntimeRunPersistenceAdapter as x, type RuntimeRunRow as y, startRuntimeRun as z };

package/dist/workflow.d.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 import { AgentProfile, CreateSandboxOptions, PromptOptions, TaskOptions, SandboxEvent } from '@tangle-network/sandbox';
-import { a as LoopSandboxClient, O as OutputAdapter, l as LoopSandboxPlacement, b as LoopResult } from './types-DbJzz2uf.js';
-import { R as RunLoopOptions } from './run-loop-C4L1Sted.js';
+import { S as SandboxClient, O as OutputAdapter, f as LoopSandboxPlacement, b as LoopResult } from './types-DnYoHvvZ.js';
+import { R as RunLoopOptions } from './run-loop-CU2Y00Si.js';
 import '@tangle-network/agent-eval';
+import './runtime-hooks-C7JwKb9E.js';
 /**
  * @experimental
@@ -436,7 +437,7 @@ interface WorkflowSandboxAgentTrace<TOutput = unknown> {
     tokenUsage: WorkflowTokenUsage;
 }
 interface CreateSandboxWorkflowAgentDelegateOptions<TOutput = unknown> {
-    client: LoopSandboxClient;
+    client: SandboxClient;
     profile: WorkflowSandboxAgentProfileResolver;
     output?: OutputAdapter<TOutput>;
     stream?: WorkflowSandboxAgentStream;

package/dist/workflow.js CHANGED Viewed

@@ -1,13 +1,12 @@
 import {
   createSandboxForSpec,
   describeSandboxPlacement,
-  extractLlmCallEvent,
   runLoop
-} from "./chunk-S7JXV32P.js";
-import "./chunk-PY6NMZYX.js";
+} from "./chunk-65FQLI4V.js";
 import {
-  ValidationError
-} from "./chunk-SQSCRJ7U.js";
+  ValidationError,
+  extractLlmCallEvent
+} from "./chunk-GSUO5QS6.js";
 import "./chunk-DGUM43GV.js";
 // src/workflow/agent-delegate.ts