npm - @tangle-network/agent-runtime - Versions diffs - 0.6.0 → 0.8.0 - Mend

@tangle-network/agent-runtime 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -1,6 +1,17 @@
-import { ControlEvalResult, KnowledgeRequirement, ControlBudget, KnowledgeReadinessReport, ControlStep, ControlDecision, UserQuestion, DataAcquisitionPlan, ControlRunResult, RunRecord, TraceStore } from '@tangle-network/agent-eval';
-export { ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, KnowledgeReadinessReport, KnowledgeRequirement, RunRecord, UserQuestion } from '@tangle-network/agent-eval';
+import { ControlEvalResult, KnowledgeRequirement, ControlBudget, KnowledgeReadinessReport, ControlStep, ControlDecision, UserQuestion, DataAcquisitionPlan, ControlRunResult, RunRecord, TraceStore, AgentEvalError, TraceEvent } from '@tangle-network/agent-eval';
+export { AgentEvalError, AgentEvalErrorCode, CaptureIntegrityError, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, ReplayError, RunRecord, UserQuestion, ValidationError, VerificationError } from '@tangle-network/agent-eval';
+/**
+ * @stable
+ *
+ * Core task, session, adapter, and stream-event types for the runtime.
+ *
+ * This module owns the public shape of every cross-cutting record (`TaskSpec`,
+ * `RuntimeSession`, `RuntimeStreamEvent`). Everything else in the runtime
+ * imports from here so type-level changes ripple in one place.
+ */
+/** @stable */
 interface AgentTaskSpec {
     id: string;
     intent: string;
@@ -11,6 +22,7 @@ interface AgentTaskSpec {
     budget?: Partial<ControlBudget>;
     metadata?: Record<string, unknown>;
 }
+/** @stable */
 interface AgentKnowledgeProvider {
     buildReadiness?(task: AgentTaskSpec): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport;
     answerQuestions?(questions: UserQuestion[], task: AgentTaskSpec): Promise<Record<string, string>> | Record<string, string>;
@@ -22,6 +34,7 @@ interface AgentKnowledgeProvider {
         acquiredEvidenceIds: string[];
     }): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport;
 }
+/** @stable */
 interface AgentTaskContext<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
     task: AgentTaskSpec;
     knowledge: KnowledgeReadinessReport;
@@ -35,6 +48,7 @@ interface AgentTaskContext<TState, TAction, TActionResult, TEval extends Control
     remainingCostUsd?: number;
     abortSignal: AbortSignal;
 }
+/** @stable */
 interface AgentAdapter<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
     observe(ctx: {
         task: AgentTaskSpec;
@@ -78,7 +92,9 @@ interface AgentAdapter<TState, TAction, TActionResult, TEval extends ControlEval
     }): number | undefined;
     projectRunRecords?(result: ControlRunResult<TState, TAction, TActionResult, TEval>, task: AgentTaskSpec): RunRecord[];
 }
+/** @stable */
 type AgentTaskStatus = 'completed' | 'blocked' | 'failed' | 'aborted';
+/** @stable */
 type AgentRuntimeEvent<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> = {
     type: 'task_start';
     task: AgentTaskSpec;
@@ -125,7 +141,9 @@ type AgentRuntimeEvent<TState = unknown, TAction = unknown, TActionResult = unkn
     status: AgentTaskStatus;
     reason: string;
 };
+/** @stable */
 type AgentRuntimeEventSink<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> = (event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>) => Promise<void> | void;
+/** @stable */
 type RuntimeStreamEvent = {
     type: 'task_start';
     task: AgentTaskSpec;
@@ -206,6 +224,17 @@ type RuntimeStreamEvent = {
     toolCallId?: string;
     result?: unknown;
     timestamp?: string;
+} | {
+    type: 'llm_call';
+    task?: AgentTaskSpec;
+    session?: RuntimeSession;
+    model: string;
+    tokensIn?: number;
+    tokensOut?: number;
+    costUsd?: number;
+    latencyMs?: number;
+    finishReason?: string;
+    timestamp?: string;
 } | {
     type: 'artifact';
     task?: AgentTaskSpec;
@@ -246,6 +275,7 @@ type RuntimeStreamEvent = {
     metadata?: Record<string, unknown>;
     timestamp: string;
 };
+/** @stable */
 interface RuntimeSession {
     id: string;
     backend: string;
@@ -255,12 +285,14 @@ interface RuntimeSession {
     updatedAt: string;
     metadata?: Record<string, unknown>;
 }
+/** @stable */
 interface RuntimeSessionStore {
     get(sessionId: string): Promise<RuntimeSession | undefined> | RuntimeSession | undefined;
     put(session: RuntimeSession): Promise<void> | void;
     appendEvent?(sessionId: string, event: RuntimeStreamEvent): Promise<void> | void;
     listEvents?(sessionId: string): Promise<RuntimeStreamEvent[]> | RuntimeStreamEvent[];
 }
+/** @stable */
 interface AgentBackendInput {
     task: AgentTaskSpec;
     message?: string;
@@ -270,12 +302,14 @@ interface AgentBackendInput {
     }>;
     inputs?: Record<string, unknown>;
 }
+/** @stable */
 interface AgentBackendContext {
     task: AgentTaskSpec;
     knowledge: KnowledgeReadinessReport;
     session: RuntimeSession;
     signal?: AbortSignal;
 }
+/** @stable */
 interface AgentExecutionBackend<TInput extends AgentBackendInput = AgentBackendInput> {
     kind: string;
     start?(input: TInput, context: Omit<AgentBackendContext, 'session'> & {
@@ -285,6 +319,7 @@ interface AgentExecutionBackend<TInput extends AgentBackendInput = AgentBackendI
     stream(input: TInput, context: AgentBackendContext): AsyncIterable<RuntimeStreamEvent>;
     stop?(session: RuntimeSession, reason: string): Promise<void> | void;
 }
+/** @stable */
 interface RunAgentTaskStreamOptions<TInput extends AgentBackendInput = AgentBackendInput> {
     task: AgentTaskSpec;
     backend: AgentExecutionBackend<TInput>;
@@ -296,6 +331,7 @@ interface RunAgentTaskStreamOptions<TInput extends AgentBackendInput = AgentBack
     signal?: AbortSignal;
     minimumReadinessScore?: number;
 }
+/** @stable */
 interface RunAgentTaskOptions<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
     task: AgentTaskSpec;
     adapter: AgentAdapter<TState, TAction, TActionResult, TEval>;
@@ -308,6 +344,7 @@ interface RunAgentTaskOptions<TState, TAction, TActionResult, TEval extends Cont
     variantId?: string;
     minimumReadinessScore?: number;
 }
+/** @stable */
 interface AgentTaskRunResult<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
     task: AgentTaskSpec;
     status: AgentTaskStatus;
@@ -319,10 +356,336 @@ interface AgentTaskRunResult<TState, TAction, TActionResult, TEval extends Contr
     control: ControlRunResult<TState, TAction, TActionResult, TEval>;
     runRecords: RunRecord[];
 }
+/** @stable */
+interface AgentTaskRunSummary {
+    taskId: string;
+    domain?: string;
+    status: AgentTaskStatus;
+    reason: string;
+    readinessStatus: KnowledgeReadinessDecision['status'];
+    readinessScore: number;
+    recommendedAction: KnowledgeReadinessReport['recommendedAction'];
+    blockingGapIds: string[];
+    nonBlockingGapIds: string[];
+    questionCount: number;
+    acquisitionPlanCount: number;
+    acquiredEvidenceCount: number;
+    controlStepCount: number;
+    pass: boolean;
+    failureClass?: string;
+    wallMs: number;
+    costUsd: number;
+}
+/** @stable */
+interface KnowledgeReadinessDecision {
+    passed: boolean;
+    status: 'ready' | 'blocked' | 'caveat';
+    reason: string;
+    readinessScore: number;
+    recommendedAction: KnowledgeReadinessReport['recommendedAction'];
+    severity: KnowledgeReadinessReport['severity'];
+    blockingGapIds: string[];
+    nonBlockingGapIds: string[];
+}
+/**
+ * @stable
+ *
+ * Backend factories for `runAgentTaskStream`. Three shapes ship in core:
+ *
+ *  - `createIterableBackend` — wrap any custom async iterable into a backend
+ *  - `createSandboxPromptBackend` — sandbox / sidecar `streamPrompt` clients
+ *  - `createOpenAICompatibleBackend` — OpenAI-style chat completions endpoints
+ *
+ * Adapters stay thin: domain repos own auth, model selection, and the concrete
+ * tool surface. The factories handle session creation, stream normalization,
+ * and graceful end-of-stream signalling.
+ */
+/** @stable */
+declare function createIterableBackend<TInput extends AgentBackendInput>(options: {
+    kind: string;
+    start?: AgentExecutionBackend<TInput>['start'];
+    resume?: AgentExecutionBackend<TInput>['resume'];
+    stream: AgentExecutionBackend<TInput>['stream'];
+    stop?: AgentExecutionBackend<TInput>['stop'];
+}): AgentExecutionBackend<TInput>;
+/** @stable */
+declare function createSandboxPromptBackend<TBox, TInput extends AgentBackendInput = AgentBackendInput>(options: {
+    kind?: string;
+    getBox(input: TInput, context: Omit<AgentBackendContext, 'session'>): Promise<TBox> | TBox;
+    streamPrompt(box: TBox, message: string, context: AgentBackendContext): AsyncIterable<unknown>;
+    mapEvent?: (event: unknown, context: AgentBackendContext) => RuntimeStreamEvent | undefined;
+    getSessionId?: (box: TBox, input: TInput) => string | undefined;
+}): AgentExecutionBackend<TInput>;
+/** @stable */
+declare function createOpenAICompatibleBackend<TInput extends AgentBackendInput = AgentBackendInput>(options: {
+    apiKey: string;
+    baseUrl: string;
+    model: string;
+    kind?: string;
+    fetchImpl?: typeof fetch;
+}): AgentExecutionBackend<TInput>;
+/**
+ * @stable
+ *
+ * Error taxonomy for `@tangle-network/agent-runtime`.
+ *
+ * Public contract: every error this package throws as part of its consumer-
+ * facing API either extends `AgentEvalError` (re-exported here for ergonomic
+ * `instanceof` checks at the runtime boundary) or extends one of the
+ * runtime-specific subclasses below.
+ *
+ * Internal invariant guards (`throw new Error('this should never happen')`)
+ * remain plain `Error` — they are programmer-mistake assertions, not
+ * consumer-catchable contract failures.
+ *
+ * Subclassing strategy: where a runtime-specific failure maps cleanly to an
+ * agent-eval code (validation, config, not_found), we re-use the agent-eval
+ * subclass. Runtime-only failure modes (session resume against the wrong
+ * backend, backend transport errors) get fresh subclasses that still carry an
+ * `AgentEvalErrorCode` so cross-package handlers can pattern-match without
+ * importing the runtime.
+ */
+/**
+ * @stable
+ *
+ * Caller asked to resume a session against a backend whose `kind` does not
+ * match the session's recorded backend. This is a routing bug — the same
+ * session id was reused across two different backend implementations — and
+ * is not retryable without picking the right backend.
+ */
+declare class SessionMismatchError extends AgentEvalError {
+    readonly sessionBackend: string;
+    readonly requestedBackend: string;
+    constructor(sessionBackend: string, requestedBackend: string, options?: {
+        cause?: unknown;
+    });
+}
+/**
+ * @stable
+ *
+ * A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success
+ * status. Distinct from `JudgeError` (which is structural / unrecoverable)
+ * because backend failures are sometimes retryable and consumers may want to
+ * branch on the upstream status code.
+ */
+declare class BackendTransportError extends AgentEvalError {
+    readonly backend: string;
+    readonly status?: number;
+    constructor(backend: string, message: string, options?: {
+        cause?: unknown;
+        status?: number;
+    });
+}
+/**
+ * @stable
+ *
+ * A runtime-run lifecycle method was called in an order the state machine does
+ * not allow: `persist()` before `complete()`, `complete()` twice, etc.
+ */
+declare class RuntimeRunStateError extends AgentEvalError {
+    constructor(message: string, options?: {
+        cause?: unknown;
+    });
+}
+/**
+ * @stable
+ *
+ * Pure readiness-decision helper. Maps a `KnowledgeReadinessReport` from
+ * `@tangle-network/agent-eval` to a three-state branch (`ready` / `blocked` /
+ * `caveat`) the runtime, route handlers, and UI shells can all switch on.
+ *
+ * Default `minimumScore` of 0.7 mirrors the readiness scoring scale in
+ * agent-eval; callers tightening or loosening this should keep it consistent
+ * across all entry points for the same product so the UI / metrics agree on
+ * what "caveat" means.
+ */
+/** @stable */
+declare function decideKnowledgeReadiness(report: KnowledgeReadinessReport, options?: {
+    minimumScore?: number;
+}): KnowledgeReadinessDecision;
+/**
+ * @stable
+ *
+ * The two top-level entry points:
+ *
+ *  - `runAgentTask` — single-shot lifecycle for adapter-driven tasks.
+ *  - `runAgentTaskStream` — streaming lifecycle that delegates execution to an
+ *    `AgentExecutionBackend` (model API, sandbox, or custom iterable).
+ *
+ * Both gate the run on `KnowledgeReadinessReport` from `agent-eval`, emit the
+ * same lifecycle event vocabulary (under different shapes — see `types.ts`),
+ * and route session lifecycle through a pluggable `RuntimeSessionStore`.
+ */
+/** @stable */
+declare function runAgentTask<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult>(options: RunAgentTaskOptions<TState, TAction, TActionResult, TEval>): Promise<AgentTaskRunResult<TState, TAction, TActionResult, TEval>>;
+/** @stable */
+declare function summarizeAgentTaskRun<TState, TAction, TActionResult, TEval extends ControlEvalResult>(result: AgentTaskRunResult<TState, TAction, TActionResult, TEval>): AgentTaskRunSummary;
+/** @stable */
+declare function runAgentTaskStream<TInput extends AgentBackendInput = AgentBackendInput>(options: RunAgentTaskStreamOptions<TInput>): AsyncIterable<RuntimeStreamEvent>;
+/**
+ * @stable
+ *
+ * Canonical production-run lifecycle. ONE abstraction for "the agent did a
+ * thing on behalf of a customer; record what it did, what it cost, and how it
+ * ended." Consumer agents (legal, tax, gtm, creative, agent-builder) reach for
+ * `startRuntimeRun` instead of inventing their own `agentRuns`-row helpers.
+ *
+ * Three concerns live in this module:
+ *
+ *  1. **Lifecycle state machine** — `running` -> `completed | failed | cancelled`,
+ *     enforced by `RuntimeRunStateError`. Completion is idempotent (a second
+ *     `complete()` call with the same status is a no-op so retries / cleanup
+ *     paths don't double-fire side effects). A different terminal status is a
+ *     state error.
+ *
+ *  2. **Cost ledger** — every `llm_call` event the handle observes contributes
+ *     `tokensIn`, `tokensOut`, `costUsd`, and bumps `llmCalls`. Wall time is
+ *     measured from `startRuntimeRun()` to `complete()`. Surface via
+ *     `handle.cost()` for "cost per customer task" dashboards.
+ *
+ *  3. **Persistence adapter** — `RuntimeRunPersistenceAdapter` is the seam
+ *     consumers plug in to write a `RuntimeRunRow` to their D1 / postgres /
+ *     KV store. The adapter receives a sanitized row shape; no telemetry
+ *     payload bytes flow through it unless the consumer opts in via
+ *     `RuntimeRunOptions.telemetryEvents`.
+ *
+ * The pattern replaces legal-agent's bespoke `completeProductionAgentRun` /
+ * `persistRuntimeRun` pair from `eval-evidence.ts` + `api.chat.ts`. Both are
+ * marked `@deprecated` in this release; consumers ditch them on their own
+ * version bumps.
+ */
+/** @stable */
+type RuntimeRunStatus = 'running' | 'completed' | 'failed' | 'cancelled';
+/** @stable */
+interface RuntimeRunCost {
+    /** Cumulative input tokens across every observed `llm_call` event. */
+    tokensIn: number;
+    /** Cumulative output tokens across every observed `llm_call` event. */
+    tokensOut: number;
+    /** Sum of `costUsd` from every observed `llm_call` event. */
+    costUsd: number;
+    /** Wall time from `startRuntimeRun()` to `complete()` (or `now()` if not yet completed). */
+    wallMs: number;
+    /** Count of `llm_call` events observed during the run. */
+    llmCalls: number;
+}
+/** @stable */
+interface RuntimeRunCompleteInput {
+    status: Exclude<RuntimeRunStatus, 'running'>;
+    resultSummary?: string;
+    /** Optional explicit cost override; if omitted, the accumulated ledger is used. */
+    cost?: Partial<RuntimeRunCost>;
+    /** Stable error message when `status === 'failed'`. */
+    error?: string;
+    /** Additional adapter-specific fields merged into the persisted row. */
+    metadata?: Record<string, unknown>;
+}
+/** @stable */
+interface RuntimeRunRow {
+    /** Stable runtime-side identifier. Adapters may translate to their own primary key. */
+    id: string;
+    workspaceId: string;
+    sessionId?: string;
+    agentId?: string;
+    domain?: string;
+    taskId: string;
+    scenarioId?: string;
+    status: RuntimeRunStatus;
+    resultSummary?: string;
+    error?: string;
+    cost: RuntimeRunCost;
+    startedAt: string;
+    completedAt?: string;
+    metadata?: Record<string, unknown>;
+}
+/** @stable */
+interface RuntimeRunPersistenceAdapter {
+    /**
+     * Called once when `handle.persist()` runs. Implementations write `row` to
+     * their durable store (D1, postgres, KV) and return whatever the consumer
+     * wants the caller to see (often the storage-side row id). Errors thrown
+     * here propagate out of `persist()` so the caller can decide whether to
+     * retry or log-and-continue.
+     */
+    upsert(row: RuntimeRunRow): Promise<void> | void;
+}
+/** @stable */
+interface RuntimeRunOptions {
+    workspaceId: string;
+    sessionId?: string;
+    agentId?: string;
+    taskSpec: AgentTaskSpec;
+    scenarioId?: string;
+    /** Optional persistence adapter; if omitted, `persist()` is a no-op. */
+    adapter?: RuntimeRunPersistenceAdapter;
+    /** Override the row id; default = `${taskSpec.id}:${random suffix}`. */
+    id?: string;
+    /** Override the clock; default = `Date.now()`. Useful for deterministic tests. */
+    now?: () => number;
+}
+/** @stable */
+interface RuntimeRunHandle {
+    /** Stable id assigned at start. */
+    readonly id: string;
+    readonly workspaceId: string;
+    readonly sessionId: string | undefined;
+    readonly taskSpec: AgentTaskSpec;
+    readonly status: RuntimeRunStatus;
+    /**
+     * Observe a single `RuntimeStreamEvent`. The handle ignores non-cost events
+     * (text deltas, tool calls) silently so consumers can pipe the whole stream
+     * through `handle.observe`. `llm_call` events update the ledger.
+     */
+    observe(event: RuntimeStreamEvent): void;
+    /** Snapshot of the current cost ledger. Safe to call at any time. */
+    cost(): RuntimeRunCost;
+    /**
+     * Transition to a terminal state. Idempotent for the same status; throws
+     * `RuntimeRunStateError` for a different terminal status (state machines
+     * don't time-travel).
+     */
+    complete(input: RuntimeRunCompleteInput): void;
+    /** Build the current row without writing it. Useful for tests + dry runs. */
+    toRow(metadata?: Record<string, unknown>): RuntimeRunRow;
+    /**
+     * Persist the current row via the configured adapter. Must be called after
+     * `complete()`. Idempotent for the same terminal state (the adapter sees
+     * the same row on retry).
+     */
+    persist(metadata?: Record<string, unknown>): Promise<void>;
+}
+/**
+ * @stable
+ *
+ * Construct a runtime-run handle. The returned handle is mutable across its
+ * lifetime; consumers should not share it across requests.
+ */
+declare function startRuntimeRun(options: RuntimeRunOptions): RuntimeRunHandle;
+/**
+ * @stable
+ *
+ * Sanitization for runtime telemetry. The rule: nothing user-controlled leaks
+ * unless the caller opts in with a `RuntimeTelemetryOptions` flag. This is the
+ * envelope that ends up in `agent_run.metadata.runtimeEvents` on every
+ * consumer, so the default must be safe.
+ */
+/** @stable */
 interface RuntimeTelemetryOptions {
     /**
-     * Include raw task inputs. Off by default because task inputs often
-     * contain customer facts, credentials, source text, or internal IDs.
+     * Include raw task inputs. Off by default because task inputs often contain
+     * customer facts, credentials, source text, or internal IDs.
      */
     includeInputs?: boolean;
     /** Include requirement descriptions. Secret requirements are always redacted. */
@@ -338,6 +701,7 @@ interface RuntimeTelemetryOptions {
     /** Include eval detail/evidence strings. Off by default because validators may echo private input. */
     includeEvalDetails?: boolean;
 }
+/** @stable */
 interface SanitizedKnowledgeRequirement {
     id: string;
     description?: string;
@@ -353,6 +717,7 @@ interface SanitizedKnowledgeRequirement {
     evidenceIds?: string[];
     fallbackPolicy: KnowledgeRequirement['fallbackPolicy'];
 }
+/** @stable */
 interface SanitizedKnowledgeReadinessReport {
     taskId: string;
     readinessScore: number;
@@ -365,40 +730,20 @@ interface SanitizedKnowledgeReadinessReport {
     evidenceIds?: string[];
     missingRequirementIds: string[];
 }
-interface AgentTaskRunSummary {
-    taskId: string;
-    domain?: string;
-    status: AgentTaskStatus;
-    reason: string;
-    readinessStatus: KnowledgeReadinessDecision['status'];
-    readinessScore: number;
-    recommendedAction: KnowledgeReadinessReport['recommendedAction'];
-    blockingGapIds: string[];
-    nonBlockingGapIds: string[];
-    questionCount: number;
-    acquisitionPlanCount: number;
-    acquiredEvidenceCount: number;
-    controlStepCount: number;
-    pass: boolean;
-    failureClass?: string;
-    wallMs: number;
-    costUsd: number;
-}
-interface KnowledgeReadinessDecision {
-    passed: boolean;
-    status: 'ready' | 'blocked' | 'caveat';
-    reason: string;
-    readinessScore: number;
-    recommendedAction: KnowledgeReadinessReport['recommendedAction'];
-    severity: KnowledgeReadinessReport['severity'];
-    blockingGapIds: string[];
-    nonBlockingGapIds: string[];
-}
+/** @stable */
+declare function sanitizeKnowledgeReadinessReport(report: KnowledgeReadinessReport, options?: RuntimeTelemetryOptions): SanitizedKnowledgeReadinessReport;
+/** @stable */
+declare function sanitizeAgentRuntimeEvent<TState, TAction, TActionResult, TEval extends ControlEvalResult>(event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>, options?: RuntimeTelemetryOptions): Record<string, unknown>;
+/** @stable */
+declare function sanitizeRuntimeStreamEvent(event: RuntimeStreamEvent, options?: RuntimeTelemetryOptions): Record<string, unknown>;
+/** @stable */
 interface RuntimeEventCollector<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> {
-    onEvent: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval>;
+    onEvent: (event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>) => void;
     events: Array<Record<string, unknown>>;
 }
+/** @stable */
 type RuntimeStreamEventSink = (event: RuntimeStreamEvent) => void;
+/** @stable */
 interface RuntimeStreamEventSummary {
     /** Total count of sanitized events collected. */
     eventCount: number;
@@ -413,36 +758,18 @@ interface RuntimeStreamEventSummary {
     /** Concatenated `text_delta.text` across the stream, even when payloads are redacted. */
     finalText: string;
 }
+/** @stable */
 interface RuntimeStreamEventCollector {
     onEvent: RuntimeStreamEventSink;
     events: Array<Record<string, unknown>>;
     /** Snapshot of a small streaming-flavored summary derived from collected events. */
     summary(): RuntimeStreamEventSummary;
 }
-interface ServerSentEventOptions {
-    event?: string;
-    id?: string;
-    retry?: number;
-}
-declare class InMemoryRuntimeSessionStore implements RuntimeSessionStore {
-    private readonly sessions;
-    private readonly events;
-    get(sessionId: string): RuntimeSession | undefined;
-    put(session: RuntimeSession): void;
-    appendEvent(sessionId: string, event: RuntimeStreamEvent): void;
-    listEvents(sessionId: string): RuntimeStreamEvent[];
-}
-declare function runAgentTask<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult>(options: RunAgentTaskOptions<TState, TAction, TActionResult, TEval>): Promise<AgentTaskRunResult<TState, TAction, TActionResult, TEval>>;
-declare function summarizeAgentTaskRun<TState, TAction, TActionResult, TEval extends ControlEvalResult>(result: AgentTaskRunResult<TState, TAction, TActionResult, TEval>): AgentTaskRunSummary;
-declare function runAgentTaskStream<TInput extends AgentBackendInput = AgentBackendInput>(options: RunAgentTaskStreamOptions<TInput>): AsyncIterable<RuntimeStreamEvent>;
-declare function decideKnowledgeReadiness(report: KnowledgeReadinessReport, options?: {
-    minimumScore?: number;
-}): KnowledgeReadinessDecision;
-declare function sanitizeKnowledgeReadinessReport(report: KnowledgeReadinessReport, options?: RuntimeTelemetryOptions): SanitizedKnowledgeReadinessReport;
-declare function sanitizeAgentRuntimeEvent<TState, TAction, TActionResult, TEval extends ControlEvalResult>(event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>, options?: RuntimeTelemetryOptions): Record<string, unknown>;
-declare function sanitizeRuntimeStreamEvent(event: RuntimeStreamEvent, options?: RuntimeTelemetryOptions): Record<string, unknown>;
+/** @stable */
 declare function createRuntimeEventCollector<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult>(options?: RuntimeTelemetryOptions): RuntimeEventCollector<TState, TAction, TActionResult, TEval>;
 /**
+ * @stable
+ *
  * Streaming-event counterpart of `createRuntimeEventCollector`. Use this with
  * `runAgentTaskStream` — pass each yielded event through `onEvent` and read
  * the sanitized copies off `events`. The same `RuntimeTelemetryOptions`
@@ -455,29 +782,112 @@ declare function createRuntimeEventCollector<TState = unknown, TAction = unknown
  * events whose `type` literals overlap (`task_start`, `readiness_end`, etc.).
  */
 declare function createRuntimeStreamEventCollector(options?: RuntimeTelemetryOptions): RuntimeStreamEventCollector;
+/**
+ * @stable
+ *
+ * Session helpers + an in-memory `RuntimeSessionStore` implementation suitable
+ * for tests, scratch processes, and per-request scratch storage in serverless
+ * runtimes. Durable stores (D1, postgres, Durable Objects) implement the same
+ * interface from `./types`.
+ */
+/** @stable */
+declare class InMemoryRuntimeSessionStore implements RuntimeSessionStore {
+    private readonly sessions;
+    private readonly events;
+    get(sessionId: string): RuntimeSession | undefined;
+    put(session: RuntimeSession): void;
+    appendEvent(sessionId: string, event: RuntimeStreamEvent): void;
+    listEvents(sessionId: string): RuntimeStreamEvent[];
+}
+/**
+ * @stable
+ *
+ * Server-Sent Events serialization for runtime telemetry streams.
+ *
+ * Newline-safe by construction: any newline in `id` or `event` is collapsed to
+ * a space (browsers terminate fields on newline), and multi-line `data`
+ * payloads are split into one `data:` line per source line so JSON.stringify
+ * output transports cleanly.
+ */
+/** @stable */
+interface ServerSentEventOptions {
+    event?: string;
+    id?: string;
+    retry?: number;
+}
+/** @stable */
 declare function encodeServerSentEvent(data: unknown, options?: ServerSentEventOptions): string;
+/** @stable */
 declare function readinessServerSentEvent(report: KnowledgeReadinessReport, options?: RuntimeTelemetryOptions & ServerSentEventOptions): string;
+/** @stable */
 declare function runtimeStreamServerSentEvent(event: RuntimeStreamEvent, options?: RuntimeTelemetryOptions & ServerSentEventOptions): string;
-declare function createIterableBackend<TInput extends AgentBackendInput>(options: {
-    kind: string;
-    start?: AgentExecutionBackend<TInput>['start'];
-    resume?: AgentExecutionBackend<TInput>['resume'];
-    stream: AgentExecutionBackend<TInput>['stream'];
-    stop?: AgentExecutionBackend<TInput>['stop'];
-}): AgentExecutionBackend<TInput>;
-declare function createSandboxPromptBackend<TBox, TInput extends AgentBackendInput = AgentBackendInput>(options: {
-    kind?: string;
-    getBox(input: TInput, context: Omit<AgentBackendContext, 'session'>): Promise<TBox> | TBox;
-    streamPrompt(box: TBox, message: string, context: AgentBackendContext): AsyncIterable<unknown>;
-    mapEvent?: (event: unknown, context: AgentBackendContext) => RuntimeStreamEvent | undefined;
-    getSessionId?: (box: TBox, input: TInput) => string | undefined;
-}): AgentExecutionBackend<TInput>;
-declare function createOpenAICompatibleBackend<TInput extends AgentBackendInput = AgentBackendInput>(options: {
-    apiKey: string;
-    baseUrl: string;
-    model: string;
-    kind?: string;
-    fetchImpl?: typeof fetch;
-}): AgentExecutionBackend<TInput>;
-export { type AgentAdapter, type AgentBackendContext, type AgentBackendInput, type AgentExecutionBackend, type AgentKnowledgeProvider, type AgentRuntimeEvent, type AgentRuntimeEventSink, type AgentTaskContext, type AgentTaskRunResult, type AgentTaskRunSummary, type AgentTaskSpec, type AgentTaskStatus, InMemoryRuntimeSessionStore, type KnowledgeReadinessDecision, type RunAgentTaskOptions, type RunAgentTaskStreamOptions, type RuntimeEventCollector, type RuntimeSession, type RuntimeSessionStore, type RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, decideKnowledgeReadiness, encodeServerSentEvent, readinessServerSentEvent, runAgentTask, runAgentTaskStream, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, summarizeAgentTaskRun };
+/**
+ * @stable
+ *
+ * Bridge from runtime stream events to the agent-eval trace schema.
+ *
+ * Before this module, consumers (legal-agent's chat.ts, gtm-agent's runtime
+ * route) hand-rolled an adapter from `RuntimeStreamEvent` -> `TraceEvent` per
+ * repo. The mapping is mechanical and the destination schema is owned by
+ * agent-eval, so the adapter belongs in runtime, not in N consumer repos.
+ *
+ * The bridge is intentionally one-way (runtime -> agent-eval). The reverse
+ * mapping is degenerate (agent-eval events have no session / task affinity)
+ * and would invite consumers to round-trip through agent-eval, defeating the
+ * point of the runtime-specific shape.
+ */
+/** @stable */
+interface TraceBridgeOptions {
+    /**
+     * Stable `runId` to stamp on every emitted `TraceEvent`. Required because
+     * agent-eval's `TraceEvent.runId` is non-optional.
+     */
+    runId: string;
+    /**
+     * Optional `spanId` to attach when an event maps to a known span (for
+     * example, an outer runtime-task span the consumer is already emitting).
+     */
+    spanId?: string;
+    /**
+     * Optional id generator; default = monotonic counter scoped to this bridge
+     * instance. Override for deterministic tests or to integrate with a wider
+     * id-allocator (uuid, ksuid).
+     */
+    newEventId?: () => string;
+}
+/** @stable */
+interface TraceBridge {
+    /**
+     * Map a single `RuntimeStreamEvent` to a `TraceEvent`. Returns `undefined`
+     * for events that have no useful trace projection (text deltas, reasoning
+     * deltas — these belong inside an `LlmSpan.output`, not as separate trace
+     * events).
+     */
+    toTraceEvent(event: RuntimeStreamEvent): TraceEvent | undefined;
+    /** Convenience: drain an iterable of stream events into trace events. */
+    drain(events: Iterable<RuntimeStreamEvent>): TraceEvent[];
+}
+/**
+ * @stable
+ *
+ * Build a stateful bridge. State is intentionally minimal — only the event-id
+ * counter — because the runtime stream already carries timestamps and the
+ * caller already knows the `runId`.
+ */
+declare function createTraceBridge(options: TraceBridgeOptions): TraceBridge;
+/**
+ * @stable
+ *
+ * One-shot convenience for callers who don't want to hold a bridge instance.
+ * Internally allocates a single-use bridge so id-generation stays consistent
+ * within the call.
+ */
+declare function toAgentEvalTrace(event: RuntimeStreamEvent, options: TraceBridgeOptions): TraceEvent | undefined;
+export { type AgentAdapter, type AgentBackendContext, type AgentBackendInput, type AgentExecutionBackend, type AgentKnowledgeProvider, type AgentRuntimeEvent, type AgentRuntimeEventSink, type AgentTaskContext, type AgentTaskRunResult, type AgentTaskRunSummary, type AgentTaskSpec, type AgentTaskStatus, BackendTransportError, InMemoryRuntimeSessionStore, type KnowledgeReadinessDecision, type RunAgentTaskOptions, type RunAgentTaskStreamOptions, type RuntimeEventCollector, type RuntimeRunCompleteInput, type RuntimeRunCost, type RuntimeRunHandle, type RuntimeRunOptions, type RuntimeRunPersistenceAdapter, type RuntimeRunRow, RuntimeRunStateError, type RuntimeRunStatus, type RuntimeSession, type RuntimeSessionStore, type RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, SessionMismatchError, type TraceBridge, type TraceBridgeOptions, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, createTraceBridge, decideKnowledgeReadiness, encodeServerSentEvent, readinessServerSentEvent, runAgentTask, runAgentTaskStream, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, startRuntimeRun, summarizeAgentTaskRun, toAgentEvalTrace };