@tangle-network/agent-runtime 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,6 +1,17 @@
1
- import { ControlEvalResult, KnowledgeRequirement, ControlBudget, KnowledgeReadinessReport, ControlStep, ControlDecision, UserQuestion, DataAcquisitionPlan, ControlRunResult, RunRecord, TraceStore } from '@tangle-network/agent-eval';
2
- export { ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, KnowledgeReadinessReport, KnowledgeRequirement, RunRecord, UserQuestion } from '@tangle-network/agent-eval';
1
+ import { ControlEvalResult, KnowledgeRequirement, ControlBudget, KnowledgeReadinessReport, ControlStep, ControlDecision, UserQuestion, DataAcquisitionPlan, ControlRunResult, RunRecord, TraceStore, AgentEvalError, TraceEvent } from '@tangle-network/agent-eval';
2
+ export { AgentEvalError, AgentEvalErrorCode, CaptureIntegrityError, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, ReplayError, RunRecord, UserQuestion, ValidationError, VerificationError } from '@tangle-network/agent-eval';
3
3
 
4
+ /**
5
+ * @stable
6
+ *
7
+ * Core task, session, adapter, and stream-event types for the runtime.
8
+ *
9
+ * This module owns the public shape of every cross-cutting record (`TaskSpec`,
10
+ * `RuntimeSession`, `RuntimeStreamEvent`). Everything else in the runtime
11
+ * imports from here so type-level changes ripple in one place.
12
+ */
13
+
14
+ /** @stable */
4
15
  interface AgentTaskSpec {
5
16
  id: string;
6
17
  intent: string;
@@ -11,6 +22,7 @@ interface AgentTaskSpec {
11
22
  budget?: Partial<ControlBudget>;
12
23
  metadata?: Record<string, unknown>;
13
24
  }
25
+ /** @stable */
14
26
  interface AgentKnowledgeProvider {
15
27
  buildReadiness?(task: AgentTaskSpec): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport;
16
28
  answerQuestions?(questions: UserQuestion[], task: AgentTaskSpec): Promise<Record<string, string>> | Record<string, string>;
@@ -22,6 +34,7 @@ interface AgentKnowledgeProvider {
22
34
  acquiredEvidenceIds: string[];
23
35
  }): Promise<KnowledgeReadinessReport> | KnowledgeReadinessReport;
24
36
  }
37
+ /** @stable */
25
38
  interface AgentTaskContext<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
26
39
  task: AgentTaskSpec;
27
40
  knowledge: KnowledgeReadinessReport;
@@ -35,6 +48,7 @@ interface AgentTaskContext<TState, TAction, TActionResult, TEval extends Control
35
48
  remainingCostUsd?: number;
36
49
  abortSignal: AbortSignal;
37
50
  }
51
+ /** @stable */
38
52
  interface AgentAdapter<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
39
53
  observe(ctx: {
40
54
  task: AgentTaskSpec;
@@ -78,7 +92,9 @@ interface AgentAdapter<TState, TAction, TActionResult, TEval extends ControlEval
78
92
  }): number | undefined;
79
93
  projectRunRecords?(result: ControlRunResult<TState, TAction, TActionResult, TEval>, task: AgentTaskSpec): RunRecord[];
80
94
  }
95
+ /** @stable */
81
96
  type AgentTaskStatus = 'completed' | 'blocked' | 'failed' | 'aborted';
97
+ /** @stable */
82
98
  type AgentRuntimeEvent<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> = {
83
99
  type: 'task_start';
84
100
  task: AgentTaskSpec;
@@ -125,7 +141,9 @@ type AgentRuntimeEvent<TState = unknown, TAction = unknown, TActionResult = unkn
125
141
  status: AgentTaskStatus;
126
142
  reason: string;
127
143
  };
144
+ /** @stable */
128
145
  type AgentRuntimeEventSink<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> = (event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>) => Promise<void> | void;
146
+ /** @stable */
129
147
  type RuntimeStreamEvent = {
130
148
  type: 'task_start';
131
149
  task: AgentTaskSpec;
@@ -206,6 +224,17 @@ type RuntimeStreamEvent = {
206
224
  toolCallId?: string;
207
225
  result?: unknown;
208
226
  timestamp?: string;
227
+ } | {
228
+ type: 'llm_call';
229
+ task?: AgentTaskSpec;
230
+ session?: RuntimeSession;
231
+ model: string;
232
+ tokensIn?: number;
233
+ tokensOut?: number;
234
+ costUsd?: number;
235
+ latencyMs?: number;
236
+ finishReason?: string;
237
+ timestamp?: string;
209
238
  } | {
210
239
  type: 'artifact';
211
240
  task?: AgentTaskSpec;
@@ -246,6 +275,7 @@ type RuntimeStreamEvent = {
246
275
  metadata?: Record<string, unknown>;
247
276
  timestamp: string;
248
277
  };
278
+ /** @stable */
249
279
  interface RuntimeSession {
250
280
  id: string;
251
281
  backend: string;
@@ -255,12 +285,14 @@ interface RuntimeSession {
255
285
  updatedAt: string;
256
286
  metadata?: Record<string, unknown>;
257
287
  }
288
+ /** @stable */
258
289
  interface RuntimeSessionStore {
259
290
  get(sessionId: string): Promise<RuntimeSession | undefined> | RuntimeSession | undefined;
260
291
  put(session: RuntimeSession): Promise<void> | void;
261
292
  appendEvent?(sessionId: string, event: RuntimeStreamEvent): Promise<void> | void;
262
293
  listEvents?(sessionId: string): Promise<RuntimeStreamEvent[]> | RuntimeStreamEvent[];
263
294
  }
295
+ /** @stable */
264
296
  interface AgentBackendInput {
265
297
  task: AgentTaskSpec;
266
298
  message?: string;
@@ -270,12 +302,14 @@ interface AgentBackendInput {
270
302
  }>;
271
303
  inputs?: Record<string, unknown>;
272
304
  }
305
+ /** @stable */
273
306
  interface AgentBackendContext {
274
307
  task: AgentTaskSpec;
275
308
  knowledge: KnowledgeReadinessReport;
276
309
  session: RuntimeSession;
277
310
  signal?: AbortSignal;
278
311
  }
312
+ /** @stable */
279
313
  interface AgentExecutionBackend<TInput extends AgentBackendInput = AgentBackendInput> {
280
314
  kind: string;
281
315
  start?(input: TInput, context: Omit<AgentBackendContext, 'session'> & {
@@ -285,6 +319,7 @@ interface AgentExecutionBackend<TInput extends AgentBackendInput = AgentBackendI
285
319
  stream(input: TInput, context: AgentBackendContext): AsyncIterable<RuntimeStreamEvent>;
286
320
  stop?(session: RuntimeSession, reason: string): Promise<void> | void;
287
321
  }
322
+ /** @stable */
288
323
  interface RunAgentTaskStreamOptions<TInput extends AgentBackendInput = AgentBackendInput> {
289
324
  task: AgentTaskSpec;
290
325
  backend: AgentExecutionBackend<TInput>;
@@ -296,6 +331,7 @@ interface RunAgentTaskStreamOptions<TInput extends AgentBackendInput = AgentBack
296
331
  signal?: AbortSignal;
297
332
  minimumReadinessScore?: number;
298
333
  }
334
+ /** @stable */
299
335
  interface RunAgentTaskOptions<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
300
336
  task: AgentTaskSpec;
301
337
  adapter: AgentAdapter<TState, TAction, TActionResult, TEval>;
@@ -308,6 +344,7 @@ interface RunAgentTaskOptions<TState, TAction, TActionResult, TEval extends Cont
308
344
  variantId?: string;
309
345
  minimumReadinessScore?: number;
310
346
  }
347
+ /** @stable */
311
348
  interface AgentTaskRunResult<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
312
349
  task: AgentTaskSpec;
313
350
  status: AgentTaskStatus;
@@ -319,10 +356,336 @@ interface AgentTaskRunResult<TState, TAction, TActionResult, TEval extends Contr
319
356
  control: ControlRunResult<TState, TAction, TActionResult, TEval>;
320
357
  runRecords: RunRecord[];
321
358
  }
359
+ /** @stable */
360
+ interface AgentTaskRunSummary {
361
+ taskId: string;
362
+ domain?: string;
363
+ status: AgentTaskStatus;
364
+ reason: string;
365
+ readinessStatus: KnowledgeReadinessDecision['status'];
366
+ readinessScore: number;
367
+ recommendedAction: KnowledgeReadinessReport['recommendedAction'];
368
+ blockingGapIds: string[];
369
+ nonBlockingGapIds: string[];
370
+ questionCount: number;
371
+ acquisitionPlanCount: number;
372
+ acquiredEvidenceCount: number;
373
+ controlStepCount: number;
374
+ pass: boolean;
375
+ failureClass?: string;
376
+ wallMs: number;
377
+ costUsd: number;
378
+ }
379
+ /** @stable */
380
+ interface KnowledgeReadinessDecision {
381
+ passed: boolean;
382
+ status: 'ready' | 'blocked' | 'caveat';
383
+ reason: string;
384
+ readinessScore: number;
385
+ recommendedAction: KnowledgeReadinessReport['recommendedAction'];
386
+ severity: KnowledgeReadinessReport['severity'];
387
+ blockingGapIds: string[];
388
+ nonBlockingGapIds: string[];
389
+ }
390
+
391
+ /**
392
+ * @stable
393
+ *
394
+ * Backend factories for `runAgentTaskStream`. Three shapes ship in core:
395
+ *
396
+ * - `createIterableBackend` — wrap any custom async iterable into a backend
397
+ * - `createSandboxPromptBackend` — sandbox / sidecar `streamPrompt` clients
398
+ * - `createOpenAICompatibleBackend` — OpenAI-style chat completions endpoints
399
+ *
400
+ * Adapters stay thin: domain repos own auth, model selection, and the concrete
401
+ * tool surface. The factories handle session creation, stream normalization,
402
+ * and graceful end-of-stream signalling.
403
+ */
404
+
405
+ /** @stable */
406
+ declare function createIterableBackend<TInput extends AgentBackendInput>(options: {
407
+ kind: string;
408
+ start?: AgentExecutionBackend<TInput>['start'];
409
+ resume?: AgentExecutionBackend<TInput>['resume'];
410
+ stream: AgentExecutionBackend<TInput>['stream'];
411
+ stop?: AgentExecutionBackend<TInput>['stop'];
412
+ }): AgentExecutionBackend<TInput>;
413
+ /** @stable */
414
+ declare function createSandboxPromptBackend<TBox, TInput extends AgentBackendInput = AgentBackendInput>(options: {
415
+ kind?: string;
416
+ getBox(input: TInput, context: Omit<AgentBackendContext, 'session'>): Promise<TBox> | TBox;
417
+ streamPrompt(box: TBox, message: string, context: AgentBackendContext): AsyncIterable<unknown>;
418
+ mapEvent?: (event: unknown, context: AgentBackendContext) => RuntimeStreamEvent | undefined;
419
+ getSessionId?: (box: TBox, input: TInput) => string | undefined;
420
+ }): AgentExecutionBackend<TInput>;
421
+ /** @stable */
422
+ declare function createOpenAICompatibleBackend<TInput extends AgentBackendInput = AgentBackendInput>(options: {
423
+ apiKey: string;
424
+ baseUrl: string;
425
+ model: string;
426
+ kind?: string;
427
+ fetchImpl?: typeof fetch;
428
+ }): AgentExecutionBackend<TInput>;
429
+
430
+ /**
431
+ * @stable
432
+ *
433
+ * Error taxonomy for `@tangle-network/agent-runtime`.
434
+ *
435
+ * Public contract: every error this package throws as part of its consumer-
436
+ * facing API either extends `AgentEvalError` (re-exported here for ergonomic
437
+ * `instanceof` checks at the runtime boundary) or extends one of the
438
+ * runtime-specific subclasses below.
439
+ *
440
+ * Internal invariant guards (`throw new Error('this should never happen')`)
441
+ * remain plain `Error` — they are programmer-mistake assertions, not
442
+ * consumer-catchable contract failures.
443
+ *
444
+ * Subclassing strategy: where a runtime-specific failure maps cleanly to an
445
+ * agent-eval code (validation, config, not_found), we re-use the agent-eval
446
+ * subclass. Runtime-only failure modes (session resume against the wrong
447
+ * backend, backend transport errors) get fresh subclasses that still carry an
448
+ * `AgentEvalErrorCode` so cross-package handlers can pattern-match without
449
+ * importing the runtime.
450
+ */
451
+
452
+ /**
453
+ * @stable
454
+ *
455
+ * Caller asked to resume a session against a backend whose `kind` does not
456
+ * match the session's recorded backend. This is a routing bug — the same
457
+ * session id was reused across two different backend implementations — and
458
+ * is not retryable without picking the right backend.
459
+ */
460
+ declare class SessionMismatchError extends AgentEvalError {
461
+ readonly sessionBackend: string;
462
+ readonly requestedBackend: string;
463
+ constructor(sessionBackend: string, requestedBackend: string, options?: {
464
+ cause?: unknown;
465
+ });
466
+ }
467
+ /**
468
+ * @stable
469
+ *
470
+ * A backend transport call (HTTP, gRPC, sidecar IPC) failed with a non-success
471
+ * status. Distinct from `JudgeError` (which is structural / unrecoverable)
472
+ * because backend failures are sometimes retryable and consumers may want to
473
+ * branch on the upstream status code.
474
+ */
475
+ declare class BackendTransportError extends AgentEvalError {
476
+ readonly backend: string;
477
+ readonly status?: number;
478
+ constructor(backend: string, message: string, options?: {
479
+ cause?: unknown;
480
+ status?: number;
481
+ });
482
+ }
483
+ /**
484
+ * @stable
485
+ *
486
+ * A runtime-run lifecycle method was called in an order the state machine does
487
+ * not allow: `persist()` before `complete()`, `complete()` twice, etc.
488
+ */
489
+ declare class RuntimeRunStateError extends AgentEvalError {
490
+ constructor(message: string, options?: {
491
+ cause?: unknown;
492
+ });
493
+ }
494
+
495
+ /**
496
+ * @stable
497
+ *
498
+ * Pure readiness-decision helper. Maps a `KnowledgeReadinessReport` from
499
+ * `@tangle-network/agent-eval` to a three-state branch (`ready` / `blocked` /
500
+ * `caveat`) the runtime, route handlers, and UI shells can all switch on.
501
+ *
502
+ * Default `minimumScore` of 0.7 mirrors the readiness scoring scale in
503
+ * agent-eval; callers tightening or loosening this should keep it consistent
504
+ * across all entry points for the same product so the UI / metrics agree on
505
+ * what "caveat" means.
506
+ */
507
+
508
+ /** @stable */
509
+ declare function decideKnowledgeReadiness(report: KnowledgeReadinessReport, options?: {
510
+ minimumScore?: number;
511
+ }): KnowledgeReadinessDecision;
512
+
513
+ /**
514
+ * @stable
515
+ *
516
+ * The two top-level entry points:
517
+ *
518
+ * - `runAgentTask` — single-shot lifecycle for adapter-driven tasks.
519
+ * - `runAgentTaskStream` — streaming lifecycle that delegates execution to an
520
+ * `AgentExecutionBackend` (model API, sandbox, or custom iterable).
521
+ *
522
+ * Both gate the run on `KnowledgeReadinessReport` from `agent-eval`, emit the
523
+ * same lifecycle event vocabulary (under different shapes — see `types.ts`),
524
+ * and route session lifecycle through a pluggable `RuntimeSessionStore`.
525
+ */
526
+
527
+ /** @stable */
528
+ declare function runAgentTask<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult>(options: RunAgentTaskOptions<TState, TAction, TActionResult, TEval>): Promise<AgentTaskRunResult<TState, TAction, TActionResult, TEval>>;
529
+ /** @stable */
530
+ declare function summarizeAgentTaskRun<TState, TAction, TActionResult, TEval extends ControlEvalResult>(result: AgentTaskRunResult<TState, TAction, TActionResult, TEval>): AgentTaskRunSummary;
531
+ /** @stable */
532
+ declare function runAgentTaskStream<TInput extends AgentBackendInput = AgentBackendInput>(options: RunAgentTaskStreamOptions<TInput>): AsyncIterable<RuntimeStreamEvent>;
533
+
534
+ /**
535
+ * @stable
536
+ *
537
+ * Canonical production-run lifecycle. ONE abstraction for "the agent did a
538
+ * thing on behalf of a customer; record what it did, what it cost, and how it
539
+ * ended." Consumer agents (legal, tax, gtm, creative, agent-builder) reach for
540
+ * `startRuntimeRun` instead of inventing their own `agentRuns`-row helpers.
541
+ *
542
+ * Three concerns live in this module:
543
+ *
544
+ * 1. **Lifecycle state machine** — `running` -> `completed | failed | cancelled`,
545
+ * enforced by `RuntimeRunStateError`. Completion is idempotent (a second
546
+ * `complete()` call with the same status is a no-op so retries / cleanup
547
+ * paths don't double-fire side effects). A different terminal status is a
548
+ * state error.
549
+ *
550
+ * 2. **Cost ledger** — every `llm_call` event the handle observes contributes
551
+ * `tokensIn`, `tokensOut`, `costUsd`, and bumps `llmCalls`. Wall time is
552
+ * measured from `startRuntimeRun()` to `complete()`. Surface via
553
+ * `handle.cost()` for "cost per customer task" dashboards.
554
+ *
555
+ * 3. **Persistence adapter** — `RuntimeRunPersistenceAdapter` is the seam
556
+ * consumers plug in to write a `RuntimeRunRow` to their D1 / postgres /
557
+ * KV store. The adapter receives a sanitized row shape; no telemetry
558
+ * payload bytes flow through it unless the consumer opts in via
559
+ * `RuntimeRunOptions.telemetryEvents`.
560
+ *
561
+ * The pattern replaces legal-agent's bespoke `completeProductionAgentRun` /
562
+ * `persistRuntimeRun` pair from `eval-evidence.ts` + `api.chat.ts`. Both are
563
+ * marked `@deprecated` in this release; consumers ditch them on their own
564
+ * version bumps.
565
+ */
566
+
567
+ /** @stable */
568
+ type RuntimeRunStatus = 'running' | 'completed' | 'failed' | 'cancelled';
569
+ /** @stable */
570
+ interface RuntimeRunCost {
571
+ /** Cumulative input tokens across every observed `llm_call` event. */
572
+ tokensIn: number;
573
+ /** Cumulative output tokens across every observed `llm_call` event. */
574
+ tokensOut: number;
575
+ /** Sum of `costUsd` from every observed `llm_call` event. */
576
+ costUsd: number;
577
+ /** Wall time from `startRuntimeRun()` to `complete()` (or `now()` if not yet completed). */
578
+ wallMs: number;
579
+ /** Count of `llm_call` events observed during the run. */
580
+ llmCalls: number;
581
+ }
582
+ /** @stable */
583
+ interface RuntimeRunCompleteInput {
584
+ status: Exclude<RuntimeRunStatus, 'running'>;
585
+ resultSummary?: string;
586
+ /** Optional explicit cost override; if omitted, the accumulated ledger is used. */
587
+ cost?: Partial<RuntimeRunCost>;
588
+ /** Stable error message when `status === 'failed'`. */
589
+ error?: string;
590
+ /** Additional adapter-specific fields merged into the persisted row. */
591
+ metadata?: Record<string, unknown>;
592
+ }
593
+ /** @stable */
594
+ interface RuntimeRunRow {
595
+ /** Stable runtime-side identifier. Adapters may translate to their own primary key. */
596
+ id: string;
597
+ workspaceId: string;
598
+ sessionId?: string;
599
+ agentId?: string;
600
+ domain?: string;
601
+ taskId: string;
602
+ scenarioId?: string;
603
+ status: RuntimeRunStatus;
604
+ resultSummary?: string;
605
+ error?: string;
606
+ cost: RuntimeRunCost;
607
+ startedAt: string;
608
+ completedAt?: string;
609
+ metadata?: Record<string, unknown>;
610
+ }
611
+ /** @stable */
612
+ interface RuntimeRunPersistenceAdapter {
613
+ /**
614
+ * Called once when `handle.persist()` runs. Implementations write `row` to
615
+ * their durable store (D1, postgres, KV) and return whatever the consumer
616
+ * wants the caller to see (often the storage-side row id). Errors thrown
617
+ * here propagate out of `persist()` so the caller can decide whether to
618
+ * retry or log-and-continue.
619
+ */
620
+ upsert(row: RuntimeRunRow): Promise<void> | void;
621
+ }
622
+ /** @stable */
623
+ interface RuntimeRunOptions {
624
+ workspaceId: string;
625
+ sessionId?: string;
626
+ agentId?: string;
627
+ taskSpec: AgentTaskSpec;
628
+ scenarioId?: string;
629
+ /** Optional persistence adapter; if omitted, `persist()` is a no-op. */
630
+ adapter?: RuntimeRunPersistenceAdapter;
631
+ /** Override the row id; default = `${taskSpec.id}:${random suffix}`. */
632
+ id?: string;
633
+ /** Override the clock; default = `Date.now()`. Useful for deterministic tests. */
634
+ now?: () => number;
635
+ }
636
+ /** @stable */
637
+ interface RuntimeRunHandle {
638
+ /** Stable id assigned at start. */
639
+ readonly id: string;
640
+ readonly workspaceId: string;
641
+ readonly sessionId: string | undefined;
642
+ readonly taskSpec: AgentTaskSpec;
643
+ readonly status: RuntimeRunStatus;
644
+ /**
645
+ * Observe a single `RuntimeStreamEvent`. The handle ignores non-cost events
646
+ * (text deltas, tool calls) silently so consumers can pipe the whole stream
647
+ * through `handle.observe`. `llm_call` events update the ledger.
648
+ */
649
+ observe(event: RuntimeStreamEvent): void;
650
+ /** Snapshot of the current cost ledger. Safe to call at any time. */
651
+ cost(): RuntimeRunCost;
652
+ /**
653
+ * Transition to a terminal state. Idempotent for the same status; throws
654
+ * `RuntimeRunStateError` for a different terminal status (state machines
655
+ * don't time-travel).
656
+ */
657
+ complete(input: RuntimeRunCompleteInput): void;
658
+ /** Build the current row without writing it. Useful for tests + dry runs. */
659
+ toRow(metadata?: Record<string, unknown>): RuntimeRunRow;
660
+ /**
661
+ * Persist the current row via the configured adapter. Must be called after
662
+ * `complete()`. Idempotent for the same terminal state (the adapter sees
663
+ * the same row on retry).
664
+ */
665
+ persist(metadata?: Record<string, unknown>): Promise<void>;
666
+ }
667
+ /**
668
+ * @stable
669
+ *
670
+ * Construct a runtime-run handle. The returned handle is mutable across its
671
+ * lifetime; consumers should not share it across requests.
672
+ */
673
+ declare function startRuntimeRun(options: RuntimeRunOptions): RuntimeRunHandle;
674
+
675
+ /**
676
+ * @stable
677
+ *
678
+ * Sanitization for runtime telemetry. The rule: nothing user-controlled leaks
679
+ * unless the caller opts in with a `RuntimeTelemetryOptions` flag. This is the
680
+ * envelope that ends up in `agent_run.metadata.runtimeEvents` on every
681
+ * consumer, so the default must be safe.
682
+ */
683
+
684
+ /** @stable */
322
685
  interface RuntimeTelemetryOptions {
323
686
  /**
324
- * Include raw task inputs. Off by default because task inputs often
325
- * contain customer facts, credentials, source text, or internal IDs.
687
+ * Include raw task inputs. Off by default because task inputs often contain
688
+ * customer facts, credentials, source text, or internal IDs.
326
689
  */
327
690
  includeInputs?: boolean;
328
691
  /** Include requirement descriptions. Secret requirements are always redacted. */
@@ -338,6 +701,7 @@ interface RuntimeTelemetryOptions {
338
701
  /** Include eval detail/evidence strings. Off by default because validators may echo private input. */
339
702
  includeEvalDetails?: boolean;
340
703
  }
704
+ /** @stable */
341
705
  interface SanitizedKnowledgeRequirement {
342
706
  id: string;
343
707
  description?: string;
@@ -353,6 +717,7 @@ interface SanitizedKnowledgeRequirement {
353
717
  evidenceIds?: string[];
354
718
  fallbackPolicy: KnowledgeRequirement['fallbackPolicy'];
355
719
  }
720
+ /** @stable */
356
721
  interface SanitizedKnowledgeReadinessReport {
357
722
  taskId: string;
358
723
  readinessScore: number;
@@ -365,40 +730,20 @@ interface SanitizedKnowledgeReadinessReport {
365
730
  evidenceIds?: string[];
366
731
  missingRequirementIds: string[];
367
732
  }
368
- interface AgentTaskRunSummary {
369
- taskId: string;
370
- domain?: string;
371
- status: AgentTaskStatus;
372
- reason: string;
373
- readinessStatus: KnowledgeReadinessDecision['status'];
374
- readinessScore: number;
375
- recommendedAction: KnowledgeReadinessReport['recommendedAction'];
376
- blockingGapIds: string[];
377
- nonBlockingGapIds: string[];
378
- questionCount: number;
379
- acquisitionPlanCount: number;
380
- acquiredEvidenceCount: number;
381
- controlStepCount: number;
382
- pass: boolean;
383
- failureClass?: string;
384
- wallMs: number;
385
- costUsd: number;
386
- }
387
- interface KnowledgeReadinessDecision {
388
- passed: boolean;
389
- status: 'ready' | 'blocked' | 'caveat';
390
- reason: string;
391
- readinessScore: number;
392
- recommendedAction: KnowledgeReadinessReport['recommendedAction'];
393
- severity: KnowledgeReadinessReport['severity'];
394
- blockingGapIds: string[];
395
- nonBlockingGapIds: string[];
396
- }
733
+ /** @stable */
734
+ declare function sanitizeKnowledgeReadinessReport(report: KnowledgeReadinessReport, options?: RuntimeTelemetryOptions): SanitizedKnowledgeReadinessReport;
735
+ /** @stable */
736
+ declare function sanitizeAgentRuntimeEvent<TState, TAction, TActionResult, TEval extends ControlEvalResult>(event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>, options?: RuntimeTelemetryOptions): Record<string, unknown>;
737
+ /** @stable */
738
+ declare function sanitizeRuntimeStreamEvent(event: RuntimeStreamEvent, options?: RuntimeTelemetryOptions): Record<string, unknown>;
739
+ /** @stable */
397
740
  interface RuntimeEventCollector<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult> {
398
- onEvent: AgentRuntimeEventSink<TState, TAction, TActionResult, TEval>;
741
+ onEvent: (event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>) => void;
399
742
  events: Array<Record<string, unknown>>;
400
743
  }
744
+ /** @stable */
401
745
  type RuntimeStreamEventSink = (event: RuntimeStreamEvent) => void;
746
+ /** @stable */
402
747
  interface RuntimeStreamEventSummary {
403
748
  /** Total count of sanitized events collected. */
404
749
  eventCount: number;
@@ -413,36 +758,18 @@ interface RuntimeStreamEventSummary {
413
758
  /** Concatenated `text_delta.text` across the stream, even when payloads are redacted. */
414
759
  finalText: string;
415
760
  }
761
+ /** @stable */
416
762
  interface RuntimeStreamEventCollector {
417
763
  onEvent: RuntimeStreamEventSink;
418
764
  events: Array<Record<string, unknown>>;
419
765
  /** Snapshot of a small streaming-flavored summary derived from collected events. */
420
766
  summary(): RuntimeStreamEventSummary;
421
767
  }
422
- interface ServerSentEventOptions {
423
- event?: string;
424
- id?: string;
425
- retry?: number;
426
- }
427
- declare class InMemoryRuntimeSessionStore implements RuntimeSessionStore {
428
- private readonly sessions;
429
- private readonly events;
430
- get(sessionId: string): RuntimeSession | undefined;
431
- put(session: RuntimeSession): void;
432
- appendEvent(sessionId: string, event: RuntimeStreamEvent): void;
433
- listEvents(sessionId: string): RuntimeStreamEvent[];
434
- }
435
- declare function runAgentTask<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult>(options: RunAgentTaskOptions<TState, TAction, TActionResult, TEval>): Promise<AgentTaskRunResult<TState, TAction, TActionResult, TEval>>;
436
- declare function summarizeAgentTaskRun<TState, TAction, TActionResult, TEval extends ControlEvalResult>(result: AgentTaskRunResult<TState, TAction, TActionResult, TEval>): AgentTaskRunSummary;
437
- declare function runAgentTaskStream<TInput extends AgentBackendInput = AgentBackendInput>(options: RunAgentTaskStreamOptions<TInput>): AsyncIterable<RuntimeStreamEvent>;
438
- declare function decideKnowledgeReadiness(report: KnowledgeReadinessReport, options?: {
439
- minimumScore?: number;
440
- }): KnowledgeReadinessDecision;
441
- declare function sanitizeKnowledgeReadinessReport(report: KnowledgeReadinessReport, options?: RuntimeTelemetryOptions): SanitizedKnowledgeReadinessReport;
442
- declare function sanitizeAgentRuntimeEvent<TState, TAction, TActionResult, TEval extends ControlEvalResult>(event: AgentRuntimeEvent<TState, TAction, TActionResult, TEval>, options?: RuntimeTelemetryOptions): Record<string, unknown>;
443
- declare function sanitizeRuntimeStreamEvent(event: RuntimeStreamEvent, options?: RuntimeTelemetryOptions): Record<string, unknown>;
768
+ /** @stable */
444
769
  declare function createRuntimeEventCollector<TState = unknown, TAction = unknown, TActionResult = unknown, TEval extends ControlEvalResult = ControlEvalResult>(options?: RuntimeTelemetryOptions): RuntimeEventCollector<TState, TAction, TActionResult, TEval>;
445
770
  /**
771
+ * @stable
772
+ *
446
773
  * Streaming-event counterpart of `createRuntimeEventCollector`. Use this with
447
774
  * `runAgentTaskStream` — pass each yielded event through `onEvent` and read
448
775
  * the sanitized copies off `events`. The same `RuntimeTelemetryOptions`
@@ -455,29 +782,112 @@ declare function createRuntimeEventCollector<TState = unknown, TAction = unknown
455
782
  * events whose `type` literals overlap (`task_start`, `readiness_end`, etc.).
456
783
  */
457
784
  declare function createRuntimeStreamEventCollector(options?: RuntimeTelemetryOptions): RuntimeStreamEventCollector;
785
+
786
+ /**
787
+ * @stable
788
+ *
789
+ * Session helpers + an in-memory `RuntimeSessionStore` implementation suitable
790
+ * for tests, scratch processes, and per-request scratch storage in serverless
791
+ * runtimes. Durable stores (D1, postgres, Durable Objects) implement the same
792
+ * interface from `./types`.
793
+ */
794
+
795
+ /** @stable */
796
+ declare class InMemoryRuntimeSessionStore implements RuntimeSessionStore {
797
+ private readonly sessions;
798
+ private readonly events;
799
+ get(sessionId: string): RuntimeSession | undefined;
800
+ put(session: RuntimeSession): void;
801
+ appendEvent(sessionId: string, event: RuntimeStreamEvent): void;
802
+ listEvents(sessionId: string): RuntimeStreamEvent[];
803
+ }
804
+
805
+ /**
806
+ * @stable
807
+ *
808
+ * Server-Sent Events serialization for runtime telemetry streams.
809
+ *
810
+ * Newline-safe by construction: any newline in `id` or `event` is collapsed to
811
+ * a space (browsers terminate fields on newline), and multi-line `data`
812
+ * payloads are split into one `data:` line per source line so JSON.stringify
813
+ * output transports cleanly.
814
+ */
815
+
816
+ /** @stable */
817
+ interface ServerSentEventOptions {
818
+ event?: string;
819
+ id?: string;
820
+ retry?: number;
821
+ }
822
+ /** @stable */
458
823
  declare function encodeServerSentEvent(data: unknown, options?: ServerSentEventOptions): string;
824
+ /** @stable */
459
825
  declare function readinessServerSentEvent(report: KnowledgeReadinessReport, options?: RuntimeTelemetryOptions & ServerSentEventOptions): string;
826
+ /** @stable */
460
827
  declare function runtimeStreamServerSentEvent(event: RuntimeStreamEvent, options?: RuntimeTelemetryOptions & ServerSentEventOptions): string;
461
- declare function createIterableBackend<TInput extends AgentBackendInput>(options: {
462
- kind: string;
463
- start?: AgentExecutionBackend<TInput>['start'];
464
- resume?: AgentExecutionBackend<TInput>['resume'];
465
- stream: AgentExecutionBackend<TInput>['stream'];
466
- stop?: AgentExecutionBackend<TInput>['stop'];
467
- }): AgentExecutionBackend<TInput>;
468
- declare function createSandboxPromptBackend<TBox, TInput extends AgentBackendInput = AgentBackendInput>(options: {
469
- kind?: string;
470
- getBox(input: TInput, context: Omit<AgentBackendContext, 'session'>): Promise<TBox> | TBox;
471
- streamPrompt(box: TBox, message: string, context: AgentBackendContext): AsyncIterable<unknown>;
472
- mapEvent?: (event: unknown, context: AgentBackendContext) => RuntimeStreamEvent | undefined;
473
- getSessionId?: (box: TBox, input: TInput) => string | undefined;
474
- }): AgentExecutionBackend<TInput>;
475
- declare function createOpenAICompatibleBackend<TInput extends AgentBackendInput = AgentBackendInput>(options: {
476
- apiKey: string;
477
- baseUrl: string;
478
- model: string;
479
- kind?: string;
480
- fetchImpl?: typeof fetch;
481
- }): AgentExecutionBackend<TInput>;
482
828
 
483
- export { type AgentAdapter, type AgentBackendContext, type AgentBackendInput, type AgentExecutionBackend, type AgentKnowledgeProvider, type AgentRuntimeEvent, type AgentRuntimeEventSink, type AgentTaskContext, type AgentTaskRunResult, type AgentTaskRunSummary, type AgentTaskSpec, type AgentTaskStatus, InMemoryRuntimeSessionStore, type KnowledgeReadinessDecision, type RunAgentTaskOptions, type RunAgentTaskStreamOptions, type RuntimeEventCollector, type RuntimeSession, type RuntimeSessionStore, type RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, decideKnowledgeReadiness, encodeServerSentEvent, readinessServerSentEvent, runAgentTask, runAgentTaskStream, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, summarizeAgentTaskRun };
829
+ /**
830
+ * @stable
831
+ *
832
+ * Bridge from runtime stream events to the agent-eval trace schema.
833
+ *
834
+ * Before this module, consumers (legal-agent's chat.ts, gtm-agent's runtime
835
+ * route) hand-rolled an adapter from `RuntimeStreamEvent` -> `TraceEvent` per
836
+ * repo. The mapping is mechanical and the destination schema is owned by
837
+ * agent-eval, so the adapter belongs in runtime, not in N consumer repos.
838
+ *
839
+ * The bridge is intentionally one-way (runtime -> agent-eval). The reverse
840
+ * mapping is degenerate (agent-eval events have no session / task affinity)
841
+ * and would invite consumers to round-trip through agent-eval, defeating the
842
+ * point of the runtime-specific shape.
843
+ */
844
+
845
+ /** @stable */
846
+ interface TraceBridgeOptions {
847
+ /**
848
+ * Stable `runId` to stamp on every emitted `TraceEvent`. Required because
849
+ * agent-eval's `TraceEvent.runId` is non-optional.
850
+ */
851
+ runId: string;
852
+ /**
853
+ * Optional `spanId` to attach when an event maps to a known span (for
854
+ * example, an outer runtime-task span the consumer is already emitting).
855
+ */
856
+ spanId?: string;
857
+ /**
858
+ * Optional id generator; default = monotonic counter scoped to this bridge
859
+ * instance. Override for deterministic tests or to integrate with a wider
860
+ * id-allocator (uuid, ksuid).
861
+ */
862
+ newEventId?: () => string;
863
+ }
864
+ /** @stable */
865
+ interface TraceBridge {
866
+ /**
867
+ * Map a single `RuntimeStreamEvent` to a `TraceEvent`. Returns `undefined`
868
+ * for events that have no useful trace projection (text deltas, reasoning
869
+ * deltas — these belong inside an `LlmSpan.output`, not as separate trace
870
+ * events).
871
+ */
872
+ toTraceEvent(event: RuntimeStreamEvent): TraceEvent | undefined;
873
+ /** Convenience: drain an iterable of stream events into trace events. */
874
+ drain(events: Iterable<RuntimeStreamEvent>): TraceEvent[];
875
+ }
876
+ /**
877
+ * @stable
878
+ *
879
+ * Build a stateful bridge. State is intentionally minimal — only the event-id
880
+ * counter — because the runtime stream already carries timestamps and the
881
+ * caller already knows the `runId`.
882
+ */
883
+ declare function createTraceBridge(options: TraceBridgeOptions): TraceBridge;
884
+ /**
885
+ * @stable
886
+ *
887
+ * One-shot convenience for callers who don't want to hold a bridge instance.
888
+ * Internally allocates a single-use bridge so id-generation stays consistent
889
+ * within the call.
890
+ */
891
+ declare function toAgentEvalTrace(event: RuntimeStreamEvent, options: TraceBridgeOptions): TraceEvent | undefined;
892
+
893
+ export { type AgentAdapter, type AgentBackendContext, type AgentBackendInput, type AgentExecutionBackend, type AgentKnowledgeProvider, type AgentRuntimeEvent, type AgentRuntimeEventSink, type AgentTaskContext, type AgentTaskRunResult, type AgentTaskRunSummary, type AgentTaskSpec, type AgentTaskStatus, BackendTransportError, InMemoryRuntimeSessionStore, type KnowledgeReadinessDecision, type RunAgentTaskOptions, type RunAgentTaskStreamOptions, type RuntimeEventCollector, type RuntimeRunCompleteInput, type RuntimeRunCost, type RuntimeRunHandle, type RuntimeRunOptions, type RuntimeRunPersistenceAdapter, type RuntimeRunRow, RuntimeRunStateError, type RuntimeRunStatus, type RuntimeSession, type RuntimeSessionStore, type RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, SessionMismatchError, type TraceBridge, type TraceBridgeOptions, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, createTraceBridge, decideKnowledgeReadiness, encodeServerSentEvent, readinessServerSentEvent, runAgentTask, runAgentTaskStream, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, startRuntimeRun, summarizeAgentTaskRun, toAgentEvalTrace };