@tangle-network/agent-runtime 0.15.1 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { AgentEvalError, KnowledgeReadinessReport, ControlEvalResult, KnowledgeRequirement, TraceEvent } from '@tangle-network/agent-eval';
2
2
  export { AgentEvalError, AgentEvalErrorCode, CaptureIntegrityError, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, ReplayError, RunRecord, UserQuestion, ValidationError, VerificationError } from '@tangle-network/agent-eval';
3
- import { A as AgentBackendInput, a as AgentExecutionBackend, b as AgentBackendContext, R as RuntimeStreamEvent, c as AgentTaskSpec, K as KnowledgeReadinessDecision, d as RunAgentTaskOptions, e as AgentTaskRunResult, f as RunAgentTaskStreamOptions, g as AgentTaskRunSummary, h as AgentRuntimeEvent, i as AgentTaskStatus, j as RuntimeSessionStore, k as RuntimeSession } from './types-CYxfw14J.js';
4
- export { l as AgentAdapter, m as AgentKnowledgeProvider, n as AgentRuntimeEventSink, o as AgentTaskContext } from './types-CYxfw14J.js';
3
+ import { A as AgentBackendInput, a as AgentExecutionBackend, b as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, c as RunAgentTaskOptions, d as AgentTaskRunResult, e as RunAgentTaskStreamOptions, f as AgentTaskRunSummary, g as AgentTaskSpec, h as AgentRuntimeEvent, i as AgentTaskStatus, j as RuntimeSessionStore, k as RuntimeSession } from './types-DmhXdAhu.js';
4
+ export { l as AgentAdapter, m as AgentKnowledgeProvider, n as AgentRuntimeEventSink, o as AgentTaskContext } from './types-DmhXdAhu.js';
5
5
  import { AgentProfilePrompt, AgentProfileResources, AgentSubagentProfile, AgentProfile, SandboxInstance } from '@tangle-network/sandbox';
6
6
 
7
7
  /**
@@ -177,402 +177,30 @@ declare function sandboxAsChatTurnTarget(instance: SandboxInstance, opts?: {
177
177
  }): ChatTurnSandbox;
178
178
 
179
179
  /**
180
- * Durable-run substrate: the typed contract for checkpointed agent runs that
181
- * survive worker crashes, deploy rolls, OOM, and transient transport errors.
180
+ * `handleChatTurn` framework-neutral chat-turn HTTP orchestrator.
181
+ * Owns the NDJSON `ChatStreamEvent` line protocol, the `session.run.*`
182
+ * lifecycle vocabulary, and the persist / post-process / trace-flush
183
+ * hook order. Returns a `ReadableStream` body the product hands to its
184
+ * platform `Response`.
182
185
  *
183
- * The model directly inspired by Absurd (Postgres-backed) and Cloudflare
184
- * Workflows splits a run into ordered, idempotent **steps**. Each step's
185
- * result is persisted before the next step runs. On resume, the runner reads
186
- * the prior steps from a `DurableRunStore` and fast-replays them (returning
187
- * cached values) until it reaches the first unfinished step, where execution
188
- * actually resumes.
186
+ * Execution durability is the substrate's concern: `box.streamPrompt`
187
+ * auto-reconnects in-call; cross-process reconnect via `X-Execution-ID`
188
+ * is the product's job. The producer this engine wraps already speaks
189
+ * that protocol the engine just frames the events.
189
190
  *
190
- * Three boundary disciplines:
191
+ * Hooks (`ChatTurnHooks`):
192
+ * - `produce` — build the backend event stream
193
+ * - `persistAssistantMessage` — write the assistant turn to the product DB
194
+ * - `onTurnComplete?` — post-process (proposals, citations, …)
195
+ * - `onEvent?` — per-event side channel (e.g. DO broadcast)
196
+ * - `transformFinalText?` — pre-persist transform (e.g. PII redact)
197
+ * - `traceFlush?` — handed to waitUntil so OTLP export lands
191
198
  *
192
- * 1. Step results MUST be JSON-serializable. No closures, no class
193
- * instances, no live streams. The store treats results as opaque JSON.
194
- *
195
- * 2. Step intents MUST be stable across replays. The runner derives a
196
- * stable step id from (runId, stepIndex, intent). Mismatched intent at
197
- * the same index = `DurableRunDivergenceError`.
198
- *
199
- * 3. Non-determinism (now / uuid / random) MUST flow through the
200
- * `DurableContext` helpers — `ctx.now()`, `ctx.uuid()` — so the values
201
- * are checkpointed and identical on replay. Bare `Date.now()` /
202
- * `crypto.randomUUID()` inside a task fn breaks replay equality.
199
+ * Framework neutrality: takes already-resolved values (`identity` tuple,
200
+ * a `waitUntil`), never a `Request` or a `Context`. The product's thin
201
+ * route adapter does auth + parse + access-control, then calls
202
+ * `handleChatTurn(...)` and returns `result.body` as its platform `Response`.
203
203
  */
204
-
205
- /** Caller-facing kinds. The runner uses these for telemetry + querying. */
206
- type StepKind =
207
- /** Logical step that ran user code (the default for ctx.step). */
208
- 'logic'
209
- /** A wrapped LLM call. */
210
- | 'llm'
211
- /** A wrapped tool call. */
212
- | 'tool'
213
- /** A wrapped readiness probe. */
214
- | 'readiness'
215
- /** A deterministic clock or uuid read. */
216
- | 'deterministic'
217
- /** A suspend-for-event boundary. */
218
- | 'event';
219
- type StepStatus = 'pending' | 'running' | 'completed' | 'failed';
220
- interface StepError {
221
- message: string;
222
- code?: string;
223
- /** Optional stack — stored for diagnostics, NEVER replayed as an exception. */
224
- stack?: string;
225
- }
226
- interface StepRecord<T = unknown> {
227
- runId: string;
228
- /** Monotonic 0-based index. Position is the load-bearing identifier — the
229
- * same intent string at different positions is a different step. */
230
- stepIndex: number;
231
- /** Caller-supplied label; intended for human reading + log correlation. */
232
- intent: string;
233
- kind: StepKind;
234
- /** sha256 of the canonical input fingerprint at begin-time. Used to detect
235
- * divergence (caller changed inputs across replays). Empty for steps where
236
- * the input cannot be canonicalized (e.g. ctx.now()). */
237
- inputHash: string;
238
- status: StepStatus;
239
- /** Re-entry count. Increments each time the step begins. */
240
- attempts: number;
241
- /** JSON-serializable result. Present when status === 'completed'. */
242
- result?: T;
243
- error?: StepError;
244
- startedAt?: string;
245
- completedAt?: string;
246
- }
247
- interface EventRecord {
248
- runId: string;
249
- key: string;
250
- payload: unknown;
251
- emittedAt: string;
252
- }
253
- /**
254
- * A pointer to a substrate run that outlives the worker isolate — the sandbox
255
- * container is orchestrator-managed and survives a Worker death or a Durable
256
- * Object migration. Persisted on the run row so a fresh supervisor re-attaches
257
- * to the in-flight run instead of re-prompting.
258
- */
259
- interface RunHandle {
260
- /** Which substrate owns the run. `sandbox` runs are reconnectable;
261
- * `tcloud` runs have no cross-process replay endpoint. */
262
- kind: 'sandbox' | 'tcloud';
263
- /** Orchestrator-managed sandbox id — stable across worker isolates. */
264
- sandboxId?: string;
265
- /** Sandbox conversation/session id. */
266
- sessionId?: string;
267
- /** The substrate run id (the sandbox SDK's `executionId`). The replay
268
- * endpoint keys on it. */
269
- runId?: string;
270
- /** Lifecycle of the substrate run as last observed. */
271
- status: 'running' | 'completed' | 'failed';
272
- /** Last substrate event id seen — the adapter's reconnect cursor. */
273
- cursor?: string;
274
- }
275
- /**
276
- * One event in a run's ordered, replayable stream log. The supervisor drains
277
- * a run's event stream into this log as it flows, so replay is guaranteed by
278
- * the substrate rather than by the sandbox runtime's own buffering.
279
- */
280
- interface StreamEventRecord {
281
- runId: string;
282
- /** Monotonic 0-based sequence — the store's ordering + cursor. */
283
- seq: number;
284
- /** Producer-supplied stable id — the dedup key and the substrate cursor. */
285
- eventId: string;
286
- payload: unknown;
287
- appendedAt: string;
288
- }
289
- type RunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'suspended';
290
- interface RunOutcome {
291
- pass?: boolean;
292
- score?: number;
293
- notes?: string;
294
- /** Free-form bag of run-level metrics — surfaced in OTLP / TraceStore. */
295
- metadata?: Record<string, unknown>;
296
- }
297
- interface DurableRunManifest {
298
- /** Stable per-product id (e.g. 'legal-agent', 'creative-agent'). */
299
- projectId: string;
300
- /** Optional scenario / persona / session id — surfaced in telemetry. */
301
- scenarioId?: string;
302
- task: AgentTaskSpec;
303
- /** Input payload. Hashed into the run identity so two runs with the same
304
- * runId but different inputs raise DurableRunInputMismatchError. */
305
- input: Record<string, unknown>;
306
- /** Free-form tags surfaced into RunRecord / OTLP. */
307
- tags?: Record<string, string>;
308
- }
309
- interface RunRecord {
310
- runId: string;
311
- manifestHash: string;
312
- projectId: string;
313
- scenarioId?: string;
314
- status: RunStatus;
315
- createdAt: string;
316
- updatedAt: string;
317
- completedAt?: string;
318
- /** Stable per-worker id holding the lease. */
319
- leaseHolderId?: string;
320
- leaseExpiresAt?: string;
321
- outcome?: RunOutcome;
322
- stepCount: number;
323
- /** Pointer to the in-flight substrate run, when one has been registered.
324
- * A fresh supervisor re-attaches by it. */
325
- handle?: RunHandle;
326
- }
327
- /**
328
- * The durable-run substrate. Implementations: in-memory (dev), file-system
329
- * (eval harness), D1 (Cloudflare prod). All stores share this exact contract
330
- * — swap by changing one factory call.
331
- *
332
- * Concurrency model: at most one worker holds a run's lease at a time. Lease
333
- * renewal happens on a heartbeat; on lease expiry, another worker can
334
- * `startOrResume` and pick up. Steps committed by the prior worker survive.
335
- */
336
- interface DurableRunStore {
337
- /**
338
- * Begin or resume a run. Returns the canonical RunRecord, all previously
339
- * completed steps (in order), and the lease deadline.
340
- *
341
- * If the run did not exist, creates it with status='running'. If it existed
342
- * with a different manifest hash, throws DurableRunInputMismatchError.
343
- * If it existed with a live lease held by a different worker, throws
344
- * DurableRunLeaseHeldError (caller can retry or back off).
345
- */
346
- startOrResume(input: {
347
- runId: string;
348
- manifest: DurableRunManifest;
349
- workerId: string;
350
- leaseMs?: number;
351
- }): Promise<{
352
- run: RunRecord;
353
- completedSteps: ReadonlyArray<StepRecord>;
354
- leaseExpiresAt: string;
355
- }>;
356
- /** Renew the lease. Returns false if another worker now holds it. */
357
- renewLease(input: {
358
- runId: string;
359
- workerId: string;
360
- leaseMs?: number;
361
- }): Promise<{
362
- ok: boolean;
363
- leaseExpiresAt?: string;
364
- }>;
365
- /** Load a step by position. Returns undefined if not yet begun. */
366
- loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
367
- /** Record step start (intent + input hash + kind). Bumps attempt count. */
368
- beginStep(input: {
369
- runId: string;
370
- stepIndex: number;
371
- intent: string;
372
- kind: StepKind;
373
- inputHash: string;
374
- }): Promise<StepRecord>;
375
- /** Mark step completed with a JSON-serializable result. */
376
- completeStep(input: {
377
- runId: string;
378
- stepIndex: number;
379
- result: unknown;
380
- }): Promise<StepRecord>;
381
- /** Mark step failed with a captured error. */
382
- failStep(input: {
383
- runId: string;
384
- stepIndex: number;
385
- error: StepError;
386
- }): Promise<StepRecord>;
387
- /** End the run; releases lease. */
388
- endRun(input: {
389
- runId: string;
390
- workerId: string;
391
- status: 'completed' | 'failed';
392
- outcome?: RunOutcome;
393
- }): Promise<RunRecord>;
394
- /**
395
- * Emit an event. First emit wins; subsequent emits return the existing
396
- * record under `existing` and accepted=false. Caller can treat that as
397
- * idempotency-by-design — never double-fire a downstream side effect.
398
- */
399
- emitEvent(input: {
400
- runId: string;
401
- key: string;
402
- payload: unknown;
403
- }): Promise<{
404
- accepted: boolean;
405
- record: EventRecord;
406
- }>;
407
- /** Load the cached event payload if it has been emitted. */
408
- loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
409
- /**
410
- * Append an event to the run's ordered stream log. The store assigns the
411
- * monotonic `seq`. Idempotent on `eventId`: re-appending a known id is a
412
- * no-op that returns the existing record under `accepted: false` — so an
413
- * adapter that re-yields a boundary event on reconnect cannot double-log.
414
- */
415
- appendStreamEvent(input: {
416
- runId: string;
417
- eventId: string;
418
- payload: unknown;
419
- }): Promise<{
420
- accepted: boolean;
421
- record: StreamEventRecord;
422
- }>;
423
- /**
424
- * Read the stream log in `seq` order. `afterSeq` (exclusive) resumes a
425
- * reader from a cursor; omit for the whole log.
426
- */
427
- readStreamEvents(runId: string, afterSeq?: number): Promise<ReadonlyArray<StreamEventRecord>>;
428
- /** Persist the run handle — the pointer a fresh supervisor re-attaches by.
429
- * One per run; overwrites. */
430
- setRunHandle(input: {
431
- runId: string;
432
- handle: RunHandle;
433
- }): Promise<void>;
434
- /** Cleanup hook for in-memory / fs stores; no-op for D1. Idempotent. */
435
- close(): Promise<void>;
436
- }
437
- /** Base class for durable-run errors. */
438
- declare class DurableRunError extends Error {
439
- readonly code: 'lease_held' | 'manifest_mismatch' | 'step_divergence' | 'step_input_mismatch' | 'await_event_timeout' | 'event_emit_race';
440
- constructor(message: string, code: 'lease_held' | 'manifest_mismatch' | 'step_divergence' | 'step_input_mismatch' | 'await_event_timeout' | 'event_emit_race');
441
- }
442
- /** Thrown when another worker holds the lease for this runId. */
443
- declare class DurableRunLeaseHeldError extends DurableRunError {
444
- constructor(message: string);
445
- }
446
- /** Thrown when the manifest hash differs from a prior run with the same id. */
447
- declare class DurableRunInputMismatchError extends DurableRunError {
448
- constructor(message: string);
449
- }
450
- /** Thrown when the same stepIndex re-runs with a different intent string. */
451
- declare class DurableRunDivergenceError extends DurableRunError {
452
- constructor(message: string);
453
- }
454
- /** Thrown when `awaitEvent` times out. */
455
- declare class DurableAwaitEventTimeoutError extends DurableRunError {
456
- constructor(message: string);
457
- }
458
-
459
- /**
460
- * `runDurableTurn` — a streaming, backend-agnostic, checkpoint+replay durable
461
- * turn. The single reusable primitive every product's chat handler routes
462
- * through, so per-product durability code drops to zero.
463
- *
464
- * A **turn** is one request→response unit: a producer yields a stream of
465
- * events and, once drained, exposes the turn's final text. `runDurableTurn`
466
- * wraps that with a `DurableRunStore`:
467
- *
468
- * - **Fresh run** — no completed step for this `(runId)`. The producer
469
- * runs; its events forward live to the caller (streaming preserved)
470
- * while final text accumulates; on drain the text is checkpointed.
471
- *
472
- * - **Replay** — a completed step already exists (the worker died after
473
- * the turn finished but before the response reached the client, and the
474
- * client retried the same turn). The cached text is emitted as a single
475
- * synthetic event; the producer is never constructed — no LLM call, no
476
- * double-billing.
477
- *
478
- * - **Mid-stream crash** — a turn that died *while streaming* leaves step 0
479
- * in `running`/`failed`. There is no partial-stream checkpoint (the
480
- * substrate checkpoints JSON values at step granularity), so the turn
481
- * re-runs from the top. This is the honest durability ceiling: a
482
- * *completed* turn is free to replay; an *interrupted* turn re-runs.
483
- *
484
- * Generic over the event type `TEvent` so a product can stream its own NDJSON
485
- * shape or the runtime's `RuntimeStreamEvent` — `runDurableTurn` never
486
- * inspects events, it only forwards them and reads `finalText()` after drain.
487
- *
488
- * Lease: a turn is a single step, fast enough that the heartbeat in
489
- * `runDurable` is unnecessary — `runDurableTurn` claims the lease once via
490
- * `startOrResume` and releases it on `endRun`. Concurrent workers on the same
491
- * `runId` are rejected with `DurableRunLeaseHeldError` (the client retried
492
- * before the first attempt finished); callers surface that as "turn already
493
- * in flight."
494
- */
495
-
496
- /** The live side of a turn — what a fresh run produces. */
497
- interface DurableTurnProducer<TEvent> {
498
- /** The turn's event stream. Forwarded verbatim to the caller. */
499
- stream: AsyncGenerator<TEvent, void, unknown>;
500
- /** The turn's final assistant text. Read once, after `stream` drains. */
501
- finalText(): string;
502
- }
503
- interface RunDurableTurnOptions<TEvent> {
504
- store: DurableRunStore;
505
- /** Stable per-turn run id. Convention: `chat:<threadId>:<turnIndex>`. The
506
- * same id on a retry is what enables replay. */
507
- runId: string;
508
- manifest: DurableRunManifest;
509
- /** Stable per-isolate worker id. Defaults to a fresh `deriveWorkerId()`
510
- * per call when omitted — fine for single-attempt turns. */
511
- workerId: string;
512
- /** Lease window in ms. Default 60_000 — a turn rarely runs longer. */
513
- leaseMs?: number;
514
- /** Human-readable step label. Default `turn`. */
515
- intent?: string;
516
- /** Builds the live producer. Called exactly once, on a fresh run; never
517
- * called on the replay path. */
518
- produce: () => DurableTurnProducer<TEvent>;
519
- /** Synthesizes the single event emitted on the replay path from the
520
- * cached final text (e.g. a product's `{ type: 'result', data: {...} }`). */
521
- replayEvent: (finalText: string) => TEvent;
522
- /** Optional live accumulator. When the producer's `finalText()` is only
523
- * valid after drain, this lets `runDurableTurn` also observe each event
524
- * to build the text — return the running text or `undefined` to ignore
525
- * an event. When omitted, `producer.finalText()` is the sole source. */
526
- accumulate?: (event: TEvent, current: string) => string | undefined;
527
- }
528
- interface DurableTurnHandle<TEvent> {
529
- /** Drop-in stream. Fresh runs forward producer events live; replays emit
530
- * exactly one `replayEvent(cachedText)`. */
531
- stream: AsyncGenerator<TEvent, void, unknown>;
532
- /** The turn's final text. Valid after `stream` drains. */
533
- finalText(): string;
534
- /** True iff this turn replayed a cached result (no producer ran). Valid
535
- * after `stream` drains. */
536
- replayed(): boolean;
537
- /** The durable `RunRecord` for this turn. Valid after `stream` drains. */
538
- record(): RunRecord | undefined;
539
- }
540
- declare function runDurableTurn<TEvent>(options: RunDurableTurnOptions<TEvent>): DurableTurnHandle<TEvent>;
541
-
542
- /**
543
- * `DurableChatTurnEngine` — the framework-neutral chat-turn orchestrator every
544
- * product chat handler routes through. It owns the parts that were copy-pasted
545
- * across legal / gtm / creative / tax: durable checkpointing, the NDJSON
546
- * `StreamEvent` line protocol, the `session.run.*` lifecycle vocabulary, the
547
- * runtime-run cost ledger, and trace flush. Everything genuinely
548
- * product-specific is a hook the product supplies.
549
- *
550
- * What the engine owns:
551
- * - durable turn (`runDurableTurn`): completed turns replay free, no re-bill
552
- * - the `session.run.started` / `session.run.completed` / `session.run.failed`
553
- * event envelope around the producer's events
554
- * - NDJSON encoding into a `ReadableStream<Uint8Array>` (the body every
555
- * product returns, React Router or Hono alike)
556
- * - calling the product's persist / post-process hooks in the right order,
557
- * after the stream drains, with the assembled final text
558
- * - never throwing into the HTTP layer — a producer failure becomes an
559
- * `error` + `session.run.failed` event pair, the stream still closes
560
- *
561
- * What the product supplies (`ChatTurnHooks`):
562
- * - `produce` — build the backend stream for this turn (sandbox / router
563
- * / tcloud / runtime — the engine does not care which)
564
- * - `persistAssistantMessage` — write the assistant turn to the product DB
565
- * - `onTurnComplete` (optional) — post-process (proposals, citations, …)
566
- * - `onEvent` (optional) — per-event side-channel (e.g. DO broadcast)
567
- * - `transformFinalText` (optional) — pre-persist transform (e.g. PII redact)
568
- *
569
- * Framework neutrality: the engine takes already-resolved values
570
- * (`userId`, identity tuple, parsed message, a `DurableRunStore`, a
571
- * `waitUntil`), never a `Request` or a `Context`. The product's thin route
572
- * adapter does auth + parse + access-control, then calls `engine.runTurn(...)`
573
- * and returns `result.body` as its platform `Response`.
574
- */
575
-
576
204
  /** The NDJSON line protocol every product chat client already speaks. */
577
205
  interface ChatStreamEvent {
578
206
  type: string;
@@ -582,76 +210,56 @@ interface ChatStreamEvent {
582
210
  * scoped products and the user id for session-scoped products. */
583
211
  interface ChatTurnIdentity {
584
212
  tenantId: string;
585
- /** Thread / session id — the durable run is keyed on this + `turnIndex`. */
213
+ /** Thread / session id. */
586
214
  sessionId: string;
587
215
  userId: string;
588
216
  /** Monotonic 0-based turn index within the session. */
589
217
  turnIndex: number;
590
218
  }
219
+ /** The live side of a turn — what the product's `produce` hook returns. */
220
+ interface ChatTurnProducer<TEvent extends ChatStreamEvent = ChatStreamEvent> {
221
+ /** The turn's event stream. Forwarded verbatim to the caller. */
222
+ stream: AsyncGenerator<TEvent, void, unknown>;
223
+ /** The turn's final assistant text. Read once, after `stream` drains. */
224
+ finalText(): string;
225
+ }
591
226
  interface ChatTurnHooks {
592
- /**
593
- * Build the backend stream for this turn. The engine never inspects which
594
- * backend this is — sandbox container, tcloud router, direct runtime, a
595
- * test double it only forwards the events and reads `finalText()`.
596
- */
597
- produce(): DurableTurnProducer<ChatStreamEvent>;
598
- /**
599
- * Persist the completed assistant message to the product's own store.
600
- * Called once, after the stream drains, on a fresh (non-replay) run.
601
- * Receives the assembled (and `transformFinalText`-transformed) text.
602
- */
227
+ /** Build the backend stream. The engine forwards events verbatim and
228
+ * reads `finalText()` once the stream drains. */
229
+ produce(): ChatTurnProducer;
230
+ /** Persist the assistant message to the product's own store. Called
231
+ * once, after drain, with the assembled (transform-applied) text. */
603
232
  persistAssistantMessage(input: {
604
233
  identity: ChatTurnIdentity;
605
234
  finalText: string;
606
- record: RunRecord | undefined;
607
235
  }): Promise<void>;
608
- /**
609
- * Optional post-processing after persistence proposal extraction,
610
- * citation validation, credit metering, etc. Product policy; the engine
611
- * has no shared logic here. Errors are swallowed + logged (post-process
612
- * must never fail the turn that already streamed successfully).
613
- */
236
+ /** Optional post-processing (proposals, citations, credit metering …).
237
+ * Errors are swallowed + logged — post-process must never fail a turn
238
+ * that already streamed successfully. */
614
239
  onTurnComplete?(input: {
615
240
  identity: ChatTurnIdentity;
616
241
  finalText: string;
617
242
  }): Promise<void>;
618
- /**
619
- * Optional per-event side channel (e.g. Durable Object broadcast). Runs
620
- * for every event the engine emits, lifecycle envelope included. Errors
621
- * are swallowed — a broadcast failure must not break the chat stream.
622
- */
243
+ /** Optional per-event side channel (e.g. DO broadcast). Runs for every
244
+ * emitted event, lifecycle envelope included. Errors swallowed a
245
+ * broadcast failure must not break the chat stream. */
623
246
  onEvent?(event: ChatStreamEvent): void | Promise<void>;
624
- /**
625
- * Optional pre-persist transform of the final text (e.g. PII redaction).
626
- * Affects only what is persisted; the live stream is never altered.
627
- */
247
+ /** Optional pre-persist transform of the final text (e.g. PII
248
+ * redaction). Affects only what is persisted; the live stream is
249
+ * never altered. */
628
250
  transformFinalText?(text: string): string | Promise<string>;
629
- /**
630
- * Optional trace flush resolves when OTLP export completes. The engine
631
- * hands it to `waitUntil` so the worker isolate stays alive for the POST.
632
- */
251
+ /** Optional trace flush — resolves when OTLP export completes. Handed
252
+ * to `waitUntil` so the worker isolate stays alive for the POST. */
633
253
  traceFlush?(): Promise<void>;
634
254
  }
635
255
  interface RunChatTurnInput {
636
256
  identity: ChatTurnIdentity;
637
- /** The user's message for this turn. Hashed into the durable run identity. */
638
- userMessage: string;
639
- /** Product id for telemetry / the durable manifest (`legal-agent`, …). */
640
- projectId: string;
641
- /** Domain tag for the task spec (`legal`, `gtm`, …). */
642
- domain: string;
643
- /** Model id, when known — recorded on the manifest. */
644
- model?: string;
645
- store: DurableRunStore;
646
257
  hooks: ChatTurnHooks;
647
- /** Worker liveness hook (`ctx.waitUntil` / `executionCtx.waitUntil`). When
648
- * omitted, trace flush is awaited inline before the stream closes. */
258
+ /** Worker liveness hook. When omitted, trace flush is awaited inline
259
+ * before the stream closes. */
649
260
  waitUntil?: (p: Promise<unknown>) => void;
650
- /** Stable per-isolate worker id. Defaults to a fresh `deriveWorkerId()`. */
651
- workerId?: string;
652
- /** Lease window in ms. Default 60_000. */
653
- leaseMs?: number;
654
- /** Optional structured logger for swallowed hook errors. */
261
+ /** Structured logger for swallowed hook errors. Defaults to
262
+ * `console.error` so failures surface without product wiring. */
655
263
  log?: (message: string, meta?: Record<string, unknown>) => void;
656
264
  }
657
265
  interface ChatTurnResult {
@@ -661,697 +269,36 @@ interface ChatTurnResult {
661
269
  contentType: 'application/x-ndjson';
662
270
  }
663
271
  /**
664
- * The engine. One instance is stateless and reusable across requests — all
665
- * per-turn state lives in `runTurn`'s closure.
666
- */
667
- declare class DurableChatTurnEngine {
668
- /**
669
- * Run one durable chat turn. Returns immediately with a `ReadableStream`
670
- * body; the turn executes as the body is pulled. Never rejects — backend
671
- * failures surface as `error` + `session.run.failed` events.
672
- */
673
- runTurn(input: RunChatTurnInput): ChatTurnResult;
674
- }
675
- /** Convenience singleton — the engine is stateless, one instance is enough. */
676
- declare const durableChatTurnEngine: DurableChatTurnEngine;
677
-
678
- /**
679
- * D1DurableRunStore — the production path for Cloudflare Workers. Backed by
680
- * a D1 (SQLite-compatible) database via the binding the worker already holds.
681
- *
682
- * Apply `./schema.sql` once before use; the store itself does not run DDL.
683
- * Migration version is recorded in `durable_schema_info`; consumers can
684
- * inspect `getSchemaVersion()` if they ship a migration tool.
685
- *
686
- * Why structural typing: agent-runtime stays Cloudflare-free at the dep
687
- * level. Consumers pass their `D1Database` binding — TypeScript matches the
688
- * minimal `D1DatabaseLike` surface below. Tests use the same interface with
689
- * a fake.
690
- */
691
-
692
- /**
693
- * Minimal D1 surface this store uses. Compatible with Cloudflare's
694
- * `D1Database` from `@cloudflare/workers-types`. Defined locally so
695
- * agent-runtime does not depend on workers-types at the package level.
696
- */
697
- interface D1DatabaseLike {
698
- prepare(query: string): D1PreparedStatementLike;
699
- batch(statements: D1PreparedStatementLike[]): Promise<unknown[]>;
700
- }
701
- interface D1PreparedStatementLike {
702
- bind(...values: unknown[]): D1PreparedStatementLike;
703
- first<T = unknown>(): Promise<T | null>;
704
- all<T = unknown>(): Promise<{
705
- results: T[];
706
- }>;
707
- run(): Promise<{
708
- success: boolean;
709
- meta?: {
710
- changes?: number;
711
- };
712
- }>;
713
- }
714
- declare class D1DurableRunStore implements DurableRunStore {
715
- private readonly db;
716
- constructor(db: D1DatabaseLike);
717
- /** Override for tests — defaults to Date.now(). */
718
- now: () => number;
719
- startOrResume(input: {
720
- runId: string;
721
- manifest: DurableRunManifest;
722
- workerId: string;
723
- leaseMs?: number;
724
- }): ReturnType<DurableRunStore['startOrResume']>;
725
- renewLease(input: {
726
- runId: string;
727
- workerId: string;
728
- leaseMs?: number;
729
- }): Promise<{
730
- ok: boolean;
731
- leaseExpiresAt?: string;
732
- }>;
733
- loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
734
- beginStep(input: {
735
- runId: string;
736
- stepIndex: number;
737
- intent: string;
738
- kind: StepKind;
739
- inputHash: string;
740
- }): Promise<StepRecord>;
741
- completeStep(input: {
742
- runId: string;
743
- stepIndex: number;
744
- result: unknown;
745
- }): Promise<StepRecord>;
746
- failStep(input: {
747
- runId: string;
748
- stepIndex: number;
749
- error: StepError;
750
- }): Promise<StepRecord>;
751
- endRun(input: {
752
- runId: string;
753
- workerId: string;
754
- status: 'completed' | 'failed';
755
- outcome?: RunOutcome;
756
- }): Promise<RunRecord>;
757
- emitEvent(input: {
758
- runId: string;
759
- key: string;
760
- payload: unknown;
761
- }): ReturnType<DurableRunStore['emitEvent']>;
762
- loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
763
- appendStreamEvent(input: {
764
- runId: string;
765
- eventId: string;
766
- payload: unknown;
767
- }): ReturnType<DurableRunStore['appendStreamEvent']>;
768
- readStreamEvents(runId: string, afterSeq?: number): Promise<ReadonlyArray<StreamEventRecord>>;
769
- setRunHandle(input: {
770
- runId: string;
771
- handle: RunHandle;
772
- }): Promise<void>;
773
- close(): Promise<void>;
774
- /** Inspect the currently-applied schema version. */
775
- getSchemaVersion(): Promise<number | undefined>;
776
- private readSteps;
777
- private bumpUpdated;
778
- }
779
-
780
- /**
781
- * FileSystemDurableRunStore — durable-run substrate backed by a directory
782
- * tree under a single root. One subdir per run:
783
- *
784
- * <root>/<runId>/
785
- * run.json — RunRecord (rewritten on every mutation; the only
786
- * scalar fields are status/lease, so this stays small)
787
- * steps.jsonl — append-only StepRecord stream; one JSON per line
788
- * events.jsonl — append-only EventRecord stream
789
- * lease.json — current leaseholder + deadline (separate from
790
- * run.json so renewLease writes one tiny file
791
- * instead of round-tripping the whole run record)
792
- *
793
- * Concurrency: the eval harness is single-process — we rely on Node's
794
- * append-mode semantics for atomicity of step / event writes (single-line
795
- * writes < PIPE_BUF are atomic on POSIX). For run.json / lease.json we write
796
- * to a `<file>.tmp` then `rename` to make replacement atomic. This is
797
- * sufficient for the single-process eval harness use case. Multi-process
798
- * concurrency on the SAME filesystem requires a flock-based extension;
799
- * for that path use D1DurableRunStore.
272
+ * Run one chat turn. Returns immediately with a `ReadableStream` body;
273
+ * the turn executes as the body is pulled. Never rejects — backend
274
+ * failures surface as `error` + `session.run.failed` events.
800
275
  */
801
-
802
- declare class FileSystemDurableRunStore implements DurableRunStore {
803
- private readonly root;
804
- constructor(root: string);
805
- /** Override for tests — defaults to Date.now(). */
806
- now: () => number;
807
- startOrResume(input: {
808
- runId: string;
809
- manifest: DurableRunManifest;
810
- workerId: string;
811
- leaseMs?: number;
812
- }): ReturnType<DurableRunStore['startOrResume']>;
813
- renewLease(input: {
814
- runId: string;
815
- workerId: string;
816
- leaseMs?: number;
817
- }): Promise<{
818
- ok: boolean;
819
- leaseExpiresAt?: string;
820
- }>;
821
- loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
822
- beginStep(input: {
823
- runId: string;
824
- stepIndex: number;
825
- intent: string;
826
- kind: StepKind;
827
- inputHash: string;
828
- }): Promise<StepRecord>;
829
- completeStep(input: {
830
- runId: string;
831
- stepIndex: number;
832
- result: unknown;
833
- }): Promise<StepRecord>;
834
- failStep(input: {
835
- runId: string;
836
- stepIndex: number;
837
- error: StepError;
838
- }): Promise<StepRecord>;
839
- endRun(input: {
840
- runId: string;
841
- workerId: string;
842
- status: 'completed' | 'failed';
843
- outcome?: RunOutcome;
844
- }): Promise<RunRecord>;
845
- emitEvent(input: {
846
- runId: string;
847
- key: string;
848
- payload: unknown;
849
- }): ReturnType<DurableRunStore['emitEvent']>;
850
- loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
851
- appendStreamEvent(input: {
852
- runId: string;
853
- eventId: string;
854
- payload: unknown;
855
- }): ReturnType<DurableRunStore['appendStreamEvent']>;
856
- readStreamEvents(runId: string, afterSeq?: number): Promise<ReadonlyArray<StreamEventRecord>>;
857
- setRunHandle(input: {
858
- runId: string;
859
- handle: RunHandle;
860
- }): Promise<void>;
861
- close(): Promise<void>;
862
- private readStreamEventsRaw;
863
- /** @internal — used by tests to list runs in the store. */
864
- _listRunIds(): Promise<string[]>;
865
- private runDir;
866
- private readRun;
867
- private writeRun;
868
- private readLeaseSafe;
869
- private writeLease;
870
- private readSteps;
871
- private appendStep;
872
- private bumpRunUpdated;
873
- }
276
+ declare function handleChatTurn(input: RunChatTurnInput): ChatTurnResult;
874
277
 
875
278
  /**
876
- * Identity + canonical-hash helpers for the durable-runs substrate.
877
- *
878
- * Two boundary disciplines:
879
- *
880
- * 1. **Manifest hash** — sha256 over a sorted-key JSON of (projectId,
881
- * scenarioId, task.id, task.intent, task.domain, input). Same hash =
882
- * same run identity. Used to detect "same runId, different inputs."
883
- *
884
- * 2. **Step input hash** — sha256 over a sorted-key JSON of the step's
885
- * input fingerprint. Used to detect drift across replays.
886
- *
887
- * Sorted-key JSON makes hashes deterministic regardless of object insertion
888
- * order. NaN / Infinity / undefined / functions / symbols / class instances
889
- * are rejected — pure JSON only at the boundary, so the hash matches whatever
890
- * the store round-trips.
891
- */
892
-
893
- /** sha256-hex over a JSON-canonicalized value. */
894
- declare function canonicalHash(value: unknown): string;
895
- /** Canonical JSON: object keys sorted lexicographically; arrays preserved. */
896
- declare function canonicalJson(value: unknown): string;
897
- /** Hash a DurableRunManifest into the run identity component. */
898
- declare function manifestHash(manifest: DurableRunManifest): string;
899
- /** Stable per-step identifier — hash of (runId, position, intent). */
900
- declare function stepId(runId: string, stepIndex: number, intent: string): string;
901
- /**
902
- * Stable worker id for a single process. Format: `host:pid:rand`. Random
903
- * suffix prevents collisions when the host/pid pair is short-lived (e.g.,
904
- * Cloudflare isolates that recycle fast).
905
- */
906
- declare function deriveWorkerId(): string;
907
-
908
- /**
909
- * In-memory DurableRunStore for dev + tests. Single-process. All state lives
910
- * in maps. Lease enforcement is real (Date.now() vs lease deadline) so the
911
- * crash-recovery + multi-worker race tests run identically against this and
912
- * the file-system / D1 stores.
913
- */
914
-
915
- declare class InMemoryDurableRunStore implements DurableRunStore {
916
- private readonly runs;
917
- /** Override for tests — defaults to Date.now(). */
918
- now: () => number;
919
- startOrResume(input: {
920
- runId: string;
921
- manifest: DurableRunManifest;
922
- workerId: string;
923
- leaseMs?: number;
924
- }): ReturnType<DurableRunStore['startOrResume']>;
925
- renewLease(input: {
926
- runId: string;
927
- workerId: string;
928
- leaseMs?: number;
929
- }): Promise<{
930
- ok: boolean;
931
- leaseExpiresAt?: string;
932
- }>;
933
- loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
934
- beginStep(input: {
935
- runId: string;
936
- stepIndex: number;
937
- intent: string;
938
- kind: StepKind;
939
- inputHash: string;
940
- }): Promise<StepRecord>;
941
- completeStep(input: {
942
- runId: string;
943
- stepIndex: number;
944
- result: unknown;
945
- }): Promise<StepRecord>;
946
- failStep(input: {
947
- runId: string;
948
- stepIndex: number;
949
- error: StepError;
950
- }): Promise<StepRecord>;
951
- endRun(input: {
952
- runId: string;
953
- workerId: string;
954
- status: 'completed' | 'failed';
955
- outcome?: RunOutcome;
956
- }): Promise<RunRecord>;
957
- emitEvent(input: {
958
- runId: string;
959
- key: string;
960
- payload: unknown;
961
- }): ReturnType<DurableRunStore['emitEvent']>;
962
- loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
963
- appendStreamEvent(input: {
964
- runId: string;
965
- eventId: string;
966
- payload: unknown;
967
- }): ReturnType<DurableRunStore['appendStreamEvent']>;
968
- readStreamEvents(runId: string, afterSeq?: number): Promise<ReadonlyArray<StreamEventRecord>>;
969
- setRunHandle(input: {
970
- runId: string;
971
- handle: RunHandle;
972
- }): Promise<void>;
973
- close(): Promise<void>;
974
- /** @internal — used by tests to inspect lease metadata. */
975
- _inspect(runId: string): RunRecord | undefined;
976
- /** @internal — used by tests to simulate lease expiry. */
977
- _expireLease(runId: string): void;
978
- private requireRun;
979
- }
980
-
981
- /**
982
- * Durable runner — wraps a user-supplied async function in checkpoint /
983
- * resume / lease semantics. The user writes plain async code, awaiting
984
- * `ctx.step(intent, fn)` boundaries. On worker crash, the next caller with
985
- * the same `runId` skips completed steps and resumes from the first unfinished
986
- * one.
987
- *
988
- * Invariants:
989
- *
990
- * - Step positions are derived from a monotonic counter on the ctx. The
991
- * same intent at position N is the same step across replays. If the user
992
- * reorders steps, position N changes intent and we raise
993
- * DurableRunDivergenceError fail-loud.
994
- *
995
- * - `ctx.now()` and `ctx.uuid()` are checkpointed as zero-input logic steps
996
- * with kind='deterministic'. On replay they return the recorded value.
279
+ * Derive a stable executionId from the run identity. The same
280
+ * `(projectId, sessionId, turnIndex)` tuple yields the same id — so a
281
+ * client retry of the same turn lands on the same substrate execution
282
+ * and the orchestrator's buffer replays instead of starting a second
283
+ * prompt.
997
284
  *
998
- * - `awaitEvent` writes a 'event' step that records the event payload on
999
- * first awaited completion. On replay, the cached payload returns
1000
- * synchronously. If the event has not been emitted and the runner is in
1001
- * a fresh execution, it polls the store until timeout.
1002
- *
1003
- * - Lease renewal happens on a wall-clock interval (every leaseMs/3). If
1004
- * the store reports a lost lease, the runner aborts the current step
1005
- * execution and throws — letting whichever worker holds the lease pick
1006
- * up. Committed steps survive.
1007
- */
1008
-
1009
- interface DurableContext {
1010
- readonly runId: string;
1011
- readonly projectId: string;
1012
- readonly scenarioId?: string;
1013
- /**
1014
- * Execute a checkpointed step. The step is identified by its **position**
1015
- * (monotonic counter on this ctx); `intent` is a human-readable label that
1016
- * must stay stable across replays.
1017
- *
1018
- * On first execution: runs `fn`, records the result, returns it.
1019
- * On replay: returns the recorded result WITHOUT calling `fn`.
1020
- *
1021
- * The `inputFingerprint` (optional) lets the runner detect "same intent,
1022
- * different inputs" — it gets hashed and compared. If you don't supply
1023
- * one, drift is allowed (input not checked).
1024
- */
1025
- step<T>(intent: string, fn: () => Promise<T>, opts?: {
1026
- kind?: StepKind;
1027
- inputFingerprint?: unknown;
1028
- }): Promise<T>;
1029
- /** Race-free first-emit-wins event wait. */
1030
- awaitEvent<T = unknown>(key: string, opts?: {
1031
- timeoutMs?: number;
1032
- pollMs?: number;
1033
- }): Promise<T>;
1034
- /** Emit an event. First emit wins. Subsequent emits no-op. */
1035
- emitEvent(key: string, payload: unknown): Promise<{
1036
- accepted: boolean;
1037
- }>;
1038
- /** Deterministic clock — checkpointed once per call. */
1039
- now(): Promise<Date>;
1040
- /** Deterministic uuid — checkpointed once per call. */
1041
- uuid(): Promise<string>;
1042
- }
1043
- interface RunDurableInput<TResult> {
1044
- runId: string;
1045
- manifest: DurableRunManifest;
1046
- store: DurableRunStore;
1047
- workerId?: string;
1048
- leaseMs?: number;
1049
- /** Total time budget for the run. Used for awaitEvent timeouts; runner
1050
- * itself doesn't kill long-running steps (the step fn must respect
1051
- * AbortSignal if it cares). */
1052
- signal?: AbortSignal;
1053
- taskFn: (ctx: DurableContext) => Promise<TResult>;
1054
- /** Default outcome on successful completion. */
1055
- defaultOutcome?: RunOutcome;
1056
- }
1057
- interface RunDurableResult<TResult> {
1058
- result: TResult;
1059
- record: RunRecord;
1060
- /** All steps captured this run (replayed + freshly executed). */
1061
- steps: ReadonlyArray<StepRecord>;
1062
- }
1063
- declare function runDurable<TResult>(input: RunDurableInput<TResult>): Promise<RunDurableResult<TResult>>;
1064
-
1065
- /**
1066
- * The durable-runs SQL schema as a string constant. Inlined so consumers
1067
- * (Cloudflare Workers via D1) can apply it without bundling a `.sql` file:
1068
- *
1069
- * import { DURABLE_SCHEMA_SQL } from '@tangle-network/agent-runtime'
1070
- * await env.DB.exec(DURABLE_SCHEMA_SQL)
1071
- *
1072
- * The canonical source is `src/durable/schema.sql` — this string MUST stay
1073
- * byte-identical to it. The build does not copy `.sql` files into `dist/`,
1074
- * so the constant is the only path consumers have. A unit test asserts the
1075
- * two stay in sync (`durable-schema.test.ts`).
1076
- *
1077
- * `DURABLE_SCHEMA_VERSION` reflects the latest migration version applied by
1078
- * this string. Bump it on every backwards-incompatible change AND add a new
1079
- * migration entry to durable_schema_info instead of mutating prior rows.
1080
- */
1081
- declare const DURABLE_SCHEMA_VERSION = 2;
1082
- declare const DURABLE_SCHEMA_SQL = "-- Durable-run substrate \u2014 versioned schema for D1 / SQLite.\n--\n-- Apply once per database. Subsequent migrations append; never rewrite a\n-- prior version. See `durable_schema_info` for the migration trail.\n--\n-- Concurrency notes for D1:\n-- - SQLite supports UNIQUE constraints for first-emit-wins (`durable_events`\n-- PK is (run_id, key) \u2014 duplicate insert raises, caller treats as \"already\n-- emitted\").\n-- - Lease takeover happens via a conditional UPDATE: we only claim the lease\n-- if (lease_holder_id IS NULL OR lease_expires_at < :now) \u2014 atomic under\n-- SQLite's row-level locking.\n-- - All timestamps stored as ISO-8601 TEXT for cross-platform consistency.\n-- - `result_json` / `error_json` / `outcome_json` / `payload_json` are\n-- JSON-encoded TEXT; the application enforces canonical-JSON discipline at\n-- the boundary so the store stays type-agnostic.\n\nCREATE TABLE IF NOT EXISTS durable_schema_info (\n version INTEGER PRIMARY KEY,\n applied_at TEXT NOT NULL\n);\n\nCREATE TABLE IF NOT EXISTS durable_runs (\n run_id TEXT PRIMARY KEY,\n manifest_hash TEXT NOT NULL,\n project_id TEXT NOT NULL,\n scenario_id TEXT,\n status TEXT NOT NULL CHECK (status IN ('pending','running','completed','failed','suspended')),\n created_at TEXT NOT NULL,\n updated_at TEXT NOT NULL,\n completed_at TEXT,\n lease_holder_id TEXT,\n lease_expires_at TEXT,\n outcome_json TEXT,\n step_count INTEGER NOT NULL DEFAULT 0\n);\n\nCREATE INDEX IF NOT EXISTS idx_durable_runs_project_status ON durable_runs(project_id, status);\nCREATE INDEX IF NOT EXISTS idx_durable_runs_lease_expires ON durable_runs(lease_expires_at);\n\nCREATE TABLE IF NOT EXISTS durable_steps (\n run_id TEXT NOT NULL,\n step_index INTEGER NOT NULL,\n intent TEXT NOT NULL,\n kind TEXT NOT NULL,\n input_hash TEXT NOT NULL DEFAULT '',\n status TEXT NOT NULL CHECK (status IN ('pending','running','completed','failed')),\n attempts INTEGER NOT NULL DEFAULT 0,\n result_json TEXT,\n error_json TEXT,\n started_at TEXT,\n completed_at TEXT,\n PRIMARY KEY (run_id, step_index)\n);\n\nCREATE INDEX IF NOT EXISTS idx_durable_steps_status ON durable_steps(run_id, status);\n\nCREATE TABLE IF NOT EXISTS durable_events (\n run_id TEXT NOT NULL,\n key TEXT NOT NULL,\n payload_json TEXT,\n emitted_at TEXT NOT NULL,\n PRIMARY KEY (run_id, key)\n);\n\nINSERT OR IGNORE INTO durable_schema_info (version, applied_at)\nVALUES (1, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'));\n\n-- \u2500\u2500 Migration v2 \u2014 durable event-stream log + run handle \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n-- Run once on a database created at v1. `ALTER TABLE` is not idempotent; the\n-- version trail in `durable_schema_info` is how migrations are sequenced \u2014\n-- never by blind re-execution of this block.\n--\n-- - `durable_stream_events` is the ordered, replayable per-run event log.\n-- `seq` is the store-assigned monotonic cursor; the UNIQUE index on\n-- (run_id, event_id) makes appends idempotent \u2014 a reconnecting adapter\n-- that re-yields a boundary event cannot double-log it.\n-- - `durable_runs.handle_json` is the pointer (sandbox + substrate run id +\n-- cursor) a fresh supervisor re-attaches by.\n\nALTER TABLE durable_runs ADD COLUMN handle_json TEXT;\n\nCREATE TABLE IF NOT EXISTS durable_stream_events (\n run_id TEXT NOT NULL,\n seq INTEGER NOT NULL,\n event_id TEXT NOT NULL,\n payload_json TEXT,\n appended_at TEXT NOT NULL,\n PRIMARY KEY (run_id, seq)\n);\n\nCREATE UNIQUE INDEX IF NOT EXISTS idx_durable_stream_events_event_id\n ON durable_stream_events(run_id, event_id);\n\nINSERT OR IGNORE INTO durable_schema_info (version, applied_at)\nVALUES (2, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'));\n";
1083
-
1084
- /**
1085
- * `runSupervisedTurn` — relocates the durability boundary off the ephemeral
1086
- * worker isolate.
1087
- *
1088
- * `runDurableTurn` replays a *completed* turn; an interrupted turn re-runs.
1089
- * `runSupervisedTurn` closes that gap for sandbox-backed runs: the sandbox
1090
- * container is orchestrator-managed and outlives the worker, so instead of
1091
- * re-prompting, a fresh supervisor re-attaches to the in-flight substrate run
1092
- * and resumes draining its event stream.
1093
- *
1094
- * Durability is owned by the substrate, not hoped-for from the sandbox. The
1095
- * supervisor drains every event into the store's stream log as it flows
1096
- * (`appendStreamEvent`), persists the reconnect pointer the instant the
1097
- * substrate yields it (`setRunHandle`), and heartbeats the lease. A fresh
1098
- * supervisor reads the log for its cursor and calls `adapter.attach` to
1099
- * resume strictly after it — the append's idempotency on `eventId` dedups
1100
- * the reconnect seam, so no event is lost and none is delivered twice.
1101
- *
1102
- * The platform-agnostic core is here; `SessionSupervisorDO` hosts it on a
1103
- * Cloudflare Durable Object. The reconnect glue is one typed contract —
1104
- * `SandboxReconnectAdapter` — implemented once per substrate, never per
1105
- * product.
1106
- */
1107
-
1108
- /** One event drained from a supervised run. */
1109
- interface SupervisedEvent<TEvent> {
1110
- /** Stable substrate id — the dedup key and the reconnect cursor. */
1111
- eventId: string;
1112
- payload: TEvent;
1113
- /**
1114
- * The substrate run handle, carried on the first frame(s) once the run id
1115
- * is known. The supervisor persists it so a fresh supervisor can re-attach.
1116
- * Omit on later frames; the last non-undefined handle wins.
1117
- */
1118
- handle?: RunHandle;
1119
- }
1120
- /**
1121
- * Product-supplied glue to a reconnectable substrate run. The dangerous
1122
- * reconnect logic — re-attaching to a live distributed run — lives behind
1123
- * this one typed contract: implement it once per substrate (the sandbox SDK,
1124
- * etc.), never per product.
285
+ * Format is readable, not hashed: operators grepping orchestrator logs
286
+ * for `gtm-agent:thread-abc:3` find the run without translating an
287
+ * opaque id. Substrate executionIds are not a secrecy boundary.
1125
288
  *
1126
- * Conformance (asserted by `supervisor.test.ts`):
1127
- * - `start()` yields the run's events; at least one early event carries a
1128
- * `handle` with `status: 'running'` and a defined `runId`.
1129
- * - `attach(handle, afterEventId)` yields only events strictly after
1130
- * `afterEventId`, and terminates cleanly when the run has no more.
1131
- * - `eventId`s are unique within a run.
289
+ * Wire integration:
290
+ * - `@tangle-network/sandbox@0.1.x` PromptOptions does not yet expose
291
+ * `executionId`. The SDK auto-reconnects in-call by extracting it
292
+ * from the response `execution.started` event; products do nothing.
293
+ * - For cross-process reconnect today, bypass the SDK and POST to the
294
+ * orchestrator's `/agents/run/stream` directly with this id in the
295
+ * `X-Execution-ID` header (see tax-agent's `sessions.ts`).
1132
296
  */
1133
- interface SandboxReconnectAdapter<TEvent> {
1134
- /** Begin a fresh substrate run. */
1135
- start(): AsyncIterable<SupervisedEvent<TEvent>>;
1136
- /**
1137
- * Re-attach to an in-flight run, resuming strictly after `afterEventId`
1138
- * (`undefined` → from the first event).
1139
- */
1140
- attach(handle: RunHandle, afterEventId: string | undefined): AsyncIterable<SupervisedEvent<TEvent>>;
1141
- }
1142
- /** How the supervised turn resolved. */
1143
- type SupervisedRunMode = 'fresh' | 'resumed' | 'replayed';
1144
- interface RunSupervisorOptions<TEvent> {
1145
- store: DurableRunStore;
1146
- /** Stable per-turn run id — the same id on a retry is what enables both
1147
- * replay (completed turn) and resume (in-flight turn). */
1148
- runId: string;
1149
- manifest: DurableRunManifest;
1150
- /** Stable per-isolate worker id. */
1151
- workerId: string;
1152
- adapter: SandboxReconnectAdapter<TEvent>;
1153
- /** Lease window in ms. Default 60_000 — deliberately short: the heartbeat
1154
- * keeps an actively-draining supervisor's lease alive, so an abandoned
1155
- * supervisor's lease lapses fast and a fresh supervisor can take over. */
1156
- leaseMs?: number;
1157
- /** Renew the lease at most this often while draining. Default 30_000 —
1158
- * must be below `leaseMs` or an active drain loses its own lease. */
1159
- heartbeatMs?: number;
1160
- /** Human-readable step label. Default `turn`. */
1161
- intent?: string;
1162
- /** Time source override — tests pin this for deterministic heartbeats. */
1163
- now?: () => number;
1164
- }
1165
- interface SupervisedRunHandle<TEvent> {
1166
- /** Drop-in stream. Fresh forwards live events; resumed re-yields the logged
1167
- * prefix then forwards live events; replayed re-yields the full log. */
1168
- stream: AsyncGenerator<TEvent, void, unknown>;
1169
- /** Which path ran. Valid after `stream` drains. */
1170
- mode(): SupervisedRunMode;
1171
- /** The durable RunRecord for the turn. Valid after `stream` drains. */
1172
- record(): RunRecord | undefined;
1173
- }
1174
- declare function runSupervisedTurn<TEvent>(options: RunSupervisorOptions<TEvent>): SupervisedRunHandle<TEvent>;
1175
-
1176
- /**
1177
- * `SessionSupervisorDO` — the Cloudflare Durable Object host for
1178
- * `runSupervisedTurn`.
1179
- *
1180
- * A stateless Worker isolate is the wrong place to own a 15-minute run: it
1181
- * dies on a deploy roll or CPU limit. A Durable Object is addressable by
1182
- * session id and survives across requests — it is the right home for the
1183
- * supervisor. This host is deliberately thin: all the durability logic lives
1184
- * in the platform-agnostic `runSupervisedTurn`; the DO only hosts it and
1185
- * uses an `alarm()` to re-attach a run the response stream abandoned.
1186
- *
1187
- * - `fetch` resolves the run, records it, arms the orphan-check alarm, and
1188
- * streams the supervised events back. If the client disconnects, the
1189
- * supervisor stops being pulled and its short lease lapses.
1190
- * - `alarm()` is the recovery mechanism: it finds a recorded-but-unfinished
1191
- * run and re-drives `runSupervisedTurn` headlessly to completion (events
1192
- * land in the durable log; a later `fetch` replays them). A run still held
1193
- * by a live `fetch` raises `DurableRunLeaseHeldError` — not orphaned, so
1194
- * the alarm just re-arms.
1195
- *
1196
- * Structural CF types (`DurableObjectStateLike`) are defined locally so
1197
- * agent-runtime keeps no dependency on `@cloudflare/workers-types` — the same
1198
- * discipline as `D1DatabaseLike` in `d1-store.ts`.
1199
- */
1200
-
1201
- /** Minimal Durable Object storage surface this host uses. Compatible with
1202
- * Cloudflare's `DurableObjectStorage`. */
1203
- interface DurableObjectStorageLike {
1204
- get<T = unknown>(key: string): Promise<T | undefined>;
1205
- put<T = unknown>(key: string, value: T): Promise<void>;
1206
- delete(key: string): Promise<boolean>;
1207
- /** Schedule the next `alarm()` invocation at an epoch-ms time. */
1208
- setAlarm(scheduledTime: number): Promise<void>;
1209
- }
1210
- /** Minimal Durable Object state surface — the `state` ctor argument. */
1211
- interface DurableObjectStateLike {
1212
- storage: DurableObjectStorageLike;
1213
- }
1214
- /**
1215
- * Product-supplied wiring for the host. `resolveRun` / `resolveOrphan` build
1216
- * the supervisor inputs (store, adapter, manifest) — the host owns no
1217
- * product policy.
1218
- */
1219
- interface SupervisorHostConfig<TEvent, TEnv> {
1220
- /** Build supervisor inputs for an incoming request. `undefined` → 404. */
1221
- resolveRun(request: Request, env: TEnv, state: DurableObjectStateLike): Promise<RunSupervisorOptions<TEvent> | undefined>;
1222
- /** Rebuild supervisor inputs for an orphan re-attach, from the recorded
1223
- * runId. `undefined` → the run is untrackable; the host stops tracking it. */
1224
- resolveOrphan(runId: string, env: TEnv, state: DurableObjectStateLike): Promise<RunSupervisorOptions<TEvent> | undefined>;
1225
- /** Serialize one event into a response-stream chunk (an SSE or NDJSON
1226
- * line — the product owns the framing). */
1227
- encodeEvent(event: TEvent): string;
1228
- /** Delay before the orphan-check alarm fires. Default 60_000. */
1229
- orphanCheckMs?: number;
1230
- /** Time source — tests pin this. */
1231
- now?: () => number;
1232
- }
1233
- /** The host instance surface — what a Cloudflare DO runtime invokes. */
1234
- interface SessionSupervisorDO {
1235
- fetch(request: Request): Promise<Response>;
1236
- alarm(): Promise<void>;
1237
- }
1238
- /**
1239
- * Build the `SessionSupervisorDO` class for a product. Export the result from
1240
- * the Worker entrypoint and bind it in `wrangler.toml`:
1241
- *
1242
- * export const SessionSupervisor = createSessionSupervisorDO(config)
1243
- *
1244
- * # wrangler.toml
1245
- * [[durable_objects.bindings]]
1246
- * name = "SESSION_SUPERVISOR"
1247
- * class_name = "SessionSupervisor"
1248
- * [[migrations]]
1249
- * tag = "v1"
1250
- * new_classes = ["SessionSupervisor"]
1251
- */
1252
- declare function createSessionSupervisorDO<TEvent, TEnv>(config: SupervisorHostConfig<TEvent, TEnv>): new (state: DurableObjectStateLike, env: TEnv) => SessionSupervisorDO;
1253
-
1254
- /**
1255
- * Cloudflare Workflows integration for the durable-run substrate.
1256
- *
1257
- * Two valid deployment patterns on Cloudflare:
1258
- *
1259
- * A. **Plain Worker + D1DurableRunStore.** Each request invokes
1260
- * `runDurable(...)` directly against a D1 binding. Survives worker
1261
- * isolate restarts; lease takeover happens via D1 row-level
1262
- * conditional UPDATE. The default path; no Workflows binding needed.
1263
- *
1264
- * B. **Cloudflare Workflows entrypoint.** Wrap an entire `runDurable(...)`
1265
- * call inside a single Workflow `step.do(...)`. Workflows gives you
1266
- * retry-on-throw with platform-managed exponential backoff and
1267
- * survives full Workers deploy rolls. Use it when the task can take
1268
- * minutes to hours, or when you want the Workflows dashboard for
1269
- * observability. Inside the step, `runDurable` still uses D1 for
1270
- * step-level checkpoints — so a half-completed run resumes from
1271
- * its last checkpoint on retry rather than restarting from scratch.
1272
- *
1273
- * This module provides the surface for pattern B: a thin helper that
1274
- * converts a Workflows `WorkflowStep` into a `DurableContext`. We do not
1275
- * take a runtime dep on `cloudflare:workers` — the integration is purely
1276
- * structural typing.
1277
- *
1278
- * Example (pattern B):
1279
- *
1280
- * import { WorkflowEntrypoint } from 'cloudflare:workers'
1281
- * import { runOnWorkflowStep } from '@tangle-network/agent-runtime'
1282
- *
1283
- * export class LegalChatWorkflow extends WorkflowEntrypoint<Env, ChatParams> {
1284
- * async run(event, step) {
1285
- * return runOnWorkflowStep(step, {
1286
- * workflowName: 'legal-chat',
1287
- * taskFn: async (ctx) => {
1288
- * const ready = await ctx.step('readiness', () => probeKnowledge(...))
1289
- * const answer = await ctx.step('llm:turn-1', () => callLlm(...))
1290
- * const shipped = await ctx.awaitEvent('shipped', { timeoutMs: 5 * 60_000 })
1291
- * return { answer, shipped }
1292
- * },
1293
- * })
1294
- * }
1295
- * }
1296
- *
1297
- * Step ordering, replay semantics, and divergence detection inside the
1298
- * `taskFn` are inherited from Cloudflare's Workflows engine — we
1299
- * intentionally do NOT layer a second durable store inside this path.
1300
- * Pick pattern A or pattern B per agent; do not mix.
1301
- */
1302
-
1303
- /**
1304
- * Structural subset of Cloudflare's `WorkflowStep`. Mirrors the public surface
1305
- * documented at https://developers.cloudflare.com/workflows/build/. Defined
1306
- * here so this module imposes zero `cloudflare:workers` runtime dependency.
1307
- */
1308
- interface WorkflowStepLike {
1309
- do<T>(name: string, opts: WorkflowStepConfig, fn: () => Promise<T>): Promise<T>;
1310
- do<T>(name: string, fn: () => Promise<T>): Promise<T>;
1311
- sleep(name: string, duration: string | number): Promise<void>;
1312
- waitForEvent<T = unknown>(name: string, opts: {
1313
- type: string;
1314
- timeout?: string;
1315
- }): Promise<{
1316
- payload: T;
1317
- timestamp: number;
1318
- type: string;
1319
- }>;
1320
- }
1321
- interface WorkflowStepConfig {
1322
- retries?: {
1323
- limit: number;
1324
- delay: string | number;
1325
- backoff?: 'constant' | 'linear' | 'exponential';
1326
- };
1327
- timeout?: string | number;
1328
- }
1329
- interface RunOnWorkflowStepInput<TResult> {
1330
- /** Logical workflow name; used as a prefix on step ids for filtering. */
1331
- workflowName: string;
1332
- /** User task — same shape as runDurable's taskFn. */
1333
- taskFn: (ctx: DurableContext) => Promise<TResult>;
1334
- /** Optional per-step retry / timeout policy applied to ctx.step calls. */
1335
- stepConfig?: WorkflowStepConfig;
1336
- /** Optional clock — defaults to Date.now. */
1337
- now?: () => number;
1338
- }
1339
- /**
1340
- * Adapt a Cloudflare `WorkflowStep` into a `DurableContext` and run a task.
1341
- *
1342
- * Every `ctx.step(intent, fn)` becomes `step.do(<name>, fn)` with stable
1343
- * names — Workflows checkpoints + replays based on step name + position,
1344
- * matching our model.
1345
- *
1346
- * `ctx.awaitEvent(key)` becomes `step.waitForEvent(key, { type: key })`.
1347
- * Caller is responsible for emitting from the platform side (e.g. via the
1348
- * Workflows REST API or a sibling worker that publishes events).
1349
- *
1350
- * `ctx.now()` and `ctx.uuid()` go through `step.do` so the values are
1351
- * captured in the platform's checkpoint state and remain stable across
1352
- * replay — same invariant as our own stores.
1353
- */
1354
- declare function runOnWorkflowStep<TResult>(workflowStep: WorkflowStepLike, input: RunOnWorkflowStepInput<TResult>): Promise<TResult>;
297
+ declare function deriveExecutionId(input: {
298
+ projectId: string;
299
+ sessionId: string;
300
+ turnIndex: number;
301
+ }): string;
1355
302
 
1356
303
  /**
1357
304
  * @stable
@@ -2009,4 +956,4 @@ declare function createTraceBridge(options: TraceBridgeOptions): TraceBridge;
2009
956
  */
2010
957
  declare function toAgentEvalTrace(event: RuntimeStreamEvent, options: TraceBridgeOptions): TraceEvent | undefined;
2011
958
 
2012
- export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskRunSummary, AgentTaskSpec, AgentTaskStatus, type BackendRetryPolicy, BackendTransportError, type ChatModelCandidate, type ChatModelValidation, type ChatStreamEvent, ChatTurnError, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnMessage, type ChatTurnOverlay, type ChatTurnResult, type ChatTurnSandbox, type ClassifyIntentOptions, type ClassifyIntentResult, type ConformanceIssue, type ConformanceOptions, type ConformanceResult, type D1DatabaseLike, D1DurableRunStore, type D1PreparedStatementLike, DEFAULT_ROUTER_BASE_URL, DURABLE_SCHEMA_SQL, DURABLE_SCHEMA_VERSION, DurableAwaitEventTimeoutError, DurableChatTurnEngine, type DurableContext, type DurableObjectStateLike, type DurableObjectStorageLike, DurableRunDivergenceError, DurableRunError, DurableRunInputMismatchError, DurableRunLeaseHeldError, type DurableRunManifest, type DurableRunStore, type DurableTurnHandle, type DurableTurnProducer, type EventRecord, FileSystemDurableRunStore, InMemoryDurableRunStore, InMemoryRuntimeSessionStore, KnowledgeReadinessDecision, type ModelInfo, type ResolvedChatModel, type RouterEnv, RunAgentTaskOptions, RunAgentTaskStreamOptions, type RunChatTurnInput, type RunChatTurnOptions, type RunDurableInput, type RunDurableResult, type RunDurableTurnOptions, type RunHandle, type RunOnWorkflowStepInput, type RunOutcome, type RunStatus, type RunSupervisorOptions, type RuntimeEventCollector, type RuntimeRunCompleteInput, type RuntimeRunCost, type RuntimeRunHandle, type RuntimeRunOptions, type RuntimeRunPersistenceAdapter, type RuntimeRunRow, RuntimeRunStateError, type RuntimeRunStatus, RuntimeSession, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SandboxReconnectAdapter, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, SessionMismatchError, type SessionSupervisorDO, type StepError, type StepKind, type StepRecord, type StepStatus, type StreamEventRecord, type SubagentMatcher, type SupervisedEvent, type SupervisedRunHandle, type SupervisedRunMode, type SupervisorHostConfig, type TraceBridge, type TraceBridgeOptions, type WorkflowStepConfig, type WorkflowStepLike, assertProfileConformance, canonicalHash, canonicalJson, classifyIntent, cleanModelId, composeTurnProfile, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, createSessionSupervisorDO, createTraceBridge, decideKnowledgeReadiness, deriveWorkerId, durableChatTurnEngine, encodeServerSentEvent, getModels, manifestHash, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runChatTurn, runDurable, runDurableTurn, runOnWorkflowStep, runSupervisedTurn, runtimeStreamServerSentEvent, sandboxAsChatTurnTarget, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, startRuntimeRun, stepId, summarizeAgentTaskRun, toAgentEvalTrace, validateChatModelId, withConfiguredModels };
959
+ export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskRunSummary, AgentTaskSpec, AgentTaskStatus, type BackendRetryPolicy, BackendTransportError, type ChatModelCandidate, type ChatModelValidation, type ChatStreamEvent, ChatTurnError, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnMessage, type ChatTurnOverlay, type ChatTurnProducer, type ChatTurnResult, type ChatTurnSandbox, type ClassifyIntentOptions, type ClassifyIntentResult, type ConformanceIssue, type ConformanceOptions, type ConformanceResult, DEFAULT_ROUTER_BASE_URL, InMemoryRuntimeSessionStore, KnowledgeReadinessDecision, type ModelInfo, type ResolvedChatModel, type RouterEnv, RunAgentTaskOptions, RunAgentTaskStreamOptions, type RunChatTurnInput, type RunChatTurnOptions, type RuntimeEventCollector, type RuntimeRunCompleteInput, type RuntimeRunCost, type RuntimeRunHandle, type RuntimeRunOptions, type RuntimeRunPersistenceAdapter, type RuntimeRunRow, RuntimeRunStateError, type RuntimeRunStatus, RuntimeSession, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, SessionMismatchError, type SubagentMatcher, type TraceBridge, type TraceBridgeOptions, assertProfileConformance, classifyIntent, cleanModelId, composeTurnProfile, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, createTraceBridge, decideKnowledgeReadiness, deriveExecutionId, encodeServerSentEvent, getModels, handleChatTurn, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runChatTurn, runtimeStreamServerSentEvent, sandboxAsChatTurnTarget, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, startRuntimeRun, summarizeAgentTaskRun, toAgentEvalTrace, validateChatModelId, withConfiguredModels };