@tangle-network/agent-runtime 0.12.1 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { AgentEvalError, KnowledgeReadinessReport, ControlEvalResult, KnowledgeRequirement, TraceEvent } from '@tangle-network/agent-eval';
2
2
  export { AgentEvalError, AgentEvalErrorCode, CaptureIntegrityError, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, ReplayError, RunRecord, UserQuestion, ValidationError, VerificationError } from '@tangle-network/agent-eval';
3
- import { A as AgentBackendInput, a as AgentExecutionBackend, b as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, c as RunAgentTaskOptions, d as AgentTaskRunResult, e as RunAgentTaskStreamOptions, f as AgentTaskRunSummary, g as AgentTaskSpec, h as AgentRuntimeEvent, i as AgentTaskStatus, j as RuntimeSessionStore, k as RuntimeSession } from './types-afLuHk1G.js';
4
- export { l as AgentAdapter, m as AgentKnowledgeProvider, n as AgentRuntimeEventSink, o as AgentTaskContext } from './types-afLuHk1G.js';
3
+ import { A as AgentBackendInput, a as AgentExecutionBackend, b as AgentBackendContext, R as RuntimeStreamEvent, c as AgentTaskSpec, K as KnowledgeReadinessDecision, d as RunAgentTaskOptions, e as AgentTaskRunResult, f as RunAgentTaskStreamOptions, g as AgentTaskRunSummary, h as AgentRuntimeEvent, i as AgentTaskStatus, j as RuntimeSessionStore, k as RuntimeSession } from './types-jr_EFhrD.js';
4
+ export { l as AgentAdapter, m as AgentKnowledgeProvider, n as AgentRuntimeEventSink, o as AgentTaskContext } from './types-jr_EFhrD.js';
5
5
  import { AgentProfilePrompt, AgentProfileResources, AgentSubagentProfile, AgentProfile, SandboxInstance } from '@tangle-network/sandbox';
6
6
 
7
7
  /**
@@ -168,6 +168,699 @@ declare function sandboxAsChatTurnTarget(instance: SandboxInstance, opts?: {
168
168
  };
169
169
  }): ChatTurnSandbox;
170
170
 
171
+ /**
172
+ * Durable-run substrate: the typed contract for checkpointed agent runs that
173
+ * survive worker crashes, deploy rolls, OOM, and transient transport errors.
174
+ *
175
+ * The model — directly inspired by Absurd (Postgres-backed) and Cloudflare
176
+ * Workflows — splits a run into ordered, idempotent **steps**. Each step's
177
+ * result is persisted before the next step runs. On resume, the runner reads
178
+ * the prior steps from a `DurableRunStore` and fast-replays them (returning
179
+ * cached values) until it reaches the first unfinished step, where execution
180
+ * actually resumes.
181
+ *
182
+ * Three boundary disciplines:
183
+ *
184
+ * 1. Step results MUST be JSON-serializable. No closures, no class
185
+ * instances, no live streams. The store treats results as opaque JSON.
186
+ *
187
+ * 2. Step intents MUST be stable across replays. The runner derives a
188
+ * stable step id from (runId, stepIndex, intent). Mismatched intent at
189
+ * the same index = `DurableRunDivergenceError`.
190
+ *
191
+ * 3. Non-determinism (now / uuid / random) MUST flow through the
192
+ * `DurableContext` helpers — `ctx.now()`, `ctx.uuid()` — so the values
193
+ * are checkpointed and identical on replay. Bare `Date.now()` /
194
+ * `crypto.randomUUID()` inside a task fn breaks replay equality.
195
+ */
196
+
197
+ /** Caller-facing kinds. The runner uses these for telemetry + querying. */
198
+ type StepKind =
199
+ /** Logical step that ran user code (the default for ctx.step). */
200
+ 'logic'
201
+ /** A wrapped LLM call. */
202
+ | 'llm'
203
+ /** A wrapped tool call. */
204
+ | 'tool'
205
+ /** A wrapped readiness probe. */
206
+ | 'readiness'
207
+ /** A deterministic clock or uuid read. */
208
+ | 'deterministic'
209
+ /** A suspend-for-event boundary. */
210
+ | 'event';
211
+ type StepStatus = 'pending' | 'running' | 'completed' | 'failed';
212
+ interface StepError {
213
+ message: string;
214
+ code?: string;
215
+ /** Optional stack — stored for diagnostics, NEVER replayed as an exception. */
216
+ stack?: string;
217
+ }
218
+ interface StepRecord<T = unknown> {
219
+ runId: string;
220
+ /** Monotonic 0-based index. Position is the load-bearing identifier — the
221
+ * same intent string at different positions is a different step. */
222
+ stepIndex: number;
223
+ /** Caller-supplied label; intended for human reading + log correlation. */
224
+ intent: string;
225
+ kind: StepKind;
226
+ /** sha256 of the canonical input fingerprint at begin-time. Used to detect
227
+ * divergence (caller changed inputs across replays). Empty for steps where
228
+ * the input cannot be canonicalized (e.g. ctx.now()). */
229
+ inputHash: string;
230
+ status: StepStatus;
231
+ /** Re-entry count. Increments each time the step begins. */
232
+ attempts: number;
233
+ /** JSON-serializable result. Present when status === 'completed'. */
234
+ result?: T;
235
+ error?: StepError;
236
+ startedAt?: string;
237
+ completedAt?: string;
238
+ }
239
+ interface EventRecord {
240
+ runId: string;
241
+ key: string;
242
+ payload: unknown;
243
+ emittedAt: string;
244
+ }
245
+ type RunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'suspended';
246
+ interface RunOutcome {
247
+ pass?: boolean;
248
+ score?: number;
249
+ notes?: string;
250
+ /** Free-form bag of run-level metrics — surfaced in OTLP / TraceStore. */
251
+ metadata?: Record<string, unknown>;
252
+ }
253
+ interface DurableRunManifest {
254
+ /** Stable per-product id (e.g. 'legal-agent', 'creative-agent'). */
255
+ projectId: string;
256
+ /** Optional scenario / persona / session id — surfaced in telemetry. */
257
+ scenarioId?: string;
258
+ task: AgentTaskSpec;
259
+ /** Input payload. Hashed into the run identity so two runs with the same
260
+ * runId but different inputs raise DurableRunInputMismatchError. */
261
+ input: Record<string, unknown>;
262
+ /** Free-form tags surfaced into RunRecord / OTLP. */
263
+ tags?: Record<string, string>;
264
+ }
265
+ interface RunRecord {
266
+ runId: string;
267
+ manifestHash: string;
268
+ projectId: string;
269
+ scenarioId?: string;
270
+ status: RunStatus;
271
+ createdAt: string;
272
+ updatedAt: string;
273
+ completedAt?: string;
274
+ /** Stable per-worker id holding the lease. */
275
+ leaseHolderId?: string;
276
+ leaseExpiresAt?: string;
277
+ outcome?: RunOutcome;
278
+ stepCount: number;
279
+ }
280
+ /**
281
+ * The durable-run substrate. Implementations: in-memory (dev), file-system
282
+ * (eval harness), D1 (Cloudflare prod). All stores share this exact contract
283
+ * — swap by changing one factory call.
284
+ *
285
+ * Concurrency model: at most one worker holds a run's lease at a time. Lease
286
+ * renewal happens on a heartbeat; on lease expiry, another worker can
287
+ * `startOrResume` and pick up. Steps committed by the prior worker survive.
288
+ */
289
+ interface DurableRunStore {
290
+ /**
291
+ * Begin or resume a run. Returns the canonical RunRecord, all previously
292
+ * completed steps (in order), and the lease deadline.
293
+ *
294
+ * If the run did not exist, creates it with status='running'. If it existed
295
+ * with a different manifest hash, throws DurableRunInputMismatchError.
296
+ * If it existed with a live lease held by a different worker, throws
297
+ * DurableRunLeaseHeldError (caller can retry or back off).
298
+ */
299
+ startOrResume(input: {
300
+ runId: string;
301
+ manifest: DurableRunManifest;
302
+ workerId: string;
303
+ leaseMs?: number;
304
+ }): Promise<{
305
+ run: RunRecord;
306
+ completedSteps: ReadonlyArray<StepRecord>;
307
+ leaseExpiresAt: string;
308
+ }>;
309
+ /** Renew the lease. Returns false if another worker now holds it. */
310
+ renewLease(input: {
311
+ runId: string;
312
+ workerId: string;
313
+ leaseMs?: number;
314
+ }): Promise<{
315
+ ok: boolean;
316
+ leaseExpiresAt?: string;
317
+ }>;
318
+ /** Load a step by position. Returns undefined if not yet begun. */
319
+ loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
320
+ /** Record step start (intent + input hash + kind). Bumps attempt count. */
321
+ beginStep(input: {
322
+ runId: string;
323
+ stepIndex: number;
324
+ intent: string;
325
+ kind: StepKind;
326
+ inputHash: string;
327
+ }): Promise<StepRecord>;
328
+ /** Mark step completed with a JSON-serializable result. */
329
+ completeStep(input: {
330
+ runId: string;
331
+ stepIndex: number;
332
+ result: unknown;
333
+ }): Promise<StepRecord>;
334
+ /** Mark step failed with a captured error. */
335
+ failStep(input: {
336
+ runId: string;
337
+ stepIndex: number;
338
+ error: StepError;
339
+ }): Promise<StepRecord>;
340
+ /** End the run; releases lease. */
341
+ endRun(input: {
342
+ runId: string;
343
+ workerId: string;
344
+ status: 'completed' | 'failed';
345
+ outcome?: RunOutcome;
346
+ }): Promise<RunRecord>;
347
+ /**
348
+ * Emit an event. First emit wins; subsequent emits return the existing
349
+ * record under `existing` and accepted=false. Caller can treat that as
350
+ * idempotency-by-design — never double-fire a downstream side effect.
351
+ */
352
+ emitEvent(input: {
353
+ runId: string;
354
+ key: string;
355
+ payload: unknown;
356
+ }): Promise<{
357
+ accepted: boolean;
358
+ record: EventRecord;
359
+ }>;
360
+ /** Load the cached event payload if it has been emitted. */
361
+ loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
362
+ /** Cleanup hook for in-memory / fs stores; no-op for D1. Idempotent. */
363
+ close(): Promise<void>;
364
+ }
365
+ /** Base class for durable-run errors. */
366
+ declare class DurableRunError extends Error {
367
+ readonly code: 'lease_held' | 'manifest_mismatch' | 'step_divergence' | 'step_input_mismatch' | 'await_event_timeout' | 'event_emit_race';
368
+ constructor(message: string, code: 'lease_held' | 'manifest_mismatch' | 'step_divergence' | 'step_input_mismatch' | 'await_event_timeout' | 'event_emit_race');
369
+ }
370
+ /** Thrown when another worker holds the lease for this runId. */
371
+ declare class DurableRunLeaseHeldError extends DurableRunError {
372
+ constructor(message: string);
373
+ }
374
+ /** Thrown when the manifest hash differs from a prior run with the same id. */
375
+ declare class DurableRunInputMismatchError extends DurableRunError {
376
+ constructor(message: string);
377
+ }
378
+ /** Thrown when the same stepIndex re-runs with a different intent string. */
379
+ declare class DurableRunDivergenceError extends DurableRunError {
380
+ constructor(message: string);
381
+ }
382
+ /** Thrown when `awaitEvent` times out. */
383
+ declare class DurableAwaitEventTimeoutError extends DurableRunError {
384
+ constructor(message: string);
385
+ }
386
+
387
+ /**
388
+ * D1DurableRunStore — the production path for Cloudflare Workers. Backed by
389
+ * a D1 (SQLite-compatible) database via the binding the worker already holds.
390
+ *
391
+ * Apply `./schema.sql` once before use; the store itself does not run DDL.
392
+ * Migration version is recorded in `durable_schema_info`; consumers can
393
+ * inspect `getSchemaVersion()` if they ship a migration tool.
394
+ *
395
+ * Why structural typing: agent-runtime stays Cloudflare-free at the dep
396
+ * level. Consumers pass their `D1Database` binding — TypeScript matches the
397
+ * minimal `D1DatabaseLike` surface below. Tests use the same interface with
398
+ * a fake.
399
+ */
400
+
401
+ /**
402
+ * Minimal D1 surface this store uses. Compatible with Cloudflare's
403
+ * `D1Database` from `@cloudflare/workers-types`. Defined locally so
404
+ * agent-runtime does not depend on workers-types at the package level.
405
+ */
406
+ interface D1DatabaseLike {
407
+ prepare(query: string): D1PreparedStatementLike;
408
+ batch(statements: D1PreparedStatementLike[]): Promise<unknown[]>;
409
+ }
410
+ interface D1PreparedStatementLike {
411
+ bind(...values: unknown[]): D1PreparedStatementLike;
412
+ first<T = unknown>(): Promise<T | null>;
413
+ all<T = unknown>(): Promise<{
414
+ results: T[];
415
+ }>;
416
+ run(): Promise<{
417
+ success: boolean;
418
+ meta?: {
419
+ changes?: number;
420
+ };
421
+ }>;
422
+ }
423
+ declare class D1DurableRunStore implements DurableRunStore {
424
+ private readonly db;
425
+ constructor(db: D1DatabaseLike);
426
+ /** Override for tests — defaults to Date.now(). */
427
+ now: () => number;
428
+ startOrResume(input: {
429
+ runId: string;
430
+ manifest: DurableRunManifest;
431
+ workerId: string;
432
+ leaseMs?: number;
433
+ }): ReturnType<DurableRunStore['startOrResume']>;
434
+ renewLease(input: {
435
+ runId: string;
436
+ workerId: string;
437
+ leaseMs?: number;
438
+ }): Promise<{
439
+ ok: boolean;
440
+ leaseExpiresAt?: string;
441
+ }>;
442
+ loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
443
+ beginStep(input: {
444
+ runId: string;
445
+ stepIndex: number;
446
+ intent: string;
447
+ kind: StepKind;
448
+ inputHash: string;
449
+ }): Promise<StepRecord>;
450
+ completeStep(input: {
451
+ runId: string;
452
+ stepIndex: number;
453
+ result: unknown;
454
+ }): Promise<StepRecord>;
455
+ failStep(input: {
456
+ runId: string;
457
+ stepIndex: number;
458
+ error: StepError;
459
+ }): Promise<StepRecord>;
460
+ endRun(input: {
461
+ runId: string;
462
+ workerId: string;
463
+ status: 'completed' | 'failed';
464
+ outcome?: RunOutcome;
465
+ }): Promise<RunRecord>;
466
+ emitEvent(input: {
467
+ runId: string;
468
+ key: string;
469
+ payload: unknown;
470
+ }): ReturnType<DurableRunStore['emitEvent']>;
471
+ loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
472
+ close(): Promise<void>;
473
+ /** Inspect the currently-applied schema version. */
474
+ getSchemaVersion(): Promise<number | undefined>;
475
+ private readSteps;
476
+ private bumpUpdated;
477
+ }
478
+
479
+ /**
480
+ * FileSystemDurableRunStore — durable-run substrate backed by a directory
481
+ * tree under a single root. One subdir per run:
482
+ *
483
+ * <root>/<runId>/
484
+ * run.json — RunRecord (rewritten on every mutation; the only
485
+ * scalar fields are status/lease, so this stays small)
486
+ * steps.jsonl — append-only StepRecord stream; one JSON per line
487
+ * events.jsonl — append-only EventRecord stream
488
+ * lease.json — current leaseholder + deadline (separate from
489
+ * run.json so renewLease writes one tiny file
490
+ * instead of round-tripping the whole run record)
491
+ *
492
+ * Concurrency: the eval harness is single-process — we rely on Node's
493
+ * append-mode semantics for atomicity of step / event writes (single-line
494
+ * writes < PIPE_BUF are atomic on POSIX). For run.json / lease.json we write
495
+ * to a `<file>.tmp` then `rename` to make replacement atomic. This is
496
+ * sufficient for the single-process eval harness use case. Multi-process
497
+ * concurrency on the SAME filesystem requires a flock-based extension;
498
+ * for that path use D1DurableRunStore.
499
+ */
500
+
501
+ declare class FileSystemDurableRunStore implements DurableRunStore {
502
+ private readonly root;
503
+ constructor(root: string);
504
+ /** Override for tests — defaults to Date.now(). */
505
+ now: () => number;
506
+ startOrResume(input: {
507
+ runId: string;
508
+ manifest: DurableRunManifest;
509
+ workerId: string;
510
+ leaseMs?: number;
511
+ }): ReturnType<DurableRunStore['startOrResume']>;
512
+ renewLease(input: {
513
+ runId: string;
514
+ workerId: string;
515
+ leaseMs?: number;
516
+ }): Promise<{
517
+ ok: boolean;
518
+ leaseExpiresAt?: string;
519
+ }>;
520
+ loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
521
+ beginStep(input: {
522
+ runId: string;
523
+ stepIndex: number;
524
+ intent: string;
525
+ kind: StepKind;
526
+ inputHash: string;
527
+ }): Promise<StepRecord>;
528
+ completeStep(input: {
529
+ runId: string;
530
+ stepIndex: number;
531
+ result: unknown;
532
+ }): Promise<StepRecord>;
533
+ failStep(input: {
534
+ runId: string;
535
+ stepIndex: number;
536
+ error: StepError;
537
+ }): Promise<StepRecord>;
538
+ endRun(input: {
539
+ runId: string;
540
+ workerId: string;
541
+ status: 'completed' | 'failed';
542
+ outcome?: RunOutcome;
543
+ }): Promise<RunRecord>;
544
+ emitEvent(input: {
545
+ runId: string;
546
+ key: string;
547
+ payload: unknown;
548
+ }): ReturnType<DurableRunStore['emitEvent']>;
549
+ loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
550
+ close(): Promise<void>;
551
+ /** @internal — used by tests to list runs in the store. */
552
+ _listRunIds(): Promise<string[]>;
553
+ private runDir;
554
+ private readRun;
555
+ private writeRun;
556
+ private readLeaseSafe;
557
+ private writeLease;
558
+ private readSteps;
559
+ private appendStep;
560
+ private bumpRunUpdated;
561
+ }
562
+
563
+ /**
564
+ * Identity + canonical-hash helpers for the durable-runs substrate.
565
+ *
566
+ * Two boundary disciplines:
567
+ *
568
+ * 1. **Manifest hash** — sha256 over a sorted-key JSON of (projectId,
569
+ * scenarioId, task.id, task.intent, task.domain, input). Same hash =
570
+ * same run identity. Used to detect "same runId, different inputs."
571
+ *
572
+ * 2. **Step input hash** — sha256 over a sorted-key JSON of the step's
573
+ * input fingerprint. Used to detect drift across replays.
574
+ *
575
+ * Sorted-key JSON makes hashes deterministic regardless of object insertion
576
+ * order. NaN / Infinity / undefined / functions / symbols / class instances
577
+ * are rejected — pure JSON only at the boundary, so the hash matches whatever
578
+ * the store round-trips.
579
+ */
580
+
581
+ /** sha256-hex over a JSON-canonicalized value. */
582
+ declare function canonicalHash(value: unknown): string;
583
+ /** Canonical JSON: object keys sorted lexicographically; arrays preserved. */
584
+ declare function canonicalJson(value: unknown): string;
585
+ /** Hash a DurableRunManifest into the run identity component. */
586
+ declare function manifestHash(manifest: DurableRunManifest): string;
587
+ /** Stable per-step identifier — hash of (runId, position, intent). */
588
+ declare function stepId(runId: string, stepIndex: number, intent: string): string;
589
+ /**
590
+ * Stable worker id for a single process. Format: `host:pid:rand`. Random
591
+ * suffix prevents collisions when the host/pid pair is short-lived (e.g.,
592
+ * Cloudflare isolates that recycle fast).
593
+ */
594
+ declare function deriveWorkerId(): string;
595
+
596
+ /**
597
+ * In-memory DurableRunStore for dev + tests. Single-process. All state lives
598
+ * in maps. Lease enforcement is real (Date.now() vs lease deadline) so the
599
+ * crash-recovery + multi-worker race tests run identically against this and
600
+ * the file-system / D1 stores.
601
+ */
602
+
603
+ declare class InMemoryDurableRunStore implements DurableRunStore {
604
+ private readonly runs;
605
+ /** Override for tests — defaults to Date.now(). */
606
+ now: () => number;
607
+ startOrResume(input: {
608
+ runId: string;
609
+ manifest: DurableRunManifest;
610
+ workerId: string;
611
+ leaseMs?: number;
612
+ }): ReturnType<DurableRunStore['startOrResume']>;
613
+ renewLease(input: {
614
+ runId: string;
615
+ workerId: string;
616
+ leaseMs?: number;
617
+ }): Promise<{
618
+ ok: boolean;
619
+ leaseExpiresAt?: string;
620
+ }>;
621
+ loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
622
+ beginStep(input: {
623
+ runId: string;
624
+ stepIndex: number;
625
+ intent: string;
626
+ kind: StepKind;
627
+ inputHash: string;
628
+ }): Promise<StepRecord>;
629
+ completeStep(input: {
630
+ runId: string;
631
+ stepIndex: number;
632
+ result: unknown;
633
+ }): Promise<StepRecord>;
634
+ failStep(input: {
635
+ runId: string;
636
+ stepIndex: number;
637
+ error: StepError;
638
+ }): Promise<StepRecord>;
639
+ endRun(input: {
640
+ runId: string;
641
+ workerId: string;
642
+ status: 'completed' | 'failed';
643
+ outcome?: RunOutcome;
644
+ }): Promise<RunRecord>;
645
+ emitEvent(input: {
646
+ runId: string;
647
+ key: string;
648
+ payload: unknown;
649
+ }): ReturnType<DurableRunStore['emitEvent']>;
650
+ loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
651
+ close(): Promise<void>;
652
+ /** @internal — used by tests to inspect lease metadata. */
653
+ _inspect(runId: string): RunRecord | undefined;
654
+ /** @internal — used by tests to simulate lease expiry. */
655
+ _expireLease(runId: string): void;
656
+ private requireRun;
657
+ }
658
+
659
+ /**
660
+ * Durable runner — wraps a user-supplied async function in checkpoint /
661
+ * resume / lease semantics. The user writes plain async code, awaiting
662
+ * `ctx.step(intent, fn)` boundaries. On worker crash, the next caller with
663
+ * the same `runId` skips completed steps and resumes from the first unfinished
664
+ * one.
665
+ *
666
+ * Invariants:
667
+ *
668
+ * - Step positions are derived from a monotonic counter on the ctx. The
669
+ * same intent at position N is the same step across replays. If the user
670
+ * reorders steps, position N changes intent and we raise
671
+ * DurableRunDivergenceError fail-loud.
672
+ *
673
+ * - `ctx.now()` and `ctx.uuid()` are checkpointed as zero-input logic steps
674
+ * with kind='deterministic'. On replay they return the recorded value.
675
+ *
676
+ * - `awaitEvent` writes a 'event' step that records the event payload on
677
+ * first awaited completion. On replay, the cached payload returns
678
+ * synchronously. If the event has not been emitted and the runner is in
679
+ * a fresh execution, it polls the store until timeout.
680
+ *
681
+ * - Lease renewal happens on a wall-clock interval (every leaseMs/3). If
682
+ * the store reports a lost lease, the runner aborts the current step
683
+ * execution and throws — letting whichever worker holds the lease pick
684
+ * up. Committed steps survive.
685
+ */
686
+
687
+ interface DurableContext {
688
+ readonly runId: string;
689
+ readonly projectId: string;
690
+ readonly scenarioId?: string;
691
+ /**
692
+ * Execute a checkpointed step. The step is identified by its **position**
693
+ * (monotonic counter on this ctx); `intent` is a human-readable label that
694
+ * must stay stable across replays.
695
+ *
696
+ * On first execution: runs `fn`, records the result, returns it.
697
+ * On replay: returns the recorded result WITHOUT calling `fn`.
698
+ *
699
+ * The `inputFingerprint` (optional) lets the runner detect "same intent,
700
+ * different inputs" — it gets hashed and compared. If you don't supply
701
+ * one, drift is allowed (input not checked).
702
+ */
703
+ step<T>(intent: string, fn: () => Promise<T>, opts?: {
704
+ kind?: StepKind;
705
+ inputFingerprint?: unknown;
706
+ }): Promise<T>;
707
+ /** Race-free first-emit-wins event wait. */
708
+ awaitEvent<T = unknown>(key: string, opts?: {
709
+ timeoutMs?: number;
710
+ pollMs?: number;
711
+ }): Promise<T>;
712
+ /** Emit an event. First emit wins. Subsequent emits no-op. */
713
+ emitEvent(key: string, payload: unknown): Promise<{
714
+ accepted: boolean;
715
+ }>;
716
+ /** Deterministic clock — checkpointed once per call. */
717
+ now(): Promise<Date>;
718
+ /** Deterministic uuid — checkpointed once per call. */
719
+ uuid(): Promise<string>;
720
+ }
721
+ interface RunDurableInput<TResult> {
722
+ runId: string;
723
+ manifest: DurableRunManifest;
724
+ store: DurableRunStore;
725
+ workerId?: string;
726
+ leaseMs?: number;
727
+ /** Total time budget for the run. Used for awaitEvent timeouts; runner
728
+ * itself doesn't kill long-running steps (the step fn must respect
729
+ * AbortSignal if it cares). */
730
+ signal?: AbortSignal;
731
+ taskFn: (ctx: DurableContext) => Promise<TResult>;
732
+ /** Default outcome on successful completion. */
733
+ defaultOutcome?: RunOutcome;
734
+ }
735
+ interface RunDurableResult<TResult> {
736
+ result: TResult;
737
+ record: RunRecord;
738
+ /** All steps captured this run (replayed + freshly executed). */
739
+ steps: ReadonlyArray<StepRecord>;
740
+ }
741
+ declare function runDurable<TResult>(input: RunDurableInput<TResult>): Promise<RunDurableResult<TResult>>;
742
+
743
+ /**
744
+ * The durable-runs SQL schema as a string constant. Inlined so consumers
745
+ * (Cloudflare Workers via D1) can apply it without bundling a `.sql` file:
746
+ *
747
+ * import { DURABLE_SCHEMA_SQL } from '@tangle-network/agent-runtime'
748
+ * await env.DB.exec(DURABLE_SCHEMA_SQL)
749
+ *
750
+ * The canonical source is `src/durable/schema.sql` — this string MUST stay
751
+ * byte-identical to it. The build does not copy `.sql` files into `dist/`,
752
+ * so the constant is the only path consumers have. A unit test asserts the
753
+ * two stay in sync (`durable-schema.test.ts`).
754
+ *
755
+ * `DURABLE_SCHEMA_VERSION` reflects the latest migration version applied by
756
+ * this string. Bump it on every backwards-incompatible change AND add a new
757
+ * migration entry to durable_schema_info instead of mutating prior rows.
758
+ */
759
+ declare const DURABLE_SCHEMA_VERSION = 1;
760
+ declare const DURABLE_SCHEMA_SQL = "-- Durable-run substrate \u2014 versioned schema for D1 / SQLite.\n--\n-- Apply once per database. Subsequent migrations append; never rewrite a\n-- prior version. See `durable_schema_info` for the migration trail.\n--\n-- Concurrency notes for D1:\n-- - SQLite supports UNIQUE constraints for first-emit-wins (`durable_events`\n-- PK is (run_id, key) \u2014 duplicate insert raises, caller treats as \"already\n-- emitted\").\n-- - Lease takeover happens via a conditional UPDATE: we only claim the lease\n-- if (lease_holder_id IS NULL OR lease_expires_at < :now) \u2014 atomic under\n-- SQLite's row-level locking.\n-- - All timestamps stored as ISO-8601 TEXT for cross-platform consistency.\n-- - `result_json` / `error_json` / `outcome_json` / `payload_json` are\n-- JSON-encoded TEXT; the application enforces canonical-JSON discipline at\n-- the boundary so the store stays type-agnostic.\n\nCREATE TABLE IF NOT EXISTS durable_schema_info (\n version INTEGER PRIMARY KEY,\n applied_at TEXT NOT NULL\n);\n\nCREATE TABLE IF NOT EXISTS durable_runs (\n run_id TEXT PRIMARY KEY,\n manifest_hash TEXT NOT NULL,\n project_id TEXT NOT NULL,\n scenario_id TEXT,\n status TEXT NOT NULL CHECK (status IN ('pending','running','completed','failed','suspended')),\n created_at TEXT NOT NULL,\n updated_at TEXT NOT NULL,\n completed_at TEXT,\n lease_holder_id TEXT,\n lease_expires_at TEXT,\n outcome_json TEXT,\n step_count INTEGER NOT NULL DEFAULT 0\n);\n\nCREATE INDEX IF NOT EXISTS idx_durable_runs_project_status ON durable_runs(project_id, status);\nCREATE INDEX IF NOT EXISTS idx_durable_runs_lease_expires ON durable_runs(lease_expires_at);\n\nCREATE TABLE IF NOT EXISTS durable_steps (\n run_id TEXT NOT NULL,\n step_index INTEGER NOT NULL,\n intent TEXT NOT NULL,\n kind TEXT NOT NULL,\n input_hash TEXT NOT NULL DEFAULT '',\n status TEXT NOT NULL CHECK (status IN ('pending','running','completed','failed')),\n attempts INTEGER NOT NULL DEFAULT 0,\n result_json TEXT,\n error_json TEXT,\n started_at TEXT,\n completed_at TEXT,\n PRIMARY KEY (run_id, step_index)\n);\n\nCREATE INDEX IF NOT EXISTS idx_durable_steps_status ON durable_steps(run_id, status);\n\nCREATE TABLE IF NOT EXISTS durable_events (\n run_id TEXT NOT NULL,\n key TEXT NOT NULL,\n payload_json TEXT,\n emitted_at TEXT NOT NULL,\n PRIMARY KEY (run_id, key)\n);\n\nINSERT OR IGNORE INTO durable_schema_info (version, applied_at)\nVALUES (1, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'));\n";
761
+
762
+ /**
763
+ * Cloudflare Workflows integration for the durable-run substrate.
764
+ *
765
+ * Two valid deployment patterns on Cloudflare:
766
+ *
767
+ * A. **Plain Worker + D1DurableRunStore.** Each request invokes
768
+ * `runDurable(...)` directly against a D1 binding. Survives worker
769
+ * isolate restarts; lease takeover happens via D1 row-level
770
+ * conditional UPDATE. The default path; no Workflows binding needed.
771
+ *
772
+ * B. **Cloudflare Workflows entrypoint.** Wrap an entire `runDurable(...)`
773
+ * call inside a single Workflow `step.do(...)`. Workflows gives you
774
+ * retry-on-throw with platform-managed exponential backoff and
775
+ * survives full Workers deploy rolls. Use it when the task can take
776
+ * minutes to hours, or when you want the Workflows dashboard for
777
+ * observability. Inside the step, `runDurable` still uses D1 for
778
+ * step-level checkpoints — so a half-completed run resumes from
779
+ * its last checkpoint on retry rather than restarting from scratch.
780
+ *
781
+ * This module provides the surface for pattern B: a thin helper that
782
+ * converts a Workflows `WorkflowStep` into a `DurableContext`. We do not
783
+ * take a runtime dep on `cloudflare:workers` — the integration is purely
784
+ * structural typing.
785
+ *
786
+ * Example (pattern B):
787
+ *
788
+ * import { WorkflowEntrypoint } from 'cloudflare:workers'
789
+ * import { runOnWorkflowStep } from '@tangle-network/agent-runtime'
790
+ *
791
+ * export class LegalChatWorkflow extends WorkflowEntrypoint<Env, ChatParams> {
792
+ * async run(event, step) {
793
+ * return runOnWorkflowStep(step, {
794
+ * workflowName: 'legal-chat',
795
+ * taskFn: async (ctx) => {
796
+ * const ready = await ctx.step('readiness', () => probeKnowledge(...))
797
+ * const answer = await ctx.step('llm:turn-1', () => callLlm(...))
798
+ * const shipped = await ctx.awaitEvent('shipped', { timeoutMs: 5 * 60_000 })
799
+ * return { answer, shipped }
800
+ * },
801
+ * })
802
+ * }
803
+ * }
804
+ *
805
+ * Step ordering, replay semantics, and divergence detection inside the
806
+ * `taskFn` are inherited from Cloudflare's Workflows engine — we
807
+ * intentionally do NOT layer a second durable store inside this path.
808
+ * Pick pattern A or pattern B per agent; do not mix.
809
+ */
810
+
811
+ /**
812
+ * Structural subset of Cloudflare's `WorkflowStep`. Mirrors the public surface
813
+ * documented at https://developers.cloudflare.com/workflows/build/. Defined
814
+ * here so this module imposes zero `cloudflare:workers` runtime dependency.
815
+ */
816
+ interface WorkflowStepLike {
817
+ do<T>(name: string, opts: WorkflowStepConfig, fn: () => Promise<T>): Promise<T>;
818
+ do<T>(name: string, fn: () => Promise<T>): Promise<T>;
819
+ sleep(name: string, duration: string | number): Promise<void>;
820
+ waitForEvent<T = unknown>(name: string, opts: {
821
+ type: string;
822
+ timeout?: string;
823
+ }): Promise<{
824
+ payload: T;
825
+ timestamp: number;
826
+ type: string;
827
+ }>;
828
+ }
829
+ interface WorkflowStepConfig {
830
+ retries?: {
831
+ limit: number;
832
+ delay: string | number;
833
+ backoff?: 'constant' | 'linear' | 'exponential';
834
+ };
835
+ timeout?: string | number;
836
+ }
837
+ interface RunOnWorkflowStepInput<TResult> {
838
+ /** Logical workflow name; used as a prefix on step ids for filtering. */
839
+ workflowName: string;
840
+ /** User task — same shape as runDurable's taskFn. */
841
+ taskFn: (ctx: DurableContext) => Promise<TResult>;
842
+ /** Optional per-step retry / timeout policy applied to ctx.step calls. */
843
+ stepConfig?: WorkflowStepConfig;
844
+ /** Optional clock — defaults to Date.now. */
845
+ now?: () => number;
846
+ }
847
+ /**
848
+ * Adapt a Cloudflare `WorkflowStep` into a `DurableContext` and run a task.
849
+ *
850
+ * Every `ctx.step(intent, fn)` becomes `step.do(<name>, fn)` with stable
851
+ * names — Workflows checkpoints + replays based on step name + position,
852
+ * matching our model.
853
+ *
854
+ * `ctx.awaitEvent(key)` becomes `step.waitForEvent(key, { type: key })`.
855
+ * Caller is responsible for emitting from the platform side (e.g. via the
856
+ * Workflows REST API or a sibling worker that publishes events).
857
+ *
858
+ * `ctx.now()` and `ctx.uuid()` go through `step.do` so the values are
859
+ * captured in the platform's checkpoint state and remain stable across
860
+ * replay — same invariant as our own stores.
861
+ */
862
+ declare function runOnWorkflowStep<TResult>(workflowStep: WorkflowStepLike, input: RunOnWorkflowStepInput<TResult>): Promise<TResult>;
863
+
171
864
  /**
172
865
  * @stable
173
866
  *
@@ -731,4 +1424,4 @@ declare function createTraceBridge(options: TraceBridgeOptions): TraceBridge;
731
1424
  */
732
1425
  declare function toAgentEvalTrace(event: RuntimeStreamEvent, options: TraceBridgeOptions): TraceEvent | undefined;
733
1426
 
734
- export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskRunSummary, AgentTaskSpec, AgentTaskStatus, BackendTransportError, ChatTurnError, type ChatTurnMessage, type ChatTurnOverlay, type ChatTurnSandbox, type ClassifyIntentOptions, type ClassifyIntentResult, type ConformanceIssue, type ConformanceOptions, type ConformanceResult, InMemoryRuntimeSessionStore, KnowledgeReadinessDecision, RunAgentTaskOptions, RunAgentTaskStreamOptions, type RunChatTurnOptions, type RuntimeEventCollector, type RuntimeRunCompleteInput, type RuntimeRunCost, type RuntimeRunHandle, type RuntimeRunOptions, type RuntimeRunPersistenceAdapter, type RuntimeRunRow, RuntimeRunStateError, type RuntimeRunStatus, RuntimeSession, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, SessionMismatchError, type SubagentMatcher, type TraceBridge, type TraceBridgeOptions, assertProfileConformance, classifyIntent, composeTurnProfile, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, createTraceBridge, decideKnowledgeReadiness, encodeServerSentEvent, readinessServerSentEvent, runAgentTask, runAgentTaskStream, runChatTurn, runtimeStreamServerSentEvent, sandboxAsChatTurnTarget, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, startRuntimeRun, summarizeAgentTaskRun, toAgentEvalTrace };
1427
+ export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskRunSummary, AgentTaskSpec, AgentTaskStatus, type BackendRetryPolicy, BackendTransportError, ChatTurnError, type ChatTurnMessage, type ChatTurnOverlay, type ChatTurnSandbox, type ClassifyIntentOptions, type ClassifyIntentResult, type ConformanceIssue, type ConformanceOptions, type ConformanceResult, type D1DatabaseLike, D1DurableRunStore, type D1PreparedStatementLike, DURABLE_SCHEMA_SQL, DURABLE_SCHEMA_VERSION, DurableAwaitEventTimeoutError, type DurableContext, DurableRunDivergenceError, DurableRunError, DurableRunInputMismatchError, DurableRunLeaseHeldError, type DurableRunManifest, type DurableRunStore, type EventRecord, FileSystemDurableRunStore, InMemoryDurableRunStore, InMemoryRuntimeSessionStore, KnowledgeReadinessDecision, RunAgentTaskOptions, RunAgentTaskStreamOptions, type RunChatTurnOptions, type RunDurableInput, type RunDurableResult, type RunOnWorkflowStepInput, type RunOutcome, type RunStatus, type RuntimeEventCollector, type RuntimeRunCompleteInput, type RuntimeRunCost, type RuntimeRunHandle, type RuntimeRunOptions, type RuntimeRunPersistenceAdapter, type RuntimeRunRow, RuntimeRunStateError, type RuntimeRunStatus, RuntimeSession, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, SessionMismatchError, type StepError, type StepKind, type StepRecord, type StepStatus, type SubagentMatcher, type TraceBridge, type TraceBridgeOptions, type WorkflowStepConfig, type WorkflowStepLike, assertProfileConformance, canonicalHash, canonicalJson, classifyIntent, composeTurnProfile, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, createTraceBridge, decideKnowledgeReadiness, deriveWorkerId, encodeServerSentEvent, manifestHash, readinessServerSentEvent, runAgentTask, runAgentTaskStream, runChatTurn, runDurable, runOnWorkflowStep, runtimeStreamServerSentEvent, sandboxAsChatTurnTarget, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, startRuntimeRun, stepId, summarizeAgentTaskRun, toAgentEvalTrace };