@tangle-network/agent-runtime 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -384,6 +384,225 @@ declare class DurableAwaitEventTimeoutError extends DurableRunError {
384
384
  constructor(message: string);
385
385
  }
386
386
 
387
+ /**
388
+ * `runDurableTurn` — a streaming, backend-agnostic, checkpoint+replay durable
389
+ * turn. The single reusable primitive every product's chat handler routes
390
+ * through, so per-product durability code drops to zero.
391
+ *
392
+ * A **turn** is one request→response unit: a producer yields a stream of
393
+ * events and, once drained, exposes the turn's final text. `runDurableTurn`
394
+ * wraps that with a `DurableRunStore`:
395
+ *
396
+ * - **Fresh run** — no completed step for this `(runId)`. The producer
397
+ * runs; its events forward live to the caller (streaming preserved)
398
+ * while final text accumulates; on drain the text is checkpointed.
399
+ *
400
+ * - **Replay** — a completed step already exists (the worker died after
401
+ * the turn finished but before the response reached the client, and the
402
+ * client retried the same turn). The cached text is emitted as a single
403
+ * synthetic event; the producer is never constructed — no LLM call, no
404
+ * double-billing.
405
+ *
406
+ * - **Mid-stream crash** — a turn that died *while streaming* leaves step 0
407
+ * in `running`/`failed`. There is no partial-stream checkpoint (the
408
+ * substrate checkpoints JSON values at step granularity), so the turn
409
+ * re-runs from the top. This is the honest durability ceiling: a
410
+ * *completed* turn is free to replay; an *interrupted* turn re-runs.
411
+ *
412
+ * Generic over the event type `TEvent` so a product can stream its own NDJSON
413
+ * shape or the runtime's `RuntimeStreamEvent` — `runDurableTurn` never
414
+ * inspects events, it only forwards them and reads `finalText()` after drain.
415
+ *
416
+ * Lease: a turn is a single step, fast enough that the heartbeat in
417
+ * `runDurable` is unnecessary — `runDurableTurn` claims the lease once via
418
+ * `startOrResume` and releases it on `endRun`. Concurrent workers on the same
419
+ * `runId` are rejected with `DurableRunLeaseHeldError` (the client retried
420
+ * before the first attempt finished); callers surface that as "turn already
421
+ * in flight."
422
+ */
423
+
424
+ /** The live side of a turn — what a fresh run produces. */
425
+ interface DurableTurnProducer<TEvent> {
426
+ /** The turn's event stream. Forwarded verbatim to the caller. */
427
+ stream: AsyncGenerator<TEvent, void, unknown>;
428
+ /** The turn's final assistant text. Read once, after `stream` drains. */
429
+ finalText(): string;
430
+ }
431
+ interface RunDurableTurnOptions<TEvent> {
432
+ store: DurableRunStore;
433
+ /** Stable per-turn run id. Convention: `chat:<threadId>:<turnIndex>`. The
434
+ * same id on a retry is what enables replay. */
435
+ runId: string;
436
+ manifest: DurableRunManifest;
437
+ /** Stable per-isolate worker id. Defaults to a fresh `deriveWorkerId()`
438
+ * per call when omitted — fine for single-attempt turns. */
439
+ workerId: string;
440
+ /** Lease window in ms. Default 60_000 — a turn rarely runs longer. */
441
+ leaseMs?: number;
442
+ /** Human-readable step label. Default `turn`. */
443
+ intent?: string;
444
+ /** Builds the live producer. Called exactly once, on a fresh run; never
445
+ * called on the replay path. */
446
+ produce: () => DurableTurnProducer<TEvent>;
447
+ /** Synthesizes the single event emitted on the replay path from the
448
+ * cached final text (e.g. a product's `{ type: 'result', data: {...} }`). */
449
+ replayEvent: (finalText: string) => TEvent;
450
+ /** Optional live accumulator. When the producer's `finalText()` is only
451
+ * valid after drain, this lets `runDurableTurn` also observe each event
452
+ * to build the text — return the running text or `undefined` to ignore
453
+ * an event. When omitted, `producer.finalText()` is the sole source. */
454
+ accumulate?: (event: TEvent, current: string) => string | undefined;
455
+ }
456
+ interface DurableTurnHandle<TEvent> {
457
+ /** Drop-in stream. Fresh runs forward producer events live; replays emit
458
+ * exactly one `replayEvent(cachedText)`. */
459
+ stream: AsyncGenerator<TEvent, void, unknown>;
460
+ /** The turn's final text. Valid after `stream` drains. */
461
+ finalText(): string;
462
+ /** True iff this turn replayed a cached result (no producer ran). Valid
463
+ * after `stream` drains. */
464
+ replayed(): boolean;
465
+ /** The durable `RunRecord` for this turn. Valid after `stream` drains. */
466
+ record(): RunRecord | undefined;
467
+ }
468
+ declare function runDurableTurn<TEvent>(options: RunDurableTurnOptions<TEvent>): DurableTurnHandle<TEvent>;
469
+
470
+ /**
471
+ * `DurableChatTurnEngine` — the framework-neutral chat-turn orchestrator every
472
+ * product chat handler routes through. It owns the parts that were copy-pasted
473
+ * across legal / gtm / creative / tax: durable checkpointing, the NDJSON
474
+ * `StreamEvent` line protocol, the `session.run.*` lifecycle vocabulary, the
475
+ * runtime-run cost ledger, and trace flush. Everything genuinely
476
+ * product-specific is a hook the product supplies.
477
+ *
478
+ * What the engine owns:
479
+ * - durable turn (`runDurableTurn`): completed turns replay free, no re-bill
480
+ * - the `session.run.started` / `session.run.completed` / `session.run.failed`
481
+ * event envelope around the producer's events
482
+ * - NDJSON encoding into a `ReadableStream<Uint8Array>` (the body every
483
+ * product returns, React Router or Hono alike)
484
+ * - calling the product's persist / post-process hooks in the right order,
485
+ * after the stream drains, with the assembled final text
486
+ * - never throwing into the HTTP layer — a producer failure becomes an
487
+ * `error` + `session.run.failed` event pair, the stream still closes
488
+ *
489
+ * What the product supplies (`ChatTurnHooks`):
490
+ * - `produce` — build the backend stream for this turn (sandbox / router
491
+ * / tcloud / runtime — the engine does not care which)
492
+ * - `persistAssistantMessage` — write the assistant turn to the product DB
493
+ * - `onTurnComplete` (optional) — post-process (proposals, citations, …)
494
+ * - `onEvent` (optional) — per-event side-channel (e.g. DO broadcast)
495
+ * - `transformFinalText` (optional) — pre-persist transform (e.g. PII redact)
496
+ *
497
+ * Framework neutrality: the engine takes already-resolved values
498
+ * (`userId`, identity tuple, parsed message, a `DurableRunStore`, a
499
+ * `waitUntil`), never a `Request` or a `Context`. The product's thin route
500
+ * adapter does auth + parse + access-control, then calls `engine.runTurn(...)`
501
+ * and returns `result.body` as its platform `Response`.
502
+ */
503
+
504
+ /** The NDJSON line protocol every product chat client already speaks. */
505
+ interface ChatStreamEvent {
506
+ type: string;
507
+ data?: Record<string, unknown>;
508
+ }
509
+ /** Identity of a chat turn. `tenantId` is the workspace id for workspace-
510
+ * scoped products and the user id for session-scoped products. */
511
+ interface ChatTurnIdentity {
512
+ tenantId: string;
513
+ /** Thread / session id — the durable run is keyed on this + `turnIndex`. */
514
+ sessionId: string;
515
+ userId: string;
516
+ /** Monotonic 0-based turn index within the session. */
517
+ turnIndex: number;
518
+ }
519
+ interface ChatTurnHooks {
520
+ /**
521
+ * Build the backend stream for this turn. The engine never inspects which
522
+ * backend this is — sandbox container, tcloud router, direct runtime, a
523
+ * test double — it only forwards the events and reads `finalText()`.
524
+ */
525
+ produce(): DurableTurnProducer<ChatStreamEvent>;
526
+ /**
527
+ * Persist the completed assistant message to the product's own store.
528
+ * Called once, after the stream drains, on a fresh (non-replay) run.
529
+ * Receives the assembled (and `transformFinalText`-transformed) text.
530
+ */
531
+ persistAssistantMessage(input: {
532
+ identity: ChatTurnIdentity;
533
+ finalText: string;
534
+ record: RunRecord | undefined;
535
+ }): Promise<void>;
536
+ /**
537
+ * Optional post-processing after persistence — proposal extraction,
538
+ * citation validation, credit metering, etc. Product policy; the engine
539
+ * has no shared logic here. Errors are swallowed + logged (post-process
540
+ * must never fail the turn that already streamed successfully).
541
+ */
542
+ onTurnComplete?(input: {
543
+ identity: ChatTurnIdentity;
544
+ finalText: string;
545
+ }): Promise<void>;
546
+ /**
547
+ * Optional per-event side channel (e.g. Durable Object broadcast). Runs
548
+ * for every event the engine emits, lifecycle envelope included. Errors
549
+ * are swallowed — a broadcast failure must not break the chat stream.
550
+ */
551
+ onEvent?(event: ChatStreamEvent): void | Promise<void>;
552
+ /**
553
+ * Optional pre-persist transform of the final text (e.g. PII redaction).
554
+ * Affects only what is persisted; the live stream is never altered.
555
+ */
556
+ transformFinalText?(text: string): string | Promise<string>;
557
+ /**
558
+ * Optional trace flush — resolves when OTLP export completes. The engine
559
+ * hands it to `waitUntil` so the worker isolate stays alive for the POST.
560
+ */
561
+ traceFlush?(): Promise<void>;
562
+ }
563
+ interface RunChatTurnInput {
564
+ identity: ChatTurnIdentity;
565
+ /** The user's message for this turn. Hashed into the durable run identity. */
566
+ userMessage: string;
567
+ /** Product id for telemetry / the durable manifest (`legal-agent`, …). */
568
+ projectId: string;
569
+ /** Domain tag for the task spec (`legal`, `gtm`, …). */
570
+ domain: string;
571
+ /** Model id, when known — recorded on the manifest. */
572
+ model?: string;
573
+ store: DurableRunStore;
574
+ hooks: ChatTurnHooks;
575
+ /** Worker liveness hook (`ctx.waitUntil` / `executionCtx.waitUntil`). When
576
+ * omitted, trace flush is awaited inline before the stream closes. */
577
+ waitUntil?: (p: Promise<unknown>) => void;
578
+ /** Stable per-isolate worker id. Defaults to a fresh `deriveWorkerId()`. */
579
+ workerId?: string;
580
+ /** Lease window in ms. Default 60_000. */
581
+ leaseMs?: number;
582
+ /** Optional structured logger for swallowed hook errors. */
583
+ log?: (message: string, meta?: Record<string, unknown>) => void;
584
+ }
585
+ interface ChatTurnResult {
586
+ /** NDJSON body — return this as the platform `Response` body. */
587
+ body: ReadableStream<Uint8Array>;
588
+ /** Content type for the response. */
589
+ contentType: 'application/x-ndjson';
590
+ }
591
+ /**
592
+ * The engine. One instance is stateless and reusable across requests — all
593
+ * per-turn state lives in `runTurn`'s closure.
594
+ */
595
+ declare class DurableChatTurnEngine {
596
+ /**
597
+ * Run one durable chat turn. Returns immediately with a `ReadableStream`
598
+ * body; the turn executes as the body is pulled. Never rejects — backend
599
+ * failures surface as `error` + `session.run.failed` events.
600
+ */
601
+ runTurn(input: RunChatTurnInput): ChatTurnResult;
602
+ }
603
+ /** Convenience singleton — the engine is stateless, one instance is enough. */
604
+ declare const durableChatTurnEngine: DurableChatTurnEngine;
605
+
387
606
  /**
388
607
  * D1DurableRunStore — the production path for Cloudflare Workers. Backed by
389
608
  * a D1 (SQLite-compatible) database via the binding the worker already holds.
@@ -740,6 +959,25 @@ interface RunDurableResult<TResult> {
740
959
  }
741
960
  declare function runDurable<TResult>(input: RunDurableInput<TResult>): Promise<RunDurableResult<TResult>>;
742
961
 
962
+ /**
963
+ * The durable-runs SQL schema as a string constant. Inlined so consumers
964
+ * (Cloudflare Workers via D1) can apply it without bundling a `.sql` file:
965
+ *
966
+ * import { DURABLE_SCHEMA_SQL } from '@tangle-network/agent-runtime'
967
+ * await env.DB.exec(DURABLE_SCHEMA_SQL)
968
+ *
969
+ * The canonical source is `src/durable/schema.sql` — this string MUST stay
970
+ * byte-identical to it. The build does not copy `.sql` files into `dist/`,
971
+ * so the constant is the only path consumers have. A unit test asserts the
972
+ * two stay in sync (`durable-schema.test.ts`).
973
+ *
974
+ * `DURABLE_SCHEMA_VERSION` reflects the latest migration version applied by
975
+ * this string. Bump it on every backwards-incompatible change AND add a new
976
+ * migration entry to durable_schema_info instead of mutating prior rows.
977
+ */
978
+ declare const DURABLE_SCHEMA_VERSION = 1;
979
+ declare const DURABLE_SCHEMA_SQL = "-- Durable-run substrate \u2014 versioned schema for D1 / SQLite.\n--\n-- Apply once per database. Subsequent migrations append; never rewrite a\n-- prior version. See `durable_schema_info` for the migration trail.\n--\n-- Concurrency notes for D1:\n-- - SQLite supports UNIQUE constraints for first-emit-wins (`durable_events`\n-- PK is (run_id, key) \u2014 duplicate insert raises, caller treats as \"already\n-- emitted\").\n-- - Lease takeover happens via a conditional UPDATE: we only claim the lease\n-- if (lease_holder_id IS NULL OR lease_expires_at < :now) \u2014 atomic under\n-- SQLite's row-level locking.\n-- - All timestamps stored as ISO-8601 TEXT for cross-platform consistency.\n-- - `result_json` / `error_json` / `outcome_json` / `payload_json` are\n-- JSON-encoded TEXT; the application enforces canonical-JSON discipline at\n-- the boundary so the store stays type-agnostic.\n\nCREATE TABLE IF NOT EXISTS durable_schema_info (\n version INTEGER PRIMARY KEY,\n applied_at TEXT NOT NULL\n);\n\nCREATE TABLE IF NOT EXISTS durable_runs (\n run_id TEXT PRIMARY KEY,\n manifest_hash TEXT NOT NULL,\n project_id TEXT NOT NULL,\n scenario_id TEXT,\n status TEXT NOT NULL CHECK (status IN ('pending','running','completed','failed','suspended')),\n created_at TEXT NOT NULL,\n updated_at TEXT NOT NULL,\n completed_at TEXT,\n lease_holder_id TEXT,\n lease_expires_at TEXT,\n outcome_json TEXT,\n step_count INTEGER NOT NULL DEFAULT 0\n);\n\nCREATE INDEX IF NOT EXISTS idx_durable_runs_project_status ON durable_runs(project_id, status);\nCREATE INDEX IF NOT EXISTS idx_durable_runs_lease_expires ON durable_runs(lease_expires_at);\n\nCREATE TABLE IF NOT EXISTS durable_steps (\n run_id TEXT NOT NULL,\n step_index INTEGER NOT NULL,\n intent TEXT NOT NULL,\n kind TEXT NOT NULL,\n input_hash TEXT NOT NULL DEFAULT '',\n status TEXT NOT NULL CHECK (status IN ('pending','running','completed','failed')),\n attempts INTEGER NOT NULL DEFAULT 0,\n result_json TEXT,\n error_json TEXT,\n started_at TEXT,\n completed_at TEXT,\n PRIMARY KEY (run_id, step_index)\n);\n\nCREATE INDEX IF NOT EXISTS idx_durable_steps_status ON durable_steps(run_id, status);\n\nCREATE TABLE IF NOT EXISTS durable_events (\n run_id TEXT NOT NULL,\n key TEXT NOT NULL,\n payload_json TEXT,\n emitted_at TEXT NOT NULL,\n PRIMARY KEY (run_id, key)\n);\n\nINSERT OR IGNORE INTO durable_schema_info (version, applied_at)\nVALUES (1, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'));\n";
980
+
743
981
  /**
744
982
  * Cloudflare Workflows integration for the durable-run substrate.
745
983
  *
@@ -1405,4 +1643,4 @@ declare function createTraceBridge(options: TraceBridgeOptions): TraceBridge;
1405
1643
  */
1406
1644
  declare function toAgentEvalTrace(event: RuntimeStreamEvent, options: TraceBridgeOptions): TraceEvent | undefined;
1407
1645
 
1408
- export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskRunSummary, AgentTaskSpec, AgentTaskStatus, type BackendRetryPolicy, BackendTransportError, ChatTurnError, type ChatTurnMessage, type ChatTurnOverlay, type ChatTurnSandbox, type ClassifyIntentOptions, type ClassifyIntentResult, type ConformanceIssue, type ConformanceOptions, type ConformanceResult, type D1DatabaseLike, D1DurableRunStore, type D1PreparedStatementLike, DurableAwaitEventTimeoutError, type DurableContext, DurableRunDivergenceError, DurableRunError, DurableRunInputMismatchError, DurableRunLeaseHeldError, type DurableRunManifest, type DurableRunStore, type EventRecord, FileSystemDurableRunStore, InMemoryDurableRunStore, InMemoryRuntimeSessionStore, KnowledgeReadinessDecision, RunAgentTaskOptions, RunAgentTaskStreamOptions, type RunChatTurnOptions, type RunDurableInput, type RunDurableResult, type RunOnWorkflowStepInput, type RunOutcome, type RunStatus, type RuntimeEventCollector, type RuntimeRunCompleteInput, type RuntimeRunCost, type RuntimeRunHandle, type RuntimeRunOptions, type RuntimeRunPersistenceAdapter, type RuntimeRunRow, RuntimeRunStateError, type RuntimeRunStatus, RuntimeSession, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, SessionMismatchError, type StepError, type StepKind, type StepRecord, type StepStatus, type SubagentMatcher, type TraceBridge, type TraceBridgeOptions, type WorkflowStepConfig, type WorkflowStepLike, assertProfileConformance, canonicalHash, canonicalJson, classifyIntent, composeTurnProfile, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, createTraceBridge, decideKnowledgeReadiness, deriveWorkerId, encodeServerSentEvent, manifestHash, readinessServerSentEvent, runAgentTask, runAgentTaskStream, runChatTurn, runDurable, runOnWorkflowStep, runtimeStreamServerSentEvent, sandboxAsChatTurnTarget, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, startRuntimeRun, stepId, summarizeAgentTaskRun, toAgentEvalTrace };
1646
+ export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskRunSummary, AgentTaskSpec, AgentTaskStatus, type BackendRetryPolicy, BackendTransportError, type ChatStreamEvent, ChatTurnError, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnMessage, type ChatTurnOverlay, type ChatTurnResult, type ChatTurnSandbox, type ClassifyIntentOptions, type ClassifyIntentResult, type ConformanceIssue, type ConformanceOptions, type ConformanceResult, type D1DatabaseLike, D1DurableRunStore, type D1PreparedStatementLike, DURABLE_SCHEMA_SQL, DURABLE_SCHEMA_VERSION, DurableAwaitEventTimeoutError, DurableChatTurnEngine, type DurableContext, DurableRunDivergenceError, DurableRunError, DurableRunInputMismatchError, DurableRunLeaseHeldError, type DurableRunManifest, type DurableRunStore, type DurableTurnHandle, type DurableTurnProducer, type EventRecord, FileSystemDurableRunStore, InMemoryDurableRunStore, InMemoryRuntimeSessionStore, KnowledgeReadinessDecision, RunAgentTaskOptions, RunAgentTaskStreamOptions, type RunChatTurnInput, type RunChatTurnOptions, type RunDurableInput, type RunDurableResult, type RunDurableTurnOptions, type RunOnWorkflowStepInput, type RunOutcome, type RunStatus, type RuntimeEventCollector, type RuntimeRunCompleteInput, type RuntimeRunCost, type RuntimeRunHandle, type RuntimeRunOptions, type RuntimeRunPersistenceAdapter, type RuntimeRunRow, RuntimeRunStateError, type RuntimeRunStatus, RuntimeSession, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, SessionMismatchError, type StepError, type StepKind, type StepRecord, type StepStatus, type SubagentMatcher, type TraceBridge, type TraceBridgeOptions, type WorkflowStepConfig, type WorkflowStepLike, assertProfileConformance, canonicalHash, canonicalJson, classifyIntent, composeTurnProfile, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, createTraceBridge, decideKnowledgeReadiness, deriveWorkerId, durableChatTurnEngine, encodeServerSentEvent, manifestHash, readinessServerSentEvent, runAgentTask, runAgentTaskStream, runChatTurn, runDurable, runDurableTurn, runOnWorkflowStep, runtimeStreamServerSentEvent, sandboxAsChatTurnTarget, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, startRuntimeRun, stepId, summarizeAgentTaskRun, toAgentEvalTrace };
package/dist/index.js CHANGED
@@ -497,6 +497,228 @@ function deriveWorkerId() {
497
497
  return `${host}:${pid}:${rand}:${counter}`;
498
498
  }
499
499
 
500
+ // src/durable/turn.ts
501
+ var STEP_INDEX = 0;
502
+ function runDurableTurn(options) {
503
+ const { store, runId, manifest, workerId } = options;
504
+ const leaseMs = options.leaseMs ?? 6e4;
505
+ const intent = options.intent ?? "turn";
506
+ const inputHash = canonicalHash(manifest.input);
507
+ let accumulated = "";
508
+ let didReplay = false;
509
+ let finalRecord;
510
+ async function* stream() {
511
+ const { completedSteps } = await store.startOrResume({
512
+ runId,
513
+ manifest,
514
+ workerId,
515
+ leaseMs
516
+ });
517
+ const prior = completedSteps.find((s) => s.stepIndex === STEP_INDEX);
518
+ if (prior && prior.status === "completed") {
519
+ didReplay = true;
520
+ const cached = prior.result;
521
+ accumulated = cached?.finalText ?? "";
522
+ yield options.replayEvent(accumulated);
523
+ finalRecord = await store.endRun({ runId, workerId, status: "completed" });
524
+ return;
525
+ }
526
+ await store.beginStep({
527
+ runId,
528
+ stepIndex: STEP_INDEX,
529
+ intent,
530
+ kind: "llm",
531
+ inputHash
532
+ });
533
+ try {
534
+ const producer = options.produce();
535
+ for await (const event of producer.stream) {
536
+ if (options.accumulate) {
537
+ const next = options.accumulate(event, accumulated);
538
+ if (typeof next === "string") accumulated = next;
539
+ }
540
+ yield event;
541
+ }
542
+ const producerText = producer.finalText();
543
+ if (producerText) accumulated = producerText;
544
+ await store.completeStep({
545
+ runId,
546
+ stepIndex: STEP_INDEX,
547
+ result: { finalText: accumulated }
548
+ });
549
+ finalRecord = await store.endRun({
550
+ runId,
551
+ workerId,
552
+ status: "completed",
553
+ outcome: { notes: intent, metadata: { chars: accumulated.length } }
554
+ });
555
+ } catch (err) {
556
+ await store.failStep({
557
+ runId,
558
+ stepIndex: STEP_INDEX,
559
+ error: { message: err instanceof Error ? err.message : String(err) }
560
+ });
561
+ finalRecord = await store.endRun({ runId, workerId, status: "failed" });
562
+ throw err;
563
+ }
564
+ }
565
+ return {
566
+ stream: stream(),
567
+ finalText: () => accumulated,
568
+ replayed: () => didReplay,
569
+ record: () => finalRecord
570
+ };
571
+ }
572
+
573
+ // src/durable/chat-engine.ts
574
+ var encoder = new TextEncoder();
575
+ function encodeLine(event) {
576
+ return encoder.encode(`${JSON.stringify(event)}
577
+ `);
578
+ }
579
+ var DurableChatTurnEngine = class {
580
+ /**
581
+ * Run one durable chat turn. Returns immediately with a `ReadableStream`
582
+ * body; the turn executes as the body is pulled. Never rejects — backend
583
+ * failures surface as `error` + `session.run.failed` events.
584
+ */
585
+ runTurn(input) {
586
+ const workerId = input.workerId ?? deriveWorkerId();
587
+ const log = input.log ?? (() => void 0);
588
+ const { identity } = input;
589
+ const runId = `chat:${identity.sessionId}:${identity.turnIndex}`;
590
+ const manifest = {
591
+ projectId: input.projectId,
592
+ scenarioId: identity.sessionId,
593
+ task: {
594
+ id: `${input.projectId}:chat:${identity.sessionId}:${identity.turnIndex}`,
595
+ intent: `Run a ${input.domain} chat turn with workspace context.`,
596
+ domain: input.domain,
597
+ requiredKnowledge: [],
598
+ metadata: {
599
+ tenantId: identity.tenantId,
600
+ sessionId: identity.sessionId,
601
+ turnIndex: identity.turnIndex
602
+ }
603
+ },
604
+ input: {
605
+ userMessage: input.userMessage,
606
+ model: input.model ?? null
607
+ },
608
+ tags: {
609
+ session_id: identity.sessionId,
610
+ tenant_id: identity.tenantId
611
+ }
612
+ };
613
+ const body = new ReadableStream({
614
+ start: async (controller) => {
615
+ const emit2 = async (event) => {
616
+ controller.enqueue(encodeLine(event));
617
+ if (input.hooks.onEvent) {
618
+ try {
619
+ await input.hooks.onEvent(event);
620
+ } catch (err) {
621
+ log("[chat-engine] onEvent hook threw", {
622
+ error: err instanceof Error ? err.message : String(err)
623
+ });
624
+ }
625
+ }
626
+ };
627
+ let turnFailed = false;
628
+ try {
629
+ await emit2({
630
+ type: "session.run.started",
631
+ data: {
632
+ sessionId: identity.sessionId,
633
+ tenantId: identity.tenantId,
634
+ turnIndex: identity.turnIndex
635
+ }
636
+ });
637
+ const turn = runDurableTurn({
638
+ store: input.store,
639
+ runId,
640
+ manifest,
641
+ workerId,
642
+ leaseMs: input.leaseMs,
643
+ intent: `chat:turn-${identity.turnIndex}`,
644
+ produce: input.hooks.produce,
645
+ replayEvent: (finalText2) => ({ type: "result", data: { finalText: finalText2 } }),
646
+ accumulate: (event, current) => {
647
+ if (event.type === "message.part.updated") {
648
+ const data = event.data ?? {};
649
+ const delta = typeof data.delta === "string" ? data.delta : "";
650
+ const part = data.part;
651
+ if (delta) return current + delta;
652
+ if (part?.type === "text" && typeof part.text === "string") return part.text;
653
+ return void 0;
654
+ }
655
+ if (event.type === "result") {
656
+ const data = event.data ?? {};
657
+ if (typeof data.finalText === "string") return data.finalText;
658
+ }
659
+ return void 0;
660
+ }
661
+ });
662
+ for await (const event of turn.stream) {
663
+ await emit2(event);
664
+ }
665
+ const rawFinal = turn.finalText();
666
+ const finalText = input.hooks.transformFinalText ? await input.hooks.transformFinalText(rawFinal) : rawFinal;
667
+ if (!turn.replayed()) {
668
+ try {
669
+ await input.hooks.persistAssistantMessage({
670
+ identity,
671
+ finalText,
672
+ record: turn.record()
673
+ });
674
+ } catch (err) {
675
+ log("[chat-engine] persistAssistantMessage threw", {
676
+ error: err instanceof Error ? err.message : String(err)
677
+ });
678
+ }
679
+ if (input.hooks.onTurnComplete) {
680
+ try {
681
+ await input.hooks.onTurnComplete({ identity, finalText });
682
+ } catch (err) {
683
+ log("[chat-engine] onTurnComplete threw", {
684
+ error: err instanceof Error ? err.message : String(err)
685
+ });
686
+ }
687
+ }
688
+ }
689
+ await emit2({
690
+ type: "session.run.completed",
691
+ data: { sessionId: identity.sessionId, replayed: turn.replayed() }
692
+ });
693
+ } catch (err) {
694
+ turnFailed = true;
695
+ const message = err instanceof Error ? err.message : String(err);
696
+ log("[chat-engine] turn failed", { error: message });
697
+ await emit2({ type: "error", data: { message } });
698
+ await emit2({
699
+ type: "session.run.failed",
700
+ data: { sessionId: identity.sessionId, message }
701
+ });
702
+ } finally {
703
+ if (input.hooks.traceFlush) {
704
+ const flush = input.hooks.traceFlush().catch(
705
+ (err) => log("[chat-engine] traceFlush threw", {
706
+ error: err instanceof Error ? err.message : String(err)
707
+ })
708
+ );
709
+ if (input.waitUntil) input.waitUntil(flush);
710
+ else await flush;
711
+ }
712
+ controller.close();
713
+ void turnFailed;
714
+ }
715
+ }
716
+ });
717
+ return { body, contentType: "application/x-ndjson" };
718
+ }
719
+ };
720
+ var durableChatTurnEngine = new DurableChatTurnEngine();
721
+
500
722
  // src/durable/types.ts
501
723
  var DurableRunError = class extends Error {
502
724
  constructor(message, code) {
@@ -1489,6 +1711,77 @@ function cryptoRandomUuid() {
1489
1711
  return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20)}`;
1490
1712
  }
1491
1713
 
1714
+ // src/durable/schema.ts
1715
+ var DURABLE_SCHEMA_VERSION = 1;
1716
+ var DURABLE_SCHEMA_SQL = `-- Durable-run substrate \u2014 versioned schema for D1 / SQLite.
1717
+ --
1718
+ -- Apply once per database. Subsequent migrations append; never rewrite a
1719
+ -- prior version. See \`durable_schema_info\` for the migration trail.
1720
+ --
1721
+ -- Concurrency notes for D1:
1722
+ -- - SQLite supports UNIQUE constraints for first-emit-wins (\`durable_events\`
1723
+ -- PK is (run_id, key) \u2014 duplicate insert raises, caller treats as "already
1724
+ -- emitted").
1725
+ -- - Lease takeover happens via a conditional UPDATE: we only claim the lease
1726
+ -- if (lease_holder_id IS NULL OR lease_expires_at < :now) \u2014 atomic under
1727
+ -- SQLite's row-level locking.
1728
+ -- - All timestamps stored as ISO-8601 TEXT for cross-platform consistency.
1729
+ -- - \`result_json\` / \`error_json\` / \`outcome_json\` / \`payload_json\` are
1730
+ -- JSON-encoded TEXT; the application enforces canonical-JSON discipline at
1731
+ -- the boundary so the store stays type-agnostic.
1732
+
1733
+ CREATE TABLE IF NOT EXISTS durable_schema_info (
1734
+ version INTEGER PRIMARY KEY,
1735
+ applied_at TEXT NOT NULL
1736
+ );
1737
+
1738
+ CREATE TABLE IF NOT EXISTS durable_runs (
1739
+ run_id TEXT PRIMARY KEY,
1740
+ manifest_hash TEXT NOT NULL,
1741
+ project_id TEXT NOT NULL,
1742
+ scenario_id TEXT,
1743
+ status TEXT NOT NULL CHECK (status IN ('pending','running','completed','failed','suspended')),
1744
+ created_at TEXT NOT NULL,
1745
+ updated_at TEXT NOT NULL,
1746
+ completed_at TEXT,
1747
+ lease_holder_id TEXT,
1748
+ lease_expires_at TEXT,
1749
+ outcome_json TEXT,
1750
+ step_count INTEGER NOT NULL DEFAULT 0
1751
+ );
1752
+
1753
+ CREATE INDEX IF NOT EXISTS idx_durable_runs_project_status ON durable_runs(project_id, status);
1754
+ CREATE INDEX IF NOT EXISTS idx_durable_runs_lease_expires ON durable_runs(lease_expires_at);
1755
+
1756
+ CREATE TABLE IF NOT EXISTS durable_steps (
1757
+ run_id TEXT NOT NULL,
1758
+ step_index INTEGER NOT NULL,
1759
+ intent TEXT NOT NULL,
1760
+ kind TEXT NOT NULL,
1761
+ input_hash TEXT NOT NULL DEFAULT '',
1762
+ status TEXT NOT NULL CHECK (status IN ('pending','running','completed','failed')),
1763
+ attempts INTEGER NOT NULL DEFAULT 0,
1764
+ result_json TEXT,
1765
+ error_json TEXT,
1766
+ started_at TEXT,
1767
+ completed_at TEXT,
1768
+ PRIMARY KEY (run_id, step_index)
1769
+ );
1770
+
1771
+ CREATE INDEX IF NOT EXISTS idx_durable_steps_status ON durable_steps(run_id, status);
1772
+
1773
+ CREATE TABLE IF NOT EXISTS durable_events (
1774
+ run_id TEXT NOT NULL,
1775
+ key TEXT NOT NULL,
1776
+ payload_json TEXT,
1777
+ emitted_at TEXT NOT NULL,
1778
+ PRIMARY KEY (run_id, key)
1779
+ );
1780
+
1781
+ INSERT OR IGNORE INTO durable_schema_info (version, applied_at)
1782
+ VALUES (1, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'));
1783
+ `;
1784
+
1492
1785
  // src/durable/workflows.ts
1493
1786
  async function runOnWorkflowStep(workflowStep, input) {
1494
1787
  const stepCfg = input.stepConfig;
@@ -2791,7 +3084,10 @@ export {
2791
3084
  ChatTurnError,
2792
3085
  ConfigError,
2793
3086
  D1DurableRunStore,
3087
+ DURABLE_SCHEMA_SQL,
3088
+ DURABLE_SCHEMA_VERSION,
2794
3089
  DurableAwaitEventTimeoutError,
3090
+ DurableChatTurnEngine,
2795
3091
  DurableRunDivergenceError,
2796
3092
  DurableRunError,
2797
3093
  DurableRunInputMismatchError,
@@ -2819,6 +3115,7 @@ export {
2819
3115
  createTraceBridge,
2820
3116
  decideKnowledgeReadiness,
2821
3117
  deriveWorkerId,
3118
+ durableChatTurnEngine,
2822
3119
  encodeServerSentEvent,
2823
3120
  manifestHash,
2824
3121
  readinessServerSentEvent,
@@ -2826,6 +3123,7 @@ export {
2826
3123
  runAgentTaskStream,
2827
3124
  runChatTurn,
2828
3125
  runDurable,
3126
+ runDurableTurn,
2829
3127
  runOnWorkflowStep,
2830
3128
  runtimeStreamServerSentEvent,
2831
3129
  sandboxAsChatTurnTarget,