workerflow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/runtime.ts ADDED
@@ -0,0 +1,1705 @@
1
+ import { DurableObject, RpcTarget } from "cloudflare:workers";
2
+ import type { WorkflowDefinition } from "./definition";
3
+ import type { Json } from "./json";
4
+ import mig000 from "./migrations/0000_initial";
5
+ import type { Brand } from "./brand";
6
+
7
+ export abstract class WorkflowRuntime<
8
+ TInput extends Json | undefined = Json | undefined,
9
+ TVersion extends string = string
10
+ > extends DurableObject {
11
+ private static readonly MIGRATIONS = [mig000];
12
+ private readonly sql: SqlStorage;
13
+ #status: WorkflowStatus;
14
+ #isRunLoopActive: boolean = false;
15
+ #definitionVersion: TVersion | undefined;
16
+ #definitionInput: TInput | undefined;
17
+
18
+ /**
19
+ * @param status - The new status of the workflow; one of "running", "paused", "completed", "failed", or "cancelled".
20
+ * @internal
21
+ * A callback that is called when the status of the workflow changes.
22
+ */
23
+ async onStatusChange_experimental?(
24
+ status: "running" | "paused" | "completed" | "failed" | "cancelled"
25
+ ): Promise<void>;
26
+ constructor(ctx: DurableObjectState, env: Cloudflare.Env) {
27
+ super(ctx, env);
28
+ this.sql = this.ctx.storage.sql;
29
+
30
+ this.sql.exec(
31
+ `CREATE TABLE IF NOT EXISTS migrations (
32
+ version INTEGER NOT NULL PRIMARY KEY,
33
+ migrated_at REAL NOT NULL DEFAULT (CAST(unixepoch('subsecond') * 1000 AS INTEGER))
34
+ ) STRICT`
35
+ );
36
+
37
+ const currentVersion =
38
+ this.sql.exec<{ version: number | null }>("SELECT MAX(version) AS version FROM migrations").one().version ?? 0;
39
+ // Apply any pending migrations if the current version is less than the number of migrations defined
40
+ if (currentVersion < WorkflowRuntime.MIGRATIONS.length) {
41
+ for (let version = currentVersion + 1; version <= WorkflowRuntime.MIGRATIONS.length; version++) {
42
+ this.sql.exec(WorkflowRuntime.MIGRATIONS[version - 1] as string);
43
+ this.sql.exec("INSERT INTO migrations (version) VALUES (?)", version);
44
+ }
45
+ } else if (currentVersion > WorkflowRuntime.MIGRATIONS.length) {
46
+ console.error("Database migration version is ahead of the codebase. Please check your migrations.");
47
+ }
48
+
49
+ const [metadata] = this.sql
50
+ .exec<WorkflowMetadata_Row<TVersion>>("SELECT * FROM workflow_metadata WHERE id = 1")
51
+ .toArray();
52
+ if (metadata === undefined) {
53
+ this.sql.exec("INSERT INTO workflow_metadata (id, status) VALUES (1, ?)", "pending");
54
+ this.sql.exec("INSERT INTO workflow_events (type) VALUES (?)", "created");
55
+ this.#status = "pending";
56
+ } else {
57
+ this.#status = metadata.status;
58
+ this.#definitionVersion = metadata.definition_version === null ? undefined : metadata.definition_version;
59
+ this.#definitionInput =
60
+ metadata.definition_input === null ? undefined : (JSON.parse(metadata.definition_input) as TInput);
61
+ }
62
+ }
63
+
64
+ protected abstract getDefinition(
65
+ version: TVersion
66
+ ): (options: {
67
+ props: { requestId: string; runtimeInstanceId: string; input: TInput };
68
+ }) => Fetcher<WorkflowDefinition<TInput>>;
69
+
70
+ public getStatus(): WorkflowStatus {
71
+ return this.#status;
72
+ }
73
+
74
+ #setStatus(
75
+ data:
76
+ | { type: "running" }
77
+ | { type: "paused" }
78
+ | { type: "completed" }
79
+ | { type: "failed" }
80
+ | { type: "cancelled"; reason?: string }
81
+ ): void {
82
+ if (this.#status === data.type) return;
83
+
84
+ let eventType: "started" | "resumed" | "paused" | "completed" | "failed" | "cancelled";
85
+ switch (data.type) {
86
+ case "running":
87
+ eventType = this.#status === "paused" ? "resumed" : "started";
88
+ break;
89
+ case "paused":
90
+ case "completed":
91
+ case "failed":
92
+ case "cancelled":
93
+ eventType = data.type;
94
+ break;
95
+ }
96
+
97
+ this.sql.exec(
98
+ `UPDATE workflow_metadata
99
+ SET status = ?,
100
+ updated_at = CAST(unixepoch('subsecond') * 1000 AS INTEGER)
101
+ WHERE id = 1`,
102
+ data.type
103
+ );
104
+
105
+ this.sql.exec(
106
+ `INSERT INTO workflow_events (type, cancellation_reason) VALUES (?, ?)`,
107
+ eventType,
108
+ data.type === "cancelled" ? (data.reason ?? null) : null
109
+ );
110
+
111
+ this.#status = data.type;
112
+ }
113
+
114
+ /**
115
+ * Retrieves the current state of all steps in the workflow, ordered by when each step was created.
116
+ *
117
+ * @returns An array containing the formatted steps for all steps in the
118
+ * workflow.
119
+ */
120
+ getSteps_experimental(): Step[] {
121
+ const steps = this.sql.exec<Step_Row>("SELECT * FROM steps ORDER BY created_at ASC").toArray();
122
+ return steps.map((step) => formatStep(step));
123
+ }
124
+
125
+ /**
126
+ * Retrieves all durable step events in the workflow, ordered by recording time.
127
+ *
128
+ * @returns Formatted `step_events` rows for the workflow instance.
129
+ */
130
+ getStepEvents_experimental(): StepEvent[] {
131
+ const events = this.sql.exec<StepEventRow>("SELECT * FROM step_events ORDER BY recorded_at ASC");
132
+ return events.toArray().map((row) => formatStepEvent(row));
133
+ }
134
+
135
+ /**
136
+ * Retrieves all durable workflow status events, ordered by id (insertion order).
137
+ *
138
+ * @returns Formatted `workflow_events` rows for the workflow instance.
139
+ */
140
+ getWorkflowEvents_experimental(): WorkflowEvent[] {
141
+ const events = this.sql.exec<WorkflowEventRow>("SELECT * FROM workflow_events ORDER BY id ASC");
142
+ return events.toArray().map((row) => formatWorkflowEvent(row));
143
+ }
144
+
145
+ /**
146
+ * Handles an inbound event by satisfying the first waiting wait-step for the given event name, ordered by creation
147
+ * time. If a step is found, we mark it as satisfied and resume the workflow. Otherwise, we record the event and wait
148
+ * for it to be satisfied. If the workflow is in a terminal state, we do not need to process the inbound event.
149
+ *
150
+ * @param event - The name of the event that a wait step is expected to be waiting for.
151
+ * @param payload - The payload of the event that will be associated with the wait step if it is satisfied.
152
+ */
153
+ async handleInboundEvent(event: string, payload?: Json): Promise<void> {
154
+ // If the workflow is in a terminal state, we do not need to process the inbound event.
155
+ if (this.isTerminalStatus(this.#status)) {
156
+ console.info(`An inbound event was received for a workflow in a terminal state: ${this.#status}`);
157
+ return;
158
+ }
159
+
160
+ const serializedPayload = payload !== undefined ? JSON.stringify(payload) : null;
161
+
162
+ // If the workflow is paused, queue the event but do not satisfy any wait step or call run().
163
+ // The event will be picked up when the workflow is resumed and execution hits getOrCreateWaitStep.
164
+ if (this.#status === "paused") {
165
+ this.sql.exec(`INSERT INTO inbound_events (event_name, payload) VALUES (?, ?)`, event, serializedPayload);
166
+ return;
167
+ }
168
+
169
+ /**
170
+ * Find the first waiting wait-step for the given event name, ordered by creation time. If a step is found, we mark
171
+ * it as satisfied and resume the workflow. Otherwise, we record the event and wait for it to be satisfied.
172
+ */
173
+ const [step] = this.sql
174
+ .exec<Pick<WaitStep_Row, "id">>(
175
+ `SELECT id
176
+ FROM steps
177
+ WHERE type = 'wait'
178
+ AND state = 'waiting'
179
+ AND event_name = ?
180
+ ORDER BY created_at ASC, id ASC
181
+ LIMIT 1`,
182
+ event
183
+ )
184
+ .toArray();
185
+
186
+ if (step !== undefined) {
187
+ this.sql.exec(
188
+ `UPDATE steps
189
+ SET state = 'satisfied',
190
+ payload = ?,
191
+ resolved_at = CAST(unixepoch('subsecond') * 1000 AS INTEGER),
192
+ timeout_at = NULL
193
+ WHERE id = ?
194
+ AND type = 'wait'
195
+ AND state = 'waiting'`,
196
+ serializedPayload,
197
+ step.id
198
+ );
199
+
200
+ this.sql.exec(
201
+ `INSERT INTO step_events (step_id, type, payload)
202
+ VALUES (?, ?, ?)`,
203
+ step.id,
204
+ "wait_satisfied",
205
+ serializedPayload
206
+ );
207
+
208
+ this.sql.exec(
209
+ `INSERT INTO inbound_events (event_name, payload, claimed_by, claimed_at)
210
+ VALUES (?, ?, ?, CAST(unixepoch('subsecond') * 1000 AS INTEGER))`,
211
+ event,
212
+ serializedPayload,
213
+ step.id
214
+ );
215
+
216
+ await this.run();
217
+ } else {
218
+ this.sql.exec(`INSERT INTO inbound_events (event_name, payload) VALUES (?, ?)`, event, serializedPayload);
219
+ }
220
+ }
221
+
222
+ /**
223
+ * Cancels the workflow. If the workflow is in a terminal state (completed, failed, or cancelled), it will return
224
+ * early.
225
+ *
226
+ * @param reason - The reason for the cancellation.
227
+ */
228
+ async cancel(reason?: string): Promise<void> {
229
+ if (this.isTerminalStatus(this.#status)) return;
230
+
231
+ this.#setStatus({ type: "cancelled", reason });
232
+ await this.ctx.storage.deleteAlarm();
233
+
234
+ if (this.onStatusChange_experimental !== undefined) {
235
+ await this.onStatusChange_experimental("cancelled");
236
+ }
237
+ }
238
+
239
+ /**
240
+ * Pauses the workflow. Only transitions from `running` to `paused`. If the workflow is already paused, terminal, or
241
+ * not running (e.g. `pending`), this method is a no-op.
242
+ */
243
+ async pause(): Promise<void> {
244
+ if (this.#status !== "running") return;
245
+
246
+ await this.ctx.storage.transaction(async (transaction) => {
247
+ this.#setStatus({ type: "paused" });
248
+ await transaction.deleteAlarm();
249
+ });
250
+
251
+ if (this.onStatusChange_experimental !== undefined) {
252
+ await this.onStatusChange_experimental("paused");
253
+ }
254
+ }
255
+
256
+ /**
257
+ * Resumes a paused workflow. Only transitions from `paused` to `running`. If the workflow is not paused, an error is
258
+ * thrown.
259
+ */
260
+ async resume(): Promise<void> {
261
+ if (this.#status !== "paused") {
262
+ throw new Error(`Cannot resume workflow: expected status 'paused' but got '${this.#status}'.`);
263
+ }
264
+
265
+ this.#setStatus({ type: "running" });
266
+
267
+ if (this.onStatusChange_experimental !== undefined) {
268
+ await this.onStatusChange_experimental("running");
269
+ }
270
+
271
+ await this.run();
272
+ }
273
+
274
+ async alarm(_info?: AlarmInvocationInfo): Promise<void> {
275
+ // If the workflow is in a terminal state (completed, failed, or cancelled), we do not need to continue the execution.
276
+ if (this.isTerminalStatus(this.#status)) return;
277
+
278
+ // If the workflow is paused, do not continue execution.
279
+ if (this.#status === "paused") return;
280
+
281
+ // Schedule another safety alarm if the run loop is still active.
282
+ if (this.#isRunLoopActive) {
283
+ await this.ctx.storage.setAlarm(Date.now() + 30_000 * 60); // 30 minutes
284
+ } else {
285
+ await this.run();
286
+ }
287
+ }
288
+
289
+ /**
290
+ * Creates a new workflow instance and pins the definition version. If the workflow is in a terminal state, it will
291
+ * return early. Otherwise, it will pin the definition version and set the input. If the definition version is already
292
+ * pinned to a different version, it will throw an error.
293
+ *
294
+ * @param options.definitionVersion - The version of the definition to pin to the workflow instance. This will be used
295
+ * to resolve the workflow definition from the `getDefinition` hook.
296
+ * @param options.input - The input to the workflow instance. This will be passed to the workflow definition as the
297
+ * `input` property.
298
+ */
299
+ public async create(options: { definitionVersion: TVersion; input?: TInput }): Promise<void> {
300
+ if (this.isTerminalStatus(this.#status)) return;
301
+ if (this.#status === "paused") return;
302
+
303
+ const version = options.definitionVersion;
304
+ let metadata = this.sql
305
+ .exec<Pick<WorkflowMetadata_Row<TVersion>, "definition_version" | "definition_input">>(
306
+ "SELECT definition_version, definition_input FROM workflow_metadata WHERE id = 1"
307
+ )
308
+ .one();
309
+
310
+ if (metadata.definition_version !== null && metadata.definition_version !== version) {
311
+ throw new Error(
312
+ `Workflow definition version is already pinned to '${metadata.definition_version}' and cannot be changed to '${version}'.`
313
+ );
314
+ }
315
+
316
+ // If the workflow is not yet pinned to a definition version, we pin it to the new version and set the input.
317
+ if (metadata.definition_version === null) {
318
+ metadata = this.sql
319
+ .exec<Pick<WorkflowMetadata_Row<TVersion>, "definition_version" | "definition_input">>(
320
+ `UPDATE workflow_metadata
321
+ SET definition_version = ?,
322
+ definition_input = ?,
323
+ updated_at = CAST(unixepoch('subsecond') * 1000 AS INTEGER)
324
+ WHERE id = 1 RETURNING definition_version, definition_input`,
325
+ version,
326
+ options.input ? JSON.stringify(options.input) : null
327
+ )
328
+ .one();
329
+ }
330
+
331
+ this.#definitionVersion = version;
332
+ this.#definitionInput = metadata.definition_input ? (JSON.parse(metadata.definition_input) as TInput) : undefined;
333
+
334
+ await this.run();
335
+ }
336
+
337
+ private async run(): Promise<void> {
338
+ if (this.isTerminalStatus(this.#status)) return;
339
+ if (this.#status === "paused") return;
340
+
341
+ if (this.#definitionVersion === undefined) return;
342
+
343
+ if (this.#status !== "running") {
344
+ this.#setStatus({ type: "running" });
345
+ this.#status = "running";
346
+
347
+ if (this.onStatusChange_experimental !== undefined) {
348
+ await this.onStatusChange_experimental("running");
349
+ }
350
+ }
351
+
352
+ if (this.#isRunLoopActive) return;
353
+
354
+ const requestId = crypto.randomUUID();
355
+ const context = new WorkflowRuntimeContext(this.ctx.storage, { requestId });
356
+
357
+ this.#isRunLoopActive = true;
358
+
359
+ (async () => {
360
+ try {
361
+ /**
362
+ * Drives the workflow forward by repeatedly calling next() on the executor.
363
+ *
364
+ * Each call to next() re-executes the workflow function from the top. Completed steps return their cached
365
+ * results immediately. Per nesting level, a sibling `run()` that follows another successful sibling in the same
366
+ * `next()` sees a full sibling budget and throws `ResumeImmediatelyError` until the next `next()`; the budget
367
+ * increments only after a `run()` step records `succeeded`. Nested `run()` callbacks use a fresh frame.
368
+ *
369
+ * The loop exits when: - The workflow completes or aborts (done: true) - A step needs a delayed retry or sleep
370
+ * (schedules an alarm and exits) - A step is waiting for an inbound event (exits with no alarm; an event
371
+ * resumes the workflow)
372
+ */
373
+ while (true) {
374
+ // If paused between iterations, exit the loop cleanly.
375
+ if (this.#status === "paused") {
376
+ await this.ctx.storage.deleteAlarm();
377
+ break;
378
+ }
379
+
380
+ try {
381
+ const version = this.#definitionVersion;
382
+ if (version === undefined) {
383
+ throw new Error(
384
+ "Workflow definition version has not been initialized. Call 'start()' before running the workflow."
385
+ );
386
+ }
387
+
388
+ const definition = this.getDefinition(version);
389
+ const executor = definition({
390
+ props: {
391
+ runtimeInstanceId: this.ctx.id.toString(),
392
+ requestId,
393
+ input: this.#definitionInput as TInput
394
+ }
395
+ });
396
+
397
+ // Schedule a watchdog alarm. A watchdog alarm is protection against loss of control around durable state transitions,
398
+ // especially when a step has been durably marked as started but the engine has not durably recorded how to proceed next.
399
+ await this.ctx.storage.setAlarm(Date.now() + 30_000 * 60); // 30 minutes
400
+
401
+ const result = await executor.next(context);
402
+
403
+ // If the workflow was cancelled while waiting for the executor to return a response, we exit the loop immediately.
404
+ if (this.#status === "cancelled") {
405
+ await this.ctx.storage.deleteAlarm();
406
+ break;
407
+ }
408
+
409
+ // Pause can happen while `next()` is in flight. From `paused`, durable metadata may only move to `running` or
410
+ // `cancelled`, so we must not apply terminal transitions here; `resume()` will run `next()` again.
411
+ if (this.getStatus() === "paused") {
412
+ await this.ctx.storage.deleteAlarm();
413
+ break;
414
+ }
415
+
416
+ if (result.done) {
417
+ await this.ctx.storage.transaction(async (transaction) => {
418
+ this.#setStatus({ type: result.status });
419
+ await transaction.deleteAlarm();
420
+ });
421
+ if (this.onStatusChange_experimental !== undefined) {
422
+ await this.onStatusChange_experimental(result.status);
423
+ }
424
+ break;
425
+ }
426
+
427
+ // An 'immediate' resume hint indicates that the workflow should resume immediately.
428
+ if (result.resume.type === "immediate") continue;
429
+
430
+ // A 'suspended' resume hint indicates that the workflow should suspend itself and wait for the next alarm or inbound event to resume.
431
+ if (result.resume.type === "suspended") break;
432
+
433
+ break;
434
+ } catch (error) {
435
+ // An exception can be thrown when calling 'next()' on the executor worker.
436
+ // The resulting exception will have a 'remote' property set to 'True' in this case.
437
+ // In this case, the error is considered to be transient and the workflow should continue.
438
+ if (error instanceof Error && "remote" in error && error.remote) {
439
+ console.info(error, { requestId });
440
+ continue;
441
+ }
442
+
443
+ console.error(error instanceof Error ? error : new Error(String(error)), { requestId });
444
+
445
+ // If the workflow is in a terminal state, we do not need to process the error.
446
+ if (this.isTerminalStatus(this.#status)) break;
447
+
448
+ // Same as after `next()` returns: `paused` cannot transition to `failed` in the database.
449
+ if (this.getStatus() === "paused") {
450
+ await this.ctx.storage.deleteAlarm();
451
+ break;
452
+ }
453
+
454
+ // All other errors are considered to be fatal and the workflow should be aborted.
455
+ await this.ctx.storage.transaction(async (transaction) => {
456
+ this.#setStatus({ type: "failed" });
457
+ await transaction.deleteAlarm();
458
+ });
459
+
460
+ if (this.onStatusChange_experimental !== undefined) {
461
+ await this.onStatusChange_experimental("failed");
462
+ }
463
+ }
464
+ }
465
+ } finally {
466
+ this.#isRunLoopActive = false;
467
+ }
468
+ })();
469
+ }
470
+
471
+ private isTerminalStatus(status: WorkflowStatus): boolean {
472
+ return status === "completed" || status === "failed" || status === "cancelled";
473
+ }
474
+ }
475
+
476
+ export class WorkflowRuntimeContext extends RpcTarget {
477
+ private readonly storage: DurableObjectStorage;
478
+ private readonly sql: SqlStorage;
479
+ private readonly requestId?: string;
480
+ private static readonly BACKOFF_DELAYS = [250, 500, 1_000, 2_000, 4_000, 8_000, 10_000] as const;
481
+
482
+ private static readonly DEFAULT_MAX_ATTEMPTS = 3;
483
+
484
+ constructor(storage: DurableObjectStorage, options?: { requestId?: string }) {
485
+ super();
486
+ this.storage = storage;
487
+ this.sql = storage.sql;
488
+ this.requestId = options?.requestId;
489
+ }
490
+
491
+ public async getOrCreateStep(
492
+ id: RunStepId,
493
+ options: { type: "run"; maxAttempts?: number | null; parentStepId: RunStepId | null }
494
+ ): Promise<RunStep>;
495
+ public async getOrCreateStep(
496
+ id: SleepStepId,
497
+ options: { type: "sleep"; wakeAt: Date; parentStepId: RunStepId | null }
498
+ ): Promise<SleepStep>;
499
+ public async getOrCreateStep(
500
+ id: WaitStepId,
501
+ options: { type: "wait"; eventName: string; timeoutAt?: Date; parentStepId: RunStepId | null }
502
+ ): Promise<WaitStep>;
503
+ public async getOrCreateStep(
504
+ id: RunStepId | SleepStepId | WaitStepId,
505
+ options:
506
+ | { type: "run"; maxAttempts?: number | null; parentStepId: RunStepId | null }
507
+ | { type: "sleep"; wakeAt: Date; parentStepId: RunStepId | null }
508
+ | { type: "wait"; eventName: string; timeoutAt?: Date; parentStepId: RunStepId | null }
509
+ ): Promise<Step> {
510
+ try {
511
+ if (options.type === "run") {
512
+ return await this.getOrCreateRunStep(id as RunStepId, {
513
+ maxAttempts: options?.maxAttempts,
514
+ parentStepId: options.parentStepId
515
+ });
516
+ } else if (options.type === "sleep") {
517
+ return await this.getOrCreateSleepStep(id as SleepStepId, {
518
+ wakeAt: options.wakeAt,
519
+ parentStepId: options.parentStepId
520
+ });
521
+ } else {
522
+ return await this.getOrCreateWaitStep(id as WaitStepId, {
523
+ eventName: options.eventName,
524
+ timeoutAt: options.timeoutAt,
525
+ parentStepId: options.parentStepId
526
+ });
527
+ }
528
+ } catch (error) {
529
+ console.error(error instanceof Error ? error : new Error(String(error)), { requestId: this.requestId });
530
+ if (error instanceof Error && isSqliteInvariantViolation(error.message)) {
531
+ throw new WorkflowInvariantError(error.message);
532
+ }
533
+
534
+ // All other errors are considered to be infrastructure/critical errors and may cause the DO to be reset
535
+ // The following are examples of such errors:
536
+ // 'SQLITE_FULL', // Database or disk is full
537
+ // 'SQLITE_IOERR', // I/O error
538
+ // 'SQLITE_BUSY', // Database is locked
539
+ // 'SQLITE_NOMEM', // Out of memory
540
+ // 'SQLITE_INTERRUPT', // Operation interrupted
541
+ // 'SQLITE_CORRUPT', // Database file is corrupted
542
+ // 'SQLITE_CANTOPEN', // Cannot open database file
543
+ throw error;
544
+ }
545
+ }
546
+
547
+ /**
548
+ * True if this run step has at least one **direct** child that still **explains** a parent left in `running`:
549
+ * typically `run` in `running` or `pending`, `sleep`/`wait` in `waiting`, or a successful-but-not-failed child row
550
+ * (`succeeded` run, `satisfied` wait, `elapsed` sleep) while the parent has not yet recorded its own outcome.
551
+ *
552
+ * Excludes only terminal **failure** child states (`failed`, `timed_out`).
553
+ */
554
+ public async hasRunningOrWaitingChildSteps(stepId: RunStepId): Promise<boolean> {
555
+ const rows = this.sql
556
+ .exec<{ x: number }>(
557
+ `SELECT 1 AS x FROM steps
558
+ WHERE parent_step_id = ?
559
+ AND state NOT IN ('failed', 'timed_out')
560
+ LIMIT 1`,
561
+ stepId
562
+ )
563
+ .toArray();
564
+ return rows.length > 0;
565
+ }
566
+
567
+ private async getOrCreateRunStep(
568
+ id: RunStepId,
569
+ options: { maxAttempts?: number | null; parentStepId: RunStepId | null }
570
+ ): Promise<RunStep> {
571
+ const maxAttempts =
572
+ options.maxAttempts === undefined ? WorkflowRuntimeContext.DEFAULT_MAX_ATTEMPTS : options.maxAttempts;
573
+ return await this.storage.transaction(async (transaction) => {
574
+ const [existing] = this.sql.exec<RunStep_Row>("SELECT * FROM steps WHERE id = ? AND type = 'run'", id).toArray();
575
+ // If the step does not exist, we create it and mark the attempt as 'pending'.
576
+ if (existing === undefined) {
577
+ const inserted = this.sql
578
+ .exec<RunStep_Row>(
579
+ `INSERT INTO steps (id, type, state, attempt_count, max_attempts, next_attempt_at, parent_step_id) VALUES (?, 'run', 'pending', 0, ?, CAST(unixepoch('subsecond') * 1000 AS INTEGER), ?) RETURNING *`,
580
+ id,
581
+ maxAttempts,
582
+ options.parentStepId
583
+ )
584
+ .one();
585
+
586
+ return formatStep(inserted);
587
+ } else {
588
+ // If the step exists and is in 'pending' state, we update the alarm to wake at the correct time.
589
+ if (existing.state === "pending" && Date.now() < existing.next_attempt_at) {
590
+ await transaction.setAlarm(existing.next_attempt_at);
591
+ }
592
+ return formatStep(existing);
593
+ }
594
+ });
595
+ }
596
+
597
+ private async getOrCreateSleepStep(
598
+ id: SleepStepId,
599
+ options: { wakeAt: Date; parentStepId: RunStepId | null }
600
+ ): Promise<SleepStep> {
601
+ return await this.storage.transaction(async (transaction) => {
602
+ const [existing] = this.sql
603
+ .exec<SleepStep_Row>("SELECT * FROM steps WHERE id = ? AND type = 'sleep'", id)
604
+ .toArray();
605
+ if (existing !== undefined) {
606
+ // If the step exists and is in 'waiting' state, we update the alarm to wake at the correct time.
607
+ if (existing.state === "waiting" && Date.now() < existing.wake_at) {
608
+ await transaction.setAlarm(existing.wake_at);
609
+ }
610
+ return formatStep(existing);
611
+ }
612
+
613
+ // If the step does not exist, we create it, set it to 'waiting' state and set the alarm to wake at the correct time.
614
+ const wakeAt = options.wakeAt.getTime();
615
+ const inserted = this.sql
616
+ .exec<SleepStep_Row>(
617
+ `INSERT INTO steps (id, type, state, wake_at, parent_step_id) VALUES (?, 'sleep', 'waiting', ?, ?) RETURNING *`,
618
+ id,
619
+ wakeAt,
620
+ options.parentStepId
621
+ )
622
+ .one();
623
+ this.sql.exec("INSERT INTO step_events (step_id, type, wake_at) VALUES (?, ?, ?)", id, "sleep_waiting", wakeAt);
624
+ await transaction.setAlarm(wakeAt);
625
+ return formatStep(inserted);
626
+ });
627
+ }
628
+
629
+ private async getOrCreateWaitStep(
630
+ id: WaitStepId,
631
+ options: { eventName: string; timeoutAt?: Date; parentStepId: RunStepId | null }
632
+ ): Promise<WaitStep> {
633
+ return await this.storage.transaction(async (transaction) => {
634
+ const [existing] = this.sql
635
+ .exec<WaitStep_Row>("SELECT * FROM steps WHERE id = ? AND type = 'wait'", id)
636
+ .toArray();
637
+ // If the step exists and isn't in 'waiting' state (i.e. in terminal state of 'satisfied' or 'timed_out'), we return the step as is as no further action is needed.
638
+ if (existing !== undefined && existing.state !== "waiting") {
639
+ return formatStep(existing);
640
+ }
641
+
642
+ let waiting: Extract<WaitStep_Row, { state: "waiting" }>;
643
+ if (existing !== undefined) {
644
+ waiting = existing;
645
+ } else {
646
+ waiting = this.sql
647
+ .exec<Extract<WaitStep_Row, { state: "waiting" }>>(
648
+ `
649
+ INSERT INTO steps (id, type, state, event_name, timeout_at, parent_step_id)
650
+ VALUES (?, 'wait', 'waiting', ?, ?, ?)
651
+ RETURNING *
652
+ `,
653
+ id,
654
+ options.eventName,
655
+ options.timeoutAt !== undefined ? options.timeoutAt.getTime() : null,
656
+ options.parentStepId
657
+ )
658
+ .one();
659
+ this.sql.exec(
660
+ `INSERT INTO step_events (step_id, type, event_name, timeout_at) VALUES (?, ?, ?, ?)`,
661
+ id,
662
+ "wait_waiting",
663
+ options.eventName,
664
+ options.timeoutAt !== undefined ? options.timeoutAt.getTime() : null
665
+ );
666
+ }
667
+
668
+ const timeoutAt = waiting.timeout_at;
669
+
670
+ // Attempt to claim any inbound event that is not claimed yet for the given event name.
671
+ const [event] = this.sql
672
+ .exec<{ id: string; payload: string }>(
673
+ `
674
+ UPDATE inbound_events
675
+ SET claimed_by = ?,
676
+ claimed_at = CAST(unixepoch('subsecond') * 1000 AS INTEGER)
677
+ WHERE id = (
678
+ SELECT id
679
+ FROM inbound_events
680
+ WHERE event_name = ?
681
+ AND claimed_by IS NULL
682
+ ORDER BY created_at ASC, id ASC
683
+ LIMIT 1
684
+ )
685
+ AND claimed_by IS NULL
686
+ RETURNING id, payload
687
+ `,
688
+ id,
689
+ options.eventName
690
+ )
691
+ .toArray();
692
+
693
+ // If a queued inbound event was found, we mark the step as 'satisfied' and return the satisfied step.
694
+ if (event !== undefined) {
695
+ const satisfied = this.sql
696
+ .exec<WaitStep_Row>(
697
+ `
698
+ UPDATE steps
699
+ SET state = 'satisfied',
700
+ payload = ?,
701
+ resolved_at = CAST(unixepoch('subsecond') * 1000 AS INTEGER),
702
+ timeout_at = NULL
703
+ WHERE id = ?
704
+ AND type = 'wait'
705
+ AND state = 'waiting'
706
+ RETURNING *
707
+ `,
708
+ event.payload,
709
+ id
710
+ )
711
+ .one();
712
+
713
+ this.sql.exec(
714
+ `
715
+ INSERT INTO step_events (step_id, type, payload)
716
+ VALUES (?, ?, ?)
717
+ `,
718
+ id,
719
+ "wait_satisfied",
720
+ event.payload
721
+ );
722
+
723
+ return formatStep(satisfied);
724
+ }
725
+ // If no queued inbound event was found, we return the step as is.
726
+ else {
727
+ if (timeoutAt !== null && Date.now() < timeoutAt) {
728
+ await transaction.setAlarm(timeoutAt);
729
+ } else {
730
+ await transaction.deleteAlarm();
731
+ }
732
+ return formatStep(waiting);
733
+ }
734
+ });
735
+ }
736
+ async handleRunAttemptEvent(
737
+ id: RunStepId,
738
+ event:
739
+ | { type: "running"; attemptCount: number }
740
+ | { type: "succeeded"; attemptCount: number; result: string }
741
+ | {
742
+ type: "failed";
743
+ attemptCount: number;
744
+ errorMessage: string;
745
+ errorName?: string;
746
+ isNonRetryableStepError?: boolean;
747
+ }
748
+ ): Promise<void> {
749
+ try {
750
+ await this.storage.transaction(async (transaction) => {
751
+ const [existing] = this.sql
752
+ .exec<RunStep_Row>("SELECT * FROM steps WHERE id = ? AND type = 'run'", id)
753
+ .toArray();
754
+ if (existing === undefined) {
755
+ throw new WorkflowInvariantError(`Step '${id}' of type 'run' not found.`);
756
+ }
757
+
758
+ const attemptCount = event.attemptCount;
759
+
760
+ if (event.type === "running") {
761
+ if (existing.state !== "pending") {
762
+ throw new WorkflowInvariantError(
763
+ `Unexpected state for run step '${id}'. Expected 'pending' but got ${existing.state}.`
764
+ );
765
+ }
766
+ if (existing.next_attempt_at !== null && existing.next_attempt_at > Date.now()) {
767
+ throw new WorkflowInvariantError(
768
+ `Unexpected next attempt at for run step '${id}'. Expected a NULL value or a value that is in the past but got ${new Date(existing.next_attempt_at).toISOString()}.`
769
+ );
770
+ }
771
+ if (existing.attempt_count !== attemptCount - 1) {
772
+ throw new WorkflowInvariantError(
773
+ `Unexpected attempt count for run step '${id}'. Expected ${attemptCount - 1} but got ${existing.attempt_count}.`
774
+ );
775
+ }
776
+ // Update the step to the 'running' state and insert an `attempt_started` step_events row.
777
+ this.sql.exec(
778
+ `UPDATE steps SET state = 'running', attempt_count = ?, next_attempt_at = NULL WHERE id = ?`,
779
+ attemptCount,
780
+ id
781
+ );
782
+ this.sql.exec(
783
+ "INSERT INTO step_events (step_id, type, attempt_number) VALUES (?, ?, ?)",
784
+ id,
785
+ "attempt_started",
786
+ attemptCount
787
+ );
788
+ } else if (event.type === "succeeded") {
789
+ if (existing.state !== "running") {
790
+ throw new WorkflowInvariantError(
791
+ `Unexpected state for run step '${id}'. Expected 'running' but got ${existing.state}.`
792
+ );
793
+ }
794
+ if (existing.attempt_count !== attemptCount) {
795
+ throw new WorkflowInvariantError(
796
+ `Unexpected attempt count for run step '${id}'. Expected ${attemptCount} but got ${existing.attempt_count}.`
797
+ );
798
+ }
799
+ // Update the step to the 'succeeded' state and insert an `attempt_succeeded` step_events row.
800
+ this.sql.exec(
801
+ `UPDATE steps SET state = 'succeeded', result = ?, resolved_at = CAST(unixepoch('subsecond') * 1000 AS INTEGER) WHERE id = ?`,
802
+ event.result,
803
+ id
804
+ );
805
+ this.sql.exec(
806
+ "INSERT INTO step_events (step_id, type, attempt_number, result) VALUES (?, ?, ?, ?)",
807
+ id,
808
+ "attempt_succeeded",
809
+ attemptCount,
810
+ event.result
811
+ );
812
+ } else if (event.type === "failed") {
813
+ if (existing.state !== "running") {
814
+ throw new WorkflowInvariantError(
815
+ `Unexpected state for run step '${id}'. Expected 'running' but got ${existing.state}.`
816
+ );
817
+ }
818
+
819
+ if (existing.attempt_count !== attemptCount) {
820
+ throw new WorkflowInvariantError(
821
+ `Unexpected attempt count for run step '${id}'. Expected ${attemptCount} but got ${existing.attempt_count}.`
822
+ );
823
+ }
824
+
825
+ // If the step has reached the maximum number of attempts, we mark the step as 'failed'
826
+ if (
827
+ (existing.max_attempts != null && existing.attempt_count >= existing.max_attempts) ||
828
+ event.isNonRetryableStepError
829
+ ) {
830
+ this.sql.exec(
831
+ `UPDATE steps SET state = 'failed', error_message = ?, error_name = ?, resolved_at = CAST(unixepoch('subsecond') * 1000 AS INTEGER) WHERE id = ?`,
832
+ event.errorMessage,
833
+ event.errorName ?? null,
834
+ id
835
+ );
836
+
837
+ // Insert an `attempt_failed` step_events row
838
+ this.sql.exec(
839
+ "INSERT INTO step_events (step_id, type, attempt_number, error_message, error_name) VALUES (?, ?, ?, ?, ?)",
840
+ id,
841
+ "attempt_failed",
842
+ attemptCount,
843
+ event.errorMessage,
844
+ event.errorName ?? null
845
+ );
846
+ }
847
+ // Otherwise (if the step hasn't reached the maximum number of attempts), we mark the step as 'pending'
848
+ // and update 'next_attempt_at' to the next backoff time and set the alarm to wake up at the same time.
849
+ else {
850
+ const backoff =
851
+ WorkflowRuntimeContext.BACKOFF_DELAYS[attemptCount - 1] ??
852
+ (WorkflowRuntimeContext.BACKOFF_DELAYS[WorkflowRuntimeContext.BACKOFF_DELAYS.length - 1] as number);
853
+ const nextAttemptAt = Date.now() + backoff;
854
+ this.sql.exec(`UPDATE steps SET state = 'pending', next_attempt_at = ? WHERE id = ?`, nextAttemptAt, id);
855
+ this.sql.exec(
856
+ "INSERT INTO step_events (step_id, type, attempt_number, error_message, error_name, next_attempt_at) VALUES (?, ?, ?, ?, ?, ?)",
857
+ id,
858
+ "attempt_failed",
859
+ attemptCount,
860
+ event.errorMessage,
861
+ event.errorName ?? null,
862
+ nextAttemptAt
863
+ );
864
+ await transaction.setAlarm(nextAttemptAt);
865
+ }
866
+ }
867
+ });
868
+ } catch (error) {
869
+ console.error(error instanceof Error ? error : new Error(String(error)), { requestId: this.requestId });
870
+ if (error instanceof Error && isSqliteInvariantViolation(error.message)) {
871
+ throw new WorkflowInvariantError(error.message);
872
+ }
873
+ throw error;
874
+ }
875
+ }
876
+
877
+ handleSleepStepEvent(id: SleepStepId, event: { type: "elapsed" }): void {
878
+ try {
879
+ const [existing] = this.sql
880
+ .exec<SleepStep_Row>("SELECT * FROM steps WHERE id = ? AND type = 'sleep'", id)
881
+ .toArray();
882
+ if (existing === undefined) {
883
+ throw new WorkflowInvariantError(`Step '${id}' of type 'sleep' not found.`);
884
+ }
885
+
886
+ if (event.type === "elapsed") {
887
+ // Update the step to the 'elapsed' state and insert a `sleep_elapsed` step_events row.
888
+ if (existing.state !== "waiting") {
889
+ throw new WorkflowInvariantError(
890
+ `Unexpected state for sleep step '${id}'. Expected 'waiting' but got ${existing.state}.`
891
+ );
892
+ }
893
+ this.sql.exec(
894
+ `UPDATE steps SET state = 'elapsed', resolved_at = CAST(unixepoch('subsecond') * 1000 AS INTEGER), wake_at = NULL WHERE id = ?`,
895
+ id
896
+ );
897
+ this.sql.exec("INSERT INTO step_events (step_id, type) VALUES (?, ?)", id, "sleep_elapsed");
898
+ }
899
+ } catch (error) {
900
+ console.error(error instanceof Error ? error : new Error(String(error)), { requestId: this.requestId });
901
+ if (error instanceof Error && isSqliteInvariantViolation(error.message)) {
902
+ throw new WorkflowInvariantError(error.message);
903
+ }
904
+ throw error;
905
+ }
906
+ }
907
+
908
+ handleWaitStepEvent(id: WaitStepId, event: { type: "timed_out" }): void {
909
+ try {
910
+ this.storage.transactionSync(() => {
911
+ const [existing] = this.sql
912
+ .exec<WaitStep_Row>("SELECT * FROM steps WHERE id = ? AND type = 'wait'", id)
913
+ .toArray();
914
+ if (existing === undefined) {
915
+ throw new WorkflowInvariantError(`Step '${id}' of type 'wait' not found.`);
916
+ }
917
+ if (existing.state !== "waiting") {
918
+ throw new WorkflowInvariantError(
919
+ `Unexpected state for wait step '${id}'. Expected 'waiting' but got ${existing.state}.`
920
+ );
921
+ }
922
+ // If the step has a timeout and the timeout has not been reached, we throw an error to explain the state mismatch.
923
+ if (existing.timeout_at !== null && existing.timeout_at > Date.now()) {
924
+ throw new WorkflowInvariantError(
925
+ `Unexpected timeout at for wait step '${id}'. Expected a NULL value or a value that is in the past but got ${new Date(existing.timeout_at).toISOString()}.`
926
+ );
927
+ }
928
+ // If the step has timed out, we mark the step as 'timed_out' and insert a `wait_timed_out` step_events row.
929
+ if (event.type === "timed_out") {
930
+ this.sql.exec(
931
+ "UPDATE steps SET state = 'timed_out', resolved_at = CAST(unixepoch('subsecond') * 1000 AS INTEGER), timeout_at = NULL WHERE id = ?",
932
+ id
933
+ );
934
+ this.sql.exec("INSERT INTO step_events (step_id, type) VALUES (?, ?)", id, "wait_timed_out");
935
+ }
936
+ });
937
+ } catch (error) {
938
+ console.error(error instanceof Error ? error : new Error(String(error)), { requestId: this.requestId });
939
+ if (error instanceof Error && isSqliteInvariantViolation(error.message)) {
940
+ throw new WorkflowInvariantError(error.message);
941
+ }
942
+ throw error;
943
+ }
944
+ }
945
+ }
946
+
947
+ function isSqliteInvariantViolation(message: string): boolean {
948
+ return (
949
+ message.includes("SQLITE_CONSTRAINT") || // Constraint violation (FK, UNIQUE, CHECK, NOT NULL)
950
+ message.includes("SQLITE_MISMATCH") || // Data type mismatch
951
+ message.includes("SQLITE_ERROR") || // Generic SQL error (syntax, etc.)
952
+ message.includes("SQLITE_RANGE") || // Parameter index out of range
953
+ message.includes("SQLITE_AUTH") || // Authorization denied (e.g., accessing _cf_ tables)
954
+ message.includes("SQLITE_TOOBIG") // String or BLOB too large
955
+ );
956
+ }
957
+
958
+ class WorkflowInvariantError extends Error {
959
+ constructor(message: string) {
960
+ super(message);
961
+ this.name = "WorkflowInvariantError";
962
+ }
963
+ }
964
+
965
+ export type RunStepId = Brand<string, "RunStepId">;
966
+ export type SleepStepId = Brand<string, "SleepStepId">;
967
+ export type WaitStepId = Brand<string, "WaitStepId">;
968
+
969
+ type RunStep = {
970
+ type: "run";
971
+ id: RunStepId;
972
+ createdAt: Date;
973
+ attemptCount: number;
974
+ maxAttempts: number | null;
975
+ parentStepId: RunStepId | null;
976
+ } & (
977
+ | {
978
+ state: "pending";
979
+ nextAttemptAt: Date;
980
+ }
981
+ | {
982
+ state: "running";
983
+ }
984
+ | {
985
+ state: "succeeded";
986
+ result: string;
987
+ resolvedAt: Date;
988
+ }
989
+ | {
990
+ state: "failed";
991
+ errorMessage: string;
992
+ errorName?: string;
993
+ resolvedAt: Date;
994
+ }
995
+ );
996
+
997
+ type SleepStep = {
998
+ type: "sleep";
999
+ id: SleepStepId;
1000
+ createdAt: Date;
1001
+ parentStepId: RunStepId | null;
1002
+ } & (
1003
+ | {
1004
+ state: "waiting";
1005
+ wakeAt: Date;
1006
+ }
1007
+ | {
1008
+ state: "elapsed";
1009
+ resolvedAt: Date;
1010
+ }
1011
+ );
1012
+
1013
+ type WaitStep = {
1014
+ type: "wait";
1015
+ id: WaitStepId;
1016
+ createdAt: Date;
1017
+ eventName: string;
1018
+ parentStepId: RunStepId | null;
1019
+ } & (
1020
+ | {
1021
+ state: "waiting";
1022
+ timeoutAt?: Date;
1023
+ }
1024
+ | {
1025
+ state: "satisfied";
1026
+ payload: string;
1027
+ resolvedAt: Date;
1028
+ }
1029
+ | {
1030
+ state: "timed_out";
1031
+ resolvedAt: Date;
1032
+ }
1033
+ );
1034
+
1035
+ type Step = RunStep | SleepStep | WaitStep;
1036
+
1037
+ function formatStep(step: RunStep_Row): RunStep;
1038
+ function formatStep(step: SleepStep_Row): SleepStep;
1039
+ function formatStep(step: WaitStep_Row): WaitStep;
1040
+ function formatStep(step: Step_Row): Step;
1041
+ function formatStep(step: Step_Row): Step {
1042
+ switch (step.type) {
1043
+ case "run": {
1044
+ switch (step.state) {
1045
+ case "pending":
1046
+ return {
1047
+ type: "run",
1048
+ id: step.id,
1049
+ state: "pending",
1050
+ nextAttemptAt: new Date(step.next_attempt_at),
1051
+ createdAt: new Date(step.created_at),
1052
+ attemptCount: step.attempt_count,
1053
+ maxAttempts: step.max_attempts,
1054
+ parentStepId: step.parent_step_id
1055
+ } satisfies RunStep;
1056
+ case "running":
1057
+ return {
1058
+ type: "run",
1059
+ id: step.id,
1060
+ state: "running",
1061
+ createdAt: new Date(step.created_at),
1062
+ attemptCount: step.attempt_count,
1063
+ maxAttempts: step.max_attempts,
1064
+ parentStepId: step.parent_step_id
1065
+ } satisfies RunStep;
1066
+ case "succeeded":
1067
+ return {
1068
+ type: "run",
1069
+ id: step.id,
1070
+ state: "succeeded",
1071
+ result: step.result,
1072
+ resolvedAt: new Date(step.resolved_at),
1073
+ createdAt: new Date(step.created_at),
1074
+ attemptCount: step.attempt_count,
1075
+ maxAttempts: step.max_attempts,
1076
+ parentStepId: step.parent_step_id
1077
+ } satisfies RunStep;
1078
+ case "failed":
1079
+ return {
1080
+ type: "run",
1081
+ id: step.id,
1082
+ state: "failed",
1083
+ errorMessage: step.error_message,
1084
+ errorName: step.error_name ?? undefined,
1085
+ resolvedAt: new Date(step.resolved_at),
1086
+ createdAt: new Date(step.created_at),
1087
+ attemptCount: step.attempt_count,
1088
+ maxAttempts: step.max_attempts,
1089
+ parentStepId: step.parent_step_id
1090
+ } satisfies RunStep;
1091
+ default:
1092
+ throw new Error("Unexpected step state");
1093
+ }
1094
+ }
1095
+ case "sleep":
1096
+ switch (step.state) {
1097
+ case "waiting":
1098
+ return {
1099
+ type: "sleep",
1100
+ id: step.id,
1101
+ state: "waiting",
1102
+ wakeAt: new Date(step.wake_at),
1103
+ createdAt: new Date(step.created_at),
1104
+ parentStepId: step.parent_step_id
1105
+ } satisfies SleepStep;
1106
+ case "elapsed":
1107
+ return {
1108
+ type: "sleep",
1109
+ id: step.id,
1110
+ state: "elapsed",
1111
+ resolvedAt: new Date(step.resolved_at),
1112
+ createdAt: new Date(step.created_at),
1113
+ parentStepId: step.parent_step_id
1114
+ } satisfies SleepStep;
1115
+ default:
1116
+ throw new Error("Unexpected step state");
1117
+ }
1118
+ case "wait":
1119
+ switch (step.state) {
1120
+ case "waiting":
1121
+ return {
1122
+ type: "wait",
1123
+ id: step.id,
1124
+ state: "waiting",
1125
+ eventName: step.event_name,
1126
+ timeoutAt: step.timeout_at ? new Date(step.timeout_at) : undefined,
1127
+ createdAt: new Date(step.created_at),
1128
+ parentStepId: step.parent_step_id
1129
+ } satisfies WaitStep;
1130
+ case "satisfied":
1131
+ return {
1132
+ type: "wait",
1133
+ id: step.id,
1134
+ state: "satisfied",
1135
+ payload: step.payload,
1136
+ createdAt: new Date(step.created_at),
1137
+ eventName: step.event_name,
1138
+ resolvedAt: new Date(step.resolved_at),
1139
+ parentStepId: step.parent_step_id
1140
+ } satisfies WaitStep;
1141
+ case "timed_out":
1142
+ return {
1143
+ type: "wait",
1144
+ id: step.id,
1145
+ state: "timed_out",
1146
+ eventName: step.event_name,
1147
+ resolvedAt: new Date(step.resolved_at),
1148
+ createdAt: new Date(step.created_at),
1149
+ parentStepId: step.parent_step_id
1150
+ } satisfies WaitStep;
1151
+ default:
1152
+ throw new Error("Unexpected step state");
1153
+ }
1154
+ default:
1155
+ throw new Error("Unexpected step type");
1156
+ }
1157
+ }
1158
+
1159
+ /**
1160
+ * Formatted `step_events` row for application use.
1161
+ */
1162
+ type StepEvent = {
1163
+ id: string;
1164
+ stepId: string;
1165
+ recordedAt: Date;
1166
+ } & (
1167
+ | {
1168
+ type: "attempt_started";
1169
+ attemptNumber: number;
1170
+ }
1171
+ | {
1172
+ type: "attempt_succeeded";
1173
+ attemptNumber: number;
1174
+ result?: string;
1175
+ }
1176
+ | {
1177
+ type: "attempt_failed";
1178
+ attemptNumber: number;
1179
+ errorMessage: string;
1180
+ errorName?: string;
1181
+ nextAttemptAt?: Date;
1182
+ }
1183
+ | {
1184
+ type: "sleep_waiting";
1185
+ wakeAt: Date;
1186
+ }
1187
+ | {
1188
+ type: "sleep_elapsed";
1189
+ }
1190
+ | {
1191
+ type: "wait_waiting";
1192
+ eventName: string;
1193
+ timeoutAt?: Date;
1194
+ }
1195
+ | {
1196
+ type: "wait_satisfied";
1197
+ payload?: string;
1198
+ }
1199
+ | {
1200
+ type: "wait_timed_out";
1201
+ }
1202
+ );
1203
+
1204
+ function formatStepEvent(row: StepEventRow): StepEvent {
1205
+ switch (row.type) {
1206
+ case "attempt_started":
1207
+ return {
1208
+ id: row.id,
1209
+ type: "attempt_started",
1210
+ attemptNumber: row.attempt_number,
1211
+ stepId: row.step_id,
1212
+ recordedAt: new Date(row.recorded_at)
1213
+ };
1214
+ case "attempt_succeeded":
1215
+ return {
1216
+ id: row.id,
1217
+ type: "attempt_succeeded",
1218
+ attemptNumber: row.attempt_number,
1219
+ result: row.result,
1220
+ stepId: row.step_id,
1221
+ recordedAt: new Date(row.recorded_at)
1222
+ };
1223
+ case "attempt_failed":
1224
+ return {
1225
+ id: row.id,
1226
+ type: "attempt_failed",
1227
+ attemptNumber: row.attempt_number,
1228
+ errorMessage: row.error_message,
1229
+ errorName: row.error_name ?? undefined,
1230
+ nextAttemptAt: row.next_attempt_at ? new Date(row.next_attempt_at) : undefined,
1231
+ stepId: row.step_id,
1232
+ recordedAt: new Date(row.recorded_at)
1233
+ };
1234
+ case "sleep_waiting":
1235
+ return {
1236
+ id: row.id,
1237
+ type: "sleep_waiting",
1238
+ wakeAt: new Date(row.wake_at),
1239
+ stepId: row.step_id,
1240
+ recordedAt: new Date(row.recorded_at)
1241
+ };
1242
+ case "sleep_elapsed":
1243
+ return {
1244
+ id: row.id,
1245
+ type: "sleep_elapsed",
1246
+ stepId: row.step_id,
1247
+ recordedAt: new Date(row.recorded_at)
1248
+ };
1249
+ case "wait_waiting":
1250
+ return {
1251
+ id: row.id,
1252
+ type: "wait_waiting",
1253
+ eventName: row.event_name,
1254
+ timeoutAt: row.timeout_at ? new Date(row.timeout_at) : undefined,
1255
+ stepId: row.step_id,
1256
+ recordedAt: new Date(row.recorded_at)
1257
+ };
1258
+ case "wait_satisfied":
1259
+ return {
1260
+ id: row.id,
1261
+ type: "wait_satisfied",
1262
+ payload: row.payload,
1263
+ stepId: row.step_id,
1264
+ recordedAt: new Date(row.recorded_at)
1265
+ };
1266
+ case "wait_timed_out":
1267
+ return {
1268
+ id: row.id,
1269
+ type: "wait_timed_out",
1270
+ stepId: row.step_id,
1271
+ recordedAt: new Date(row.recorded_at)
1272
+ };
1273
+ }
1274
+ }
1275
+
1276
+ /**
1277
+ * SQLite row shape for `workflow_events` (append-only workflow status transitions).
1278
+ */
1279
+ type WorkflowEventRow = {
1280
+ id: number;
1281
+ recorded_at: number;
1282
+ type: "created" | "started" | "paused" | "resumed" | "completed" | "failed" | "cancelled";
1283
+ cancellation_reason: string | null;
1284
+ };
1285
+
1286
+ /**
1287
+ * Formatted `workflow_events` row for application use.
1288
+ */
1289
+ type WorkflowEvent = {
1290
+ id: number;
1291
+ recordedAt: Date;
1292
+ } & (
1293
+ | { type: "created" }
1294
+ | { type: "started" }
1295
+ | { type: "paused" }
1296
+ | { type: "resumed" }
1297
+ | { type: "completed" }
1298
+ | { type: "failed" }
1299
+ | { type: "cancelled"; cancellationReason?: string }
1300
+ );
1301
+
1302
+ function formatWorkflowEvent(row: WorkflowEventRow): WorkflowEvent {
1303
+ switch (row.type) {
1304
+ case "created":
1305
+ case "started":
1306
+ case "paused":
1307
+ case "resumed":
1308
+ case "completed":
1309
+ case "failed":
1310
+ return {
1311
+ id: row.id,
1312
+ recordedAt: new Date(row.recorded_at),
1313
+ type: row.type
1314
+ };
1315
+ case "cancelled":
1316
+ return {
1317
+ id: row.id,
1318
+ recordedAt: new Date(row.recorded_at),
1319
+ type: "cancelled",
1320
+ cancellationReason: row.cancellation_reason ?? undefined
1321
+ };
1322
+ default:
1323
+ throw new Error("Unexpected workflow event type");
1324
+ }
1325
+ }
1326
+
1327
+ type RunStep_Row = {
1328
+ id: RunStepId;
1329
+ type: "run";
1330
+ created_at: number;
1331
+ /**
1332
+ * Enclosing run step id when this run step was created inside that run's callback; otherwise null.
1333
+ */
1334
+ parent_step_id: RunStepId | null;
1335
+
1336
+ /**
1337
+ * Number of attempts that have been durably started for this step.
1338
+ *
1339
+ * Invariants:
1340
+ *
1341
+ * - `0` before the first attempt starts
1342
+ * - Incremented exactly when an attempt transitions into `running`
1343
+ * - Never decremented
1344
+ * - Unchanged while the step is pending between retries/backoff
1345
+ *
1346
+ * When greater than `0`, this is also the 1-based number of the most recently started attempt. The next attempt, if
1347
+ * one is started, will have number `attempts_started + 1`.
1348
+ */
1349
+ attempt_count: number;
1350
+ /**
1351
+ * Maximum number of attempts that can be made for this step. If not present, the step can be retried indefinitely. If
1352
+ * present, the step can be retried up to this number of times. If the step has reached the maximum number of
1353
+ * attempts, it will transition to the `failed` state.
1354
+ */
1355
+ max_attempts: number | null;
1356
+ } & (
1357
+ | {
1358
+ /**
1359
+ * The step has been reached but is not yet resolved, and no attempt is currently in progress.
1360
+ *
1361
+ * This includes steps that: 1. have been reached but have not yet started their first attempt 2. are waiting
1362
+ * until a retry/backoff time 3. are eligible to run immediately.
1363
+ */
1364
+ state: "pending";
1365
+
1366
+ /**
1367
+ * Earliest time at which the next attempt may start.
1368
+ *
1369
+ * Semantics: - On first creation, this is typically set to "now", meaning the step is immediately runnable. -
1370
+ * After a failed attempt with backoff, this is set to the retry time. - While the current time is before this
1371
+ * value, the step remains `pending` and no new attempt may start. - Once the current time reaches or passes this
1372
+ * value, the step becomes eligible to transition from `pending` to `running`.
1373
+ */
1374
+ next_attempt_at: number;
1375
+ }
1376
+ | {
1377
+ /**
1378
+ * An attempt was durably started, but its outcome has not yet been durably recorded.
1379
+ *
1380
+ * This does not guarantee that code is actively executing at this instant. After a restart or interruption,
1381
+ * `running` means only that a start was recorded and no durable success/failure was recorded afterward.
1382
+ */
1383
+ state: "running";
1384
+ }
1385
+ | {
1386
+ /**
1387
+ * The step completed successfully and its result was durably recorded.
1388
+ */
1389
+ state: "succeeded";
1390
+
1391
+ /**
1392
+ * Serialized successful result for the step.
1393
+ */
1394
+ result: string;
1395
+
1396
+ /**
1397
+ * Time at which the step became terminal by succeeding.
1398
+ */
1399
+ resolved_at: number;
1400
+ }
1401
+ | {
1402
+ /**
1403
+ * The step failed terminally and no further attempts will be made.
1404
+ */
1405
+ state: "failed";
1406
+
1407
+ /**
1408
+ * Serialized failure message for the terminal failure.
1409
+ */
1410
+ error_message: string;
1411
+
1412
+ /**
1413
+ * Optional serialized error class/name for the terminal failure.
1414
+ */
1415
+ error_name: string | null;
1416
+
1417
+ /**
1418
+ * Time at which the step became terminal by failing.
1419
+ */
1420
+ resolved_at: number;
1421
+ }
1422
+ );
1423
+
1424
+ type SleepStep_Row = {
1425
+ id: SleepStepId;
1426
+ type: "sleep";
1427
+ created_at: number;
1428
+ /**
1429
+ * Enclosing run step id when this sleep step was created inside that run's callback; otherwise null.
1430
+ */
1431
+ parent_step_id: RunStepId | null;
1432
+ } & (
1433
+ | {
1434
+ /**
1435
+ * The sleep step has been reached and is currently in effect.
1436
+ *
1437
+ * `waiting` here means "started but not yet resolved", not "not yet started". The step remains waiting until its
1438
+ * wake time is reached.
1439
+ */
1440
+ state: "waiting";
1441
+
1442
+ /**
1443
+ * Earliest time at which the sleep condition becomes satisfied.
1444
+ *
1445
+ * Before this moment, the step remains waiting. At or after this moment, the step may be marked elapsed.
1446
+ */
1447
+ wake_at: number;
1448
+ }
1449
+ | {
1450
+ /**
1451
+ * The sleep interval elapsed and that fact was durably recorded.
1452
+ */
1453
+ state: "elapsed";
1454
+
1455
+ /**
1456
+ * Time at which the sleep step became terminal by completing.
1457
+ */
1458
+ resolved_at: number;
1459
+ }
1460
+ );
1461
+
1462
+ type WaitStep_Row = {
1463
+ id: WaitStepId;
1464
+ type: "wait";
1465
+ created_at: number;
1466
+ /**
1467
+ * Enclosing run step id when this wait step was created inside that run's callback; otherwise null.
1468
+ */
1469
+ parent_step_id: RunStepId | null;
1470
+
1471
+ /**
1472
+ * Name of the inbound event that can satisfy this step.
1473
+ */
1474
+ event_name: string;
1475
+ } & (
1476
+ | {
1477
+ /**
1478
+ * The step has been reached, but the expected event has not yet been durably received, and no timeout failure has
1479
+ * been durably recorded.
1480
+ */
1481
+ state: "waiting";
1482
+
1483
+ /**
1484
+ * Optional deadline after which the wait step may fail.
1485
+ *
1486
+ * - If omitted, the step may wait indefinitely.
1487
+ * - If present, reaching or passing this time allows the step to transition to `failed`.
1488
+ */
1489
+ timeout_at: number | null;
1490
+ }
1491
+ | {
1492
+ /**
1493
+ * The expected event was received and its payload was durably recorded.
1494
+ */
1495
+ state: "satisfied";
1496
+
1497
+ /**
1498
+ * Serialized payload of the event that satisfied the wait.
1499
+ */
1500
+ payload: string;
1501
+
1502
+ /**
1503
+ * Time at which the wait step became terminal by receiving the expected event.
1504
+ */
1505
+ resolved_at: number;
1506
+ }
1507
+ | {
1508
+ /**
1509
+ * The step failed because its timeout was reached before the expected event was durably recorded.
1510
+ */
1511
+ state: "timed_out";
1512
+
1513
+ /**
1514
+ * Time at which the wait step became terminal by timing out.
1515
+ */
1516
+ resolved_at: number;
1517
+ }
1518
+ );
1519
+
1520
+ /**
1521
+ * A step row is created once the workflow reaches that step. From that point on, `status` describes the step's durable
1522
+ * lifecycle state.
1523
+ */
1524
+ type Step_Row = RunStep_Row | SleepStep_Row | WaitStep_Row;
1525
+
1526
+ /**
1527
+ * SQLite row shape for `step_events`: append-only durable transitions for steps.
1528
+ *
1529
+ * The `steps` table holds current state; `step_events` records how that state evolved.
1530
+ */
1531
+ type StepEventRow = {
1532
+ id: string;
1533
+
1534
+ /**
1535
+ * `steps.id` this event applies to.
1536
+ */
1537
+ step_id: RunStepId | SleepStepId | WaitStepId;
1538
+
1539
+ /**
1540
+ * When this event row was persisted (`unixepoch` ms).
1541
+ */
1542
+ recorded_at: number;
1543
+ } & (
1544
+ | {
1545
+ /**
1546
+ * A run step attempt was durably started.
1547
+ *
1548
+ * This corresponds to the step transitioning from `pending` → `running`.
1549
+ */
1550
+ type: "attempt_started";
1551
+
1552
+ /**
1553
+ * 1-based attempt number.
1554
+ *
1555
+ * Equals the value of `attempts_started` after the transition.
1556
+ */
1557
+ attempt_number: number;
1558
+ }
1559
+ | {
1560
+ /**
1561
+ * A run step attempt completed successfully.
1562
+ *
1563
+ * The step transitioned from `running` to `succeeded`.
1564
+ */
1565
+ type: "attempt_succeeded";
1566
+
1567
+ attempt_number: number;
1568
+
1569
+ /**
1570
+ * Serialized result produced by the step.
1571
+ */
1572
+ result: string;
1573
+ }
1574
+ | {
1575
+ /**
1576
+ * A run step attempt failed.
1577
+ *
1578
+ * The step either: - scheduled a next attempt, or - transitioned to terminal failure.
1579
+ */
1580
+ type: "attempt_failed";
1581
+
1582
+ attempt_number: number;
1583
+
1584
+ error_message: string;
1585
+ error_name: string | null;
1586
+
1587
+ /**
1588
+ * If present, the next attempt time.
1589
+ *
1590
+ * Absence indicates this failure was terminal.
1591
+ */
1592
+ next_attempt_at?: number;
1593
+ }
1594
+ | {
1595
+ /**
1596
+ * A sleep step began waiting.
1597
+ *
1598
+ * Corresponds to the step entering `waiting`.
1599
+ */
1600
+ type: "sleep_waiting";
1601
+
1602
+ /**
1603
+ * Wake time for the sleep step.
1604
+ */
1605
+ wake_at: number;
1606
+ }
1607
+ | {
1608
+ /**
1609
+ * A sleep step completed because the wake condition became satisfied.
1610
+ */
1611
+ type: "sleep_elapsed";
1612
+ }
1613
+ | {
1614
+ /**
1615
+ * A wait step began waiting for the expected event.
1616
+ */
1617
+ type: "wait_waiting";
1618
+
1619
+ event_name: string;
1620
+ timeout_at: number | null;
1621
+ }
1622
+ | {
1623
+ /**
1624
+ * A wait step was satisfied by receiving the expected event.
1625
+ */
1626
+ type: "wait_satisfied";
1627
+
1628
+ payload: string;
1629
+ }
1630
+ | {
1631
+ /**
1632
+ * A wait step failed because its timeout deadline was reached.
1633
+ */
1634
+ type: "wait_timed_out";
1635
+ }
1636
+ );
1637
+
1638
+ export type WorkflowStatus =
1639
+ | "pending" // The workflow has been created but 'run' hasn't been called yet
1640
+ | "running" // The workflow is currently executing; steps are being created/processed
1641
+ | "paused" // The workflow is paused and will not make progress until resumed
1642
+ | "completed" // The workflow completed successfully; ('Workflow.next' returned { done: true, status: "succeeded" })
1643
+ | "failed" // A step exhausted retries and the workflow aborted; ('Workflow.next' returned { done: true, status: "failed" })
1644
+ | "cancelled"; // The workflow was terminated explicitly by the user.
1645
+
1646
+ type WorkflowMetadata_Row<TVersion extends string> = {
1647
+ created_at: number;
1648
+ updated_at: number;
1649
+ status: WorkflowStatus;
1650
+ definition_version: TVersion | null;
1651
+ definition_input: string | null;
1652
+ };
1653
+
1654
+ type WorkflowMetadata<TVersion extends string> = {
1655
+ createdAt: Date;
1656
+ updatedAt: Date;
1657
+ status: WorkflowStatus;
1658
+ definitionVersion?: TVersion;
1659
+ definitionInput?: Json;
1660
+ };
1661
+
1662
+ export function formatWorkflowMetadata<TVersion extends string>(
1663
+ metadata: WorkflowMetadata_Row<TVersion>
1664
+ ): WorkflowMetadata<TVersion> {
1665
+ return {
1666
+ createdAt: new Date(metadata.created_at),
1667
+ updatedAt: new Date(metadata.updated_at),
1668
+ status: metadata.status,
1669
+ definitionVersion: metadata.definition_version ?? undefined,
1670
+ definitionInput: metadata.definition_input ? JSON.parse(metadata.definition_input) : undefined
1671
+ };
1672
+ }
1673
+ /**
1674
+ * Durable record of inbound events (`inbound_events`) that may satisfy wait steps.
1675
+ *
1676
+ * Persists delivered signals across restarts so step resolution is based on durable state rather than in-memory state.
1677
+ */
1678
+ export type InboundEventRow = {
1679
+ /**
1680
+ * Unique identifier for the event.
1681
+ */
1682
+ id: string;
1683
+ /**
1684
+ * Name of the event.
1685
+ *
1686
+ * Used by wait steps to determine whether this event can satisfy them.
1687
+ */
1688
+ event_name: string;
1689
+ /**
1690
+ * Serialized payload delivered with the event.
1691
+ */
1692
+ payload: string;
1693
+ /**
1694
+ * Time the event was durably recorded.
1695
+ */
1696
+ created_at: number;
1697
+ /**
1698
+ * Step that claimed the event.
1699
+ */
1700
+ claimed_by?: RunStepId | SleepStepId | WaitStepId;
1701
+ /**
1702
+ * Time the event was claimed.
1703
+ */
1704
+ claimed_at?: number;
1705
+ };