@strav/durable 1.0.0-alpha.8 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@strav/durable",
3
- "version": "1.0.0-alpha.8",
3
+ "version": "1.0.1",
4
4
  "description": "Strav durable execution — crash-resumable sequential workflows on top of @strav/queue + Postgres. V1: sequential .step() with retries + saga compensation. V2 adds parallel/route/loop/sleep/waitForSignal.",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -19,9 +19,9 @@
19
19
  "access": "public"
20
20
  },
21
21
  "dependencies": {
22
- "@strav/kernel": "1.0.0-alpha.8",
23
- "@strav/database": "1.0.0-alpha.8",
24
- "@strav/queue": "1.0.0-alpha.8"
22
+ "@strav/kernel": "1.0.1",
23
+ "@strav/database": "1.0.1",
24
+ "@strav/queue": "1.0.1"
25
25
  },
26
26
  "peerDependencies": {
27
27
  "@types/bun": ">=1.3.14"
@@ -2,7 +2,7 @@
2
2
  * `DurableRunner` — the engine that owns the durable execution state
3
3
  * machine.
4
4
  *
5
- * Three load-bearing methods:
5
+ * Four load-bearing methods:
6
6
  *
7
7
  * 1. `start(name, input)` — INSERTs a new run row, dispatches the
8
8
  * first `DurableAdvanceJob` for it inside the same transaction
@@ -11,12 +11,11 @@
11
11
  * queue.
12
12
  *
13
13
  * 2. `advance(runId)` — the job handler. Acquires a row lock,
14
- * decides what step is next, looks for a completed journal
15
- * entry to short-circuit (idempotent replay), runs the
16
- * handler, journals the result, and either re-enqueues itself
17
- * for the next step or — on failure — schedules a retry or
18
- * kicks off compensation. The whole step body runs inside a
19
- * DB transaction so partial writes can't escape.
14
+ * resolves the node at `current_step`, dispatches by node-type
15
+ * (`step` / `sleep` / `waitForSignal` / `parallel` / `route` /
16
+ * `loop` / `childWorkflow`), journals the result, and either
17
+ * re-enqueues itself, parks the run as `waiting`, or — on
18
+ * terminal failure — kicks off compensation.
20
19
  *
21
20
  * 3. `compensate(runId)` — walks the journal in reverse order
22
21
  * running each step's `compensate` callback. On clean
@@ -24,27 +23,44 @@
24
23
  * compensation are logged but don't block the rest of the
25
24
  * rollback (compensators must be idempotent).
26
25
  *
26
+ * 4. `signal(runId, signalName, payload?)` — wakes a run parked on
27
+ * a `waitForSignal` node. Writes the journal entry, clears the
28
+ * awaiting marker, dispatches an advance.
29
+ *
27
30
  * Apps don't usually call `advance` / `compensate` directly — the
28
31
  * `DurableAdvanceJob` and `DurableCompensateJob` classes wrap them.
29
32
  */
30
33
 
31
- import {
32
- type Database,
33
- PostgresDatabase,
34
- type SchemaRegistry,
35
- } from '@strav/database'
34
+ import { type Database, PostgresDatabase, type SchemaRegistry } from '@strav/database'
36
35
  import { type Logger, ulid } from '@strav/kernel'
37
36
  import type { JobClass, Queue } from '@strav/queue'
38
- import { RunNotFoundError } from './durable_error.ts'
39
- import type { DurableStep, DurableContext, RunSnapshot, RunStatus } from './types.ts'
37
+ import { DurableError, RunNotFoundError } from './durable_error.ts'
38
+ import type {
39
+ DurableContext,
40
+ DurableNode,
41
+ DurableStep,
42
+ RunSnapshot,
43
+ RunStatus,
44
+ } from './types.ts'
40
45
  import type { WorkflowRegistry } from './workflow_registry.ts'
41
46
 
47
+ interface RunState {
48
+ results: Record<string, unknown>
49
+ stepAttempts: Record<string, number>
50
+ /** `waitForSignal` markers — `{ [nodeName]: signalName }`. */
51
+ awaitingSignals?: Record<string, string>
52
+ /** Per-loop iteration state — `{ [nodeName]: { iteration, results[] } }`. */
53
+ loopState?: Record<string, { iteration: number; results: unknown[] }>
54
+ /** Per-child-workflow link — `{ [nodeName]: { childRunId } }`. */
55
+ childRunIds?: Record<string, string>
56
+ }
57
+
42
58
  interface RunRow {
43
59
  id: string
44
60
  workflow_name: string
45
61
  input: Record<string, unknown> | string
46
62
  status: RunStatus
47
- state: { results?: Record<string, unknown>; stepAttempts?: Record<string, number> } | string
63
+ state: RunState | string
48
64
  current_step: number
49
65
  result: Record<string, unknown> | string | null
50
66
  error: string | null
@@ -63,6 +79,22 @@ interface JournalRow {
63
79
  completed_at: Date
64
80
  }
65
81
 
82
+ type Tx = { query: Database['query']; queryOne: Database['queryOne']; execute: Database['execute'] }
83
+
84
+ type Outcome =
85
+ /** Node completed; advance cursor + re-dispatch. */
86
+ | { kind: 'completed'; value: unknown; attempt: number }
87
+ /** Node has retries left; re-dispatch with delay. */
88
+ | { kind: 'retry'; attempt: number; delaySec: number }
89
+ /** Node exhausted retries; journal + compensate. */
90
+ | { kind: 'failed'; attempt: number; error: string }
91
+ /**
92
+ * Node parked itself. `delaySec`, when set, schedules a wake-up
93
+ * advance — for sleep and child-workflow polling. Undefined for
94
+ * waitForSignal (an external `signal()` call resumes it).
95
+ */
96
+ | { kind: 'waiting'; delaySec?: number }
97
+
66
98
  export interface DurableRunnerOptions {
67
99
  db: PostgresDatabase
68
100
  queue: Queue
@@ -120,8 +152,6 @@ export class DurableRunner {
120
152
  * orphan either.
121
153
  */
122
154
  async start(workflowName: string, input: Record<string, unknown> = {}): Promise<string> {
123
- // Validate workflow registration up-front so the caller sees a
124
- // synchronous error rather than a never-advancing run row.
125
155
  this.registry.get(workflowName)
126
156
  const runId = ulid()
127
157
  await this.db.transaction(async (tx) => {
@@ -129,7 +159,7 @@ export class DurableRunner {
129
159
  `INSERT INTO "strav_workflow_runs"
130
160
  (id, workflow_name, input, status, state, current_step, result, error, created_at, updated_at)
131
161
  VALUES ($1, $2, $3::jsonb, 'pending', $4::jsonb, 0, NULL, NULL, now(), now())`,
132
- [runId, workflowName, JSON.stringify(input), JSON.stringify({ results: {}, stepAttempts: {} })],
162
+ [runId, workflowName, JSON.stringify(input), JSON.stringify(emptyState())],
133
163
  )
134
164
  await this.queue.dispatch(this.advanceJob, { runId })
135
165
  })
@@ -148,133 +178,108 @@ export class DurableRunner {
148
178
  }
149
179
 
150
180
  /**
151
- * Advance handler. Runs inside one transaction:
152
- *
153
- * 1. SELECT FOR UPDATE the run row (serializes concurrent advances).
154
- * 2. Resolve the workflow + the step at `current_step`.
155
- * 3. If a completed journal row already exists for this step,
156
- * treat the run as if the step just succeeded — bump
157
- * `current_step` and either enqueue the next or mark
158
- * `completed`.
159
- * 4. Otherwise call the handler. On success: journal +
160
- * bump cursor + enqueue next (or mark `completed`). On
161
- * throw: track the attempt; if there are retries left,
162
- * enqueue a delayed advance; otherwise journal the failure
163
- * and kick off compensation.
181
+ * Advance handler. Loads the run, dispatches the current node by
182
+ * type, and either re-enqueues (`continue`), parks (`waiting`),
183
+ * retries with backoff, or kicks off compensation.
164
184
  */
165
185
  async advance(runId: string): Promise<void> {
166
- const workflow = await this.db.transaction(async (tx) => {
186
+ const shouldContinue = await this.db.transaction(async (tx) => {
167
187
  const row = await tx.queryOne<RunRow>(
168
188
  `SELECT id, workflow_name, input, status, state, current_step, result, error, created_at, updated_at
169
189
  FROM "strav_workflow_runs" WHERE id = $1 FOR UPDATE`,
170
190
  [runId],
171
191
  )
172
192
  if (!row) throw new RunNotFoundError(runId)
173
- if (row.status === 'completed' || row.status === 'failed') return null
193
+ if (row.status === 'completed' || row.status === 'failed') return false
174
194
 
175
195
  const wf = this.registry.get(row.workflow_name)
176
- const state = parseJson(row.state) as {
177
- results: Record<string, unknown>
178
- stepAttempts: Record<string, number>
179
- }
196
+ const state = parseJson(row.state) as RunState
197
+ ensureStateShape(state)
180
198
  const input = parseJson(row.input) as Record<string, unknown>
181
199
 
182
200
  if (row.current_step >= wf.steps.length) {
183
201
  await this.markCompleted(tx, runId, state.results)
184
- return null
202
+ return false
185
203
  }
186
204
 
187
- const step = wf.steps[row.current_step]!
205
+ const node = wf.steps[row.current_step] as DurableNode
188
206
 
189
- // Idempotent replay — if we already journaled this step, skip
190
- // the handler and just advance the cursor.
207
+ // Idempotent replay — if the node was already journaled
208
+ // completed, skip the handler.
191
209
  const journaled = await tx.queryOne<JournalRow>(
192
210
  `SELECT id, run_id, step_name, status, result, error, attempts, completed_at
193
211
  FROM "strav_workflow_journal" WHERE run_id = $1 AND step_name = $2`,
194
- [runId, step.name],
212
+ [runId, node.name],
195
213
  )
196
214
  if (journaled?.status === 'completed') {
197
- state.results[step.name] = parseJson(journaled.result)
215
+ state.results[node.name] = parseJson(journaled.result)
198
216
  await this.advanceCursor(tx, runId, row.current_step + 1, state)
199
- // Continue outside the transaction so we don't hold the row
200
- // lock across the next handler invocation.
201
- return { wf, runId, status: 'continue' as const }
217
+ return true
202
218
  }
203
219
 
204
- const attempt = (state.stepAttempts[step.name] ?? 0) + 1
220
+ const attempt = (state.stepAttempts[node.name] ?? 0) + 1
205
221
  const ctx: DurableContext = {
206
222
  input,
207
223
  results: state.results,
208
224
  runId,
209
225
  attempt,
210
226
  }
211
- try {
212
- const result = await step.handler(ctx)
213
- await tx.execute(
214
- `INSERT INTO "strav_workflow_journal"
215
- (id, run_id, step_name, status, result, error, attempts, completed_at, created_at, updated_at)
216
- VALUES ($1, $2, $3, 'completed', $4::jsonb, NULL, $5, now(), now(), now())`,
217
- [ulid(), runId, step.name, JSON.stringify(result ?? null), attempt],
218
- )
219
- state.results[step.name] = result
220
- delete state.stepAttempts[step.name]
221
- await this.advanceCursor(tx, runId, row.current_step + 1, state)
222
- return { wf, runId, status: 'continue' as const }
223
- } catch (err) {
224
- const message = err instanceof Error ? err.message : String(err)
225
- this.logger?.warn('Durable step failed', {
226
- runId,
227
- step: step.name,
228
- attempt,
229
- error: message,
230
- })
231
- if (attempt < step.maxAttempts) {
232
- state.stepAttempts[step.name] = attempt
233
- await tx.execute(
234
- `UPDATE "strav_workflow_runs" SET state = $1::jsonb, updated_at = now() WHERE id = $2`,
235
- [JSON.stringify(state), runId],
236
- )
237
- const delaySec = Math.max(0, step.backoff(attempt))
238
- await this.queue.dispatchLater(delaySec, this.advanceJob, { runId })
239
- return null
240
- }
241
- // Terminal — journal the failure, mark compensating, kick off
242
- // compensation. The compensate handler walks back from the
243
- // step BEFORE this one (no compensator for the step that
244
- // just failed; there's nothing to roll back since the work
245
- // didn't commit).
246
- await tx.execute(
247
- `INSERT INTO "strav_workflow_journal"
248
- (id, run_id, step_name, status, result, error, attempts, completed_at, created_at, updated_at)
249
- VALUES ($1, $2, $3, 'failed', NULL, $4, $5, now(), now(), now())`,
250
- [ulid(), runId, step.name, message, attempt],
251
- )
252
- await tx.execute(
253
- `UPDATE "strav_workflow_runs"
254
- SET status = 'compensating', state = $1::jsonb, error = $2, updated_at = now()
255
- WHERE id = $3`,
256
- [JSON.stringify(state), message, runId],
257
- )
258
- await this.queue.dispatch(this.compensateJob, { runId })
259
- return null
260
- }
227
+ const outcome = await this.runNode(tx, node, ctx, state, runId, attempt)
228
+ return this.applyOutcome(tx, runId, row.current_step, node, state, outcome)
261
229
  })
262
230
 
263
- // If the step succeeded (or was already journaled), re-enter to
264
- // advance the next one. We do this OUTSIDE the original
265
- // transaction so each step holds the row lock for the minimum
266
- // necessary window — important when steps make external API
267
- // calls that can be slow.
268
- if (workflow?.status === 'continue') {
231
+ if (shouldContinue) {
269
232
  await this.queue.dispatch(this.advanceJob, { runId })
270
233
  }
271
234
  }
272
235
 
236
+ /**
237
+ * Wake a run parked on a `waitForSignal` node. Writes the journal
238
+ * entry with `payload` as the node's result, clears the awaiting
239
+ * marker, and dispatches a fresh advance job to resume the next
240
+ * node. No-op when no run is awaiting `signalName`.
241
+ */
242
+ async signal(runId: string, signalName: string, payload?: unknown): Promise<boolean> {
243
+ return this.db.transaction(async (tx) => {
244
+ const row = await tx.queryOne<RunRow>(
245
+ `SELECT id, workflow_name, input, status, state, current_step, result, error, created_at, updated_at
246
+ FROM "strav_workflow_runs" WHERE id = $1 FOR UPDATE`,
247
+ [runId],
248
+ )
249
+ if (!row) throw new RunNotFoundError(runId)
250
+ if (row.status !== 'waiting') return false
251
+ const state = parseJson(row.state) as RunState
252
+ ensureStateShape(state)
253
+ const awaiting = state.awaitingSignals ?? {}
254
+ const matchEntry = Object.entries(awaiting).find(([, name]) => name === signalName)
255
+ if (matchEntry === undefined) return false
256
+ const [nodeName] = matchEntry
257
+ // Journal the wake-up so replay sees the signal as already received.
258
+ await tx.execute(
259
+ `INSERT INTO "strav_workflow_journal"
260
+ (id, run_id, step_name, status, result, error, attempts, completed_at, created_at, updated_at)
261
+ VALUES ($1, $2, $3, 'completed', $4::jsonb, NULL, 1, now(), now(), now())`,
262
+ [ulid(), runId, nodeName, JSON.stringify(payload ?? null)],
263
+ )
264
+ delete awaiting[nodeName]
265
+ state.awaitingSignals = awaiting
266
+ state.results[nodeName] = payload ?? null
267
+ await tx.execute(
268
+ `UPDATE "strav_workflow_runs"
269
+ SET status = 'running', state = $1::jsonb, current_step = current_step + 1, updated_at = now()
270
+ WHERE id = $2`,
271
+ [JSON.stringify(state), runId],
272
+ )
273
+ await this.queue.dispatch(this.advanceJob, { runId })
274
+ return true
275
+ })
276
+ }
277
+
273
278
  /**
274
279
  * Compensate handler. Walks the journal in reverse, calling each
275
280
  * registered compensator. Compensators that throw are logged but
276
- * don't halt the rollback the rest still run. When the walk
277
- * finishes the run lands in `failed`.
281
+ * don't halt the rollback. Only `step` nodes carry compensators in
282
+ * V2 other node types are skipped.
278
283
  */
279
284
  async compensate(runId: string): Promise<void> {
280
285
  await this.db.transaction(async (tx) => {
@@ -287,7 +292,8 @@ export class DurableRunner {
287
292
  if (row.status !== 'compensating') return
288
293
 
289
294
  const wf = this.registry.get(row.workflow_name)
290
- const state = parseJson(row.state) as { results: Record<string, unknown> }
295
+ const state = parseJson(row.state) as RunState
296
+ ensureStateShape(state)
291
297
  const input = parseJson(row.input) as Record<string, unknown>
292
298
 
293
299
  const journal = await tx.query<JournalRow>(
@@ -295,20 +301,16 @@ export class DurableRunner {
295
301
  FROM "strav_workflow_journal" WHERE run_id = $1 ORDER BY completed_at ASC`,
296
302
  [runId],
297
303
  )
298
- // Build an ordered list of successfully-completed step names so we
299
- // can walk back through `wf.steps` in declaration order and find
300
- // each compensator. Failed-step rows are skipped — they hold no
301
- // committed work to roll back.
302
- const completedNames = new Set(
303
- journal.filter((j) => j.status === 'completed').map((j) => j.step_name),
304
- )
305
- const stepsByName = new Map<string, DurableStep>(wf.steps.map((s) => [s.name, s]))
304
+ const completedNames = journal
305
+ .filter((j) => j.status === 'completed')
306
+ .map((j) => j.step_name)
307
+ const stepsByName = new Map<string, DurableNode>(wf.steps.map((s) => [s.name, s]))
306
308
 
307
309
  for (const name of [...completedNames].reverse()) {
308
- const step = stepsByName.get(name)
309
- if (!step?.compensate) continue
310
+ const node = stepsByName.get(name)
311
+ if (node?.type !== 'step' || !node.compensate) continue
310
312
  try {
311
- await step.compensate({
313
+ await (node as DurableStep).compensate?.({
312
314
  input,
313
315
  results: state.results,
314
316
  runId,
@@ -332,10 +334,316 @@ export class DurableRunner {
332
334
  })
333
335
  }
334
336
 
337
+ // ─── Node-type dispatch ──────────────────────────────────────────────────
338
+
339
+ private async runNode(
340
+ tx: Tx,
341
+ node: DurableNode,
342
+ ctx: DurableContext,
343
+ state: RunState,
344
+ runId: string,
345
+ attempt: number,
346
+ ): Promise<Outcome> {
347
+ switch (node.type) {
348
+ case 'step':
349
+ return this.runStepLike(node, ctx, attempt, () => node.handler(ctx))
350
+ case 'sleep':
351
+ return this.runSleep(node, ctx, state, attempt)
352
+ case 'waitForSignal':
353
+ return this.runWaitForSignal(node, ctx, state)
354
+ case 'parallel':
355
+ return this.runStepLike(node, ctx, attempt, async () => {
356
+ const entries = Object.entries(node.branches)
357
+ const results = await Promise.all(
358
+ entries.map(async ([key, handler]) => [key, await handler(ctx)] as const),
359
+ )
360
+ return Object.fromEntries(results)
361
+ })
362
+ case 'route':
363
+ return this.runStepLike(node, ctx, attempt, async () => {
364
+ const key = await node.select(ctx)
365
+ const handler = node.branches[key]
366
+ if (handler === undefined) {
367
+ throw new DurableError(
368
+ `DurableRunner: route "${node.name}" returned unknown branch "${key}". Branches: ${Object.keys(node.branches).join(', ')}`,
369
+ )
370
+ }
371
+ const result = await handler(ctx)
372
+ return { branch: key, result }
373
+ })
374
+ case 'loop':
375
+ return this.runLoop(tx, node, ctx, state, runId, attempt)
376
+ case 'childWorkflow':
377
+ return this.runChildWorkflow(tx, node, ctx, state, runId, attempt)
378
+ }
379
+ }
380
+
381
+ /** Common retry/failure envelope for nodes that look like one handler. */
382
+ private async runStepLike(
383
+ node: { name: string; maxAttempts: number; backoff: (n: number) => number },
384
+ ctx: DurableContext,
385
+ attempt: number,
386
+ fn: () => Promise<unknown>,
387
+ ): Promise<Outcome> {
388
+ try {
389
+ const value = await fn()
390
+ return { kind: 'completed', value, attempt }
391
+ } catch (err) {
392
+ const error = err instanceof Error ? err.message : String(err)
393
+ this.logger?.warn('Durable node failed', {
394
+ runId: ctx.runId,
395
+ node: node.name,
396
+ attempt,
397
+ error,
398
+ })
399
+ if (attempt < node.maxAttempts) {
400
+ return { kind: 'retry', attempt, delaySec: Math.max(0, node.backoff(attempt)) }
401
+ }
402
+ return { kind: 'failed', attempt, error }
403
+ }
404
+ }
405
+
406
+ private async runSleep(
407
+ node: import('./types.ts').DurableSleep,
408
+ ctx: DurableContext,
409
+ state: RunState,
410
+ attempt: number,
411
+ ): Promise<Outcome> {
412
+ const requested =
413
+ typeof node.delay === 'number' ? node.delay : await node.delay(ctx)
414
+ const delaySec = Math.max(0, Math.floor(requested))
415
+ const sleepKey = `__sleep__${node.name}`
416
+ const previouslyDispatched = (state as unknown as Record<string, unknown>)[sleepKey] as
417
+ | { dispatchedAt: number }
418
+ | undefined
419
+ if (previouslyDispatched !== undefined) {
420
+ const elapsedSec = (Date.now() - previouslyDispatched.dispatchedAt) / 1000
421
+ if (elapsedSec >= delaySec) {
422
+ return { kind: 'completed', value: { sleptSec: delaySec }, attempt }
423
+ }
424
+ // Spurious early wake-up — re-park.
425
+ return { kind: 'waiting', delaySec: Math.max(1, delaySec - elapsedSec) }
426
+ }
427
+ ;(state as unknown as Record<string, unknown>)[sleepKey] = { dispatchedAt: Date.now() }
428
+ return { kind: 'waiting', delaySec }
429
+ }
430
+
431
+ private async runWaitForSignal(
432
+ node: import('./types.ts').DurableWaitForSignal,
433
+ ctx: DurableContext,
434
+ state: RunState,
435
+ ): Promise<Outcome> {
436
+ const name = typeof node.signalName === 'string' ? node.signalName : node.signalName(ctx)
437
+ const awaiting = state.awaitingSignals ?? {}
438
+ awaiting[node.name] = name
439
+ state.awaitingSignals = awaiting
440
+ return { kind: 'waiting' }
441
+ }
442
+
443
+ private async runLoop(
444
+ tx: Tx,
445
+ node: import('./types.ts').DurableLoop,
446
+ ctx: DurableContext,
447
+ state: RunState,
448
+ runId: string,
449
+ attempt: number,
450
+ ): Promise<Outcome> {
451
+ const loops = state.loopState ?? {}
452
+ const slot = loops[node.name] ?? { iteration: 0, results: [] }
453
+ loops[node.name] = slot
454
+ state.loopState = loops
455
+
456
+ // Idempotent replay for this iteration — if the per-iteration
457
+ // journal row already exists, treat the iteration as done.
458
+ const iterName = `${node.name}#${slot.iteration}`
459
+ const iterJournal = await tx.queryOne<JournalRow>(
460
+ `SELECT id, run_id, step_name, status, result, error, attempts, completed_at
461
+ FROM "strav_workflow_journal" WHERE run_id = $1 AND step_name = $2`,
462
+ [runId, iterName],
463
+ )
464
+ if (iterJournal?.status === 'completed') {
465
+ slot.results.push(parseJson(iterJournal.result))
466
+ slot.iteration += 1
467
+ }
468
+
469
+ if (slot.iteration >= node.maxIterations) {
470
+ return { kind: 'failed', attempt, error: `loop exceeded maxIterations (${node.maxIterations})` }
471
+ }
472
+
473
+ let keepGoing: boolean
474
+ try {
475
+ keepGoing = await node.condition(ctx, slot.iteration)
476
+ } catch (err) {
477
+ return {
478
+ kind: 'failed',
479
+ attempt,
480
+ error: err instanceof Error ? err.message : String(err),
481
+ }
482
+ }
483
+ if (!keepGoing) {
484
+ return { kind: 'completed', value: [...slot.results], attempt }
485
+ }
486
+
487
+ try {
488
+ const value = await node.body({ ...ctx, iteration: slot.iteration })
489
+ // Journal this iteration before bumping; failure mid-write
490
+ // will replay this same iteration on resume (journal lookup
491
+ // above short-circuits).
492
+ await tx.execute(
493
+ `INSERT INTO "strav_workflow_journal"
494
+ (id, run_id, step_name, status, result, error, attempts, completed_at, created_at, updated_at)
495
+ VALUES ($1, $2, $3, 'completed', $4::jsonb, NULL, $5, now(), now(), now())`,
496
+ [ulid(), runId, iterName, JSON.stringify(value ?? null), attempt],
497
+ )
498
+ slot.results.push(value)
499
+ slot.iteration += 1
500
+ // Keep current_step pinned; re-dispatch advance to evaluate
501
+ // the next iteration in its own transaction.
502
+ await tx.execute(
503
+ `UPDATE "strav_workflow_runs" SET state = $1::jsonb, updated_at = now() WHERE id = $2`,
504
+ [JSON.stringify(state), runId],
505
+ )
506
+ // 'continue' via a sentinel — applyOutcome's `completed` path
507
+ // is reserved for cursor-advancing nodes; here we want to
508
+ // re-enter advance without moving the cursor.
509
+ return { kind: 'waiting', delaySec: 0 }
510
+ } catch (err) {
511
+ const error = err instanceof Error ? err.message : String(err)
512
+ if (attempt < node.maxAttempts) {
513
+ return { kind: 'retry', attempt, delaySec: Math.max(0, node.backoff(attempt)) }
514
+ }
515
+ return { kind: 'failed', attempt, error }
516
+ }
517
+ }
518
+
519
+ private async runChildWorkflow(
520
+ tx: Tx,
521
+ node: import('./types.ts').DurableChildWorkflow,
522
+ ctx: DurableContext,
523
+ state: RunState,
524
+ runId: string,
525
+ attempt: number,
526
+ ): Promise<Outcome> {
527
+ const children = state.childRunIds ?? {}
528
+ state.childRunIds = children
529
+ let childId = children[node.name]
530
+
531
+ if (childId === undefined) {
532
+ let spec: { name: string; input?: Record<string, unknown> }
533
+ try {
534
+ spec = await node.start(ctx)
535
+ } catch (err) {
536
+ const error = err instanceof Error ? err.message : String(err)
537
+ if (attempt < 1) {
538
+ return { kind: 'retry', attempt, delaySec: 0 }
539
+ }
540
+ return { kind: 'failed', attempt, error }
541
+ }
542
+ childId = await this.start(spec.name, spec.input ?? {})
543
+ children[node.name] = childId
544
+ await tx.execute(
545
+ `UPDATE "strav_workflow_runs" SET state = $1::jsonb, updated_at = now() WHERE id = $2`,
546
+ [JSON.stringify(state), runId],
547
+ )
548
+ return { kind: 'waiting', delaySec: node.pollIntervalSec }
549
+ }
550
+
551
+ const child = await tx.queryOne<RunRow>(
552
+ `SELECT id, workflow_name, input, status, state, current_step, result, error, created_at, updated_at
553
+ FROM "strav_workflow_runs" WHERE id = $1`,
554
+ [childId],
555
+ )
556
+ if (!child) {
557
+ return {
558
+ kind: 'failed',
559
+ attempt,
560
+ error: `child workflow run "${childId}" disappeared`,
561
+ }
562
+ }
563
+ if (child.status === 'completed') {
564
+ return { kind: 'completed', value: parseJson(child.result), attempt }
565
+ }
566
+ if (child.status === 'failed') {
567
+ return {
568
+ kind: 'failed',
569
+ attempt,
570
+ error: child.error ?? 'child workflow failed without error message',
571
+ }
572
+ }
573
+ // pending / running / waiting / compensating → keep polling.
574
+ return { kind: 'waiting', delaySec: node.pollIntervalSec }
575
+ }
576
+
577
+ // ─── Outcome → state mutation ───────────────────────────────────────────
578
+
579
+ private async applyOutcome(
580
+ tx: Tx,
581
+ runId: string,
582
+ currentStep: number,
583
+ node: DurableNode,
584
+ state: RunState,
585
+ outcome: Outcome,
586
+ ): Promise<boolean> {
587
+ switch (outcome.kind) {
588
+ case 'completed':
589
+ await tx.execute(
590
+ `INSERT INTO "strav_workflow_journal"
591
+ (id, run_id, step_name, status, result, error, attempts, completed_at, created_at, updated_at)
592
+ VALUES ($1, $2, $3, 'completed', $4::jsonb, NULL, $5, now(), now(), now())`,
593
+ [ulid(), runId, node.name, JSON.stringify(outcome.value ?? null), outcome.attempt],
594
+ )
595
+ state.results[node.name] = outcome.value
596
+ delete state.stepAttempts[node.name]
597
+ if (node.type === 'loop' && state.loopState !== undefined) {
598
+ delete state.loopState[node.name]
599
+ }
600
+ if (node.type === 'childWorkflow' && state.childRunIds !== undefined) {
601
+ delete state.childRunIds[node.name]
602
+ }
603
+ clearSleepKey(state, node)
604
+ await this.advanceCursor(tx, runId, currentStep + 1, state)
605
+ return true
606
+ case 'retry':
607
+ state.stepAttempts[node.name] = outcome.attempt
608
+ await tx.execute(
609
+ `UPDATE "strav_workflow_runs" SET state = $1::jsonb, updated_at = now() WHERE id = $2`,
610
+ [JSON.stringify(state), runId],
611
+ )
612
+ await this.queue.dispatchLater(outcome.delaySec, this.advanceJob, { runId })
613
+ return false
614
+ case 'failed':
615
+ await tx.execute(
616
+ `INSERT INTO "strav_workflow_journal"
617
+ (id, run_id, step_name, status, result, error, attempts, completed_at, created_at, updated_at)
618
+ VALUES ($1, $2, $3, 'failed', NULL, $4, $5, now(), now(), now())`,
619
+ [ulid(), runId, node.name, outcome.error, outcome.attempt],
620
+ )
621
+ await tx.execute(
622
+ `UPDATE "strav_workflow_runs"
623
+ SET status = 'compensating', state = $1::jsonb, error = $2, updated_at = now()
624
+ WHERE id = $3`,
625
+ [JSON.stringify(state), outcome.error, runId],
626
+ )
627
+ await this.queue.dispatch(this.compensateJob, { runId })
628
+ return false
629
+ case 'waiting':
630
+ await tx.execute(
631
+ `UPDATE "strav_workflow_runs"
632
+ SET status = 'waiting', state = $1::jsonb, updated_at = now()
633
+ WHERE id = $2`,
634
+ [JSON.stringify(state), runId],
635
+ )
636
+ if (outcome.delaySec !== undefined) {
637
+ await this.queue.dispatchLater(outcome.delaySec, this.advanceJob, { runId })
638
+ }
639
+ return false
640
+ }
641
+ }
642
+
335
643
  // ─── Internal helpers ────────────────────────────────────────────────────
336
644
 
337
645
  private async markCompleted(
338
- tx: Database | { execute: (s: string, p: unknown[]) => Promise<number> },
646
+ tx: Tx,
339
647
  runId: string,
340
648
  results: Record<string, unknown>,
341
649
  ): Promise<void> {
@@ -344,7 +652,7 @@ export class DurableRunner {
344
652
  SET status = 'completed', state = $1::jsonb, result = $2::jsonb, updated_at = now()
345
653
  WHERE id = $3`,
346
654
  [
347
- JSON.stringify({ results, stepAttempts: {} }),
655
+ JSON.stringify({ ...emptyState(), results }),
348
656
  JSON.stringify(results),
349
657
  runId,
350
658
  ],
@@ -352,10 +660,10 @@ export class DurableRunner {
352
660
  }
353
661
 
354
662
  private async advanceCursor(
355
- tx: { execute: (s: string, p: unknown[]) => Promise<number> },
663
+ tx: Tx,
356
664
  runId: string,
357
665
  nextStep: number,
358
- state: { results: Record<string, unknown>; stepAttempts: Record<string, number> },
666
+ state: RunState,
359
667
  ): Promise<void> {
360
668
  await tx.execute(
361
669
  `UPDATE "strav_workflow_runs"
@@ -368,6 +676,22 @@ export class DurableRunner {
368
676
 
369
677
  // ─── Pure helpers ────────────────────────────────────────────────────────
370
678
 
679
+ function emptyState(): RunState {
680
+ return { results: {}, stepAttempts: {} }
681
+ }
682
+
683
+ function ensureStateShape(state: RunState): void {
684
+ if (state.results === undefined) state.results = {}
685
+ if (state.stepAttempts === undefined) state.stepAttempts = {}
686
+ }
687
+
688
+ function clearSleepKey(state: RunState, node: DurableNode): void {
689
+ const key = `__sleep__${node.name}`
690
+ if (key in state) {
691
+ delete (state as unknown as Record<string, unknown>)[key]
692
+ }
693
+ }
694
+
371
695
  function parseJson(value: unknown): unknown {
372
696
  if (value === null || value === undefined) return value
373
697
  if (typeof value === 'string') return JSON.parse(value)
@@ -375,7 +699,7 @@ function parseJson(value: unknown): unknown {
375
699
  }
376
700
 
377
701
  function toSnapshot(row: RunRow): RunSnapshot {
378
- const state = parseJson(row.state) as { results?: Record<string, unknown> } | null
702
+ const state = parseJson(row.state) as RunState | null
379
703
  return {
380
704
  id: row.id,
381
705
  workflowName: row.workflow_name,
@@ -2,46 +2,58 @@
2
2
  * `DurableWorkflow` — the builder apps use to declare a named,
3
3
  * registered, crash-resumable workflow.
4
4
  *
5
- * Mirrors the `.step(name, handler, { compensate?, maxAttempts? })`
6
- * surface from `@strav/workflow` so simple migrations are mostly
7
- * copy-paste, but the semantics differ in three important ways:
5
+ * V1 surface: `.step(name, handler, options?)` sequential, named,
6
+ * journaled, retried, optionally saga-compensated.
8
7
  *
9
- * 1. Workflows are *named* and live in a registry. Steps are looked
10
- * up by name when an `advance` job picks them off the queue —
11
- * apps don't pass closures to `runner.start()`.
8
+ * V2 surface adds five composite primitives that still occupy one
9
+ * cursor slot each:
12
10
  *
13
- * 2. Each step is its own crash boundary. A step that's already
14
- * journaled completed is skipped on replay; a step that throws
15
- * is retried up to `maxAttempts` with `backoff` (default
16
- * exponential, capped at 60s); a step that exhausts its
17
- * attempts triggers reverse-order saga compensation.
11
+ * - `.sleep(name, delay)` park for N seconds or a context-aware
12
+ * deadline.
13
+ * - `.waitForSignal(name, signalName)` pause until
14
+ * `runner.signal(runId, signalName, payload?)` fires.
15
+ * - `.parallel(name, { branchA: fn, branchB: fn, ... })` — run
16
+ * every branch in `Promise.all`; whole-or-nothing failure.
17
+ * - `.route(name, select, branches)` — pick one branch by
18
+ * predicate.
19
+ * - `.loop(name, condition, body)` — iterate while `condition()`
20
+ * holds; each iteration is its own journal row.
21
+ * - `.childWorkflow(name, start)` — spawn another registered
22
+ * workflow and wait on it.
18
23
  *
19
- * 3. Step handlers must be *resolvable across processes*. The
20
- * registry holds the handler function; the queue payload carries
21
- * only the run id + step name. Handlers can close over module-
22
- * level state but NOT request-scoped variables the
23
- * `advance` job may run in a worker that never saw the request
24
- * that started the workflow.
25
- *
26
- * V1 ships sequential `.step()` only. V2 adds `.parallel` / `.route`
27
- * / `.loop` / `.sleep` / `.waitForSignal` / `.childWorkflow`.
24
+ * Cursor model stays a flat integer (`current_step`) — every node,
25
+ * primitive or composite, occupies one slot. Internal sub-state
26
+ * (loop iteration counters, awaiting-signal names, child run ids)
27
+ * lives in the run row's `state` JSONB.
28
28
  */
29
29
 
30
30
  import { DurableError } from './durable_error.ts'
31
31
  import type {
32
+ DurableChildWorkflow,
33
+ DurableCompensator,
34
+ DurableContext,
35
+ DurableLoop,
36
+ DurableLoopContext,
37
+ DurableNode,
38
+ DurableParallel,
39
+ DurableRoute,
40
+ DurableSleep,
32
41
  DurableStep,
33
42
  DurableStepHandler,
34
43
  DurableStepOptions,
44
+ DurableWaitForSignal,
35
45
  } from './types.ts'
36
46
 
37
47
  const DEFAULT_MAX_ATTEMPTS = 3
48
+ const DEFAULT_MAX_ITERATIONS = 1000
49
+ const DEFAULT_CHILD_POLL_SEC = 2
38
50
  const MAX_BACKOFF_SECONDS = 60
39
51
  const defaultBackoff = (failedAttempt: number): number =>
40
52
  Math.min(2 ** failedAttempt, MAX_BACKOFF_SECONDS)
41
53
 
42
54
  export class DurableWorkflow {
43
55
  readonly name: string
44
- private readonly _steps: DurableStep[] = []
56
+ private readonly _nodes: DurableNode[] = []
45
57
  private readonly _names = new Set<string>()
46
58
 
47
59
  constructor(name: string) {
@@ -51,9 +63,15 @@ export class DurableWorkflow {
51
63
  this.name = name
52
64
  }
53
65
 
54
- /** Read-only snapshot of the queued steps. */
55
- get steps(): readonly DurableStep[] {
56
- return this._steps
66
+ /**
67
+ * Read-only snapshot of the declared nodes.
68
+ *
69
+ * Field is named `steps` for back-compat with V1 — every node
70
+ * (`step`, `sleep`, `parallel`, …) carries a `type` discriminator
71
+ * that callers branch on.
72
+ */
73
+ get steps(): readonly DurableNode[] {
74
+ return this._nodes
57
75
  }
58
76
 
59
77
  /**
@@ -70,28 +88,170 @@ export class DurableWorkflow {
70
88
  */
71
89
  step(name: string, handler: DurableStepHandler, options?: DurableStepOptions): this {
72
90
  this.claim(name)
73
- const step: DurableStep = {
91
+ const node: DurableStep = {
74
92
  type: 'step',
75
93
  name,
76
94
  handler,
77
95
  maxAttempts: options?.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
78
96
  backoff: options?.backoff ?? defaultBackoff,
79
97
  }
80
- if (options?.compensate) step.compensate = options.compensate
81
- this._steps.push(step)
98
+ if (options?.compensate) node.compensate = options.compensate
99
+ this._nodes.push(node)
100
+ return this
101
+ }
102
+
103
+ /**
104
+ * Park the run for `delay` seconds (or a context-aware function
105
+ * returning seconds). Marks the run `waiting`; the cursor advances
106
+ * once the delayed advance fires.
107
+ */
108
+ sleep(
109
+ name: string,
110
+ delay: number | ((ctx: DurableContext) => number | Promise<number>),
111
+ ): this {
112
+ this.claim(name)
113
+ const node: DurableSleep = { type: 'sleep', name, delay }
114
+ this._nodes.push(node)
115
+ return this
116
+ }
117
+
118
+ /**
119
+ * Pause until `runner.signal(runId, signalName, payload?)` fires.
120
+ * The payload becomes this node's result.
121
+ */
122
+ waitForSignal(
123
+ name: string,
124
+ signalName: string | ((ctx: DurableContext) => string),
125
+ ): this {
126
+ this.claim(name)
127
+ const node: DurableWaitForSignal = { type: 'waitForSignal', name, signalName }
128
+ this._nodes.push(node)
129
+ return this
130
+ }
131
+
132
+ /**
133
+ * Run every branch concurrently within a single advance. Returns
134
+ * a `{ [branchName]: result }` object. Any branch throw fails the
135
+ * whole node (retried + compensated together).
136
+ */
137
+ parallel(
138
+ name: string,
139
+ branches: Record<string, DurableStepHandler>,
140
+ options?: { maxAttempts?: number; backoff?: (failedAttempt: number) => number },
141
+ ): this {
142
+ this.claim(name)
143
+ if (Object.keys(branches).length === 0) {
144
+ throw new DurableError(
145
+ `DurableWorkflow("${this.name}").parallel("${name}"): at least one branch required.`,
146
+ )
147
+ }
148
+ const node: DurableParallel = {
149
+ type: 'parallel',
150
+ name,
151
+ branches,
152
+ maxAttempts: options?.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
153
+ backoff: options?.backoff ?? defaultBackoff,
154
+ }
155
+ this._nodes.push(node)
156
+ return this
157
+ }
158
+
159
+ /**
160
+ * Pick one branch by `select(ctx)` predicate. The chosen handler's
161
+ * return is the node's result; the chosen branch key is recorded
162
+ * in `results[name].branch`.
163
+ */
164
+ route(
165
+ name: string,
166
+ select: (ctx: DurableContext) => string | Promise<string>,
167
+ branches: Record<string, DurableStepHandler>,
168
+ options?: { maxAttempts?: number; backoff?: (failedAttempt: number) => number },
169
+ ): this {
170
+ this.claim(name)
171
+ if (Object.keys(branches).length === 0) {
172
+ throw new DurableError(
173
+ `DurableWorkflow("${this.name}").route("${name}"): at least one branch required.`,
174
+ )
175
+ }
176
+ const node: DurableRoute = {
177
+ type: 'route',
178
+ name,
179
+ select,
180
+ branches,
181
+ maxAttempts: options?.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
182
+ backoff: options?.backoff ?? defaultBackoff,
183
+ }
184
+ this._nodes.push(node)
185
+ return this
186
+ }
187
+
188
+ /**
189
+ * Repeat `body(ctx)` while `condition(ctx, iter)` returns true,
190
+ * up to `maxIterations` (default 1000). Each iteration is its own
191
+ * journal row keyed `<name>#<iter>` so a crash mid-loop resumes
192
+ * from the next un-journaled iteration. The node's result is the
193
+ * array of per-iteration returns.
194
+ */
195
+ loop(
196
+ name: string,
197
+ condition: (ctx: DurableContext, iter: number) => boolean | Promise<boolean>,
198
+ body: (ctx: DurableLoopContext) => Promise<unknown>,
199
+ options?: {
200
+ maxIterations?: number
201
+ maxAttempts?: number
202
+ backoff?: (failedAttempt: number) => number
203
+ },
204
+ ): this {
205
+ this.claim(name)
206
+ const node: DurableLoop = {
207
+ type: 'loop',
208
+ name,
209
+ condition,
210
+ body,
211
+ maxIterations: options?.maxIterations ?? DEFAULT_MAX_ITERATIONS,
212
+ maxAttempts: options?.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
213
+ backoff: options?.backoff ?? defaultBackoff,
214
+ }
215
+ this._nodes.push(node)
216
+ return this
217
+ }
218
+
219
+ /**
220
+ * Spawn a registered child workflow and wait for it to complete.
221
+ * The parent re-polls the child's status via a delayed advance —
222
+ * no parent_run_id column needed. Child `failed` propagates as a
223
+ * failure on this node.
224
+ */
225
+ childWorkflow(
226
+ name: string,
227
+ start: DurableChildWorkflow['start'],
228
+ options?: { pollIntervalSec?: number },
229
+ ): this {
230
+ this.claim(name)
231
+ const node: DurableChildWorkflow = {
232
+ type: 'childWorkflow',
233
+ name,
234
+ start,
235
+ pollIntervalSec: options?.pollIntervalSec ?? DEFAULT_CHILD_POLL_SEC,
236
+ }
237
+ this._nodes.push(node)
82
238
  return this
83
239
  }
84
240
 
85
- /** Throw if the step name has already been used in this workflow. */
86
241
  private claim(name: string): void {
87
242
  if (!name) {
88
- throw new DurableError(`DurableWorkflow("${this.name}"): step name must be non-empty.`)
243
+ throw new DurableError(`DurableWorkflow("${this.name}"): node name must be non-empty.`)
89
244
  }
90
245
  if (this._names.has(name)) {
91
246
  throw new DurableError(
92
- `DurableWorkflow("${this.name}"): duplicate step name "${name}". Steps are journaled by name; collisions would break replay.`,
247
+ `DurableWorkflow("${this.name}"): duplicate node name "${name}". Nodes are journaled by name; collisions would break replay.`,
93
248
  )
94
249
  }
95
250
  this._names.add(name)
96
251
  }
97
252
  }
253
+
254
+ // Re-export `DurableCompensator` so the index barrel doesn't need to
255
+ // list it twice — it's part of `DurableStepOptions` and the
256
+ // step-builder signature.
257
+ export type { DurableCompensator }
package/src/index.ts CHANGED
@@ -1,9 +1,16 @@
1
1
  // Public API of @strav/durable.
2
2
  //
3
- // Crash-resumable workflows on top of @strav/queue + Postgres. V1
4
- // ships sequential `.step()` with per-step retries and saga
5
- // compensation. V2 layers in parallel / route / loop / sleep /
6
- // waitForSignal / childWorkflow.
3
+ // Crash-resumable workflows on top of @strav/queue + Postgres.
4
+ // Builder surface on `DurableWorkflow`:
5
+ // - `.step(name, handler, opts?)` sequential, retried, saga-compensated.
6
+ // - `.sleep(name, delay)` — park for a duration.
7
+ // - `.waitForSignal(name, signalName)` — pause until
8
+ // `runner.signal(runId, name, payload?)`.
9
+ // - `.parallel(name, branches)` — Promise.all-style fan-out.
10
+ // - `.route(name, select, branches)` — single-branch routing.
11
+ // - `.loop(name, condition, body)` — per-iteration journaled loop.
12
+ // - `.childWorkflow(name, start)` — spawn a registered workflow
13
+ // and wait for completion.
7
14
 
8
15
  export { defineDurable } from './define_durable.ts'
9
16
  export {
@@ -25,11 +32,19 @@ export { DurableWorkflow } from './durable_workflow.ts'
25
32
  export { JOURNAL_UNIQUE_INDEX, workflowJournalSchema } from './journal_schema.ts'
26
33
  export { workflowRunsSchema } from './runs_schema.ts'
27
34
  export type {
35
+ DurableChildWorkflow,
28
36
  DurableCompensator,
29
37
  DurableContext,
38
+ DurableLoop,
39
+ DurableLoopContext,
40
+ DurableNode,
41
+ DurableParallel,
42
+ DurableRoute,
43
+ DurableSleep,
30
44
  DurableStep,
31
45
  DurableStepHandler,
32
46
  DurableStepOptions,
47
+ DurableWaitForSignal,
33
48
  RunSnapshot,
34
49
  RunStatus,
35
50
  } from './types.ts'
package/src/types.ts CHANGED
@@ -2,7 +2,7 @@
2
2
  * Public types for durable execution.
3
3
  *
4
4
  * A durable workflow is a *named*, registered definition: handlers are
5
- * keyed by step name so the runner can re-enter them across processes
5
+ * keyed by node name so the runner can re-enter them across processes
6
6
  * after a crash. Apps don't pass closures into `start()` — they pass
7
7
  * a workflow name + input.
8
8
  *
@@ -16,12 +16,31 @@
16
16
  * `durable:status` CLI command in a later slice).
17
17
  */
18
18
 
19
- export type RunStatus = 'pending' | 'running' | 'compensating' | 'completed' | 'failed'
19
+ /**
20
+ * Run lifecycle states:
21
+ *
22
+ * - `pending` — row INSERTed; no advance has run yet.
23
+ * - `running` — a worker is mid-step or in-flight.
24
+ * - `waiting` — node parked itself (sleep, waitForSignal,
25
+ * childWorkflow). The cursor doesn't move until
26
+ * the wakeup condition fires.
27
+ * - `compensating`— terminal failure; the saga is rolling back.
28
+ * - `completed` — every node finished; `result` populated.
29
+ * - `failed` — compensation done (or no compensation needed);
30
+ * `error` populated.
31
+ */
32
+ export type RunStatus =
33
+ | 'pending'
34
+ | 'running'
35
+ | 'waiting'
36
+ | 'compensating'
37
+ | 'completed'
38
+ | 'failed'
20
39
 
21
40
  export interface DurableContext {
22
41
  /** Workflow input — the object passed to `DurableRunner.start(name, input)`. */
23
42
  readonly input: Record<string, unknown>
24
- /** Results from every prior step, keyed by step name. */
43
+ /** Results from every prior node, keyed by node name. */
25
44
  readonly results: Record<string, unknown>
26
45
  /** Durable run id (the row PK). Useful for logging / correlation. */
27
46
  readonly runId: string
@@ -29,6 +48,12 @@ export interface DurableContext {
29
48
  readonly attempt: number
30
49
  }
31
50
 
51
+ /** Context handed to a `.loop(...)` body — same as `DurableContext` plus the iteration counter. */
52
+ export interface DurableLoopContext extends DurableContext {
53
+ /** 0-based iteration number. */
54
+ readonly iteration: number
55
+ }
56
+
32
57
  export interface RunSnapshot {
33
58
  id: string
34
59
  workflowName: string
@@ -60,11 +85,12 @@ export interface DurableStepOptions {
60
85
  backoff?: (failedAttempt: number) => number
61
86
  }
62
87
 
88
+ // ─── Node variants (V2) ──────────────────────────────────────────────────
89
+
63
90
  /**
64
- * Internal step record. Apps don't construct this directly the
65
- * `DurableWorkflow` builder pushes one per `.step()` call. Exported
66
- * so tests and introspection tools can read the plan from
67
- * `workflow.steps`.
91
+ * One sequential step. The cursor advances by 1 once the handler
92
+ * succeeds. Failures retry up to `maxAttempts`; exhaustion triggers
93
+ * reverse-order saga compensation.
68
94
  */
69
95
  export interface DurableStep {
70
96
  type: 'step'
@@ -74,3 +100,116 @@ export interface DurableStep {
74
100
  maxAttempts: number
75
101
  backoff: (failedAttempt: number) => number
76
102
  }
103
+
104
+ /**
105
+ * Park the run for a fixed duration. The runner schedules a delayed
106
+ * advance via `queue.dispatchLater(delaySec)` and marks the run as
107
+ * `waiting`. On wake-up the node is journaled and the cursor moves
108
+ * on.
109
+ *
110
+ * `delay` is either a number of seconds or a context-aware function
111
+ * that returns one (so apps can sleep until a wall-clock target
112
+ * encoded in `ctx.input` / `ctx.results`).
113
+ */
114
+ export interface DurableSleep {
115
+ type: 'sleep'
116
+ name: string
117
+ delay: number | ((ctx: DurableContext) => number | Promise<number>)
118
+ }
119
+
120
+ /**
121
+ * Pause the run until an external `runner.signal(runId, signalName,
122
+ * payload?)` call fires. The signal's `payload` lands as the node's
123
+ * result. Useful for human-in-the-loop approvals, third-party
124
+ * webhooks, async-out / async-in handshakes.
125
+ *
126
+ * `signalName` is either a literal or a context-aware function (so
127
+ * the listener name can depend on `ctx.input`).
128
+ */
129
+ export interface DurableWaitForSignal {
130
+ type: 'waitForSignal'
131
+ name: string
132
+ signalName: string | ((ctx: DurableContext) => string)
133
+ }
134
+
135
+ /**
136
+ * Run a set of named branches concurrently. Each branch is a single
137
+ * handler; the parallel node completes when every branch has — its
138
+ * result is `{ [branch]: result }`. If any branch throws, the WHOLE
139
+ * node fails and the failure path follows the same retry +
140
+ * compensation rules as `step`.
141
+ *
142
+ * V2 scope — no per-branch retries, no per-branch journaling. The
143
+ * whole `Promise.all(...)` runs inside one advance.
144
+ */
145
+ export interface DurableParallel {
146
+ type: 'parallel'
147
+ name: string
148
+ branches: Record<string, DurableStepHandler>
149
+ maxAttempts: number
150
+ backoff: (failedAttempt: number) => number
151
+ }
152
+
153
+ /**
154
+ * Pick one of N named branches based on a `select(ctx)` predicate.
155
+ * The chosen branch's handler runs; its return lands as the node's
156
+ * result alongside the chosen key. Unknown selection keys throw.
157
+ */
158
+ export interface DurableRoute {
159
+ type: 'route'
160
+ name: string
161
+ select: (ctx: DurableContext) => string | Promise<string>
162
+ branches: Record<string, DurableStepHandler>
163
+ maxAttempts: number
164
+ backoff: (failedAttempt: number) => number
165
+ }
166
+
167
+ /**
168
+ * Repeat `body(ctx, i)` while `condition(ctx, iter)` returns true,
169
+ * up to `maxIterations`. Each iteration is journaled separately
170
+ * (`<name>#<iter>`) so a crash mid-loop resumes from the next
171
+ * un-journaled iteration. The node's final result is the array of
172
+ * per-iteration returns.
173
+ */
174
+ export interface DurableLoop {
175
+ type: 'loop'
176
+ name: string
177
+ condition: (ctx: DurableContext, iter: number) => boolean | Promise<boolean>
178
+ body: (ctx: DurableLoopContext) => Promise<unknown>
179
+ /** Safety ceiling on iterations. Default `1000`. */
180
+ maxIterations: number
181
+ maxAttempts: number
182
+ backoff: (failedAttempt: number) => number
183
+ }
184
+
185
+ /**
186
+ * Spawn a child workflow (by registered name) and wait for it to
187
+ * complete. The parent re-polls the child's status via a delayed
188
+ * advance — no parent_run_id column needed, no cross-row push. Child
189
+ * `failed` propagates as a failure on this node (which retries +
190
+ * compensates like any other).
191
+ *
192
+ * `start(ctx)` returns `{ name, input }` — the child workflow name
193
+ * (must be registered) and its input object.
194
+ */
195
+ export interface DurableChildWorkflow {
196
+ type: 'childWorkflow'
197
+ name: string
198
+ start: (
199
+ ctx: DurableContext,
200
+ ) => Promise<{ name: string; input?: Record<string, unknown> }> | {
201
+ name: string
202
+ input?: Record<string, unknown>
203
+ }
204
+ /** How often the runner re-polls the child's status (seconds). Default `2`. */
205
+ pollIntervalSec: number
206
+ }
207
+
208
+ export type DurableNode =
209
+ | DurableStep
210
+ | DurableSleep
211
+ | DurableWaitForSignal
212
+ | DurableParallel
213
+ | DurableRoute
214
+ | DurableLoop
215
+ | DurableChildWorkflow