@strav/durable 0.4.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/package.json +49 -0
- package/src/builder.ts +158 -0
- package/src/config.ts +36 -0
- package/src/durable.ts +268 -0
- package/src/engine/advance_handler.ts +154 -0
- package/src/engine/compensate_handler.ts +70 -0
- package/src/engine/compensation_driver.ts +61 -0
- package/src/engine/context.ts +36 -0
- package/src/engine/enqueue.ts +62 -0
- package/src/engine/finalize.ts +111 -0
- package/src/engine/index.ts +20 -0
- package/src/engine/run_store.ts +42 -0
- package/src/engine/step_driver.ts +291 -0
- package/src/engine/suspended_run.ts +24 -0
- package/src/errors.ts +21 -0
- package/src/helpers.ts +16 -0
- package/src/index.ts +37 -0
- package/src/models/index.ts +3 -0
- package/src/models/journal.ts +54 -0
- package/src/models/run_machine.ts +39 -0
- package/src/models/workflow_run.ts +36 -0
- package/src/providers/durable_provider.ts +31 -0
- package/src/providers/index.ts +2 -0
- package/src/registry.ts +35 -0
- package/src/schema.ts +70 -0
- package/src/types.ts +216 -0
- package/src/util.ts +25 -0
- package/tsconfig.json +5 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { transaction } from '@strav/database'
|
|
2
|
+
import { registry } from '../registry.ts'
|
|
3
|
+
import type { JournalWrite } from '../types.ts'
|
|
4
|
+
import { loadJournal, writeJournal } from '../models/journal.ts'
|
|
5
|
+
import { buildContext } from './context.ts'
|
|
6
|
+
import { enqueueCompensate } from './enqueue.ts'
|
|
7
|
+
import { failRun } from './finalize.ts'
|
|
8
|
+
import { loadRun, lockRun, type Tx } from './run_store.ts'
|
|
9
|
+
import { runCompensator } from './compensation_driver.ts'
|
|
10
|
+
|
|
11
|
+
/** Payload of a `durable:compensate` job. */
|
|
12
|
+
export interface CompensatePayload {
|
|
13
|
+
runId: number
|
|
14
|
+
compensateIndex: number
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* The `durable:compensate` queue handler — rolls back one step of a failed
|
|
19
|
+
* run. The chain walks `compensation_cursor` downward; each step's compensation
|
|
20
|
+
* is journaled (`<step>#compensate`), so rollback resumes crash-safely. When
|
|
21
|
+
* the cursor reaches below zero the run is marked `failed`.
|
|
22
|
+
*/
|
|
23
|
+
export async function compensateHandler(payload: CompensatePayload): Promise<void> {
|
|
24
|
+
const { runId, compensateIndex } = payload
|
|
25
|
+
|
|
26
|
+
const run = await loadRun(runId)
|
|
27
|
+
if (!run) return
|
|
28
|
+
if (run.status !== 'compensating') return
|
|
29
|
+
if (run.compensationCursor !== compensateIndex) return
|
|
30
|
+
|
|
31
|
+
// Phase A — run the compensator outside the transaction (it may be slow).
|
|
32
|
+
let writes: JournalWrite[] = []
|
|
33
|
+
if (compensateIndex >= 0) {
|
|
34
|
+
const step = registry.get(run.workflowName).steps[compensateIndex]
|
|
35
|
+
if (step) {
|
|
36
|
+
const journal = await loadJournal(runId)
|
|
37
|
+
const alreadyDone =
|
|
38
|
+
journal.get(`${step.name}#compensate`)?.status === 'completed'
|
|
39
|
+
if (!alreadyDone) {
|
|
40
|
+
writes = await runCompensator(step, buildContext(run, 1, step.name), journal)
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Phase B — record the compensation and advance the cursor atomically.
|
|
46
|
+
await transaction(async (trx: Tx) => {
|
|
47
|
+
const locked = await lockRun(trx, runId)
|
|
48
|
+
if (
|
|
49
|
+
!locked ||
|
|
50
|
+
locked.status !== 'compensating' ||
|
|
51
|
+
locked.compensationCursor !== compensateIndex
|
|
52
|
+
) {
|
|
53
|
+
return
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (writes.length > 0) await writeJournal(trx, runId, writes)
|
|
57
|
+
|
|
58
|
+
const next = compensateIndex - 1
|
|
59
|
+
if (next < 0) {
|
|
60
|
+
await failRun(trx, locked, locked.error ?? 'workflow failed')
|
|
61
|
+
} else {
|
|
62
|
+
await trx`
|
|
63
|
+
UPDATE "_strav_workflow_runs"
|
|
64
|
+
SET "compensation_cursor" = ${next}, "updated_at" = NOW()
|
|
65
|
+
WHERE "id" = ${runId}
|
|
66
|
+
`
|
|
67
|
+
await enqueueCompensate(trx, runId, next)
|
|
68
|
+
}
|
|
69
|
+
})
|
|
70
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
DurableContext,
|
|
3
|
+
DurableStep,
|
|
4
|
+
JournalRecord,
|
|
5
|
+
JournalWrite,
|
|
6
|
+
} from '../types.ts'
|
|
7
|
+
|
|
8
|
+
function message(err: unknown): string {
|
|
9
|
+
return err instanceof Error ? err.message : String(err)
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Run the compensator(s) for one step during saga rollback.
|
|
14
|
+
*
|
|
15
|
+
* Mirrors `@strav/workflow`'s compensation surface: only `step` (sequential)
|
|
16
|
+
* and `parallel` entries carry compensators, and only journaled-completed
|
|
17
|
+
* units are compensated. The result is a single `<step>#compensate` journal
|
|
18
|
+
* write — so the compensation chain is itself crash-safe (a redelivered
|
|
19
|
+
* compensate job sees the marker and skips).
|
|
20
|
+
*
|
|
21
|
+
* A throwing compensator is best-effort: its error is recorded on the journal
|
|
22
|
+
* row and the rollback still advances (it does not get stuck).
|
|
23
|
+
*/
|
|
24
|
+
export async function runCompensator(
|
|
25
|
+
step: DurableStep,
|
|
26
|
+
ctx: DurableContext,
|
|
27
|
+
journal: Map<string, JournalRecord>
|
|
28
|
+
): Promise<JournalWrite[]> {
|
|
29
|
+
const errors: string[] = []
|
|
30
|
+
|
|
31
|
+
if (step.type === 'step') {
|
|
32
|
+
if (journal.get(step.name)?.status === 'completed' && step.compensate) {
|
|
33
|
+
try {
|
|
34
|
+
await step.compensate(ctx)
|
|
35
|
+
} catch (err) {
|
|
36
|
+
errors.push(message(err))
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
} else if (step.type === 'parallel') {
|
|
40
|
+
for (const entry of step.entries) {
|
|
41
|
+
const done = journal.get(`${step.name}#${entry.name}`)?.status === 'completed'
|
|
42
|
+
if (done && entry.compensate) {
|
|
43
|
+
try {
|
|
44
|
+
await entry.compensate(ctx)
|
|
45
|
+
} catch (err) {
|
|
46
|
+
errors.push(`${entry.name}: ${message(err)}`)
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
// route / loop / sleep / signal / child have no compensators.
|
|
52
|
+
|
|
53
|
+
return [
|
|
54
|
+
{
|
|
55
|
+
stepId: `${step.name}#compensate`,
|
|
56
|
+
status: errors.length > 0 ? 'failed' : 'completed',
|
|
57
|
+
error: errors.length > 0 ? errors.join('; ') : undefined,
|
|
58
|
+
attempt: 1,
|
|
59
|
+
},
|
|
60
|
+
]
|
|
61
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { DurableContext, RunRow } from '../types.ts'
|
|
2
|
+
|
|
3
|
+
const INTERNAL_PREFIX = '__strav_'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Build the `DurableContext` handed to step handlers from a run row.
|
|
7
|
+
*
|
|
8
|
+
* `results` is the run's accumulated `state` with engine-internal keys
|
|
9
|
+
* (e.g. `__strav_resume__`) filtered out, so it reads exactly like a
|
|
10
|
+
* `@strav/workflow` context. `resumeData()` exposes the raw resume payload.
|
|
11
|
+
*/
|
|
12
|
+
export function buildContext(
|
|
13
|
+
run: RunRow,
|
|
14
|
+
attempt: number,
|
|
15
|
+
stepName: string
|
|
16
|
+
): DurableContext {
|
|
17
|
+
const rawState = run.state ?? {}
|
|
18
|
+
const results: Record<string, unknown> = {}
|
|
19
|
+
for (const [key, value] of Object.entries(rawState)) {
|
|
20
|
+
if (!key.startsWith(INTERNAL_PREFIX)) results[key] = value
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
input: run.input ?? {},
|
|
25
|
+
results,
|
|
26
|
+
runId: run.id,
|
|
27
|
+
attempt,
|
|
28
|
+
stepName,
|
|
29
|
+
signal<T = unknown>(name: string): T | undefined {
|
|
30
|
+
return results[name] as T | undefined
|
|
31
|
+
},
|
|
32
|
+
resumeData<T = unknown>(): T | undefined {
|
|
33
|
+
return rawState['__strav_resume__'] as T | undefined
|
|
34
|
+
},
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { getConfig } from '../config.ts'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Enqueue durable jobs by INSERTing directly into `_strav_jobs` on the caller's
|
|
5
|
+
* transaction handle.
|
|
6
|
+
*
|
|
7
|
+
* This is a deliberate, documented coupling to `@strav/queue`'s table. It is
|
|
8
|
+
* NOT done via `Queue.push` because `Queue.push` runs its INSERT on a separate
|
|
9
|
+
* connection — outside the engine's transaction — which would break the atomic
|
|
10
|
+
* `{ journal write + run-row update + next-job enqueue }` commit that the whole
|
|
11
|
+
* crash-safety story depends on. The clean long-term fix is an upstream
|
|
12
|
+
* `Queue.pushTx(trx, ...)`; until then, the INSERT shape is replicated here.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/** A Bun SQL transaction handle (tagged-template callable). */
|
|
16
|
+
type Tx = (strings: TemplateStringsArray, ...values: unknown[]) => Promise<unknown>
|
|
17
|
+
|
|
18
|
+
async function enqueueJob(
|
|
19
|
+
trx: Tx,
|
|
20
|
+
job: string,
|
|
21
|
+
payload: unknown,
|
|
22
|
+
delay: number
|
|
23
|
+
): Promise<void> {
|
|
24
|
+
const cfg = getConfig()
|
|
25
|
+
const availableAt = delay > 0 ? new Date(Date.now() + delay) : new Date()
|
|
26
|
+
await trx`
|
|
27
|
+
INSERT INTO "_strav_jobs"
|
|
28
|
+
("queue", "job", "payload", "max_attempts", "timeout", "available_at")
|
|
29
|
+
VALUES (
|
|
30
|
+
${cfg.queue},
|
|
31
|
+
${job},
|
|
32
|
+
${JSON.stringify(payload)},
|
|
33
|
+
${cfg.maxAttempts},
|
|
34
|
+
${cfg.jobTimeout},
|
|
35
|
+
${availableAt}
|
|
36
|
+
)
|
|
37
|
+
`
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Enqueue a `durable:advance` continuation for a run at a given step index. */
|
|
41
|
+
export async function enqueueAdvance(
|
|
42
|
+
trx: Tx,
|
|
43
|
+
runId: number,
|
|
44
|
+
stepIndex: number,
|
|
45
|
+
opts?: { attempt?: number; delay?: number }
|
|
46
|
+
): Promise<void> {
|
|
47
|
+
await enqueueJob(
|
|
48
|
+
trx,
|
|
49
|
+
'durable:advance',
|
|
50
|
+
{ runId, stepIndex, attempt: opts?.attempt ?? 1 },
|
|
51
|
+
opts?.delay ?? 0
|
|
52
|
+
)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Enqueue a `durable:compensate` job for a run at a given compensation cursor. */
|
|
56
|
+
export async function enqueueCompensate(
|
|
57
|
+
trx: Tx,
|
|
58
|
+
runId: number,
|
|
59
|
+
compensateIndex: number
|
|
60
|
+
): Promise<void> {
|
|
61
|
+
await enqueueJob(trx, 'durable:compensate', { runId, compensateIndex }, 0)
|
|
62
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { transaction } from '@strav/database'
|
|
2
|
+
import type { RunRow } from '../types.ts'
|
|
3
|
+
import { writeJournal } from '../models/journal.ts'
|
|
4
|
+
import { enqueueAdvance, enqueueCompensate } from './enqueue.ts'
|
|
5
|
+
import { lockRun, type Tx } from './run_store.ts'
|
|
6
|
+
|
|
7
|
+
/** Merge a result patch into a run's accumulated state. */
|
|
8
|
+
export function applyPatch(
|
|
9
|
+
state: Record<string, unknown>,
|
|
10
|
+
patch: Record<string, unknown>
|
|
11
|
+
): Record<string, unknown> {
|
|
12
|
+
return { ...state, ...patch }
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Transition a run into compensation, rolling back from the step before the
|
|
17
|
+
* one that failed. If nothing was completed before it, fail the run directly.
|
|
18
|
+
*/
|
|
19
|
+
export async function beginCompensation(
|
|
20
|
+
trx: Tx,
|
|
21
|
+
run: RunRow,
|
|
22
|
+
failedStepIndex: number,
|
|
23
|
+
failure: string
|
|
24
|
+
): Promise<void> {
|
|
25
|
+
const cursor = failedStepIndex - 1
|
|
26
|
+
if (cursor < 0) {
|
|
27
|
+
await failRun(trx, run, failure)
|
|
28
|
+
return
|
|
29
|
+
}
|
|
30
|
+
await trx`
|
|
31
|
+
UPDATE "_strav_workflow_runs"
|
|
32
|
+
SET "status" = 'compensating', "compensation_cursor" = ${cursor},
|
|
33
|
+
"error" = ${failure}, "updated_at" = NOW()
|
|
34
|
+
WHERE "id" = ${run.id}
|
|
35
|
+
`
|
|
36
|
+
await enqueueCompensate(trx, run.id, cursor)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Mark a run failed and, if it is a child, propagate failure to its parent. */
|
|
40
|
+
export async function failRun(trx: Tx, run: RunRow, failure: string): Promise<void> {
|
|
41
|
+
await trx`
|
|
42
|
+
UPDATE "_strav_workflow_runs"
|
|
43
|
+
SET "status" = 'failed', "error" = ${failure}, "updated_at" = NOW()
|
|
44
|
+
WHERE "id" = ${run.id}
|
|
45
|
+
`
|
|
46
|
+
if (run.parentRunId != null) {
|
|
47
|
+
await finalizeChildIntoParent(trx, run, 'failed')
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Complete a run. Opens its own transaction (called as the terminal advance).
|
|
53
|
+
* If the run is a child, fan in: write its result into the parent journal and
|
|
54
|
+
* resume the parent — all in the same transaction, so the parent's
|
|
55
|
+
* continuation job is visible only once the child result is.
|
|
56
|
+
*/
|
|
57
|
+
export async function completeRun(runId: number): Promise<void> {
|
|
58
|
+
await transaction(async (trx: Tx) => {
|
|
59
|
+
const run = await lockRun(trx, runId)
|
|
60
|
+
if (!run || run.status !== 'running') return
|
|
61
|
+
await trx`
|
|
62
|
+
UPDATE "_strav_workflow_runs"
|
|
63
|
+
SET "status" = 'completed', "result" = ${JSON.stringify(run.state)},
|
|
64
|
+
"updated_at" = NOW()
|
|
65
|
+
WHERE "id" = ${runId}
|
|
66
|
+
`
|
|
67
|
+
if (run.parentRunId != null) {
|
|
68
|
+
await finalizeChildIntoParent(trx, run, 'completed')
|
|
69
|
+
}
|
|
70
|
+
})
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Fan a finished child run into its parent. Locks the parent (child→parent
|
|
75
|
+
* lock order is consistent everywhere, so no deadlock).
|
|
76
|
+
*/
|
|
77
|
+
export async function finalizeChildIntoParent(
|
|
78
|
+
trx: Tx,
|
|
79
|
+
childRun: RunRow,
|
|
80
|
+
outcome: 'completed' | 'failed'
|
|
81
|
+
): Promise<void> {
|
|
82
|
+
const parentId = childRun.parentRunId
|
|
83
|
+
const childStepId = childRun.parentStepId
|
|
84
|
+
if (parentId == null || childStepId == null) return
|
|
85
|
+
|
|
86
|
+
const parent = await lockRun(trx, parentId)
|
|
87
|
+
if (!parent || parent.status !== 'suspended') return
|
|
88
|
+
|
|
89
|
+
const parentStepIndex = parent.currentStep
|
|
90
|
+
|
|
91
|
+
if (outcome === 'completed') {
|
|
92
|
+
await writeJournal(trx, parentId, [
|
|
93
|
+
{ stepId: childStepId, status: 'completed', result: childRun.state, attempt: 1 },
|
|
94
|
+
])
|
|
95
|
+
const newState = applyPatch(parent.state, { [childStepId]: childRun.state })
|
|
96
|
+
const next = parentStepIndex + 1
|
|
97
|
+
await trx`
|
|
98
|
+
UPDATE "_strav_workflow_runs"
|
|
99
|
+
SET "status" = 'running', "state" = ${JSON.stringify(newState)},
|
|
100
|
+
"current_step" = ${next}, "updated_at" = NOW()
|
|
101
|
+
WHERE "id" = ${parentId}
|
|
102
|
+
`
|
|
103
|
+
await enqueueAdvance(trx, parentId, next)
|
|
104
|
+
} else {
|
|
105
|
+
const failure = `child workflow "${childRun.workflowName}" failed`
|
|
106
|
+
await writeJournal(trx, parentId, [
|
|
107
|
+
{ stepId: childStepId, status: 'failed', error: failure, attempt: 1 },
|
|
108
|
+
])
|
|
109
|
+
await beginCompensation(trx, parent, parentStepIndex, failure)
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export { advanceHandler } from './advance_handler.ts'
|
|
2
|
+
export type { AdvancePayload } from './advance_handler.ts'
|
|
3
|
+
export { compensateHandler } from './compensate_handler.ts'
|
|
4
|
+
export type { CompensatePayload } from './compensate_handler.ts'
|
|
5
|
+
export { runDurableStep } from './step_driver.ts'
|
|
6
|
+
export type { StepOutcome } from './step_driver.ts'
|
|
7
|
+
export { runCompensator } from './compensation_driver.ts'
|
|
8
|
+
export { buildContext } from './context.ts'
|
|
9
|
+
export { isSuspendedRun } from './suspended_run.ts'
|
|
10
|
+
export type { SuspendedRunLike } from './suspended_run.ts'
|
|
11
|
+
export { enqueueAdvance, enqueueCompensate } from './enqueue.ts'
|
|
12
|
+
export { loadRun, lockRun, hydrateRun } from './run_store.ts'
|
|
13
|
+
export type { Tx } from './run_store.ts'
|
|
14
|
+
export {
|
|
15
|
+
applyPatch,
|
|
16
|
+
beginCompensation,
|
|
17
|
+
completeRun,
|
|
18
|
+
failRun,
|
|
19
|
+
finalizeChildIntoParent,
|
|
20
|
+
} from './finalize.ts'
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { sql } from '@strav/database'
|
|
2
|
+
import type { RunRow } from '../types.ts'
|
|
3
|
+
import { parseJson } from '../util.ts'
|
|
4
|
+
|
|
5
|
+
/** A Bun SQL transaction handle (tagged-template callable). */
|
|
6
|
+
export type Tx = (strings: TemplateStringsArray, ...values: unknown[]) => Promise<unknown>
|
|
7
|
+
|
|
8
|
+
/** Hydrate a raw `_strav_workflow_runs` row into a typed `RunRow`. */
|
|
9
|
+
export function hydrateRun(row: Record<string, unknown>): RunRow {
|
|
10
|
+
return {
|
|
11
|
+
id: Number(row.id),
|
|
12
|
+
workflowName: row.workflow_name as string,
|
|
13
|
+
input: parseJson<Record<string, unknown>>(row.input) ?? {},
|
|
14
|
+
status: row.status as RunRow['status'],
|
|
15
|
+
state: parseJson<Record<string, unknown>>(row.state) ?? {},
|
|
16
|
+
currentStep: Number(row.current_step),
|
|
17
|
+
compensationCursor:
|
|
18
|
+
row.compensation_cursor == null ? null : Number(row.compensation_cursor),
|
|
19
|
+
parentRunId: row.parent_run_id == null ? null : Number(row.parent_run_id),
|
|
20
|
+
parentStepId: (row.parent_step_id as string | null) ?? null,
|
|
21
|
+
awaitingSignal: (row.awaiting_signal as string | null) ?? null,
|
|
22
|
+
wakeAt: (row.wake_at as Date | null) ?? null,
|
|
23
|
+
error: (row.error as string | null) ?? null,
|
|
24
|
+
result: parseJson<Record<string, unknown>>(row.result) ?? null,
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Load a run by id (unlocked read). */
|
|
29
|
+
export async function loadRun(runId: number): Promise<RunRow | null> {
|
|
30
|
+
const rows = (await sql`
|
|
31
|
+
SELECT * FROM "_strav_workflow_runs" WHERE "id" = ${runId}
|
|
32
|
+
`) as Record<string, unknown>[]
|
|
33
|
+
return rows.length > 0 ? hydrateRun(rows[0]!) : null
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Load a run `FOR UPDATE` inside a transaction (row-locked). */
|
|
37
|
+
export async function lockRun(trx: Tx, runId: number): Promise<RunRow | null> {
|
|
38
|
+
const rows = (await trx`
|
|
39
|
+
SELECT * FROM "_strav_workflow_runs" WHERE "id" = ${runId} FOR UPDATE
|
|
40
|
+
`) as Record<string, unknown>[]
|
|
41
|
+
return rows.length > 0 ? hydrateRun(rows[0]!) : null
|
|
42
|
+
}
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
DurableContext,
|
|
3
|
+
DurableStep,
|
|
4
|
+
JournalRecord,
|
|
5
|
+
JournalWrite,
|
|
6
|
+
} from '../types.ts'
|
|
7
|
+
import { backoffDelay } from '../util.ts'
|
|
8
|
+
import { isSuspendedRun } from './suspended_run.ts'
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* The result of executing one top-level step. The engine applies it
|
|
12
|
+
* atomically in Phase B (a row-locked transaction).
|
|
13
|
+
*/
|
|
14
|
+
export type StepOutcome =
|
|
15
|
+
/** Step (and all sub-units) completed — journal results and move forward. */
|
|
16
|
+
| { kind: 'advance'; journal: JournalWrite[]; resultPatch: Record<string, unknown> }
|
|
17
|
+
/** A durable timer — journal, move forward, enqueue a delayed continuation. */
|
|
18
|
+
| { kind: 'sleep'; journal: JournalWrite[]; resultPatch: Record<string, unknown>; wakeAt: Date }
|
|
19
|
+
/** Suspend awaiting an external signal (human-in-the-loop). */
|
|
20
|
+
| { kind: 'suspend-signal'; signal: string }
|
|
21
|
+
/** Suspend on a brain agent `SuspendedRun` — resume re-enters this step. */
|
|
22
|
+
| { kind: 'suspend-agent'; stepName: string; snapshot: unknown }
|
|
23
|
+
/** Spawn a child workflow and wait for it. */
|
|
24
|
+
| { kind: 'await-child'; childName: string; childInput: Record<string, unknown>; childStepId: string }
|
|
25
|
+
/** Step failed but retries remain — re-enqueue the same step with backoff. */
|
|
26
|
+
| { kind: 'retry'; journal: JournalWrite[]; attempt: number; backoffMs: number; failure: string }
|
|
27
|
+
/** Step failed terminally — begin saga compensation. */
|
|
28
|
+
| { kind: 'compensate'; journal: JournalWrite[]; failure: string }
|
|
29
|
+
|
|
30
|
+
type RetryableStep = {
|
|
31
|
+
name: string
|
|
32
|
+
maxRetries: number
|
|
33
|
+
retryBackoff: 'exponential' | 'linear'
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function message(err: unknown): string {
|
|
37
|
+
return err instanceof Error ? err.message : String(err)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Decide between retry and compensation when a step's handler throws. */
|
|
41
|
+
function failureOutcome(
|
|
42
|
+
step: RetryableStep,
|
|
43
|
+
attempt: number,
|
|
44
|
+
err: unknown,
|
|
45
|
+
partialJournal: JournalWrite[]
|
|
46
|
+
): StepOutcome {
|
|
47
|
+
const failure = message(err)
|
|
48
|
+
if (attempt < step.maxRetries) {
|
|
49
|
+
return {
|
|
50
|
+
kind: 'retry',
|
|
51
|
+
journal: partialJournal,
|
|
52
|
+
attempt: attempt + 1,
|
|
53
|
+
backoffMs: backoffDelay(attempt, step.retryBackoff),
|
|
54
|
+
failure,
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return {
|
|
58
|
+
kind: 'compensate',
|
|
59
|
+
journal: [
|
|
60
|
+
...partialJournal,
|
|
61
|
+
{ stepId: step.name, status: 'failed', error: failure, attempt },
|
|
62
|
+
],
|
|
63
|
+
failure,
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Execute one top-level step against the journal. Completed sub-units
|
|
69
|
+
* (parallel entries, loop iterations, the route decision) are read back from
|
|
70
|
+
* the journal rather than re-run, so redelivery fast-forwards to the first
|
|
71
|
+
* incomplete unit.
|
|
72
|
+
*/
|
|
73
|
+
export async function runDurableStep(
|
|
74
|
+
step: DurableStep,
|
|
75
|
+
ctx: DurableContext,
|
|
76
|
+
journal: Map<string, JournalRecord>
|
|
77
|
+
): Promise<StepOutcome> {
|
|
78
|
+
switch (step.type) {
|
|
79
|
+
case 'step':
|
|
80
|
+
return runSequential(step, ctx)
|
|
81
|
+
case 'parallel':
|
|
82
|
+
return runParallel(step, ctx, journal)
|
|
83
|
+
case 'route':
|
|
84
|
+
return runRoute(step, ctx, journal)
|
|
85
|
+
case 'loop':
|
|
86
|
+
return runLoop(step, ctx, journal)
|
|
87
|
+
case 'sleep':
|
|
88
|
+
return runSleep(step)
|
|
89
|
+
case 'signal':
|
|
90
|
+
return { kind: 'suspend-signal', signal: step.signal }
|
|
91
|
+
case 'child':
|
|
92
|
+
return runChild(step, ctx)
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// ── step ────────────────────────────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
async function runSequential(
|
|
99
|
+
step: Extract<DurableStep, { type: 'step' }>,
|
|
100
|
+
ctx: DurableContext
|
|
101
|
+
): Promise<StepOutcome> {
|
|
102
|
+
try {
|
|
103
|
+
const result = await step.handler(ctx)
|
|
104
|
+
if (isSuspendedRun(result)) {
|
|
105
|
+
return { kind: 'suspend-agent', stepName: step.name, snapshot: result }
|
|
106
|
+
}
|
|
107
|
+
return {
|
|
108
|
+
kind: 'advance',
|
|
109
|
+
journal: [{ stepId: step.name, status: 'completed', result, attempt: ctx.attempt }],
|
|
110
|
+
resultPatch: { [step.name]: result },
|
|
111
|
+
}
|
|
112
|
+
} catch (err) {
|
|
113
|
+
return failureOutcome(step, ctx.attempt, err, [])
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// ── parallel ────────────────────────────────────────────────────────────────
|
|
118
|
+
|
|
119
|
+
async function runParallel(
|
|
120
|
+
step: Extract<DurableStep, { type: 'parallel' }>,
|
|
121
|
+
ctx: DurableContext,
|
|
122
|
+
journal: Map<string, JournalRecord>
|
|
123
|
+
): Promise<StepOutcome> {
|
|
124
|
+
const settled = await Promise.all(
|
|
125
|
+
step.entries.map(async entry => {
|
|
126
|
+
const jid = `${step.name}#${entry.name}`
|
|
127
|
+
const existing = journal.get(jid)
|
|
128
|
+
if (existing?.status === 'completed') {
|
|
129
|
+
return { entry, ok: true as const, result: existing.result, fresh: false }
|
|
130
|
+
}
|
|
131
|
+
try {
|
|
132
|
+
const result = await entry.handler(ctx)
|
|
133
|
+
return { entry, ok: true as const, result, fresh: true }
|
|
134
|
+
} catch (err) {
|
|
135
|
+
return { entry, ok: false as const, err, fresh: true }
|
|
136
|
+
}
|
|
137
|
+
})
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
const writes: JournalWrite[] = []
|
|
141
|
+
const resultPatch: Record<string, unknown> = {}
|
|
142
|
+
let firstError: unknown
|
|
143
|
+
|
|
144
|
+
for (const s of settled) {
|
|
145
|
+
if (s.ok) {
|
|
146
|
+
resultPatch[s.entry.name] = s.result
|
|
147
|
+
if (s.fresh) {
|
|
148
|
+
writes.push({
|
|
149
|
+
stepId: `${step.name}#${s.entry.name}`,
|
|
150
|
+
status: 'completed',
|
|
151
|
+
result: s.result,
|
|
152
|
+
attempt: ctx.attempt,
|
|
153
|
+
})
|
|
154
|
+
}
|
|
155
|
+
} else if (firstError === undefined) {
|
|
156
|
+
firstError = s.err
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (firstError === undefined) {
|
|
161
|
+
writes.push({
|
|
162
|
+
stepId: step.name,
|
|
163
|
+
status: 'completed',
|
|
164
|
+
result: resultPatch,
|
|
165
|
+
attempt: ctx.attempt,
|
|
166
|
+
})
|
|
167
|
+
return { kind: 'advance', journal: writes, resultPatch }
|
|
168
|
+
}
|
|
169
|
+
return failureOutcome(step, ctx.attempt, firstError, writes)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// ── route ───────────────────────────────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
async function runRoute(
|
|
175
|
+
step: Extract<DurableStep, { type: 'route' }>,
|
|
176
|
+
ctx: DurableContext,
|
|
177
|
+
journal: Map<string, JournalRecord>
|
|
178
|
+
): Promise<StepOutcome> {
|
|
179
|
+
const routeJid = `${step.name}#route`
|
|
180
|
+
const writes: JournalWrite[] = []
|
|
181
|
+
let routeKey: string
|
|
182
|
+
|
|
183
|
+
const existingRoute = journal.get(routeJid)
|
|
184
|
+
try {
|
|
185
|
+
if (existingRoute?.status === 'completed') {
|
|
186
|
+
routeKey = existingRoute.result as string
|
|
187
|
+
} else {
|
|
188
|
+
routeKey = await step.resolver(ctx)
|
|
189
|
+
writes.push({ stepId: routeJid, status: 'completed', result: routeKey, attempt: ctx.attempt })
|
|
190
|
+
}
|
|
191
|
+
} catch (err) {
|
|
192
|
+
return failureOutcome(step, ctx.attempt, err, [])
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const branch = step.branches[routeKey]
|
|
196
|
+
try {
|
|
197
|
+
let result: unknown = null
|
|
198
|
+
const existingBranch = journal.get(step.name)
|
|
199
|
+
if (existingBranch?.status === 'completed') {
|
|
200
|
+
result = existingBranch.result
|
|
201
|
+
} else if (branch) {
|
|
202
|
+
result = await branch(ctx)
|
|
203
|
+
}
|
|
204
|
+
writes.push({ stepId: step.name, status: 'completed', result, attempt: ctx.attempt })
|
|
205
|
+
return {
|
|
206
|
+
kind: 'advance',
|
|
207
|
+
journal: writes,
|
|
208
|
+
resultPatch: branch ? { [step.name]: result } : {},
|
|
209
|
+
}
|
|
210
|
+
} catch (err) {
|
|
211
|
+
// Persist the route decision so a retry takes the same branch.
|
|
212
|
+
return failureOutcome(step, ctx.attempt, err, writes)
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// ── loop ────────────────────────────────────────────────────────────────────
|
|
217
|
+
|
|
218
|
+
async function runLoop(
|
|
219
|
+
step: Extract<DurableStep, { type: 'loop' }>,
|
|
220
|
+
ctx: DurableContext,
|
|
221
|
+
journal: Map<string, JournalRecord>
|
|
222
|
+
): Promise<StepOutcome> {
|
|
223
|
+
let currentInput: unknown = step.mapInput ? step.mapInput(ctx) : ctx.input
|
|
224
|
+
let lastResult: unknown
|
|
225
|
+
let ran = false
|
|
226
|
+
const writes: JournalWrite[] = []
|
|
227
|
+
|
|
228
|
+
for (let i = 0; i < step.maxIterations; i++) {
|
|
229
|
+
ran = true
|
|
230
|
+
const jid = `${step.name}#iter${i}`
|
|
231
|
+
const existing = journal.get(jid)
|
|
232
|
+
|
|
233
|
+
if (existing?.status === 'completed') {
|
|
234
|
+
lastResult = existing.result
|
|
235
|
+
} else {
|
|
236
|
+
try {
|
|
237
|
+
lastResult = await step.handler(currentInput, ctx)
|
|
238
|
+
} catch (err) {
|
|
239
|
+
return failureOutcome(step, ctx.attempt, err, writes)
|
|
240
|
+
}
|
|
241
|
+
writes.push({ stepId: jid, status: 'completed', result: lastResult, attempt: ctx.attempt })
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
if (step.until?.(lastResult, i + 1)) break
|
|
245
|
+
if (step.feedback) currentInput = step.feedback(lastResult)
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
writes.push({ stepId: step.name, status: 'completed', result: lastResult ?? null, attempt: ctx.attempt })
|
|
249
|
+
return {
|
|
250
|
+
kind: 'advance',
|
|
251
|
+
journal: writes,
|
|
252
|
+
resultPatch: ran ? { [step.name]: lastResult } : {},
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// ── sleep ───────────────────────────────────────────────────────────────────
|
|
257
|
+
|
|
258
|
+
function runSleep(step: Extract<DurableStep, { type: 'sleep' }>): StepOutcome {
|
|
259
|
+
const wakeAt =
|
|
260
|
+
step.duration instanceof Date
|
|
261
|
+
? step.duration
|
|
262
|
+
: new Date(Date.now() + step.duration)
|
|
263
|
+
return {
|
|
264
|
+
kind: 'sleep',
|
|
265
|
+
journal: [
|
|
266
|
+
{
|
|
267
|
+
stepId: step.name,
|
|
268
|
+
status: 'completed',
|
|
269
|
+
result: { wakeAt: wakeAt.toISOString() },
|
|
270
|
+
attempt: 1,
|
|
271
|
+
},
|
|
272
|
+
],
|
|
273
|
+
resultPatch: {},
|
|
274
|
+
wakeAt,
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// ── child ───────────────────────────────────────────────────────────────────
|
|
279
|
+
|
|
280
|
+
function runChild(
|
|
281
|
+
step: Extract<DurableStep, { type: 'child' }>,
|
|
282
|
+
ctx: DurableContext
|
|
283
|
+
): StepOutcome {
|
|
284
|
+
const childInput = step.mapInput ? step.mapInput(ctx) : ctx.input
|
|
285
|
+
return {
|
|
286
|
+
kind: 'await-child',
|
|
287
|
+
childName: step.childName,
|
|
288
|
+
childInput,
|
|
289
|
+
childStepId: step.name,
|
|
290
|
+
}
|
|
291
|
+
}
|