@strav/durable 0.4.31 → 1.0.0-alpha.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +18 -37
- package/src/define_durable.ts +29 -0
- package/src/durable_advance_job.ts +38 -0
- package/src/durable_compensate_job.ts +33 -0
- package/src/durable_error.ts +38 -0
- package/src/durable_provider.ts +91 -0
- package/src/durable_runner.ts +395 -0
- package/src/durable_workflow.ts +97 -0
- package/src/index.ts +25 -26
- package/src/journal_schema.ts +53 -0
- package/src/runs_schema.ts +38 -0
- package/src/types.ts +58 -198
- package/src/workflow_registry.ts +49 -0
- package/CHANGELOG.md +0 -26
- package/src/builder.ts +0 -158
- package/src/config.ts +0 -36
- package/src/durable.ts +0 -268
- package/src/engine/advance_handler.ts +0 -154
- package/src/engine/compensate_handler.ts +0 -70
- package/src/engine/compensation_driver.ts +0 -61
- package/src/engine/context.ts +0 -36
- package/src/engine/enqueue.ts +0 -62
- package/src/engine/finalize.ts +0 -111
- package/src/engine/index.ts +0 -20
- package/src/engine/run_store.ts +0 -42
- package/src/engine/step_driver.ts +0 -291
- package/src/engine/suspended_run.ts +0 -24
- package/src/errors.ts +0 -21
- package/src/helpers.ts +0 -16
- package/src/models/index.ts +0 -3
- package/src/models/journal.ts +0 -54
- package/src/models/run_machine.ts +0 -39
- package/src/models/workflow_run.ts +0 -36
- package/src/providers/durable_provider.ts +0 -31
- package/src/providers/index.ts +0 -2
- package/src/registry.ts +0 -35
- package/src/schema.ts +0 -70
- package/src/util.ts +0 -25
- package/tsconfig.json +0 -5
package/src/durable.ts
DELETED
|
@@ -1,268 +0,0 @@
|
|
|
1
|
-
import { sql, transaction } from '@strav/database'
|
|
2
|
-
import { Queue } from '@strav/queue'
|
|
3
|
-
import { registry } from './registry.ts'
|
|
4
|
-
import { ensureTables } from './schema.ts'
|
|
5
|
-
import { configureDurable, type DurableConfig } from './config.ts'
|
|
6
|
-
import { RunNotFoundError } from './errors.ts'
|
|
7
|
-
import type {
|
|
8
|
-
ResumeResult,
|
|
9
|
-
RunStatus,
|
|
10
|
-
RunStatusSnapshot,
|
|
11
|
-
StartResult,
|
|
12
|
-
} from './types.ts'
|
|
13
|
-
import { writeJournal } from './models/journal.ts'
|
|
14
|
-
import {
|
|
15
|
-
advanceHandler,
|
|
16
|
-
applyPatch,
|
|
17
|
-
compensateHandler,
|
|
18
|
-
enqueueAdvance,
|
|
19
|
-
enqueueCompensate,
|
|
20
|
-
loadRun,
|
|
21
|
-
lockRun,
|
|
22
|
-
type AdvancePayload,
|
|
23
|
-
type CompensatePayload,
|
|
24
|
-
type Tx,
|
|
25
|
-
} from './engine/index.ts'
|
|
26
|
-
|
|
27
|
-
/** Drop engine-internal keys so `results` reads like a `@strav/workflow` context. */
|
|
28
|
-
function publicResults(state: Record<string, unknown>): Record<string, unknown> {
|
|
29
|
-
const out: Record<string, unknown> = {}
|
|
30
|
-
for (const [key, value] of Object.entries(state)) {
|
|
31
|
-
if (!key.startsWith('__strav_')) out[key] = value
|
|
32
|
-
}
|
|
33
|
-
return out
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
async function cancelRecursive(trx: Tx, runId: number): Promise<void> {
|
|
37
|
-
await trx`
|
|
38
|
-
UPDATE "_strav_workflow_runs"
|
|
39
|
-
SET "status" = 'canceled', "updated_at" = NOW()
|
|
40
|
-
WHERE "id" = ${runId}
|
|
41
|
-
AND "status" IN ('pending', 'running', 'suspended', 'compensating')
|
|
42
|
-
`
|
|
43
|
-
const children = (await trx`
|
|
44
|
-
SELECT "id" FROM "_strav_workflow_runs" WHERE "parent_run_id" = ${runId}
|
|
45
|
-
`) as Record<string, unknown>[]
|
|
46
|
-
for (const child of children) {
|
|
47
|
-
await cancelRecursive(trx, Number(child.id))
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Static facade for the durable execution engine.
|
|
53
|
-
*
|
|
54
|
-
* @example
|
|
55
|
-
* await Durable.start('milestone', { projectId: 42 })
|
|
56
|
-
* const snapshot = await Durable.status(runId)
|
|
57
|
-
* await Durable.resume(runId, 'founder-signoff', { approved: true })
|
|
58
|
-
*/
|
|
59
|
-
export class Durable {
|
|
60
|
-
/** Create the engine's tables (`_strav_workflow_runs`, `_strav_workflow_journal`). */
|
|
61
|
-
static async ensureTables(): Promise<void> {
|
|
62
|
-
await ensureTables()
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
/** Override engine configuration (queue name, job timeout, max attempts). */
|
|
66
|
-
static configure(patch: Partial<DurableConfig>): void {
|
|
67
|
-
configureDurable(patch)
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Register the `durable:advance` / `durable:compensate` queue handlers.
|
|
72
|
-
* Called by `DurableProvider.boot`; idempotent.
|
|
73
|
-
*/
|
|
74
|
-
static registerHandlers(): void {
|
|
75
|
-
Queue.handle('durable:advance', async (payload: unknown) => {
|
|
76
|
-
await advanceHandler(payload as AdvancePayload)
|
|
77
|
-
})
|
|
78
|
-
Queue.handle('durable:compensate', async (payload: unknown) => {
|
|
79
|
-
await compensateHandler(payload as CompensatePayload)
|
|
80
|
-
})
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* Start a new durable run. Inserts the run row and enqueues the first
|
|
85
|
-
* step's `durable:advance` job in a single transaction. Returns immediately
|
|
86
|
-
* — the workflow runs on the queue.
|
|
87
|
-
*/
|
|
88
|
-
static async start(
|
|
89
|
-
workflowName: string,
|
|
90
|
-
input: Record<string, unknown> = {},
|
|
91
|
-
opts?: { parentRunId?: number; parentStepId?: string }
|
|
92
|
-
): Promise<StartResult> {
|
|
93
|
-
registry.get(workflowName) // throws if not registered
|
|
94
|
-
|
|
95
|
-
return await transaction(async (trx: Tx) => {
|
|
96
|
-
const rows = (await trx`
|
|
97
|
-
INSERT INTO "_strav_workflow_runs"
|
|
98
|
-
("workflow_name", "input", "status", "state", "current_step",
|
|
99
|
-
"parent_run_id", "parent_step_id")
|
|
100
|
-
VALUES (
|
|
101
|
-
${workflowName}, ${JSON.stringify(input)}, 'running', '{}', 0,
|
|
102
|
-
${opts?.parentRunId ?? null}, ${opts?.parentStepId ?? null}
|
|
103
|
-
)
|
|
104
|
-
RETURNING "id"
|
|
105
|
-
`) as Record<string, unknown>[]
|
|
106
|
-
const runId = Number(rows[0]!.id)
|
|
107
|
-
await enqueueAdvance(trx, runId, 0)
|
|
108
|
-
return { runId, status: 'running' as RunStatus }
|
|
109
|
-
})
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Deliver a signal to a suspended run.
|
|
114
|
-
*
|
|
115
|
-
* - `waitForSignal` step → journals the payload, advances past the step.
|
|
116
|
-
* - A suspended brain-agent `.step` → stores the payload and re-enters the
|
|
117
|
-
* step so the handler can call `runner.resume(...)`.
|
|
118
|
-
*
|
|
119
|
-
* Returns `{ accepted: false }` if the run is not suspended on a matching
|
|
120
|
-
* signal (idempotent — a duplicate or mismatched signal is a no-op).
|
|
121
|
-
*/
|
|
122
|
-
static async resume(
|
|
123
|
-
runId: number,
|
|
124
|
-
signal: string,
|
|
125
|
-
data?: unknown
|
|
126
|
-
): Promise<ResumeResult> {
|
|
127
|
-
return await transaction(async (trx: Tx) => {
|
|
128
|
-
const run = await lockRun(trx, runId)
|
|
129
|
-
if (!run) throw new RunNotFoundError(runId)
|
|
130
|
-
if (run.status !== 'suspended' || run.awaitingSignal !== signal) {
|
|
131
|
-
return { accepted: false, status: run.status }
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
const step = registry.get(run.workflowName).steps[run.currentStep]
|
|
135
|
-
if (!step) return { accepted: false, status: run.status }
|
|
136
|
-
|
|
137
|
-
if (step.type === 'signal') {
|
|
138
|
-
await writeJournal(trx, runId, [
|
|
139
|
-
{ stepId: step.name, status: 'completed', result: data ?? null, attempt: 1 },
|
|
140
|
-
])
|
|
141
|
-
const newState = applyPatch(run.state, { [step.name]: data ?? null })
|
|
142
|
-
const next = run.currentStep + 1
|
|
143
|
-
await trx`
|
|
144
|
-
UPDATE "_strav_workflow_runs"
|
|
145
|
-
SET "status" = 'running', "awaiting_signal" = NULL,
|
|
146
|
-
"state" = ${JSON.stringify(newState)}, "current_step" = ${next},
|
|
147
|
-
"updated_at" = NOW()
|
|
148
|
-
WHERE "id" = ${runId}
|
|
149
|
-
`
|
|
150
|
-
await enqueueAdvance(trx, runId, next)
|
|
151
|
-
return { accepted: true, status: 'running' as RunStatus }
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
if (step.type === 'step') {
|
|
155
|
-
// A suspended brain agent — re-enter the same step with the resume data.
|
|
156
|
-
const newState = applyPatch(run.state, { __strav_resume__: data ?? null })
|
|
157
|
-
await trx`
|
|
158
|
-
UPDATE "_strav_workflow_runs"
|
|
159
|
-
SET "status" = 'running', "awaiting_signal" = NULL,
|
|
160
|
-
"state" = ${JSON.stringify(newState)}, "updated_at" = NOW()
|
|
161
|
-
WHERE "id" = ${runId}
|
|
162
|
-
`
|
|
163
|
-
await enqueueAdvance(trx, runId, run.currentStep)
|
|
164
|
-
return { accepted: true, status: 'running' as RunStatus }
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
return { accepted: false, status: run.status }
|
|
168
|
-
})
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
/** Snapshot a run's live state (and one level of child runs). */
|
|
172
|
-
static async status(runId: number): Promise<RunStatusSnapshot> {
|
|
173
|
-
const run = await loadRun(runId)
|
|
174
|
-
if (!run) throw new RunNotFoundError(runId)
|
|
175
|
-
|
|
176
|
-
const totalSteps = registry.has(run.workflowName)
|
|
177
|
-
? registry.get(run.workflowName).steps.length
|
|
178
|
-
: 0
|
|
179
|
-
|
|
180
|
-
const childRows = (await sql`
|
|
181
|
-
SELECT "id" FROM "_strav_workflow_runs"
|
|
182
|
-
WHERE "parent_run_id" = ${runId} ORDER BY "id"
|
|
183
|
-
`) as Record<string, unknown>[]
|
|
184
|
-
const children: RunStatusSnapshot[] = []
|
|
185
|
-
for (const child of childRows) {
|
|
186
|
-
children.push(await Durable.status(Number(child.id)))
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
return {
|
|
190
|
-
runId: run.id,
|
|
191
|
-
workflowName: run.workflowName,
|
|
192
|
-
status: run.status,
|
|
193
|
-
currentStep: run.currentStep,
|
|
194
|
-
totalSteps,
|
|
195
|
-
awaitingSignal: run.awaitingSignal,
|
|
196
|
-
wakeAt: run.wakeAt ? run.wakeAt.toISOString() : null,
|
|
197
|
-
results: publicResults(run.state),
|
|
198
|
-
error: run.error,
|
|
199
|
-
parentRunId: run.parentRunId,
|
|
200
|
-
children,
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
/** List runs, most recent first, optionally filtered by status / parent. */
|
|
205
|
-
static async list(filter?: {
|
|
206
|
-
status?: RunStatus
|
|
207
|
-
parentRunId?: number
|
|
208
|
-
}): Promise<RunStatusSnapshot[]> {
|
|
209
|
-
const status = filter?.status ?? null
|
|
210
|
-
const parentRunId = filter?.parentRunId ?? null
|
|
211
|
-
const rows = (await sql`
|
|
212
|
-
SELECT "id" FROM "_strav_workflow_runs"
|
|
213
|
-
WHERE (${status}::text IS NULL OR "status" = ${status})
|
|
214
|
-
AND (${parentRunId}::bigint IS NULL OR "parent_run_id" = ${parentRunId})
|
|
215
|
-
ORDER BY "id" DESC
|
|
216
|
-
`) as Record<string, unknown>[]
|
|
217
|
-
|
|
218
|
-
const snapshots: RunStatusSnapshot[] = []
|
|
219
|
-
for (const row of rows) {
|
|
220
|
-
snapshots.push(await Durable.status(Number(row.id)))
|
|
221
|
-
}
|
|
222
|
-
return snapshots
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
/** Cancel a run and all of its descendant child runs. */
|
|
226
|
-
static async cancel(runId: number): Promise<void> {
|
|
227
|
-
await transaction(async (trx: Tx) => {
|
|
228
|
-
await cancelRecursive(trx, runId)
|
|
229
|
-
})
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
/**
|
|
233
|
-
* Re-enqueue runs that are `running` / `compensating` but have no live
|
|
234
|
-
* `_strav_jobs` row — e.g. a job that dead-lettered. Returns the count
|
|
235
|
-
* recovered. Safe to run periodically (e.g. via the `@strav/queue` Scheduler).
|
|
236
|
-
*/
|
|
237
|
-
static async recover(): Promise<number> {
|
|
238
|
-
const rows = (await sql`
|
|
239
|
-
SELECT r."id", r."status", r."current_step", r."compensation_cursor"
|
|
240
|
-
FROM "_strav_workflow_runs" r
|
|
241
|
-
WHERE r."status" IN ('running', 'compensating')
|
|
242
|
-
AND NOT EXISTS (
|
|
243
|
-
SELECT 1 FROM "_strav_jobs" j
|
|
244
|
-
WHERE j."job" IN ('durable:advance', 'durable:compensate')
|
|
245
|
-
AND (j."payload"->>'runId')::bigint = r."id"
|
|
246
|
-
)
|
|
247
|
-
`) as Record<string, unknown>[]
|
|
248
|
-
|
|
249
|
-
let recovered = 0
|
|
250
|
-
for (const row of rows) {
|
|
251
|
-
const runId = Number(row.id)
|
|
252
|
-
await transaction(async (trx: Tx) => {
|
|
253
|
-
if (row.status === 'running') {
|
|
254
|
-
await enqueueAdvance(trx, runId, Number(row.current_step))
|
|
255
|
-
} else if (row.compensation_cursor != null) {
|
|
256
|
-
await enqueueCompensate(trx, runId, Number(row.compensation_cursor))
|
|
257
|
-
}
|
|
258
|
-
})
|
|
259
|
-
recovered++
|
|
260
|
-
}
|
|
261
|
-
return recovered
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
/** Clear the workflow registry. For testing only. */
|
|
265
|
-
static reset(): void {
|
|
266
|
-
registry.reset()
|
|
267
|
-
}
|
|
268
|
-
}
|
|
@@ -1,154 +0,0 @@
|
|
|
1
|
-
import { transaction } from '@strav/database'
|
|
2
|
-
import { registry } from '../registry.ts'
|
|
3
|
-
import { loadJournal, writeJournal } from '../models/journal.ts'
|
|
4
|
-
import { buildContext } from './context.ts'
|
|
5
|
-
import { enqueueAdvance } from './enqueue.ts'
|
|
6
|
-
import { applyPatch, beginCompensation, completeRun } from './finalize.ts'
|
|
7
|
-
import { loadRun, lockRun, type Tx } from './run_store.ts'
|
|
8
|
-
import { runDurableStep, type StepOutcome } from './step_driver.ts'
|
|
9
|
-
|
|
10
|
-
/** Payload of a `durable:advance` job. */
|
|
11
|
-
export interface AdvancePayload {
|
|
12
|
-
runId: number
|
|
13
|
-
stepIndex: number
|
|
14
|
-
attempt?: number
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* The `durable:advance` queue handler — runs one top-level step of a run.
|
|
19
|
-
*
|
|
20
|
-
* Phase A (no lock): load the run, guard against stale redelivery, execute the
|
|
21
|
-
* step. The step handler may take minutes — no row lock is held during it.
|
|
22
|
-
* Phase B (`applyOutcome`, row-locked transaction): apply the outcome —
|
|
23
|
-
* journal, advance/suspend/retry/compensate, enqueue the continuation — atomically.
|
|
24
|
-
*/
|
|
25
|
-
export async function advanceHandler(payload: AdvancePayload): Promise<void> {
|
|
26
|
-
const { runId, stepIndex } = payload
|
|
27
|
-
const attempt = payload.attempt ?? 1
|
|
28
|
-
|
|
29
|
-
const run = await loadRun(runId)
|
|
30
|
-
if (!run) return
|
|
31
|
-
// Only a `running` run advances; `suspended`/terminal/`compensating` runs
|
|
32
|
-
// are handled by resume / the compensation chain / not at all.
|
|
33
|
-
if (run.status !== 'running') return
|
|
34
|
-
// Stale redelivery — the run already moved past this step.
|
|
35
|
-
if (run.currentStep !== stepIndex) return
|
|
36
|
-
|
|
37
|
-
const workflow = registry.get(run.workflowName)
|
|
38
|
-
const steps = workflow.steps
|
|
39
|
-
|
|
40
|
-
// Past the last step — the run is done.
|
|
41
|
-
if (stepIndex >= steps.length) {
|
|
42
|
-
await completeRun(runId)
|
|
43
|
-
return
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
const step = steps[stepIndex]!
|
|
47
|
-
const journal = await loadJournal(runId)
|
|
48
|
-
const ctx = buildContext(run, attempt, step.name)
|
|
49
|
-
const outcome = await runDurableStep(step, ctx, journal)
|
|
50
|
-
await applyOutcome(runId, stepIndex, outcome)
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
/** Phase B — apply a step outcome atomically under a `FOR UPDATE` lock. */
|
|
54
|
-
async function applyOutcome(
|
|
55
|
-
runId: number,
|
|
56
|
-
stepIndex: number,
|
|
57
|
-
outcome: StepOutcome
|
|
58
|
-
): Promise<void> {
|
|
59
|
-
await transaction(async (trx: Tx) => {
|
|
60
|
-
const run = await lockRun(trx, runId)
|
|
61
|
-
// Re-check under the lock: a resume / cancel / concurrent duplicate may
|
|
62
|
-
// have moved the run since Phase A.
|
|
63
|
-
if (!run || run.status !== 'running' || run.currentStep !== stepIndex) return
|
|
64
|
-
|
|
65
|
-
switch (outcome.kind) {
|
|
66
|
-
case 'advance': {
|
|
67
|
-
await writeJournal(trx, runId, outcome.journal)
|
|
68
|
-
const newState = applyPatch(run.state, outcome.resultPatch)
|
|
69
|
-
delete newState['__strav_resume__']
|
|
70
|
-
const next = stepIndex + 1
|
|
71
|
-
await trx`
|
|
72
|
-
UPDATE "_strav_workflow_runs"
|
|
73
|
-
SET "state" = ${JSON.stringify(newState)}, "current_step" = ${next},
|
|
74
|
-
"updated_at" = NOW()
|
|
75
|
-
WHERE "id" = ${runId}
|
|
76
|
-
`
|
|
77
|
-
await enqueueAdvance(trx, runId, next)
|
|
78
|
-
break
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
case 'sleep': {
|
|
82
|
-
await writeJournal(trx, runId, outcome.journal)
|
|
83
|
-
const newState = applyPatch(run.state, outcome.resultPatch)
|
|
84
|
-
const next = stepIndex + 1
|
|
85
|
-
const delay = Math.max(0, outcome.wakeAt.getTime() - Date.now())
|
|
86
|
-
await trx`
|
|
87
|
-
UPDATE "_strav_workflow_runs"
|
|
88
|
-
SET "state" = ${JSON.stringify(newState)}, "current_step" = ${next},
|
|
89
|
-
"wake_at" = ${outcome.wakeAt}, "updated_at" = NOW()
|
|
90
|
-
WHERE "id" = ${runId}
|
|
91
|
-
`
|
|
92
|
-
await enqueueAdvance(trx, runId, next, { delay })
|
|
93
|
-
break
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
case 'suspend-signal': {
|
|
97
|
-
await trx`
|
|
98
|
-
UPDATE "_strav_workflow_runs"
|
|
99
|
-
SET "status" = 'suspended', "awaiting_signal" = ${outcome.signal},
|
|
100
|
-
"updated_at" = NOW()
|
|
101
|
-
WHERE "id" = ${runId}
|
|
102
|
-
`
|
|
103
|
-
break
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
case 'suspend-agent': {
|
|
107
|
-
const newState = applyPatch(run.state, { [outcome.stepName]: outcome.snapshot })
|
|
108
|
-
await trx`
|
|
109
|
-
UPDATE "_strav_workflow_runs"
|
|
110
|
-
SET "status" = 'suspended', "awaiting_signal" = ${outcome.stepName},
|
|
111
|
-
"state" = ${JSON.stringify(newState)}, "updated_at" = NOW()
|
|
112
|
-
WHERE "id" = ${runId}
|
|
113
|
-
`
|
|
114
|
-
break
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
case 'await-child': {
|
|
118
|
-
const childRows = (await trx`
|
|
119
|
-
INSERT INTO "_strav_workflow_runs"
|
|
120
|
-
("workflow_name", "input", "status", "state", "current_step",
|
|
121
|
-
"parent_run_id", "parent_step_id")
|
|
122
|
-
VALUES (
|
|
123
|
-
${outcome.childName}, ${JSON.stringify(outcome.childInput)},
|
|
124
|
-
'running', '{}', 0, ${runId}, ${outcome.childStepId}
|
|
125
|
-
)
|
|
126
|
-
RETURNING "id"
|
|
127
|
-
`) as Record<string, unknown>[]
|
|
128
|
-
const childId = Number(childRows[0]!.id)
|
|
129
|
-
await enqueueAdvance(trx, childId, 0)
|
|
130
|
-
await trx`
|
|
131
|
-
UPDATE "_strav_workflow_runs"
|
|
132
|
-
SET "status" = 'suspended', "updated_at" = NOW()
|
|
133
|
-
WHERE "id" = ${runId}
|
|
134
|
-
`
|
|
135
|
-
break
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
case 'retry': {
|
|
139
|
-
await writeJournal(trx, runId, outcome.journal)
|
|
140
|
-
await enqueueAdvance(trx, runId, stepIndex, {
|
|
141
|
-
attempt: outcome.attempt,
|
|
142
|
-
delay: outcome.backoffMs,
|
|
143
|
-
})
|
|
144
|
-
break
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
case 'compensate': {
|
|
148
|
-
await writeJournal(trx, runId, outcome.journal)
|
|
149
|
-
await beginCompensation(trx, run, stepIndex, outcome.failure)
|
|
150
|
-
break
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
})
|
|
154
|
-
}
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
import { transaction } from '@strav/database'
|
|
2
|
-
import { registry } from '../registry.ts'
|
|
3
|
-
import type { JournalWrite } from '../types.ts'
|
|
4
|
-
import { loadJournal, writeJournal } from '../models/journal.ts'
|
|
5
|
-
import { buildContext } from './context.ts'
|
|
6
|
-
import { enqueueCompensate } from './enqueue.ts'
|
|
7
|
-
import { failRun } from './finalize.ts'
|
|
8
|
-
import { loadRun, lockRun, type Tx } from './run_store.ts'
|
|
9
|
-
import { runCompensator } from './compensation_driver.ts'
|
|
10
|
-
|
|
11
|
-
/** Payload of a `durable:compensate` job. */
|
|
12
|
-
export interface CompensatePayload {
|
|
13
|
-
runId: number
|
|
14
|
-
compensateIndex: number
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* The `durable:compensate` queue handler — rolls back one step of a failed
|
|
19
|
-
* run. The chain walks `compensation_cursor` downward; each step's compensation
|
|
20
|
-
* is journaled (`<step>#compensate`), so rollback resumes crash-safely. When
|
|
21
|
-
* the cursor reaches below zero the run is marked `failed`.
|
|
22
|
-
*/
|
|
23
|
-
export async function compensateHandler(payload: CompensatePayload): Promise<void> {
|
|
24
|
-
const { runId, compensateIndex } = payload
|
|
25
|
-
|
|
26
|
-
const run = await loadRun(runId)
|
|
27
|
-
if (!run) return
|
|
28
|
-
if (run.status !== 'compensating') return
|
|
29
|
-
if (run.compensationCursor !== compensateIndex) return
|
|
30
|
-
|
|
31
|
-
// Phase A — run the compensator outside the transaction (it may be slow).
|
|
32
|
-
let writes: JournalWrite[] = []
|
|
33
|
-
if (compensateIndex >= 0) {
|
|
34
|
-
const step = registry.get(run.workflowName).steps[compensateIndex]
|
|
35
|
-
if (step) {
|
|
36
|
-
const journal = await loadJournal(runId)
|
|
37
|
-
const alreadyDone =
|
|
38
|
-
journal.get(`${step.name}#compensate`)?.status === 'completed'
|
|
39
|
-
if (!alreadyDone) {
|
|
40
|
-
writes = await runCompensator(step, buildContext(run, 1, step.name), journal)
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// Phase B — record the compensation and advance the cursor atomically.
|
|
46
|
-
await transaction(async (trx: Tx) => {
|
|
47
|
-
const locked = await lockRun(trx, runId)
|
|
48
|
-
if (
|
|
49
|
-
!locked ||
|
|
50
|
-
locked.status !== 'compensating' ||
|
|
51
|
-
locked.compensationCursor !== compensateIndex
|
|
52
|
-
) {
|
|
53
|
-
return
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
if (writes.length > 0) await writeJournal(trx, runId, writes)
|
|
57
|
-
|
|
58
|
-
const next = compensateIndex - 1
|
|
59
|
-
if (next < 0) {
|
|
60
|
-
await failRun(trx, locked, locked.error ?? 'workflow failed')
|
|
61
|
-
} else {
|
|
62
|
-
await trx`
|
|
63
|
-
UPDATE "_strav_workflow_runs"
|
|
64
|
-
SET "compensation_cursor" = ${next}, "updated_at" = NOW()
|
|
65
|
-
WHERE "id" = ${runId}
|
|
66
|
-
`
|
|
67
|
-
await enqueueCompensate(trx, runId, next)
|
|
68
|
-
}
|
|
69
|
-
})
|
|
70
|
-
}
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
DurableContext,
|
|
3
|
-
DurableStep,
|
|
4
|
-
JournalRecord,
|
|
5
|
-
JournalWrite,
|
|
6
|
-
} from '../types.ts'
|
|
7
|
-
|
|
8
|
-
function message(err: unknown): string {
|
|
9
|
-
return err instanceof Error ? err.message : String(err)
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
/**
|
|
13
|
-
* Run the compensator(s) for one step during saga rollback.
|
|
14
|
-
*
|
|
15
|
-
* Mirrors `@strav/workflow`'s compensation surface: only `step` (sequential)
|
|
16
|
-
* and `parallel` entries carry compensators, and only journaled-completed
|
|
17
|
-
* units are compensated. The result is a single `<step>#compensate` journal
|
|
18
|
-
* write — so the compensation chain is itself crash-safe (a redelivered
|
|
19
|
-
* compensate job sees the marker and skips).
|
|
20
|
-
*
|
|
21
|
-
* A throwing compensator is best-effort: its error is recorded on the journal
|
|
22
|
-
* row and the rollback still advances (it does not get stuck).
|
|
23
|
-
*/
|
|
24
|
-
export async function runCompensator(
|
|
25
|
-
step: DurableStep,
|
|
26
|
-
ctx: DurableContext,
|
|
27
|
-
journal: Map<string, JournalRecord>
|
|
28
|
-
): Promise<JournalWrite[]> {
|
|
29
|
-
const errors: string[] = []
|
|
30
|
-
|
|
31
|
-
if (step.type === 'step') {
|
|
32
|
-
if (journal.get(step.name)?.status === 'completed' && step.compensate) {
|
|
33
|
-
try {
|
|
34
|
-
await step.compensate(ctx)
|
|
35
|
-
} catch (err) {
|
|
36
|
-
errors.push(message(err))
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
} else if (step.type === 'parallel') {
|
|
40
|
-
for (const entry of step.entries) {
|
|
41
|
-
const done = journal.get(`${step.name}#${entry.name}`)?.status === 'completed'
|
|
42
|
-
if (done && entry.compensate) {
|
|
43
|
-
try {
|
|
44
|
-
await entry.compensate(ctx)
|
|
45
|
-
} catch (err) {
|
|
46
|
-
errors.push(`${entry.name}: ${message(err)}`)
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
// route / loop / sleep / signal / child have no compensators.
|
|
52
|
-
|
|
53
|
-
return [
|
|
54
|
-
{
|
|
55
|
-
stepId: `${step.name}#compensate`,
|
|
56
|
-
status: errors.length > 0 ? 'failed' : 'completed',
|
|
57
|
-
error: errors.length > 0 ? errors.join('; ') : undefined,
|
|
58
|
-
attempt: 1,
|
|
59
|
-
},
|
|
60
|
-
]
|
|
61
|
-
}
|
package/src/engine/context.ts
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
import type { DurableContext, RunRow } from '../types.ts'
|
|
2
|
-
|
|
3
|
-
const INTERNAL_PREFIX = '__strav_'
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Build the `DurableContext` handed to step handlers from a run row.
|
|
7
|
-
*
|
|
8
|
-
* `results` is the run's accumulated `state` with engine-internal keys
|
|
9
|
-
* (e.g. `__strav_resume__`) filtered out, so it reads exactly like a
|
|
10
|
-
* `@strav/workflow` context. `resumeData()` exposes the raw resume payload.
|
|
11
|
-
*/
|
|
12
|
-
export function buildContext(
|
|
13
|
-
run: RunRow,
|
|
14
|
-
attempt: number,
|
|
15
|
-
stepName: string
|
|
16
|
-
): DurableContext {
|
|
17
|
-
const rawState = run.state ?? {}
|
|
18
|
-
const results: Record<string, unknown> = {}
|
|
19
|
-
for (const [key, value] of Object.entries(rawState)) {
|
|
20
|
-
if (!key.startsWith(INTERNAL_PREFIX)) results[key] = value
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
return {
|
|
24
|
-
input: run.input ?? {},
|
|
25
|
-
results,
|
|
26
|
-
runId: run.id,
|
|
27
|
-
attempt,
|
|
28
|
-
stepName,
|
|
29
|
-
signal<T = unknown>(name: string): T | undefined {
|
|
30
|
-
return results[name] as T | undefined
|
|
31
|
-
},
|
|
32
|
-
resumeData<T = unknown>(): T | undefined {
|
|
33
|
-
return rawState['__strav_resume__'] as T | undefined
|
|
34
|
-
},
|
|
35
|
-
}
|
|
36
|
-
}
|
package/src/engine/enqueue.ts
DELETED
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
import { getConfig } from '../config.ts'
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Enqueue durable jobs by INSERTing directly into `_strav_jobs` on the caller's
|
|
5
|
-
* transaction handle.
|
|
6
|
-
*
|
|
7
|
-
* This is a deliberate, documented coupling to `@strav/queue`'s table. It is
|
|
8
|
-
* NOT done via `Queue.push` because `Queue.push` runs its INSERT on a separate
|
|
9
|
-
* connection — outside the engine's transaction — which would break the atomic
|
|
10
|
-
* `{ journal write + run-row update + next-job enqueue }` commit that the whole
|
|
11
|
-
* crash-safety story depends on. The clean long-term fix is an upstream
|
|
12
|
-
* `Queue.pushTx(trx, ...)`; until then, the INSERT shape is replicated here.
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
/** A Bun SQL transaction handle (tagged-template callable). */
|
|
16
|
-
type Tx = (strings: TemplateStringsArray, ...values: unknown[]) => Promise<unknown>
|
|
17
|
-
|
|
18
|
-
async function enqueueJob(
|
|
19
|
-
trx: Tx,
|
|
20
|
-
job: string,
|
|
21
|
-
payload: unknown,
|
|
22
|
-
delay: number
|
|
23
|
-
): Promise<void> {
|
|
24
|
-
const cfg = getConfig()
|
|
25
|
-
const availableAt = delay > 0 ? new Date(Date.now() + delay) : new Date()
|
|
26
|
-
await trx`
|
|
27
|
-
INSERT INTO "_strav_jobs"
|
|
28
|
-
("queue", "job", "payload", "max_attempts", "timeout", "available_at")
|
|
29
|
-
VALUES (
|
|
30
|
-
${cfg.queue},
|
|
31
|
-
${job},
|
|
32
|
-
${JSON.stringify(payload)},
|
|
33
|
-
${cfg.maxAttempts},
|
|
34
|
-
${cfg.jobTimeout},
|
|
35
|
-
${availableAt}
|
|
36
|
-
)
|
|
37
|
-
`
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/** Enqueue a `durable:advance` continuation for a run at a given step index. */
|
|
41
|
-
export async function enqueueAdvance(
|
|
42
|
-
trx: Tx,
|
|
43
|
-
runId: number,
|
|
44
|
-
stepIndex: number,
|
|
45
|
-
opts?: { attempt?: number; delay?: number }
|
|
46
|
-
): Promise<void> {
|
|
47
|
-
await enqueueJob(
|
|
48
|
-
trx,
|
|
49
|
-
'durable:advance',
|
|
50
|
-
{ runId, stepIndex, attempt: opts?.attempt ?? 1 },
|
|
51
|
-
opts?.delay ?? 0
|
|
52
|
-
)
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
/** Enqueue a `durable:compensate` job for a run at a given compensation cursor. */
|
|
56
|
-
export async function enqueueCompensate(
|
|
57
|
-
trx: Tx,
|
|
58
|
-
runId: number,
|
|
59
|
-
compensateIndex: number
|
|
60
|
-
): Promise<void> {
|
|
61
|
-
await enqueueJob(trx, 'durable:compensate', { runId, compensateIndex }, 0)
|
|
62
|
-
}
|