workerflow 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -29
- package/package.json +1 -1
- package/src/definition.ts +126 -174
- package/src/json.ts +5 -7
- package/src/migrations/0000_initial.ts +98 -294
- package/src/runtime.ts +634 -998
- package/test/runtime.spec.ts +709 -1113
- package/test/tsconfig.json +1 -4
- package/test/worker.ts +1 -3
- package/demo/README.md +0 -73
- package/demo/index.html +0 -13
- package/demo/package.json +0 -33
- package/demo/public/vite.svg +0 -1
- package/demo/src/App.css +0 -0
- package/demo/src/App.tsx +0 -9
- package/demo/src/assets/Cloudflare_Logo.svg +0 -51
- package/demo/src/assets/react.svg +0 -1
- package/demo/src/index.css +0 -1
- package/demo/src/main.tsx +0 -10
- package/demo/tsconfig.app.json +0 -28
- package/demo/tsconfig.json +0 -14
- package/demo/tsconfig.node.json +0 -25
- package/demo/tsconfig.worker.json +0 -13
- package/demo/vite.config.ts +0 -9
- package/demo/worker/index.ts +0 -16
- package/demo/worker-configuration.d.ts +0 -12851
- package/demo/wrangler.jsonc +0 -32
package/README.md
CHANGED
|
@@ -33,22 +33,14 @@ tag = "v1"
|
|
|
33
33
|
new_sqlite_classes = ["OrderWorkflowRuntime"]
|
|
34
34
|
```
|
|
35
35
|
|
|
36
|
-
In your Worker module, export the runtime, the definition, and a **`fetch`** handler (or queue consumer, cron trigger, and so on) that obtains a namespace stub and calls **`create`** to pin
|
|
36
|
+
In your Worker module, export the runtime, the definition, and a **`fetch`** handler (or queue consumer, cron trigger, and so on) that obtains a namespace stub and calls **`create`** to pin the workflow input:
|
|
37
37
|
|
|
38
38
|
```ts
|
|
39
39
|
// src/worker.ts
|
|
40
40
|
import { WorkflowDefinition, WorkflowRuntime } from "workerflow";
|
|
41
41
|
|
|
42
42
|
export class OrderWorkflowRuntime extends WorkflowRuntime<{ orderId: string }> {
|
|
43
|
-
|
|
44
|
-
protected getDefinition(version: string) {
|
|
45
|
-
switch (version) {
|
|
46
|
-
case "2026-04-01":
|
|
47
|
-
return this.ctx.exports.OrderWorkflowDefinition;
|
|
48
|
-
default:
|
|
49
|
-
throw new Error(`Unsupported workflow definition version: ${version}`);
|
|
50
|
-
}
|
|
51
|
-
}
|
|
43
|
+
protected readonly definition = this.ctx.exports.OrderWorkflowDefinition;
|
|
52
44
|
}
|
|
53
45
|
|
|
54
46
|
export class OrderWorkflowDefinition extends WorkflowDefinition<{ orderId: string }> {
|
|
@@ -79,7 +71,7 @@ export default {
|
|
|
79
71
|
if (url.pathname === "/orders") {
|
|
80
72
|
const orderId = "new-order";
|
|
81
73
|
const stub = env.ORDER_WORKFLOW.getByName(orderId);
|
|
82
|
-
await stub.create({
|
|
74
|
+
await stub.create({ orderId });
|
|
83
75
|
return Response.json({ id: orderId });
|
|
84
76
|
}
|
|
85
77
|
|
|
@@ -88,7 +80,22 @@ export default {
|
|
|
88
80
|
} satisfies ExportedHandler<Env>;
|
|
89
81
|
```
|
|
90
82
|
|
|
91
|
-
Workflow input is **`this.ctx.props.input`**, populated from **`create(
|
|
83
|
+
Workflow input is **`this.ctx.props.input`**, populated from **`create(input)`**. TypeScript requires an input argument when your runtime's **`TInput`** excludes **`undefined`**; no-input workflows can use **`WorkflowRuntime<undefined>`**, and optionally-input workflows can include **`undefined`** in the input type. The runtime also sets **`this.ctx.props.requestId`** (a new UUID each time the run loop invokes your definition) and **`this.ctx.props.runtimeInstanceId`** (this Durable Object’s id) for logs and correlation.
|
|
84
|
+
|
|
85
|
+
### Runtime control
|
|
86
|
+
|
|
87
|
+
From the Durable Object stub you can:
|
|
88
|
+
|
|
89
|
+
- **`create(input)`** — Pins the workflow input in SQLite the **first** time the instance is initialized, then starts execution. The input argument is required unless **`TInput`** includes **`undefined`**. **No-op** if the workflow is already **completed**, **failed**, **cancelled**, or **paused**.
|
|
90
|
+
- **`pause()`** — When status is **running**, moves to **paused**, clears alarms, and stops driving **`execute()`** until **`resume()`**. Inbound events are queued and applied when a matching **`wait`** runs again after resume.
|
|
91
|
+
- **`resume()`** — When status is **paused**, moves to **running** and continues the loop. Throws if the workflow is not paused.
|
|
92
|
+
- **`cancel(reason?)`** — Moves to terminal **cancelled** and clears alarms.
|
|
93
|
+
|
|
94
|
+
New instances start in **`pending`**. The first **`create()`** call moves the instance through the durable **`initialized`** state before execution enters **`running`**.
|
|
95
|
+
|
|
96
|
+
### Experimental introspection
|
|
97
|
+
|
|
98
|
+
For dashboards and debugging, the runtime exposes **`getSteps_experimental()`** and **`getWorkflowEvents_experimental()`**. The optional lifecycle hook is **`onStatusChange_experimental`** (see [Keeping workflow execution separate from state projection](#keeping-workflow-execution-separate-from-state-projection)). These names are marked experimental because they may change as the API hardens.
|
|
92
99
|
|
|
93
100
|
## How it works
|
|
94
101
|
|
|
@@ -102,17 +109,21 @@ The library separates concerns into two main layers:
|
|
|
102
109
|
|
|
103
110
|
Each time the runtime advances, it calls `next()` on your `WorkflowDefinition`, which **runs `execute()` from the beginning again**. Steps that have already completed durably (`run`, elapsed `sleep`, resolved `wait`, and so on) **replay from stored state**: their callbacks are not re-invoked, and recorded results are returned as-is. New side effects happen only when the engine reaches a step that is not yet complete and the durable state allows that transition.
|
|
104
111
|
|
|
112
|
+
**Step ids must be unique** within one top-level **`execute()`** run (the same **`next()`** invocation): reuse the same id across **`run`**, **`sleep`**, or **`wait`** and the workflow fails fast.
|
|
113
|
+
|
|
114
|
+
**Sibling `run` calls.** At a given nesting level, after one **`run`** finishes successfully in the same **`next()`**, the next sibling **`run`** forces the runtime to **run the loop again immediately** (you still replay from the top; completed steps stay cached). For linear workflows this is invisible; if you place several **`run`** calls back-to-back at the same depth, expect an extra loop hop per step after the first. Nested **`run`** callbacks get a fresh frame, so children do not consume the parent’s sibling budget.
|
|
115
|
+
|
|
105
116
|
### When the loop runs and when it stops
|
|
106
117
|
|
|
107
118
|
The `WorkflowRuntime` Durable Object drives a **run loop** that repeatedly invokes `next()` until one of these happens:
|
|
108
119
|
|
|
109
|
-
- **Terminal**: `next()` reports the workflow is **done** (`completed` or `failed`). The loop exits and the watchdog alarm is cleared.
|
|
120
|
+
- **Terminal**: `next()` reports the workflow is **done** (`completed` or `failed`), or the instance is **`cancelled`** via **`cancel()`** while the loop is idle or between iterations. The loop exits and the watchdog alarm is cleared.
|
|
110
121
|
- **Immediate resume**: `next()` asks to **continue immediately** (for example, so another step in the same logical “tick” can run). The loop continues without leaving the Durable Object invocation.
|
|
111
122
|
- **Suspended**: `next()` asks to **suspend**—for example, a step is waiting on a **retry backoff**, a **sleep** until a future time, or a **wait** for an inbound event. The loop exits; the runtime relies on **alarms** and/or **incoming events** to call back into the run loop. A long **watchdog alarm** also exists as a safety net if progress stalls.
|
|
112
123
|
|
|
113
124
|
### Step kinds
|
|
114
125
|
|
|
115
|
-
- **`run`**: A named, durable unit of work. Outcomes are persisted; failures can be **retried** with backoff up to
|
|
126
|
+
- **`run`**: A named, durable unit of work. Callbacks return JSON-serializable values or `undefined`. Outcomes are persisted; failures can be **retried** with backoff up to **`maxAttempts`** (default **3** attempts per step unless you pass `{ maxAttempts: n }`).
|
|
116
127
|
- **`sleep`**: Pauses until a **scheduled wake time** stored in SQLite; the Durable Object is woken by an **alarm** when that time is reached.
|
|
117
128
|
- **`wait`**: Pauses until a matching **inbound event** (by name) or an optional **timeout**. Resolution is recorded in durable state so replay does not double-apply the branch that handled the event.
|
|
118
129
|
|
|
@@ -159,7 +170,7 @@ const payment = await this.wait<{ chargeId: string }>("capture-payment", "paymen
|
|
|
159
170
|
|
|
160
171
|
#### The watchdog alarm
|
|
161
172
|
|
|
162
|
-
In addition to these precise alarms, the runtime sets a **30-minute watchdog alarm at the start of every run-loop iteration**, before delegating to the workflow definition. When an iteration
|
|
173
|
+
In addition to these precise alarms, the runtime sets a **30-minute watchdog alarm at the start of every run-loop iteration**, before delegating to the workflow definition. When an iteration ends cleanly—workflow terminal completion, suspend with a known **`wakeAt`**, or suspend waiting only on inbound events—the alarm is **cleared** or **replaced** by the next wake time when there is one. A **`wait`** with **no** `timeoutAt` has no step-specific alarm until an event arrives; the watchdog remains the backstop. The watchdog only fires if something goes wrong in the middle.
|
|
163
174
|
|
|
164
175
|
The problem it guards against is a `run` step that gets stuck in the `running` state. Before the user's callback executes, the runtime durably writes `state = 'running'` to SQLite. That write is intentional: it ensures that a later replay does not try to start a second concurrent attempt for the same step. But it creates a gap:
|
|
165
176
|
|
|
@@ -174,27 +185,21 @@ At this point there is no sleep alarm, no retry alarm, and no wait-timeout alarm
|
|
|
174
185
|
|
|
175
186
|
There is also a guard for the case where an alarm fires while the run loop is already active — for example, a sleep's precise alarm arriving while the loop is processing another step in the same Durable Object invocation. In that situation the alarm handler simply reschedules the watchdog for another 30 minutes rather than starting a second concurrent loop, keeping the safety net in place until the active loop finishes.
|
|
176
187
|
|
|
177
|
-
### Versioning
|
|
178
|
-
|
|
179
|
-
`create({ definitionVersion, input })` **pins** the definition version and input in SQLite the first time the instance is initialized. **The version cannot be changed later** for that Durable Object id; attempting a different version throws. Every subsequent `next()` resolves the worker implementation via **`getDefinition(version)`** using that pinned value, so **long-lived workflows keep running the definition lineage they started with**, while new instances can use newer version strings you add to `getDefinition`.
|
|
180
|
-
|
|
181
188
|
## Why this exists
|
|
182
189
|
|
|
183
|
-
Cloudflare Workflows is a strong managed option, and for many use cases it is the right tradeoff. I built `workerflow` for cases where I wanted tighter control over runtime behavior,
|
|
190
|
+
Cloudflare Workflows is a strong managed option, and for many use cases it is the right tradeoff. I built `workerflow` for cases where I wanted tighter control over runtime behavior, replay semantics, and state projection than the managed model naturally gives me.
|
|
184
191
|
|
|
185
192
|
1. Explicit ownership of workflow state and lifecycle
|
|
186
|
-
2.
|
|
193
|
+
2. Durable replay semantics that are explicit in userland code
|
|
187
194
|
3. Separation between workflow execution and external state synchronization
|
|
188
195
|
4. Extension points for streaming, WebSockets, and custom hooks
|
|
189
196
|
5. Fewer surprises around long-lived execution and error handling
|
|
190
197
|
|
|
191
|
-
###
|
|
192
|
-
|
|
193
|
-
One of the biggest concerns in long-running workflows is definition drift. A normal Worker request is typically bound to a single in-flight execution on one deployed version, but a Workflow is durable: it persists state and resumes across multiple executions over time. A workflow begin executing on version of its definition and resume later after a new deploy has changed or removed a step. That means the next invocation of the workflow entry point could repeat steps unsafely or leave the runtime in an invalid state.
|
|
198
|
+
### Definition compatibility
|
|
194
199
|
|
|
195
|
-
|
|
200
|
+
One of the biggest concerns in long-running workflows is definition drift. A normal Worker request is typically bound to a single in-flight execution on one deployed version, but a Workflow is durable: it persists state and resumes across multiple executions over time. A workflow may start on one version of its definition and resume later after a deploy has changed or removed a step. That means the next invocation of the workflow entry point could repeat steps unsafely or leave the runtime in an invalid state.
|
|
196
201
|
|
|
197
|
-
`workerflow`
|
|
202
|
+
`workerflow` keeps definition selection simple: each runtime points at one definition entrypoint, and the input is the only per-instance payload pinned by `create(input)`. If a workflow needs version-aware behavior, model that explicitly in your input shape and keep old branches compatible until the long-lived instances that need them have completed.
|
|
198
203
|
|
|
199
204
|
### Keeping workflow execution separate from state projection
|
|
200
205
|
|
|
@@ -222,12 +227,13 @@ export class MyWorkflow extends WorkflowEntrypoint {
|
|
|
222
227
|
|
|
223
228
|
This looks reasonable at first, but it creates an important failure-mode problem. If the actual business steps all succeed, but the final “sync success” step fails, then the workflow as a whole is now treated as failed. At that point, workflow execution and application-state projection have become tightly coupled, even though they are not really the same concern.
|
|
224
229
|
|
|
225
|
-
I think a cleaner design is to keep synchronization logic out of workflow steps entirely. Instead, the runtime can expose lifecycle
|
|
230
|
+
I think a cleaner design is to keep synchronization logic out of workflow steps entirely. Instead, the runtime can expose a lifecycle hook that fires when workflow status changes, and synchronization can happen there.
|
|
226
231
|
|
|
227
232
|
```ts
|
|
228
233
|
export class MyWorkflowRuntime extends WorkflowRuntime {
|
|
229
|
-
|
|
230
|
-
// Update your database, or push to a queue for streaming
|
|
234
|
+
async onStatusChange_experimental(status: "running" | "paused" | "completed" | "failed" | "cancelled") {
|
|
235
|
+
// Update your database, or push to a queue for streaming.
|
|
236
|
+
// Note: the hook is also invoked with "running" when leaving initialized/paused into running.
|
|
231
237
|
}
|
|
232
238
|
}
|
|
233
239
|
```
|
package/package.json
CHANGED
package/src/definition.ts
CHANGED
|
@@ -54,15 +54,17 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
54
54
|
*
|
|
55
55
|
* - { done: true; status: "completed" | "failed" }: the workflow has completed or aborted.
|
|
56
56
|
* - { done: false; resume: { type: "immediate" } }: the workflow should resume immediately.
|
|
57
|
-
* - { done: false; resume: { type: "suspended" } }: the workflow should suspend itself and wait for
|
|
58
|
-
* inbound event to resume.
|
|
57
|
+
* - { done: false; resume: { type: "suspended", wakeAt?: number } }: the workflow should suspend itself and wait for
|
|
58
|
+
* the next alarm or inbound event to resume. The `wakeAt` property is the timestamp at which the workflow should
|
|
59
|
+
* wake up. If the `wakeAt` property is not present, the workflow should wait for the next inbound event to
|
|
60
|
+
* resume.
|
|
59
61
|
* @internal
|
|
60
62
|
*/
|
|
61
63
|
async next(context: WorkflowRuntimeContext): Promise<
|
|
62
64
|
| { done: true; status: "completed" | "failed" }
|
|
63
65
|
| {
|
|
64
66
|
done: false;
|
|
65
|
-
resume: { type: "immediate" } | { type: "suspended" };
|
|
67
|
+
resume: { type: "immediate" } | { type: "suspended"; wakeAt?: number };
|
|
66
68
|
}
|
|
67
69
|
> {
|
|
68
70
|
this.#context = context;
|
|
@@ -75,7 +77,7 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
75
77
|
if (error instanceof ResumeImmediatelyError) {
|
|
76
78
|
return { done: false, resume: { type: "immediate" } };
|
|
77
79
|
} else if (error instanceof SuspendWorkflowError) {
|
|
78
|
-
return { done: false, resume: { type: "suspended" } };
|
|
80
|
+
return { done: false, resume: { type: "suspended", wakeAt: error.wakeAt } };
|
|
79
81
|
} else if (error instanceof AbortWorkflowError) {
|
|
80
82
|
return { done: true, status: "failed" };
|
|
81
83
|
} else if (
|
|
@@ -89,24 +91,24 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
89
91
|
// An exception can be thrown when calling a method on the WorkflowContext RPC target.
|
|
90
92
|
// The resulting exception will have a 'remote' property set to 'True' in this case.
|
|
91
93
|
if (error instanceof Error && "remote" in error && error.remote) {
|
|
92
|
-
console.info(error, { requestId: this.#requestId, runtimeInstanceId: this.#runtimeInstanceId });
|
|
93
94
|
/**
|
|
94
95
|
* When calling Durable Objects from a Worker, errors may include .retryable and .overloaded properties
|
|
95
|
-
* indicating whether the operation can be retried.
|
|
96
|
-
*
|
|
96
|
+
* indicating whether the operation can be retried.
|
|
97
|
+
*
|
|
98
|
+
* See: https://developers.cloudflare.com/durable-objects/best-practices/error-handling/
|
|
97
99
|
*/
|
|
98
100
|
if ("retryable" in error && error.retryable) {
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
101
|
+
console.info(error, { requestId: this.#requestId, runtimeInstanceId: this.#runtimeInstanceId });
|
|
102
|
+
// If the error is retryable, we hint the workflow to suspend and retry after 5 minutes.
|
|
103
|
+
// In future, we can use a more sophisticated retry strategy.
|
|
104
|
+
return { done: false, resume: { type: "suspended", wakeAt: new Date().getTime() + 5 * 60 * 1000 } };
|
|
105
|
+
} else {
|
|
106
|
+
console.error(error, { requestId: this.#requestId, runtimeInstanceId: this.#runtimeInstanceId });
|
|
107
|
+
// All other (non-retryable) errors are considered fatal and the workflow should be aborted.
|
|
103
108
|
return { done: true, status: "failed" };
|
|
104
109
|
}
|
|
105
|
-
// All other remote errors are considered to be transient, so we instruct the workflow to suspend itself and wait for the next alarm to resume.
|
|
106
|
-
else {
|
|
107
|
-
return { done: false, resume: { type: "suspended" } };
|
|
108
|
-
}
|
|
109
110
|
}
|
|
111
|
+
|
|
110
112
|
// All other non-remote errors are considered fatal and the workflow should be aborted.
|
|
111
113
|
console.error(error instanceof Error ? error : String(error), {
|
|
112
114
|
requestId: this.#requestId,
|
|
@@ -137,6 +139,51 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
137
139
|
|
|
138
140
|
abstract execute(): Promise<void>;
|
|
139
141
|
|
|
142
|
+
async #processRunStepAttempt<T extends Json | undefined | void>(
|
|
143
|
+
stepId: RunStepId,
|
|
144
|
+
ctx: WorkflowRuntimeContext,
|
|
145
|
+
callback: () => Promise<T>
|
|
146
|
+
): Promise<T> {
|
|
147
|
+
let _result: unknown;
|
|
148
|
+
try {
|
|
149
|
+
_result = await this.#runStepFrameContext.run(
|
|
150
|
+
{ numOfSuccessfulRunCallbacks: 0, parentStepId: stepId },
|
|
151
|
+
async () => await callback()
|
|
152
|
+
);
|
|
153
|
+
} catch (error) {
|
|
154
|
+
/**
|
|
155
|
+
* A 'run' step callback can include nested steps that can throw control flow errors like 'ResumeImmediatelyError'
|
|
156
|
+
* and 'SuspendWorkflowError'. We rethrow these errors without recording a failure on this (parent) attempt.
|
|
157
|
+
*/
|
|
158
|
+
if (error instanceof ResumeImmediatelyError || error instanceof SuspendWorkflowError) {
|
|
159
|
+
throw error;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const updated = await ctx.handleRunAttemptFailed(stepId, {
|
|
163
|
+
errorMessage: String(error),
|
|
164
|
+
errorName: error instanceof Error ? error.name : undefined,
|
|
165
|
+
isNonRetryableStepError: error instanceof NonRetryableStepError
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
if (error instanceof NonRetryableStepError) throw error;
|
|
169
|
+
|
|
170
|
+
if (updated.nextAttemptAt === undefined) {
|
|
171
|
+
const error = new MaxAttemptsExceededError();
|
|
172
|
+
Error.captureStackTrace(error, WorkflowDefinition.prototype.run);
|
|
173
|
+
throw error;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
throw new SuspendWorkflowError(updated.nextAttemptAt.getTime());
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// SQL NULL (resultJson === null) encodes `undefined`; otherwise raw JSON.stringify for the value.
|
|
180
|
+
const resultJson = _result === undefined ? null : JSON.stringify(_result);
|
|
181
|
+
await ctx.handleRunAttemptSucceeded(stepId, resultJson);
|
|
182
|
+
|
|
183
|
+
this.#getRunStepFrame().numOfSuccessfulRunCallbacks += 1;
|
|
184
|
+
return _result as T;
|
|
185
|
+
}
|
|
186
|
+
|
|
140
187
|
protected async run<T extends Json | undefined | void>(
|
|
141
188
|
id: string,
|
|
142
189
|
callback: () => Promise<T>,
|
|
@@ -156,8 +203,7 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
156
203
|
|
|
157
204
|
const parentStepId = this.#getRunStepFrame().parentStepId;
|
|
158
205
|
|
|
159
|
-
const step = await ctx.
|
|
160
|
-
type: "run",
|
|
206
|
+
const step = await ctx.getOrCreateRunStep(runStepId, {
|
|
161
207
|
maxAttempts: config?.maxAttempts,
|
|
162
208
|
parentStepId
|
|
163
209
|
});
|
|
@@ -166,160 +212,49 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
166
212
|
throw new ResumeImmediatelyError();
|
|
167
213
|
}
|
|
168
214
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
const attemptCount = step.attemptCount + 1; // Increment the attempt count by 1 as we're starting a new attempt
|
|
175
|
-
const maxAttempts = step.maxAttempts;
|
|
176
|
-
|
|
177
|
-
await ctx.handleRunAttemptEvent(runStepId, {
|
|
178
|
-
type: "running",
|
|
179
|
-
attemptCount: attemptCount
|
|
180
|
-
});
|
|
215
|
+
const lastAttempt = step.attempts[step.attempts.length - 1];
|
|
216
|
+
if (lastAttempt === undefined) {
|
|
217
|
+
await ctx.handleRunAttemptStarted(runStepId);
|
|
181
218
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
// 'ResumeImmediatelyError' and 'SuspendWorkflowError' are rethrown so a nested `run()` does not record a spurious failure on the parent.
|
|
190
|
-
if (error instanceof ResumeImmediatelyError || error instanceof SuspendWorkflowError) {
|
|
191
|
-
throw error;
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
await ctx.handleRunAttemptEvent(runStepId, {
|
|
195
|
-
type: "failed",
|
|
196
|
-
errorMessage: String(error),
|
|
197
|
-
errorName: error instanceof Error ? error.name : undefined,
|
|
198
|
-
attemptCount: attemptCount,
|
|
199
|
-
isNonRetryableStepError: error instanceof NonRetryableStepError
|
|
219
|
+
return await this.#processRunStepAttempt(runStepId, ctx, callback);
|
|
220
|
+
} else if (lastAttempt.state === "started") {
|
|
221
|
+
const hasInProgressChildSteps = await ctx.hasInProgressChildSteps(runStepId);
|
|
222
|
+
if (!hasInProgressChildSteps) {
|
|
223
|
+
const updated = await ctx.handleRunAttemptFailed(runStepId, {
|
|
224
|
+
errorMessage: STEP_EXECUTION_INTERRUPTED_ERROR_MESSAGE,
|
|
225
|
+
errorName: undefined
|
|
200
226
|
});
|
|
201
227
|
|
|
202
|
-
if (
|
|
203
|
-
throw error;
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
if (maxAttempts !== null && attemptCount >= maxAttempts) {
|
|
228
|
+
if (updated.nextAttemptAt === undefined) {
|
|
207
229
|
const error = new MaxAttemptsExceededError();
|
|
208
230
|
Error.captureStackTrace(error, WorkflowDefinition.prototype.run);
|
|
209
231
|
throw error;
|
|
210
232
|
}
|
|
211
233
|
|
|
212
|
-
throw new SuspendWorkflowError();
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
let result: string;
|
|
216
|
-
if (_result === undefined) {
|
|
217
|
-
result = "{}";
|
|
234
|
+
throw new SuspendWorkflowError(updated.nextAttemptAt.getTime());
|
|
218
235
|
} else {
|
|
219
|
-
|
|
236
|
+
return await this.#processRunStepAttempt(runStepId, ctx, callback);
|
|
220
237
|
}
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
});
|
|
227
|
-
|
|
228
|
-
this.#getRunStepFrame().numOfSuccessfulRunCallbacks += 1;
|
|
229
|
-
|
|
230
|
-
return _result as T;
|
|
231
|
-
} else if (step.state === "running") {
|
|
232
|
-
const maxAttempts = step.maxAttempts;
|
|
233
|
-
const attemptCount = step.attemptCount;
|
|
234
|
-
|
|
235
|
-
// If no direct child row explains the parent still being `running` (see `hasRunningOrWaitingChildSteps`), fail the attempt as interrupted.
|
|
236
|
-
if (!(await ctx.hasRunningOrWaitingChildSteps(runStepId))) {
|
|
237
|
-
await ctx.handleRunAttemptEvent(runStepId, {
|
|
238
|
-
type: "failed",
|
|
239
|
-
errorMessage: STEP_EXECUTION_INTERRUPTED_ERROR_MESSAGE,
|
|
240
|
-
errorName: undefined,
|
|
241
|
-
attemptCount: attemptCount
|
|
242
|
-
});
|
|
243
|
-
|
|
244
|
-
if (maxAttempts !== null && attemptCount >= maxAttempts) {
|
|
245
|
-
const error = new MaxAttemptsExceededError();
|
|
246
|
-
Error.captureStackTrace(error, WorkflowDefinition.prototype.run);
|
|
247
|
-
throw error;
|
|
238
|
+
} else if (lastAttempt.state === "failed") {
|
|
239
|
+
if (lastAttempt.nextAttemptAt) {
|
|
240
|
+
if (lastAttempt.nextAttemptAt.getTime() <= Date.now()) {
|
|
241
|
+
await ctx.handleRunAttemptStarted(runStepId);
|
|
242
|
+
return await this.#processRunStepAttempt(runStepId, ctx, callback);
|
|
248
243
|
} else {
|
|
249
|
-
throw new SuspendWorkflowError();
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
// Direct children in non-failure states: continue the same attempt by re-entering the callback.
|
|
254
|
-
let _result: unknown;
|
|
255
|
-
try {
|
|
256
|
-
_result = await this.#runStepFrameContext.run(
|
|
257
|
-
{ numOfSuccessfulRunCallbacks: 0, parentStepId: runStepId },
|
|
258
|
-
async () => await callback()
|
|
259
|
-
);
|
|
260
|
-
} catch (error) {
|
|
261
|
-
if (error instanceof ResumeImmediatelyError || error instanceof SuspendWorkflowError) {
|
|
262
|
-
throw error;
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
await ctx.handleRunAttemptEvent(runStepId, {
|
|
266
|
-
type: "failed",
|
|
267
|
-
errorMessage: String(error),
|
|
268
|
-
errorName: error instanceof Error ? error.name : undefined,
|
|
269
|
-
attemptCount: attemptCount,
|
|
270
|
-
isNonRetryableStepError: error instanceof NonRetryableStepError
|
|
271
|
-
});
|
|
272
|
-
|
|
273
|
-
if (error instanceof NonRetryableStepError) {
|
|
274
|
-
throw error;
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
if (maxAttempts !== null && attemptCount >= maxAttempts) {
|
|
278
|
-
const err = new MaxAttemptsExceededError();
|
|
279
|
-
Error.captureStackTrace(err, WorkflowDefinition.prototype.run);
|
|
280
|
-
throw err;
|
|
244
|
+
throw new SuspendWorkflowError(lastAttempt.nextAttemptAt.getTime());
|
|
281
245
|
}
|
|
282
|
-
|
|
283
|
-
throw new
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
const result: string = _result === undefined ? "{}" : JSON.stringify({ value: _result });
|
|
287
|
-
|
|
288
|
-
await ctx.handleRunAttemptEvent(runStepId, {
|
|
289
|
-
type: "succeeded",
|
|
290
|
-
attemptCount: attemptCount,
|
|
291
|
-
result: result
|
|
292
|
-
});
|
|
293
|
-
|
|
294
|
-
this.#getRunStepFrame().numOfSuccessfulRunCallbacks += 1;
|
|
295
|
-
|
|
296
|
-
return _result as T;
|
|
297
|
-
} else if (step.state === "failed") {
|
|
298
|
-
throw new AbortWorkflowError();
|
|
299
|
-
} else if (step.state === "succeeded") {
|
|
300
|
-
const parsed: unknown = JSON.parse(step.result);
|
|
301
|
-
if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
|
|
302
|
-
throw new Error(
|
|
303
|
-
"Invalid stored workflow result; expected a non-null object payload; storage may be corrupted or written by an incompatible version."
|
|
304
|
-
);
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
const keys = Object.keys(parsed);
|
|
308
|
-
// "{}" means top-level undefined
|
|
309
|
-
if (keys.length === 0) {
|
|
310
|
-
return undefined as T;
|
|
246
|
+
} else {
|
|
247
|
+
throw new AbortWorkflowError();
|
|
311
248
|
}
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
249
|
+
} else if (lastAttempt.state === "succeeded") {
|
|
250
|
+
// Replay: the callback is NOT re-executed. Reconstruct the return value from durable state.
|
|
251
|
+
if (lastAttempt.resultType === "json") {
|
|
252
|
+
return JSON.parse(lastAttempt.resultJson) as T;
|
|
315
253
|
}
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
);
|
|
254
|
+
return undefined as T;
|
|
255
|
+
} else {
|
|
256
|
+
throw new Error("Unexpected run step attempt state; expected 'started', 'failed', or 'succeeded'.");
|
|
320
257
|
}
|
|
321
|
-
|
|
322
|
-
throw new Error("Unexpected run step state; expected 'pending', 'running', 'failed', or 'succeeded'.");
|
|
323
258
|
}
|
|
324
259
|
|
|
325
260
|
protected async sleep(id: string, duration: number): Promise<void> {
|
|
@@ -333,8 +268,7 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
333
268
|
throw error;
|
|
334
269
|
}
|
|
335
270
|
|
|
336
|
-
const step = await ctx.
|
|
337
|
-
type: "sleep",
|
|
271
|
+
const step = await ctx.getOrCreateSleepStep(sleepStepId, {
|
|
338
272
|
wakeAt: new Date(Date.now() + duration),
|
|
339
273
|
parentStepId: this.#getRunStepFrame().parentStepId
|
|
340
274
|
});
|
|
@@ -345,11 +279,11 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
345
279
|
} else if (step.state === "waiting") {
|
|
346
280
|
// If the sleep step is not yet due to wake up, we suspend the workflow.
|
|
347
281
|
if (Date.now() < step.wakeAt.getTime()) {
|
|
348
|
-
throw new SuspendWorkflowError();
|
|
282
|
+
throw new SuspendWorkflowError(step.wakeAt.getTime());
|
|
349
283
|
}
|
|
350
284
|
// If the sleep step is due to wake up, we mark the step as elapsed and throw a 'ResumeImmediatelyError' to hint the driver to resume the workflow immediately.
|
|
351
285
|
else {
|
|
352
|
-
await ctx.
|
|
286
|
+
await ctx.handleSleepStepElapsed(sleepStepId);
|
|
353
287
|
throw new ResumeImmediatelyError();
|
|
354
288
|
}
|
|
355
289
|
}
|
|
@@ -357,7 +291,11 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
357
291
|
throw new Error("Unexpected sleep step state; expected 'waiting' or 'elapsed'.");
|
|
358
292
|
}
|
|
359
293
|
|
|
360
|
-
protected async wait<T extends Json
|
|
294
|
+
protected async wait<T extends Json | undefined>(
|
|
295
|
+
id: string,
|
|
296
|
+
event: string,
|
|
297
|
+
config?: { timeoutAt?: number }
|
|
298
|
+
): Promise<T> {
|
|
361
299
|
const waitStepId = id as WaitStepId;
|
|
362
300
|
this.#assertUniqueStepIdInCurrentExecution(waitStepId);
|
|
363
301
|
|
|
@@ -368,30 +306,34 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
368
306
|
throw error;
|
|
369
307
|
}
|
|
370
308
|
|
|
371
|
-
const step = await ctx.
|
|
372
|
-
type: "wait",
|
|
309
|
+
const step = await ctx.getOrCreateWaitStep<T>(waitStepId, {
|
|
373
310
|
eventName: event,
|
|
374
311
|
timeoutAt: config?.timeoutAt ? new Date(config.timeoutAt) : undefined,
|
|
375
312
|
parentStepId: this.#getRunStepFrame().parentStepId
|
|
376
313
|
});
|
|
377
314
|
|
|
378
315
|
if (step.state === "waiting") {
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
316
|
+
if (step.timeoutAt !== undefined) {
|
|
317
|
+
// If the timeout has been reached (or exceeded), we mark the step as timed out and throw an 'AbortWorkflowError' to abort the workflow.
|
|
318
|
+
if (Date.now() >= step.timeoutAt.getTime()) {
|
|
319
|
+
await ctx.handleWaitStepTimedOut(waitStepId);
|
|
320
|
+
const error = new WaitStepTimedOutError();
|
|
321
|
+
Error.captureStackTrace(error, WorkflowDefinition.prototype.wait);
|
|
322
|
+
throw error;
|
|
323
|
+
} else {
|
|
324
|
+
// If the timeout has not been reached, we suspend the workflow and wait for the next alarm to resume.
|
|
325
|
+
throw new SuspendWorkflowError(step.timeoutAt.getTime());
|
|
326
|
+
}
|
|
327
|
+
} else {
|
|
328
|
+
// If the wait step does not have a timeout, we suspend the workflow and wait for the next inbound event to resume.
|
|
329
|
+
throw new SuspendWorkflowError();
|
|
385
330
|
}
|
|
386
|
-
|
|
387
|
-
// Otherwise, we hint the driver to suspend the workflow until the next alarm or inbound event to resume.
|
|
388
|
-
throw new SuspendWorkflowError();
|
|
389
331
|
} else if (step.state === "timed_out") {
|
|
390
332
|
// If the wait step has timed out, we throw an 'AbortWorkflowError' to abort the workflow.
|
|
391
333
|
throw new AbortWorkflowError();
|
|
392
334
|
} else if (step.state === "satisfied") {
|
|
393
335
|
// If the wait step has been satisfied, we return the payload of the satisfied step.
|
|
394
|
-
return
|
|
336
|
+
return step.payload;
|
|
395
337
|
}
|
|
396
338
|
|
|
397
339
|
throw new Error("Unexpected wait step state; expected 'waiting', 'satisfied', or 'timed_out'.");
|
|
@@ -399,7 +341,17 @@ export abstract class WorkflowDefinition<TInput extends Json | undefined = Json
|
|
|
399
341
|
}
|
|
400
342
|
|
|
401
343
|
class ResumeImmediatelyError extends Error {}
|
|
402
|
-
class SuspendWorkflowError extends Error {
|
|
344
|
+
class SuspendWorkflowError extends Error {
|
|
345
|
+
readonly #wakeAt?: number;
|
|
346
|
+
constructor(wakeAt?: number) {
|
|
347
|
+
super();
|
|
348
|
+
this.#wakeAt = wakeAt;
|
|
349
|
+
this.name = "SuspendWorkflowError";
|
|
350
|
+
}
|
|
351
|
+
get wakeAt() {
|
|
352
|
+
return this.#wakeAt;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
403
355
|
class AbortWorkflowError extends Error {}
|
|
404
356
|
|
|
405
357
|
class MaxAttemptsExceededError extends Error {}
|
package/src/json.ts
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Represents an indefinitely deep arbitrary JSON data structure. There are
|
|
3
|
-
* four types that make up the Json family:
|
|
4
|
-
*
|
|
5
|
-
* - Json any legal JSON value
|
|
6
|
-
* - JsonScalar any legal JSON leaf value (no lists or objects)
|
|
7
|
-
* - JsonArray a JSON value whose outer type is an array
|
|
8
|
-
* - JsonObject a JSON value whose outer type is an object
|
|
2
|
+
* Represents an indefinitely deep arbitrary JSON data structure. There are four types that make up the Json family:
|
|
9
3
|
*
|
|
4
|
+
* - Json any legal JSON value
|
|
5
|
+
* - JsonScalar any legal JSON leaf value (no lists or objects)
|
|
6
|
+
* - JsonArray a JSON value whose outer type is an array
|
|
7
|
+
* - JsonObject a JSON value whose outer type is an object
|
|
10
8
|
*/
|
|
11
9
|
export type Json = JsonScalar | JsonArray | JsonObject;
|
|
12
10
|
export type JsonScalar = string | number | boolean | null;
|