@redflow/client 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/INTERNALS.md +238 -0
- package/README.md +34 -3
- package/package.json +1 -1
- package/src/client.ts +23 -20
- package/src/types.ts +7 -1
- package/src/worker.ts +102 -21
- package/tests/bugfixes.test.ts +11 -11
- package/tests/fixtures/worker-crash.ts +1 -0
- package/tests/fixtures/worker-recover.ts +1 -0
- package/tests/redflow.e2e.test.ts +182 -72
package/INTERNALS.md
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# redflow internals
|
|
2
|
+
|
|
3
|
+
This document describes how `@redflow/client` works internally in production terms.
|
|
4
|
+
|
|
5
|
+
## Design model
|
|
6
|
+
|
|
7
|
+
- Durable state lives in Redis.
|
|
8
|
+
- Handlers and workflow code live in process memory (per worker process).
|
|
9
|
+
- The runtime is queue-based and crash-recoverable.
|
|
10
|
+
- Delivery semantics are at-least-once at run level.
|
|
11
|
+
- Step API provides deterministic replay/caching to avoid repeating completed work.
|
|
12
|
+
|
|
13
|
+
## Main components
|
|
14
|
+
|
|
15
|
+
- **Workflow registry (in-memory):** built via `defineWorkflow(...)`.
|
|
16
|
+
- **Client (`RedflowClient`):** enqueue runs, inspect state, cancel runs, sync metadata.
|
|
17
|
+
- **Worker runtime:** executes queued runs, retries failures, promotes scheduled runs.
|
|
18
|
+
- **Cron scheduler:** leader-elected loop that creates cron runs.
|
|
19
|
+
|
|
20
|
+
## Registry and metadata sync
|
|
21
|
+
|
|
22
|
+
`startWorker({ app, ... })` always calls `syncRegistry(registry, { app })` before loops start.
|
|
23
|
+
|
|
24
|
+
What `syncRegistry` writes per workflow:
|
|
25
|
+
|
|
26
|
+
- `workflow:<name>` hash:
|
|
27
|
+
- `name`
|
|
28
|
+
- `queue`
|
|
29
|
+
- `maxConcurrency` (default `1`)
|
|
30
|
+
- `app` (required ownership scope for cleanup)
|
|
31
|
+
- `updatedAt`
|
|
32
|
+
- `cronJson`
|
|
33
|
+
- `retriesJson`
|
|
34
|
+
- `cronIdsJson`
|
|
35
|
+
- `workflows` set (all known workflow names)
|
|
36
|
+
- cron definitions in `cron:def` and schedule in `cron:next`
|
|
37
|
+
|
|
38
|
+
### Stale cleanup
|
|
39
|
+
|
|
40
|
+
Before writing new metadata, sync removes stale workflow metadata when all are true:
|
|
41
|
+
|
|
42
|
+
- workflow exists in Redis,
|
|
43
|
+
- workflow is missing in current registry,
|
|
44
|
+
- workflow `app` equals current `app`,
|
|
45
|
+
- workflow is older than grace period (`30s`).
|
|
46
|
+
|
|
47
|
+
Cleanup removes:
|
|
48
|
+
|
|
49
|
+
- `workflow:<name>` metadata hash,
|
|
50
|
+
- `workflows` set membership,
|
|
51
|
+
- associated cron entries (`cron:def`, `cron:next`).
|
|
52
|
+
|
|
53
|
+
It does **not** delete historical runs.
|
|
54
|
+
|
|
55
|
+
## Redis keyspace
|
|
56
|
+
|
|
57
|
+
Key builders are in `src/internal/keys.ts`.
|
|
58
|
+
|
|
59
|
+
- `workflows`
|
|
60
|
+
- `workflow:<name>`
|
|
61
|
+
- `workflow-runs:<name>`
|
|
62
|
+
- `runs:created`
|
|
63
|
+
- `runs:status:<status>`
|
|
64
|
+
- `run:<runId>`
|
|
65
|
+
- `run:<runId>:steps`
|
|
66
|
+
- `run:<runId>:lease`
|
|
67
|
+
- `q:<queue>:ready`
|
|
68
|
+
- `q:<queue>:processing`
|
|
69
|
+
- `q:<queue>:scheduled`
|
|
70
|
+
- `cron:def`
|
|
71
|
+
- `cron:next`
|
|
72
|
+
- `lock:cron`
|
|
73
|
+
- `idempo:<encodedWorkflow>:<encodedKey>`
|
|
74
|
+
|
|
75
|
+
## Run lifecycle
|
|
76
|
+
|
|
77
|
+
Statuses:
|
|
78
|
+
|
|
79
|
+
- `scheduled`
|
|
80
|
+
- `queued`
|
|
81
|
+
- `running`
|
|
82
|
+
- terminal: `succeeded`, `failed`, `canceled`
|
|
83
|
+
|
|
84
|
+
### Enqueue
|
|
85
|
+
|
|
86
|
+
Enqueue uses `ENQUEUE_RUN_LUA` atomically:
|
|
87
|
+
|
|
88
|
+
- creates run hash,
|
|
89
|
+
- writes indexes (`runs:created`, `runs:status:*`, `workflow-runs:*`),
|
|
90
|
+
- pushes to ready queue or scheduled ZSET,
|
|
91
|
+
- applies idempotency mapping if key was provided.
|
|
92
|
+
|
|
93
|
+
Idempotency key TTL defaults to `7 days`.
|
|
94
|
+
|
|
95
|
+
### Processing
|
|
96
|
+
|
|
97
|
+
Worker loop uses `LMOVE`/`BLMOVE` from `ready` -> `processing`.
|
|
98
|
+
|
|
99
|
+
For each claimed run:
|
|
100
|
+
|
|
101
|
+
1. Acquire lease (`run:<id>:lease`) with periodic renewal.
|
|
102
|
+
2. Validate current run status.
|
|
103
|
+
3. If `queued`, enforce `maxConcurrency` for that workflow.
|
|
104
|
+
4. Transition `queued -> running` atomically.
|
|
105
|
+
5. Execute handler with step engine.
|
|
106
|
+
6. Finalize to terminal status atomically.
|
|
107
|
+
7. Remove from `processing`.
|
|
108
|
+
|
|
109
|
+
If lease is lost, current worker aborts and does not finalize.
|
|
110
|
+
|
|
111
|
+
### Reaper
|
|
112
|
+
|
|
113
|
+
Reaper scans `processing` lists. For runs without active lease:
|
|
114
|
+
|
|
115
|
+
- removes from `processing`,
|
|
116
|
+
- pushes back to `ready`.
|
|
117
|
+
|
|
118
|
+
This recovers from worker crashes.
|
|
119
|
+
|
|
120
|
+
### Scheduled promoter
|
|
121
|
+
|
|
122
|
+
Promoter pops due items from `q:<queue>:scheduled` (`ZPOPMIN` batch), then:
|
|
123
|
+
|
|
124
|
+
- transitions `scheduled -> queued`,
|
|
125
|
+
- pushes to `ready`.
|
|
126
|
+
|
|
127
|
+
Future items are put back.
|
|
128
|
+
|
|
129
|
+
## maxConcurrency
|
|
130
|
+
|
|
131
|
+
`maxConcurrency` is per workflow, default `1`.
|
|
132
|
+
|
|
133
|
+
### For regular queued runs
|
|
134
|
+
|
|
135
|
+
When a worker picks a `queued` run:
|
|
136
|
+
|
|
137
|
+
- it counts current `running` runs for the same workflow,
|
|
138
|
+
- if count >= `maxConcurrency`, run is atomically moved from `processing` back to end of `ready`.
|
|
139
|
+
|
|
140
|
+
So non-cron runs are delayed (not dropped).
|
|
141
|
+
|
|
142
|
+
### For cron runs
|
|
143
|
+
|
|
144
|
+
Cron loop also checks running count before enqueue.
|
|
145
|
+
|
|
146
|
+
- if count >= `maxConcurrency`, that cron tick is skipped,
|
|
147
|
+
- next cron tick is still scheduled normally.
|
|
148
|
+
|
|
149
|
+
## Cron scheduler
|
|
150
|
+
|
|
151
|
+
- Leader election via Redis lock `lock:cron`.
|
|
152
|
+
- Only lock holder schedules cron runs.
|
|
153
|
+
- Loop pops earliest `cronId` from `cron:next`.
|
|
154
|
+
- If due:
|
|
155
|
+
- parses `cron:def` payload,
|
|
156
|
+
- enforces `maxConcurrency`,
|
|
157
|
+
- enqueues run via `runByName` (or skips),
|
|
158
|
+
- computes next fire time and stores in `cron:next`.
|
|
159
|
+
|
|
160
|
+
Cron uses "reschedule from now" behavior (no catch-up burst if stale timestamp was in the past).
|
|
161
|
+
|
|
162
|
+
## Step engine semantics
|
|
163
|
+
|
|
164
|
+
Inside handler, `step` API has three primitives.
|
|
165
|
+
|
|
166
|
+
### `step.run(...)`
|
|
167
|
+
|
|
168
|
+
- Step state is persisted in `run:<id>:steps` hash under `step.name`.
|
|
169
|
+
- If step already `succeeded`, cached output is returned.
|
|
170
|
+
- Duplicate step names in one execution are rejected.
|
|
171
|
+
- Step timeout and cancellation are supported.
|
|
172
|
+
|
|
173
|
+
### `step.runWorkflow(...)`
|
|
174
|
+
|
|
175
|
+
- Enqueues child workflow with deterministic idempotency by default:
|
|
176
|
+
- `parentRunId + stepName + childWorkflowName`.
|
|
177
|
+
- Waits for child completion.
|
|
178
|
+
- Waiting is bounded by step `timeoutMs` (if set), otherwise unbounded until cancellation.
|
|
179
|
+
- Inline assist: if child is queued on a queue this worker handles, worker may execute child inline to avoid self-deadlock with low concurrency.
|
|
180
|
+
|
|
181
|
+
### `step.emitWorkflow(...)`
|
|
182
|
+
|
|
183
|
+
- Enqueues child workflow and returns child `runId`.
|
|
184
|
+
- Supports child as workflow object or workflow name string.
|
|
185
|
+
- Uses deterministic idempotency default based on parent run and step name.
|
|
186
|
+
|
|
187
|
+
## Retry model
|
|
188
|
+
|
|
189
|
+
- `maxAttempts` is workflow-level (`retries.maxAttempts`), default `1`.
|
|
190
|
+
- Retry delay uses exponential backoff + jitter.
|
|
191
|
+
- Non-retryable classes:
|
|
192
|
+
- input validation errors,
|
|
193
|
+
- unknown workflow,
|
|
194
|
+
- output serialization errors,
|
|
195
|
+
- cancellation,
|
|
196
|
+
- explicit `NonRetriableError`.
|
|
197
|
+
- Retry scheduling is atomic (`scheduleRetry` Lua): status/index update + queue scheduled ZSET write in one script.
|
|
198
|
+
|
|
199
|
+
## Cancellation
|
|
200
|
+
|
|
201
|
+
`cancelRun(runId)`:
|
|
202
|
+
|
|
203
|
+
- sets `cancelRequestedAt` + optional reason,
|
|
204
|
+
- if run is `queued`/`scheduled`, attempts immediate transition to `canceled` and cleanup,
|
|
205
|
+
- if run is `running`, cancellation is cooperative via `AbortSignal` polling in worker.
|
|
206
|
+
|
|
207
|
+
Terminal finalize script ensures consistent indexes and terminal status.
|
|
208
|
+
|
|
209
|
+
## Idempotency vs step cache
|
|
210
|
+
|
|
211
|
+
- **Idempotency:** deduplicates run creation (`key -> runId`) with TTL.
|
|
212
|
+
- **Step cache:** deduplicates completed step execution within one parent run.
|
|
213
|
+
|
|
214
|
+
They solve different failure windows and are intentionally both used.
|
|
215
|
+
|
|
216
|
+
## Multi-worker behavior
|
|
217
|
+
|
|
218
|
+
- Many workers can process same prefix/queues.
|
|
219
|
+
- Cron scheduling is single-leader.
|
|
220
|
+
- Processing/recovery is shared via Redis lists + leases.
|
|
221
|
+
- `maxConcurrency` is enforced globally against Redis `running` index.
|
|
222
|
+
|
|
223
|
+
## Operational notes
|
|
224
|
+
|
|
225
|
+
Recommended for production:
|
|
226
|
+
|
|
227
|
+
- Use stable `prefix` per environment.
|
|
228
|
+
- Use explicit `app` per service role for safe metadata cleanup.
|
|
229
|
+
- Set `maxConcurrency` intentionally for long workflows.
|
|
230
|
+
- Keep queue ownership clear (avoid workers consuming queues for workflows they do not register).
|
|
231
|
+
- Use idempotency keys for external trigger endpoints.
|
|
232
|
+
|
|
233
|
+
## Current guarantees and limitations
|
|
234
|
+
|
|
235
|
+
- Run execution is at-least-once.
|
|
236
|
+
- Step cache reduces replay but cannot provide global exactly-once side effects.
|
|
237
|
+
- `maxConcurrency` is enforced via runtime checks against Redis state; it is robust in practice but not a strict distributed semaphore proof.
|
|
238
|
+
- `handle.result({ timeoutMs })` timeout affects caller waiting only, not run execution itself.
|
package/README.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
Redis-backed workflow runtime for Bun.
|
|
4
4
|
|
|
5
|
+
Deep internal details: `INTERNALS.md`
|
|
6
|
+
|
|
5
7
|
## Warning
|
|
6
8
|
|
|
7
9
|
This project is still in early alpha stage.
|
|
@@ -105,6 +107,16 @@ const analyticsRunId = await step.emitWorkflow(
|
|
|
105
107
|
);
|
|
106
108
|
```
|
|
107
109
|
|
|
110
|
+
You can also pass a workflow name string:
|
|
111
|
+
|
|
112
|
+
```ts
|
|
113
|
+
const analyticsRunId = await step.emitWorkflow(
|
|
114
|
+
{ name: "emit-analytics" },
|
|
115
|
+
"analytics-consumer",
|
|
116
|
+
{ orderId: input.orderId, totalCents: input.totalCents },
|
|
117
|
+
);
|
|
118
|
+
```
|
|
119
|
+
|
|
108
120
|
## Run workflows
|
|
109
121
|
|
|
110
122
|
The object returned by `defineWorkflow(...)` has `.run(...)`.
|
|
@@ -137,13 +149,14 @@ const output = await handle.result({ timeoutMs: 90_000 });
|
|
|
137
149
|
|
|
138
150
|
## Start a worker
|
|
139
151
|
|
|
140
|
-
Import workflows, then run `startWorker()`.
|
|
152
|
+
Import workflows, then run `startWorker({ app: ... })`.
|
|
141
153
|
|
|
142
154
|
```ts
|
|
143
155
|
import { startWorker } from "@redflow/client";
|
|
144
156
|
import "./workflows";
|
|
145
157
|
|
|
146
158
|
const worker = await startWorker({
|
|
159
|
+
app: "billing-worker",
|
|
147
160
|
url: process.env.REDIS_URL,
|
|
148
161
|
prefix: "redflow:prod",
|
|
149
162
|
concurrency: 4,
|
|
@@ -154,6 +167,7 @@ Explicit queues + runtime tuning:
|
|
|
154
167
|
|
|
155
168
|
```ts
|
|
156
169
|
const worker = await startWorker({
|
|
170
|
+
app: "billing-worker",
|
|
157
171
|
url: process.env.REDIS_URL,
|
|
158
172
|
prefix: "redflow:prod",
|
|
159
173
|
queues: ["critical", "io", "analytics"],
|
|
@@ -168,6 +182,21 @@ const worker = await startWorker({
|
|
|
168
182
|
|
|
169
183
|
## Workflow options examples
|
|
170
184
|
|
|
185
|
+
### maxConcurrency
|
|
186
|
+
|
|
187
|
+
`maxConcurrency` limits concurrent `running` runs per workflow. Default is `1`.
|
|
188
|
+
|
|
189
|
+
```ts
|
|
190
|
+
defineWorkflow(
|
|
191
|
+
"heavy-sync",
|
|
192
|
+
{
|
|
193
|
+
queue: "ops",
|
|
194
|
+
maxConcurrency: 1,
|
|
195
|
+
},
|
|
196
|
+
async () => ({ ok: true }),
|
|
197
|
+
);
|
|
198
|
+
```
|
|
199
|
+
|
|
171
200
|
### Cron
|
|
172
201
|
|
|
173
202
|
```ts
|
|
@@ -184,6 +213,8 @@ defineWorkflow(
|
|
|
184
213
|
);
|
|
185
214
|
```
|
|
186
215
|
|
|
216
|
+
Cron respects `maxConcurrency`: if the limit is reached, that cron tick is skipped.
|
|
217
|
+
|
|
187
218
|
### onFailure
|
|
188
219
|
|
|
189
220
|
```ts
|
|
@@ -271,10 +302,10 @@ const output = await handle.result({ timeoutMs: 30_000 });
|
|
|
271
302
|
console.log(output);
|
|
272
303
|
```
|
|
273
304
|
|
|
274
|
-
### Registry sync
|
|
305
|
+
### Registry sync app id
|
|
275
306
|
|
|
276
307
|
```ts
|
|
277
308
|
import { getDefaultRegistry } from "@redflow/client";
|
|
278
309
|
|
|
279
|
-
await client.syncRegistry(getDefaultRegistry(), {
|
|
310
|
+
await client.syncRegistry(getDefaultRegistry(), { app: "billing-service" });
|
|
280
311
|
```
|
package/package.json
CHANGED
package/src/client.ts
CHANGED
|
@@ -37,10 +37,10 @@ export type CreateClientOptions = {
|
|
|
37
37
|
|
|
38
38
|
export type SyncRegistryOptions = {
|
|
39
39
|
/**
|
|
40
|
-
*
|
|
41
|
-
*
|
|
40
|
+
* Stable application id used for stale workflow metadata cleanup.
|
|
41
|
+
* Workflows are pruned only when they were last synced by the same app.
|
|
42
42
|
*/
|
|
43
|
-
|
|
43
|
+
app: string;
|
|
44
44
|
};
|
|
45
45
|
|
|
46
46
|
export function defaultPrefix(): string {
|
|
@@ -249,16 +249,6 @@ function encodeCompositePart(value: string): string {
|
|
|
249
249
|
return `${value.length}:${value}`;
|
|
250
250
|
}
|
|
251
251
|
|
|
252
|
-
function defaultRegistryOwner(): string {
|
|
253
|
-
const envOwner = process.env.REDFLOW_SYNC_OWNER?.trim();
|
|
254
|
-
if (envOwner) return envOwner;
|
|
255
|
-
|
|
256
|
-
const argvOwner = process.argv[1]?.trim();
|
|
257
|
-
if (argvOwner) return argvOwner;
|
|
258
|
-
|
|
259
|
-
return "redflow:unknown-owner";
|
|
260
|
-
}
|
|
261
|
-
|
|
262
252
|
function parseEnqueueScriptResult(value: unknown): { kind: "created" | "existing"; runId: string } | null {
|
|
263
253
|
if (Array.isArray(value) && value.length === 1 && Array.isArray(value[0])) {
|
|
264
254
|
return parseEnqueueScriptResult(value[0]);
|
|
@@ -309,6 +299,11 @@ function isValidDate(value: Date): boolean {
|
|
|
309
299
|
return value instanceof Date && Number.isFinite(value.getTime());
|
|
310
300
|
}
|
|
311
301
|
|
|
302
|
+
function normalizeMaxConcurrency(value: unknown): number {
|
|
303
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return 1;
|
|
304
|
+
return Math.floor(value);
|
|
305
|
+
}
|
|
306
|
+
|
|
312
307
|
export class RedflowClient {
|
|
313
308
|
constructor(
|
|
314
309
|
public readonly redis: RedisClient,
|
|
@@ -356,9 +351,11 @@ export class RedflowClient {
|
|
|
356
351
|
const retries = safeJsonTryParse<any>(data.retriesJson ?? null) as any;
|
|
357
352
|
const updatedAt = Number(data.updatedAt ?? "0");
|
|
358
353
|
const queue = data.queue ?? "default";
|
|
354
|
+
const maxConcurrency = normalizeMaxConcurrency(Number(data.maxConcurrency ?? "1"));
|
|
359
355
|
return {
|
|
360
356
|
name,
|
|
361
357
|
queue,
|
|
358
|
+
maxConcurrency,
|
|
362
359
|
cron: Array.isArray(cron) && cron.length > 0 ? cron : undefined,
|
|
363
360
|
retries,
|
|
364
361
|
updatedAt,
|
|
@@ -606,17 +603,21 @@ export class RedflowClient {
|
|
|
606
603
|
}
|
|
607
604
|
}
|
|
608
605
|
|
|
609
|
-
async syncRegistry(registry: WorkflowRegistry, options
|
|
606
|
+
async syncRegistry(registry: WorkflowRegistry, options: SyncRegistryOptions): Promise<void> {
|
|
610
607
|
const defs = registry.list();
|
|
611
608
|
const syncStartedAt = nowMs();
|
|
612
|
-
const
|
|
609
|
+
const app = options.app.trim();
|
|
610
|
+
if (!app) {
|
|
611
|
+
throw new Error("syncRegistry requires a non-empty options.app");
|
|
612
|
+
}
|
|
613
613
|
const registeredNames = new Set(defs.map((def) => def.options.name));
|
|
614
614
|
|
|
615
|
-
await this.cleanupStaleWorkflows(registeredNames, syncStartedAt,
|
|
615
|
+
await this.cleanupStaleWorkflows(registeredNames, syncStartedAt, app);
|
|
616
616
|
|
|
617
617
|
for (const def of defs) {
|
|
618
618
|
const name = def.options.name;
|
|
619
619
|
const queue = def.options.queue ?? "default";
|
|
620
|
+
const maxConcurrency = normalizeMaxConcurrency(def.options.maxConcurrency);
|
|
620
621
|
const cron = def.options.cron ?? [];
|
|
621
622
|
const retries = def.options.retries ?? {};
|
|
622
623
|
const updatedAt = nowMs();
|
|
@@ -653,6 +654,7 @@ export class RedflowClient {
|
|
|
653
654
|
id: cronId,
|
|
654
655
|
workflow: name,
|
|
655
656
|
queue,
|
|
657
|
+
maxConcurrency,
|
|
656
658
|
expression: c.expression,
|
|
657
659
|
timezone: c.timezone,
|
|
658
660
|
inputJson: safeJsonStringify(cronInput),
|
|
@@ -671,7 +673,8 @@ export class RedflowClient {
|
|
|
671
673
|
const meta: Record<string, string> = {
|
|
672
674
|
name,
|
|
673
675
|
queue,
|
|
674
|
-
|
|
676
|
+
maxConcurrency: String(maxConcurrency),
|
|
677
|
+
app,
|
|
675
678
|
updatedAt: String(updatedAt),
|
|
676
679
|
cronJson: safeJsonStringify(cron),
|
|
677
680
|
retriesJson: safeJsonStringify(retries),
|
|
@@ -722,7 +725,7 @@ export class RedflowClient {
|
|
|
722
725
|
private async cleanupStaleWorkflows(
|
|
723
726
|
registeredNames: Set<string>,
|
|
724
727
|
syncStartedAt: number,
|
|
725
|
-
|
|
728
|
+
app: string,
|
|
726
729
|
): Promise<void> {
|
|
727
730
|
const existingNames = await this.redis.smembers(keys.workflows(this.prefix));
|
|
728
731
|
|
|
@@ -730,8 +733,8 @@ export class RedflowClient {
|
|
|
730
733
|
if (registeredNames.has(existingName)) continue;
|
|
731
734
|
|
|
732
735
|
const workflowKey = keys.workflow(this.prefix, existingName);
|
|
733
|
-
const
|
|
734
|
-
if (!
|
|
736
|
+
const workflowApp = (await this.redis.hget(workflowKey, "app")) ?? "";
|
|
737
|
+
if (!workflowApp || workflowApp !== app) {
|
|
735
738
|
continue;
|
|
736
739
|
}
|
|
737
740
|
|
package/src/types.ts
CHANGED
|
@@ -32,6 +32,11 @@ export type OnFailureContext = {
|
|
|
32
32
|
export type DefineWorkflowOptions<TSchema extends ZodTypeAny | undefined = ZodTypeAny | undefined> = {
|
|
33
33
|
name: string;
|
|
34
34
|
queue?: string;
|
|
35
|
+
/**
|
|
36
|
+
* Maximum concurrently running runs for this workflow.
|
|
37
|
+
* Default: 1.
|
|
38
|
+
*/
|
|
39
|
+
maxConcurrency?: number;
|
|
35
40
|
schema?: TSchema;
|
|
36
41
|
cron?: CronTrigger[];
|
|
37
42
|
retries?: WorkflowRetries;
|
|
@@ -113,7 +118,7 @@ export type StepApi = {
|
|
|
113
118
|
workflow: WorkflowLike<TInput, TOutput>,
|
|
114
119
|
input: TInput,
|
|
115
120
|
): Promise<TOutput>;
|
|
116
|
-
emitWorkflow(options: StepEmitWorkflowOptions, workflow: WorkflowLike, input: unknown): Promise<string>;
|
|
121
|
+
emitWorkflow(options: StepEmitWorkflowOptions, workflow: WorkflowLike | string, input: unknown): Promise<string>;
|
|
117
122
|
};
|
|
118
123
|
|
|
119
124
|
export type RunState = {
|
|
@@ -167,6 +172,7 @@ export type ListedRun = {
|
|
|
167
172
|
export type WorkflowMeta = {
|
|
168
173
|
name: string;
|
|
169
174
|
queue: string;
|
|
175
|
+
maxConcurrency: number;
|
|
170
176
|
cron?: CronTrigger[];
|
|
171
177
|
retries?: WorkflowRetries;
|
|
172
178
|
updatedAt: number;
|
package/src/worker.ts
CHANGED
|
@@ -19,6 +19,8 @@ import { getDefaultRegistry, type WorkflowRegistry } from "./registry";
|
|
|
19
19
|
import type { OnFailureContext, RunStatus, StepApi, StepStatus } from "./types";
|
|
20
20
|
|
|
21
21
|
export type StartWorkerOptions = {
|
|
22
|
+
/** Stable application id used for registry sync stale-cleanup scoping. */
|
|
23
|
+
app: string;
|
|
22
24
|
redis?: RedisClient;
|
|
23
25
|
url?: string;
|
|
24
26
|
prefix?: string;
|
|
@@ -74,18 +76,32 @@ redis.call("lpush", KEYS[2], ARGV[1])
|
|
|
74
76
|
return 1
|
|
75
77
|
`;
|
|
76
78
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
79
|
+
const REQUEUE_DUE_TO_CONCURRENCY_LUA = `
|
|
80
|
+
if redis.call("lrem", KEYS[1], 1, ARGV[1]) <= 0 then
|
|
81
|
+
return 0
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
redis.call("rpush", KEYS[2], ARGV[1])
|
|
85
|
+
return 1
|
|
86
|
+
`;
|
|
87
|
+
|
|
88
|
+
export async function startWorker(options: StartWorkerOptions): Promise<WorkerHandle> {
|
|
89
|
+
const app = options.app.trim();
|
|
90
|
+
if (!app) {
|
|
91
|
+
throw new Error("startWorker requires a non-empty options.app");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const registry = options.registry ?? getDefaultRegistry();
|
|
95
|
+
const prefix = options.prefix ?? defaultPrefix();
|
|
96
|
+
const ownsBaseRedis = !options.redis && !!options.url;
|
|
97
|
+
const baseRedis = options.redis ?? (options.url ? new BunRedisClient(options.url) : defaultRedis);
|
|
82
98
|
const syncClient = createClient({ redis: baseRedis, prefix });
|
|
83
99
|
|
|
84
|
-
const queues = options
|
|
85
|
-
const concurrency = Math.max(1, options
|
|
86
|
-
const leaseMs = Math.max(100, options
|
|
87
|
-
const blmoveTimeoutSec = options
|
|
88
|
-
const reaperIntervalMs = options
|
|
100
|
+
const queues = options.queues ?? deriveQueuesFromRegistry(registry);
|
|
101
|
+
const concurrency = Math.max(1, options.concurrency ?? 1);
|
|
102
|
+
const leaseMs = Math.max(100, options.runtime?.leaseMs ?? 5000);
|
|
103
|
+
const blmoveTimeoutSec = options.runtime?.blmoveTimeoutSec ?? 1;
|
|
104
|
+
const reaperIntervalMs = options.runtime?.reaperIntervalMs ?? 500;
|
|
89
105
|
|
|
90
106
|
const abort = new AbortController();
|
|
91
107
|
const tasks: Promise<void>[] = [];
|
|
@@ -111,7 +127,7 @@ export async function startWorker(options?: StartWorkerOptions): Promise<WorkerH
|
|
|
111
127
|
};
|
|
112
128
|
|
|
113
129
|
try {
|
|
114
|
-
await syncClient.syncRegistry(registry);
|
|
130
|
+
await syncClient.syncRegistry(registry, { app });
|
|
115
131
|
|
|
116
132
|
// Worker loops (blocking BLMOVE). Use dedicated connections per slot.
|
|
117
133
|
for (let i = 0; i < concurrency; i++) {
|
|
@@ -222,6 +238,11 @@ function encodeIdempotencyPart(value: string): string {
|
|
|
222
238
|
return `${value.length}:${value}`;
|
|
223
239
|
}
|
|
224
240
|
|
|
241
|
+
function normalizeMaxConcurrency(value: unknown): number {
|
|
242
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return 1;
|
|
243
|
+
return Math.floor(value);
|
|
244
|
+
}
|
|
245
|
+
|
|
225
246
|
function defaultStepWorkflowIdempotencyKey(parentRunId: string, stepName: string, childWorkflowName: string): string {
|
|
226
247
|
return `stepwf:${encodeIdempotencyPart(parentRunId)}:${encodeIdempotencyPart(stepName)}:${encodeIdempotencyPart(childWorkflowName)}`;
|
|
227
248
|
}
|
|
@@ -396,6 +417,8 @@ async function processRun(args: {
|
|
|
396
417
|
}
|
|
397
418
|
|
|
398
419
|
const workflowName = run.workflow ?? "";
|
|
420
|
+
const def = workflowName ? registry.get(workflowName) : undefined;
|
|
421
|
+
const maxConcurrency = normalizeMaxConcurrency(def?.options.maxConcurrency);
|
|
399
422
|
const maxAttempts = Number(run.maxAttempts ?? "1");
|
|
400
423
|
const cancelRequestedAt = run.cancelRequestedAt ? Number(run.cancelRequestedAt) : 0;
|
|
401
424
|
if (cancelRequestedAt > 0) {
|
|
@@ -406,7 +429,26 @@ async function processRun(args: {
|
|
|
406
429
|
|
|
407
430
|
const startedAt = run.startedAt && run.startedAt !== "" ? Number(run.startedAt) : nowMs();
|
|
408
431
|
|
|
409
|
-
if (currentStatus === "queued") {
|
|
432
|
+
if (currentStatus === "queued" && def) {
|
|
433
|
+
const runningCount = await countRunningRunsForWorkflow({
|
|
434
|
+
redis,
|
|
435
|
+
prefix,
|
|
436
|
+
workflowName,
|
|
437
|
+
stopAt: maxConcurrency,
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
if (runningCount >= maxConcurrency) {
|
|
441
|
+
await redis.send("EVAL", [
|
|
442
|
+
REQUEUE_DUE_TO_CONCURRENCY_LUA,
|
|
443
|
+
"2",
|
|
444
|
+
processingKey,
|
|
445
|
+
keys.queueReady(prefix, queue),
|
|
446
|
+
runId,
|
|
447
|
+
]);
|
|
448
|
+
await sleep(25);
|
|
449
|
+
return;
|
|
450
|
+
}
|
|
451
|
+
|
|
410
452
|
const movedToRunning = await client.transitionRunStatusIfCurrent(runId, "queued", "running", startedAt);
|
|
411
453
|
if (!movedToRunning) {
|
|
412
454
|
// Most likely canceled between dequeue and start transition.
|
|
@@ -433,7 +475,6 @@ async function processRun(args: {
|
|
|
433
475
|
return;
|
|
434
476
|
}
|
|
435
477
|
|
|
436
|
-
const def = registry.get(workflowName);
|
|
437
478
|
if (!def) {
|
|
438
479
|
const errorJson = makeErrorJson(new UnknownWorkflowError(workflowName));
|
|
439
480
|
await client.finalizeRun(runId, { status: "failed", errorJson, finishedAt: nowMs() });
|
|
@@ -694,18 +735,27 @@ async function processRun(args: {
|
|
|
694
735
|
};
|
|
695
736
|
|
|
696
737
|
const emitWorkflowStep: StepApi["emitWorkflow"] = async (options, workflow, workflowInput) => {
|
|
738
|
+
const workflowName = typeof workflow === "string" ? workflow : workflow.name;
|
|
697
739
|
const idempotencyKey =
|
|
698
|
-
options.idempotencyKey ?? defaultStepWorkflowIdempotencyKey(runId, options.name,
|
|
740
|
+
options.idempotencyKey ?? defaultStepWorkflowIdempotencyKey(runId, options.name, workflowName);
|
|
699
741
|
|
|
700
742
|
return await runStep(
|
|
701
743
|
{ name: options.name, timeoutMs: options.timeoutMs },
|
|
702
744
|
async () => {
|
|
703
|
-
const handle =
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
745
|
+
const handle =
|
|
746
|
+
typeof workflow === "string"
|
|
747
|
+
? await client.runByName(workflow, workflowInput, {
|
|
748
|
+
runAt: options.runAt,
|
|
749
|
+
queueOverride: options.queueOverride,
|
|
750
|
+
idempotencyTtl: options.idempotencyTtl,
|
|
751
|
+
idempotencyKey,
|
|
752
|
+
})
|
|
753
|
+
: await workflow.run(workflowInput, {
|
|
754
|
+
runAt: options.runAt,
|
|
755
|
+
queueOverride: options.queueOverride,
|
|
756
|
+
idempotencyTtl: options.idempotencyTtl,
|
|
757
|
+
idempotencyKey,
|
|
758
|
+
});
|
|
709
759
|
return handle.id;
|
|
710
760
|
},
|
|
711
761
|
);
|
|
@@ -893,6 +943,27 @@ async function reaperLoop(args: {
|
|
|
893
943
|
}
|
|
894
944
|
}
|
|
895
945
|
|
|
946
|
+
async function countRunningRunsForWorkflow(args: {
|
|
947
|
+
redis: RedisClient;
|
|
948
|
+
prefix: string;
|
|
949
|
+
workflowName: string;
|
|
950
|
+
stopAt?: number;
|
|
951
|
+
}): Promise<number> {
|
|
952
|
+
const { redis, prefix, workflowName, stopAt } = args;
|
|
953
|
+
const runningRunIds = await redis.zrevrange(keys.runsStatus(prefix, "running"), 0, -1);
|
|
954
|
+
let count = 0;
|
|
955
|
+
|
|
956
|
+
for (const runId of runningRunIds) {
|
|
957
|
+
const runWorkflow = await redis.hget(keys.run(prefix, runId), "workflow");
|
|
958
|
+
if (runWorkflow !== workflowName) continue;
|
|
959
|
+
|
|
960
|
+
count += 1;
|
|
961
|
+
if (typeof stopAt === "number" && count >= stopAt) return count;
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
return count;
|
|
965
|
+
}
|
|
966
|
+
|
|
896
967
|
async function cronSchedulerLoop(args: {
|
|
897
968
|
redis: RedisClient;
|
|
898
969
|
client: RedflowClient;
|
|
@@ -967,7 +1038,17 @@ async function cronSchedulerLoop(args: {
|
|
|
967
1038
|
continue;
|
|
968
1039
|
}
|
|
969
1040
|
|
|
970
|
-
|
|
1041
|
+
const cronMaxConcurrency = normalizeMaxConcurrency(def.maxConcurrency);
|
|
1042
|
+
const runningCount = await countRunningRunsForWorkflow({
|
|
1043
|
+
redis,
|
|
1044
|
+
prefix,
|
|
1045
|
+
workflowName: workflow,
|
|
1046
|
+
stopAt: cronMaxConcurrency,
|
|
1047
|
+
});
|
|
1048
|
+
|
|
1049
|
+
if (runningCount < cronMaxConcurrency) {
|
|
1050
|
+
await client.runByName(workflow, input, { queueOverride: queue });
|
|
1051
|
+
}
|
|
971
1052
|
|
|
972
1053
|
// Schedule next run.
|
|
973
1054
|
let nextAt: number | null = null;
|