@redflow/client 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/INTERNALS.md ADDED
@@ -0,0 +1,238 @@
1
+ # redflow internals
2
+
3
+ This document describes how `@redflow/client` works internally in production terms.
4
+
5
+ ## Design model
6
+
7
+ - Durable state lives in Redis.
8
+ - Handlers and workflow code live in process memory (per worker process).
9
+ - The runtime is queue-based and crash-recoverable.
10
+ - Delivery semantics are at-least-once at run level.
11
+ - Step API provides deterministic replay/caching to avoid repeating completed work.
12
+
13
+ ## Main components
14
+
15
+ - **Workflow registry (in-memory):** built via `defineWorkflow(...)`.
16
+ - **Client (`RedflowClient`):** enqueue runs, inspect state, cancel runs, sync metadata.
17
+ - **Worker runtime:** executes queued runs, retries failures, promotes scheduled runs.
18
+ - **Cron scheduler:** leader-elected loop that creates cron runs.
19
+
20
+ ## Registry and metadata sync
21
+
22
+ `startWorker({ app, ... })` always calls `syncRegistry(registry, { app })` before loops start.
23
+
24
+ What `syncRegistry` writes per workflow:
25
+
26
+ - `workflow:<name>` hash:
27
+ - `name`
28
+ - `queue`
29
+ - `maxConcurrency` (default `1`)
30
+ - `app` (required ownership scope for cleanup)
31
+ - `updatedAt`
32
+ - `cronJson`
33
+ - `retriesJson`
34
+ - `cronIdsJson`
35
+ - `workflows` set (all known workflow names)
36
+ - cron definitions in `cron:def` and schedule in `cron:next`
37
+
38
+ ### Stale cleanup
39
+
40
+ Before writing new metadata, sync removes stale workflow metadata when all are true:
41
+
42
+ - workflow exists in Redis,
43
+ - workflow is missing in current registry,
44
+ - workflow `app` equals current `app`,
45
+ - workflow is older than grace period (`30s`).
46
+
47
+ Cleanup removes:
48
+
49
+ - `workflow:<name>` metadata hash,
50
+ - `workflows` set membership,
51
+ - associated cron entries (`cron:def`, `cron:next`).
52
+
53
+ It does **not** delete historical runs.
54
+
55
+ ## Redis keyspace
56
+
57
+ Key builders are in `src/internal/keys.ts`.
58
+
59
+ - `workflows`
60
+ - `workflow:<name>`
61
+ - `workflow-runs:<name>`
62
+ - `runs:created`
63
+ - `runs:status:<status>`
64
+ - `run:<runId>`
65
+ - `run:<runId>:steps`
66
+ - `run:<runId>:lease`
67
+ - `q:<queue>:ready`
68
+ - `q:<queue>:processing`
69
+ - `q:<queue>:scheduled`
70
+ - `cron:def`
71
+ - `cron:next`
72
+ - `lock:cron`
73
+ - `idempo:<encodedWorkflow>:<encodedKey>`
74
+
75
+ ## Run lifecycle
76
+
77
+ Statuses:
78
+
79
+ - `scheduled`
80
+ - `queued`
81
+ - `running`
82
+ - terminal: `succeeded`, `failed`, `canceled`
83
+
84
+ ### Enqueue
85
+
86
+ Enqueue uses `ENQUEUE_RUN_LUA` atomically:
87
+
88
+ - creates run hash,
89
+ - writes indexes (`runs:created`, `runs:status:*`, `workflow-runs:*`),
90
+ - pushes to ready queue or scheduled ZSET,
91
+ - applies idempotency mapping if key was provided.
92
+
93
+ Idempotency key TTL defaults to `7 days`.
94
+
95
+ ### Processing
96
+
97
+ Worker loop uses `LMOVE`/`BLMOVE` from `ready` -> `processing`.
98
+
99
+ For each claimed run:
100
+
101
+ 1. Acquire lease (`run:<id>:lease`) with periodic renewal.
102
+ 2. Validate current run status.
103
+ 3. If `queued`, enforce `maxConcurrency` for that workflow.
104
+ 4. Transition `queued -> running` atomically.
105
+ 5. Execute handler with step engine.
106
+ 6. Finalize to terminal status atomically.
107
+ 7. Remove from `processing`.
108
+
109
+ If lease is lost, current worker aborts and does not finalize.
110
+
111
+ ### Reaper
112
+
113
+ Reaper scans `processing` lists. For runs without active lease:
114
+
115
+ - removes from `processing`,
116
+ - pushes back to `ready`.
117
+
118
+ This recovers from worker crashes.
119
+
120
+ ### Scheduled promoter
121
+
122
+ Promoter pops due items from `q:<queue>:scheduled` (`ZPOPMIN` batch), then:
123
+
124
+ - transitions `scheduled -> queued`,
125
+ - pushes to `ready`.
126
+
127
+ Future items are put back.
128
+
129
+ ## maxConcurrency
130
+
131
+ `maxConcurrency` is per workflow, default `1`.
132
+
133
+ ### For regular queued runs
134
+
135
+ When a worker picks a `queued` run:
136
+
137
+ - it counts current `running` runs for the same workflow,
138
+ - if count >= `maxConcurrency`, run is atomically moved from `processing` back to end of `ready`.
139
+
140
+ So non-cron runs are delayed (not dropped).
141
+
142
+ ### For cron runs
143
+
144
+ Cron loop also checks running count before enqueue.
145
+
146
+ - if count >= `maxConcurrency`, that cron tick is skipped,
147
+ - next cron tick is still scheduled normally.
148
+
149
+ ## Cron scheduler
150
+
151
+ - Leader election via Redis lock `lock:cron`.
152
+ - Only lock holder schedules cron runs.
153
+ - Loop pops earliest `cronId` from `cron:next`.
154
+ - If due:
155
+ - parses `cron:def` payload,
156
+ - enforces `maxConcurrency`,
157
+ - enqueues run via `runByName` (or skips),
158
+ - computes next fire time and stores in `cron:next`.
159
+
160
+ Cron uses "reschedule from now" behavior (no catch-up burst if stale timestamp was in the past).
161
+
162
+ ## Step engine semantics
163
+
164
+ Inside handler, `step` API has three primitives.
165
+
166
+ ### `step.run(...)`
167
+
168
+ - Step state is persisted in `run:<id>:steps` hash under `step.name`.
169
+ - If step already `succeeded`, cached output is returned.
170
+ - Duplicate step names in one execution are rejected.
171
+ - Step timeout and cancellation are supported.
172
+
173
+ ### `step.runWorkflow(...)`
174
+
175
+ - Enqueues child workflow with deterministic idempotency by default:
176
+ - `parentRunId + stepName + childWorkflowName`.
177
+ - Waits for child completion.
178
+ - Waiting is bounded by step `timeoutMs` (if set), otherwise unbounded until cancellation.
179
+ - Inline assist: if child is queued on a queue this worker handles, worker may execute child inline to avoid self-deadlock with low concurrency.
180
+
181
+ ### `step.emitWorkflow(...)`
182
+
183
+ - Enqueues child workflow and returns child `runId`.
184
+ - Supports child as workflow object or workflow name string.
185
+ - Uses deterministic idempotency default based on parent run and step name.
186
+
187
+ ## Retry model
188
+
189
+ - `maxAttempts` is workflow-level (`retries.maxAttempts`), default `1`.
190
+ - Retry delay uses exponential backoff + jitter.
191
+ - Non-retryable classes:
192
+ - input validation errors,
193
+ - unknown workflow,
194
+ - output serialization errors,
195
+ - cancellation,
196
+ - explicit `NonRetriableError`.
197
+ - Retry scheduling is atomic (`scheduleRetry` Lua): status/index update + queue scheduled ZSET write in one script.
198
+
199
+ ## Cancellation
200
+
201
+ `cancelRun(runId)`:
202
+
203
+ - sets `cancelRequestedAt` + optional reason,
204
+ - if run is `queued`/`scheduled`, attempts immediate transition to `canceled` and cleanup,
205
+ - if run is `running`, cancellation is cooperative via `AbortSignal` polling in worker.
206
+
207
+ Terminal finalize script ensures consistent indexes and terminal status.
208
+
209
+ ## Idempotency vs step cache
210
+
211
+ - **Idempotency:** deduplicates run creation (`key -> runId`) with TTL.
212
+ - **Step cache:** deduplicates completed step execution within one parent run.
213
+
214
+ They solve different failure windows and are intentionally both used.
215
+
216
+ ## Multi-worker behavior
217
+
218
+ - Many workers can process same prefix/queues.
219
+ - Cron scheduling is single-leader.
220
+ - Processing/recovery is shared via Redis lists + leases.
221
+ - `maxConcurrency` is enforced globally against Redis `running` index.
222
+
223
+ ## Operational notes
224
+
225
+ Recommended for production:
226
+
227
+ - Use stable `prefix` per environment.
228
+ - Use explicit `app` per service role for safe metadata cleanup.
229
+ - Set `maxConcurrency` intentionally for long workflows.
230
+ - Keep queue ownership clear (avoid workers consuming queues for workflows they do not register).
231
+ - Use idempotency keys for external trigger endpoints.
232
+
233
+ ## Current guarantees and limitations
234
+
235
+ - Run execution is at-least-once.
236
+ - Step cache reduces replay but cannot provide global exactly-once side effects.
237
+ - `maxConcurrency` is enforced via runtime checks against Redis state; it is robust in practice but not a strict distributed semaphore proof.
238
+ - `handle.result({ timeoutMs })` timeout affects caller waiting only, not run execution itself.
package/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  Redis-backed workflow runtime for Bun.
4
4
 
5
+ Deep internal details: `INTERNALS.md`
6
+
5
7
  ## Warning
6
8
 
7
9
  This project is still in early alpha stage.
@@ -147,13 +149,14 @@ const output = await handle.result({ timeoutMs: 90_000 });
147
149
 
148
150
  ## Start a worker
149
151
 
150
- Import workflows, then run `startWorker()`.
152
+ Import workflows, then run `startWorker({ app: ... })`.
151
153
 
152
154
  ```ts
153
155
  import { startWorker } from "@redflow/client";
154
156
  import "./workflows";
155
157
 
156
158
  const worker = await startWorker({
159
+ app: "billing-worker",
157
160
  url: process.env.REDIS_URL,
158
161
  prefix: "redflow:prod",
159
162
  concurrency: 4,
@@ -164,6 +167,7 @@ Explicit queues + runtime tuning:
164
167
 
165
168
  ```ts
166
169
  const worker = await startWorker({
170
+ app: "billing-worker",
167
171
  url: process.env.REDIS_URL,
168
172
  prefix: "redflow:prod",
169
173
  queues: ["critical", "io", "analytics"],
@@ -178,6 +182,21 @@ const worker = await startWorker({
178
182
 
179
183
  ## Workflow options examples
180
184
 
185
+ ### maxConcurrency
186
+
187
+ `maxConcurrency` limits concurrent `running` runs per workflow. Default is `1`.
188
+
189
+ ```ts
190
+ defineWorkflow(
191
+ "heavy-sync",
192
+ {
193
+ queue: "ops",
194
+ maxConcurrency: 1,
195
+ },
196
+ async () => ({ ok: true }),
197
+ );
198
+ ```
199
+
181
200
  ### Cron
182
201
 
183
202
  ```ts
@@ -194,6 +213,8 @@ defineWorkflow(
194
213
  );
195
214
  ```
196
215
 
216
+ Cron respects `maxConcurrency`: if the limit is reached, that cron tick is skipped.
217
+
197
218
  ### onFailure
198
219
 
199
220
  ```ts
@@ -281,10 +302,10 @@ const output = await handle.result({ timeoutMs: 30_000 });
281
302
  console.log(output);
282
303
  ```
283
304
 
284
- ### Registry sync ownership
305
+ ### Registry sync app id
285
306
 
286
307
  ```ts
287
308
  import { getDefaultRegistry } from "@redflow/client";
288
309
 
289
- await client.syncRegistry(getDefaultRegistry(), { owner: "billing-service" });
310
+ await client.syncRegistry(getDefaultRegistry(), { app: "billing-service" });
290
311
  ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redflow/client",
3
- "version": "0.0.3",
3
+ "version": "0.0.5",
4
4
  "type": "module",
5
5
  "main": "src/index.ts",
6
6
  "module": "src/index.ts",
package/src/client.ts CHANGED
@@ -16,16 +16,17 @@ import { keys } from "./internal/keys";
16
16
  import { safeJsonParse, safeJsonStringify, safeJsonTryParse } from "./internal/json";
17
17
  import { nowMs } from "./internal/time";
18
18
  import { sleep } from "./internal/sleep";
19
- import type {
20
- EmitWorkflowOptions,
21
- ListedRun,
22
- ListRunsParams,
23
- RunHandle,
24
- RunOptions,
25
- RunState,
26
- RunStatus,
27
- StepState,
28
- WorkflowMeta,
19
+ import {
20
+ DEFAULT_MAX_ATTEMPTS,
21
+ type EmitWorkflowOptions,
22
+ type ListedRun,
23
+ type ListRunsParams,
24
+ type RunHandle,
25
+ type RunOptions,
26
+ type RunState,
27
+ type RunStatus,
28
+ type StepState,
29
+ type WorkflowMeta,
29
30
  } from "./types";
30
31
  import type { WorkflowRegistry } from "./registry";
31
32
 
@@ -37,10 +38,10 @@ export type CreateClientOptions = {
37
38
 
38
39
  export type SyncRegistryOptions = {
39
40
  /**
40
- * Workflows are pruned only when they were last synced by the same owner.
41
- * Set a stable service id (for example, app name) to enable safe stale cleanup.
41
+ * Stable application id used for stale workflow metadata cleanup.
42
+ * Workflows are pruned only when they were last synced by the same app.
42
43
  */
43
- owner?: string;
44
+ app: string;
44
45
  };
45
46
 
46
47
  export function defaultPrefix(): string {
@@ -249,16 +250,6 @@ function encodeCompositePart(value: string): string {
249
250
  return `${value.length}:${value}`;
250
251
  }
251
252
 
252
- function defaultRegistryOwner(): string {
253
- const envOwner = process.env.REDFLOW_SYNC_OWNER?.trim();
254
- if (envOwner) return envOwner;
255
-
256
- const argvOwner = process.argv[1]?.trim();
257
- if (argvOwner) return argvOwner;
258
-
259
- return "redflow:unknown-owner";
260
- }
261
-
262
253
  function parseEnqueueScriptResult(value: unknown): { kind: "created" | "existing"; runId: string } | null {
263
254
  if (Array.isArray(value) && value.length === 1 && Array.isArray(value[0])) {
264
255
  return parseEnqueueScriptResult(value[0]);
@@ -309,6 +300,11 @@ function isValidDate(value: Date): boolean {
309
300
  return value instanceof Date && Number.isFinite(value.getTime());
310
301
  }
311
302
 
303
+ function normalizeMaxConcurrency(value: unknown): number {
304
+ if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return 1;
305
+ return Math.floor(value);
306
+ }
307
+
312
308
  export class RedflowClient {
313
309
  constructor(
314
310
  public readonly redis: RedisClient,
@@ -356,9 +352,11 @@ export class RedflowClient {
356
352
  const retries = safeJsonTryParse<any>(data.retriesJson ?? null) as any;
357
353
  const updatedAt = Number(data.updatedAt ?? "0");
358
354
  const queue = data.queue ?? "default";
355
+ const maxConcurrency = normalizeMaxConcurrency(Number(data.maxConcurrency ?? "1"));
359
356
  return {
360
357
  name,
361
358
  queue,
359
+ maxConcurrency,
362
360
  cron: Array.isArray(cron) && cron.length > 0 ? cron : undefined,
363
361
  retries,
364
362
  updatedAt,
@@ -395,7 +393,7 @@ export class RedflowClient {
395
393
  ? Math.floor(options.__maxAttemptsOverride)
396
394
  : null;
397
395
 
398
- const maxAttempts = maxAttemptsOverride ?? (await this.getMaxAttemptsForWorkflow(workflowName)) ?? 1;
396
+ const maxAttempts = maxAttemptsOverride ?? (await this.getMaxAttemptsForWorkflow(workflowName)) ?? DEFAULT_MAX_ATTEMPTS;
399
397
 
400
398
  return await this.enqueueRun<TOutput>({
401
399
  workflowName,
@@ -471,7 +469,7 @@ export class RedflowClient {
471
469
  output,
472
470
  error,
473
471
  attempt: Number(data.attempt ?? "0"),
474
- maxAttempts: Number(data.maxAttempts ?? "1"),
472
+ maxAttempts: Number(data.maxAttempts ?? String(DEFAULT_MAX_ATTEMPTS)),
475
473
  createdAt: Number(data.createdAt ?? "0"),
476
474
  availableAt: data.availableAt ? Number(data.availableAt) : undefined,
477
475
  startedAt: data.startedAt ? Number(data.startedAt) : undefined,
@@ -606,17 +604,21 @@ export class RedflowClient {
606
604
  }
607
605
  }
608
606
 
609
- async syncRegistry(registry: WorkflowRegistry, options?: SyncRegistryOptions): Promise<void> {
607
+ async syncRegistry(registry: WorkflowRegistry, options: SyncRegistryOptions): Promise<void> {
610
608
  const defs = registry.list();
611
609
  const syncStartedAt = nowMs();
612
- const owner = options?.owner?.trim() || defaultRegistryOwner();
610
+ const app = options.app.trim();
611
+ if (!app) {
612
+ throw new Error("syncRegistry requires a non-empty options.app");
613
+ }
613
614
  const registeredNames = new Set(defs.map((def) => def.options.name));
614
615
 
615
- await this.cleanupStaleWorkflows(registeredNames, syncStartedAt, owner);
616
+ await this.cleanupStaleWorkflows(registeredNames, syncStartedAt, app);
616
617
 
617
618
  for (const def of defs) {
618
619
  const name = def.options.name;
619
620
  const queue = def.options.queue ?? "default";
621
+ const maxConcurrency = normalizeMaxConcurrency(def.options.maxConcurrency);
620
622
  const cron = def.options.cron ?? [];
621
623
  const retries = def.options.retries ?? {};
622
624
  const updatedAt = nowMs();
@@ -653,6 +655,7 @@ export class RedflowClient {
653
655
  id: cronId,
654
656
  workflow: name,
655
657
  queue,
658
+ maxConcurrency,
656
659
  expression: c.expression,
657
660
  timezone: c.timezone,
658
661
  inputJson: safeJsonStringify(cronInput),
@@ -671,7 +674,8 @@ export class RedflowClient {
671
674
  const meta: Record<string, string> = {
672
675
  name,
673
676
  queue,
674
- owner,
677
+ maxConcurrency: String(maxConcurrency),
678
+ app,
675
679
  updatedAt: String(updatedAt),
676
680
  cronJson: safeJsonStringify(cron),
677
681
  retriesJson: safeJsonStringify(retries),
@@ -722,7 +726,7 @@ export class RedflowClient {
722
726
  private async cleanupStaleWorkflows(
723
727
  registeredNames: Set<string>,
724
728
  syncStartedAt: number,
725
- owner: string,
729
+ app: string,
726
730
  ): Promise<void> {
727
731
  const existingNames = await this.redis.smembers(keys.workflows(this.prefix));
728
732
 
@@ -730,8 +734,8 @@ export class RedflowClient {
730
734
  if (registeredNames.has(existingName)) continue;
731
735
 
732
736
  const workflowKey = keys.workflow(this.prefix, existingName);
733
- const workflowOwner = (await this.redis.hget(workflowKey, "owner")) ?? "";
734
- if (!workflowOwner || workflowOwner !== owner) {
737
+ const workflowApp = (await this.redis.hget(workflowKey, "app")) ?? "";
738
+ if (!workflowApp || workflowApp !== app) {
735
739
  continue;
736
740
  }
737
741
 
package/src/types.ts CHANGED
@@ -12,8 +12,10 @@ export type CronTrigger = {
12
12
  id?: string;
13
13
  };
14
14
 
15
+ export const DEFAULT_MAX_ATTEMPTS = 3;
16
+
15
17
  export type WorkflowRetries = {
16
- /** Total attempts including the first one. Default: 1 (no retries). */
18
+ /** Total attempts including the first one. Default: 3. */
17
19
  maxAttempts?: number;
18
20
  };
19
21
 
@@ -32,6 +34,11 @@ export type OnFailureContext = {
32
34
  export type DefineWorkflowOptions<TSchema extends ZodTypeAny | undefined = ZodTypeAny | undefined> = {
33
35
  name: string;
34
36
  queue?: string;
37
+ /**
38
+ * Maximum concurrently running runs for this workflow.
39
+ * Default: 1.
40
+ */
41
+ maxConcurrency?: number;
35
42
  schema?: TSchema;
36
43
  cron?: CronTrigger[];
37
44
  retries?: WorkflowRetries;
@@ -167,6 +174,7 @@ export type ListedRun = {
167
174
  export type WorkflowMeta = {
168
175
  name: string;
169
176
  queue: string;
177
+ maxConcurrency: number;
170
178
  cron?: CronTrigger[];
171
179
  retries?: WorkflowRetries;
172
180
  updatedAt: number;
package/src/worker.ts CHANGED
@@ -16,9 +16,11 @@ import { safeJsonParse, safeJsonStringify, safeJsonTryParse } from "./internal/j
16
16
  import { sleep } from "./internal/sleep";
17
17
  import { nowMs } from "./internal/time";
18
18
  import { getDefaultRegistry, type WorkflowRegistry } from "./registry";
19
- import type { OnFailureContext, RunStatus, StepApi, StepStatus } from "./types";
19
+ import { DEFAULT_MAX_ATTEMPTS, type OnFailureContext, type RunStatus, type StepApi, type StepStatus } from "./types";
20
20
 
21
21
  export type StartWorkerOptions = {
22
+ /** Stable application id used for registry sync stale-cleanup scoping. */
23
+ app: string;
22
24
  redis?: RedisClient;
23
25
  url?: string;
24
26
  prefix?: string;
@@ -74,18 +76,32 @@ redis.call("lpush", KEYS[2], ARGV[1])
74
76
  return 1
75
77
  `;
76
78
 
77
- export async function startWorker(options?: StartWorkerOptions): Promise<WorkerHandle> {
78
- const registry = options?.registry ?? getDefaultRegistry();
79
- const prefix = options?.prefix ?? defaultPrefix();
80
- const ownsBaseRedis = !options?.redis && !!options?.url;
81
- const baseRedis = options?.redis ?? (options?.url ? new BunRedisClient(options.url) : defaultRedis);
79
+ const REQUEUE_DUE_TO_CONCURRENCY_LUA = `
80
+ if redis.call("lrem", KEYS[1], 1, ARGV[1]) <= 0 then
81
+ return 0
82
+ end
83
+
84
+ redis.call("rpush", KEYS[2], ARGV[1])
85
+ return 1
86
+ `;
87
+
88
+ export async function startWorker(options: StartWorkerOptions): Promise<WorkerHandle> {
89
+ const app = options.app.trim();
90
+ if (!app) {
91
+ throw new Error("startWorker requires a non-empty options.app");
92
+ }
93
+
94
+ const registry = options.registry ?? getDefaultRegistry();
95
+ const prefix = options.prefix ?? defaultPrefix();
96
+ const ownsBaseRedis = !options.redis && !!options.url;
97
+ const baseRedis = options.redis ?? (options.url ? new BunRedisClient(options.url) : defaultRedis);
82
98
  const syncClient = createClient({ redis: baseRedis, prefix });
83
99
 
84
- const queues = options?.queues ?? deriveQueuesFromRegistry(registry);
85
- const concurrency = Math.max(1, options?.concurrency ?? 1);
86
- const leaseMs = Math.max(100, options?.runtime?.leaseMs ?? 5000);
87
- const blmoveTimeoutSec = options?.runtime?.blmoveTimeoutSec ?? 1;
88
- const reaperIntervalMs = options?.runtime?.reaperIntervalMs ?? 500;
100
+ const queues = options.queues ?? deriveQueuesFromRegistry(registry);
101
+ const concurrency = Math.max(1, options.concurrency ?? 1);
102
+ const leaseMs = Math.max(100, options.runtime?.leaseMs ?? 5000);
103
+ const blmoveTimeoutSec = options.runtime?.blmoveTimeoutSec ?? 1;
104
+ const reaperIntervalMs = options.runtime?.reaperIntervalMs ?? 500;
89
105
 
90
106
  const abort = new AbortController();
91
107
  const tasks: Promise<void>[] = [];
@@ -111,7 +127,7 @@ export async function startWorker(options?: StartWorkerOptions): Promise<WorkerH
111
127
  };
112
128
 
113
129
  try {
114
- await syncClient.syncRegistry(registry);
130
+ await syncClient.syncRegistry(registry, { app });
115
131
 
116
132
  // Worker loops (blocking BLMOVE). Use dedicated connections per slot.
117
133
  for (let i = 0; i < concurrency; i++) {
@@ -222,6 +238,11 @@ function encodeIdempotencyPart(value: string): string {
222
238
  return `${value.length}:${value}`;
223
239
  }
224
240
 
241
+ function normalizeMaxConcurrency(value: unknown): number {
242
+ if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return 1;
243
+ return Math.floor(value);
244
+ }
245
+
225
246
  function defaultStepWorkflowIdempotencyKey(parentRunId: string, stepName: string, childWorkflowName: string): string {
226
247
  return `stepwf:${encodeIdempotencyPart(parentRunId)}:${encodeIdempotencyPart(stepName)}:${encodeIdempotencyPart(childWorkflowName)}`;
227
248
  }
@@ -396,7 +417,9 @@ async function processRun(args: {
396
417
  }
397
418
 
398
419
  const workflowName = run.workflow ?? "";
399
- const maxAttempts = Number(run.maxAttempts ?? "1");
420
+ const def = workflowName ? registry.get(workflowName) : undefined;
421
+ const maxConcurrency = normalizeMaxConcurrency(def?.options.maxConcurrency);
422
+ const maxAttempts = Number(run.maxAttempts ?? String(DEFAULT_MAX_ATTEMPTS));
400
423
  const cancelRequestedAt = run.cancelRequestedAt ? Number(run.cancelRequestedAt) : 0;
401
424
  if (cancelRequestedAt > 0) {
402
425
  await client.finalizeRun(runId, { status: "canceled", finishedAt: nowMs() });
@@ -406,7 +429,26 @@ async function processRun(args: {
406
429
 
407
430
  const startedAt = run.startedAt && run.startedAt !== "" ? Number(run.startedAt) : nowMs();
408
431
 
409
- if (currentStatus === "queued") {
432
+ if (currentStatus === "queued" && def) {
433
+ const runningCount = await countRunningRunsForWorkflow({
434
+ redis,
435
+ prefix,
436
+ workflowName,
437
+ stopAt: maxConcurrency,
438
+ });
439
+
440
+ if (runningCount >= maxConcurrency) {
441
+ await redis.send("EVAL", [
442
+ REQUEUE_DUE_TO_CONCURRENCY_LUA,
443
+ "2",
444
+ processingKey,
445
+ keys.queueReady(prefix, queue),
446
+ runId,
447
+ ]);
448
+ await sleep(25);
449
+ return;
450
+ }
451
+
410
452
  const movedToRunning = await client.transitionRunStatusIfCurrent(runId, "queued", "running", startedAt);
411
453
  if (!movedToRunning) {
412
454
  // Most likely canceled between dequeue and start transition.
@@ -433,7 +475,6 @@ async function processRun(args: {
433
475
  return;
434
476
  }
435
477
 
436
- const def = registry.get(workflowName);
437
478
  if (!def) {
438
479
  const errorJson = makeErrorJson(new UnknownWorkflowError(workflowName));
439
480
  await client.finalizeRun(runId, { status: "failed", errorJson, finishedAt: nowMs() });
@@ -902,6 +943,27 @@ async function reaperLoop(args: {
902
943
  }
903
944
  }
904
945
 
946
+ async function countRunningRunsForWorkflow(args: {
947
+ redis: RedisClient;
948
+ prefix: string;
949
+ workflowName: string;
950
+ stopAt?: number;
951
+ }): Promise<number> {
952
+ const { redis, prefix, workflowName, stopAt } = args;
953
+ const runningRunIds = await redis.zrevrange(keys.runsStatus(prefix, "running"), 0, -1);
954
+ let count = 0;
955
+
956
+ for (const runId of runningRunIds) {
957
+ const runWorkflow = await redis.hget(keys.run(prefix, runId), "workflow");
958
+ if (runWorkflow !== workflowName) continue;
959
+
960
+ count += 1;
961
+ if (typeof stopAt === "number" && count >= stopAt) return count;
962
+ }
963
+
964
+ return count;
965
+ }
966
+
905
967
  async function cronSchedulerLoop(args: {
906
968
  redis: RedisClient;
907
969
  client: RedflowClient;
@@ -976,7 +1038,17 @@ async function cronSchedulerLoop(args: {
976
1038
  continue;
977
1039
  }
978
1040
 
979
- await client.runByName(workflow, input, { queueOverride: queue });
1041
+ const cronMaxConcurrency = normalizeMaxConcurrency(def.maxConcurrency);
1042
+ const runningCount = await countRunningRunsForWorkflow({
1043
+ redis,
1044
+ prefix,
1045
+ workflowName: workflow,
1046
+ stopAt: cronMaxConcurrency,
1047
+ });
1048
+
1049
+ if (runningCount < cronMaxConcurrency) {
1050
+ await client.runByName(workflow, input, { queueOverride: queue });
1051
+ }
980
1052
 
981
1053
  // Schedule next run.
982
1054
  let nextAt: number | null = null;