glide-mq 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/CHANGELOG.md +66 -1
  2. package/README.md +187 -55
  3. package/dist/base-worker.d.ts +282 -0
  4. package/dist/base-worker.d.ts.map +1 -0
  5. package/dist/base-worker.js +1250 -0
  6. package/dist/base-worker.js.map +1 -0
  7. package/dist/broadcast-worker.d.ts +36 -0
  8. package/dist/broadcast-worker.d.ts.map +1 -0
  9. package/dist/broadcast-worker.js +196 -0
  10. package/dist/broadcast-worker.js.map +1 -0
  11. package/dist/broadcast.d.ts +67 -0
  12. package/dist/broadcast.d.ts.map +1 -0
  13. package/dist/broadcast.js +109 -0
  14. package/dist/broadcast.js.map +1 -0
  15. package/dist/connection.d.ts +5 -1
  16. package/dist/connection.d.ts.map +1 -1
  17. package/dist/connection.js +5 -1
  18. package/dist/connection.js.map +1 -1
  19. package/dist/dag-utils.d.ts +22 -0
  20. package/dist/dag-utils.d.ts.map +1 -0
  21. package/dist/dag-utils.js +170 -0
  22. package/dist/dag-utils.js.map +1 -0
  23. package/dist/errors.d.ts +11 -0
  24. package/dist/errors.d.ts.map +1 -1
  25. package/dist/errors.js +29 -1
  26. package/dist/errors.js.map +1 -1
  27. package/dist/flow-producer.d.ts +11 -1
  28. package/dist/flow-producer.d.ts.map +1 -1
  29. package/dist/flow-producer.js +282 -6
  30. package/dist/flow-producer.js.map +1 -1
  31. package/dist/functions/index.d.ts +58 -13
  32. package/dist/functions/index.d.ts.map +1 -1
  33. package/dist/functions/index.js +1259 -212
  34. package/dist/functions/index.js.map +1 -1
  35. package/dist/index.d.ts +11 -3
  36. package/dist/index.d.ts.map +1 -1
  37. package/dist/index.js +23 -1
  38. package/dist/index.js.map +1 -1
  39. package/dist/job.d.ts +61 -4
  40. package/dist/job.d.ts.map +1 -1
  41. package/dist/job.js +210 -23
  42. package/dist/job.js.map +1 -1
  43. package/dist/producer.d.ts +58 -0
  44. package/dist/producer.d.ts.map +1 -0
  45. package/dist/producer.js +398 -0
  46. package/dist/producer.js.map +1 -0
  47. package/dist/proxy/index.d.ts +30 -0
  48. package/dist/proxy/index.d.ts.map +1 -0
  49. package/dist/proxy/index.js +61 -0
  50. package/dist/proxy/index.js.map +1 -0
  51. package/dist/proxy/routes.d.ts +13 -0
  52. package/dist/proxy/routes.d.ts.map +1 -0
  53. package/dist/proxy/routes.js +327 -0
  54. package/dist/proxy/routes.js.map +1 -0
  55. package/dist/proxy/types.d.ts +102 -0
  56. package/dist/proxy/types.d.ts.map +1 -0
  57. package/dist/proxy/types.js +3 -0
  58. package/dist/proxy/types.js.map +1 -0
  59. package/dist/queue-events.d.ts.map +1 -1
  60. package/dist/queue-events.js +25 -15
  61. package/dist/queue-events.js.map +1 -1
  62. package/dist/queue.d.ts +23 -6
  63. package/dist/queue.d.ts.map +1 -1
  64. package/dist/queue.js +590 -72
  65. package/dist/queue.js.map +1 -1
  66. package/dist/sandbox/index.d.ts.map +1 -1
  67. package/dist/sandbox/index.js +10 -3
  68. package/dist/sandbox/index.js.map +1 -1
  69. package/dist/sandbox/pool.d.ts.map +1 -1
  70. package/dist/sandbox/pool.js +29 -1
  71. package/dist/sandbox/pool.js.map +1 -1
  72. package/dist/sandbox/runner.js +39 -11
  73. package/dist/sandbox/runner.js.map +1 -1
  74. package/dist/sandbox/sandbox-job.d.ts +2 -0
  75. package/dist/sandbox/sandbox-job.d.ts.map +1 -1
  76. package/dist/sandbox/sandbox-job.js +21 -0
  77. package/dist/sandbox/sandbox-job.js.map +1 -1
  78. package/dist/sandbox/types.d.ts +2 -1
  79. package/dist/sandbox/types.d.ts.map +1 -1
  80. package/dist/sandbox/types.js.map +1 -1
  81. package/dist/scheduler.d.ts +32 -1
  82. package/dist/scheduler.d.ts.map +1 -1
  83. package/dist/scheduler.js +275 -65
  84. package/dist/scheduler.js.map +1 -1
  85. package/dist/serverless-pool.d.ts +30 -0
  86. package/dist/serverless-pool.d.ts.map +1 -0
  87. package/dist/serverless-pool.js +75 -0
  88. package/dist/serverless-pool.js.map +1 -0
  89. package/dist/testing.d.ts +59 -5
  90. package/dist/testing.d.ts.map +1 -1
  91. package/dist/testing.js +559 -29
  92. package/dist/testing.js.map +1 -1
  93. package/dist/types.d.ts +172 -3
  94. package/dist/types.d.ts.map +1 -1
  95. package/dist/types.js +6 -0
  96. package/dist/types.js.map +1 -1
  97. package/dist/utils.d.ts +64 -2
  98. package/dist/utils.d.ts.map +1 -1
  99. package/dist/utils.js +567 -39
  100. package/dist/utils.js.map +1 -1
  101. package/dist/worker.d.ts +13 -151
  102. package/dist/worker.d.ts.map +1 -1
  103. package/dist/worker.js +137 -738
  104. package/dist/worker.js.map +1 -1
  105. package/dist/workflows.d.ts +20 -1
  106. package/dist/workflows.d.ts.map +1 -1
  107. package/dist/workflows.js +30 -0
  108. package/dist/workflows.js.map +1 -1
  109. package/package.json +14 -4
package/CHANGELOG.md CHANGED
@@ -6,7 +6,72 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6
6
 
7
7
  ---
8
8
 
9
- ## [Unreleased]
9
+ ## [0.9.0] - 2026-03-08
10
+
11
+ ### Added
12
+
13
+ - Subject-based filtering for `BroadcastWorker` - NATS-style wildcard matching on job names. Configure via `subjects` option with `*` (single-token) and `>` (multi-token) wildcards. Non-matching messages are auto-acknowledged. Exported `matchSubject` and `compileSubjectMatcher` utilities (#119).
14
+ - `Producer` class - lightweight job enqueuing for serverless/edge environments without EventEmitter or Job instances. Returns plain string IDs. Use with `ServerlessPool` for automatic connection reuse across warm Lambda/Edge invocations. API: `add(name, data, opts)`, `addBulk(jobs)`, `close()` (#112).
15
+ - `ServerlessPool` and `serverlessPool` singleton - connection pooling for serverless environments. Caches Producer instances by queue name and connection fingerprint. API: `getProducer(name, opts)`, `closeAll()`, `size` (#112).
16
+ - LIFO (Last-In-First-Out) job processing via `lifo: true` option. Uses dedicated Valkey LIST with RPUSH/RPOP. Priority and delayed jobs take precedence. Cannot be combined with ordering keys (#87).
17
+ - Time-series metrics - `queue.getMetrics(type, opts?)` returns per-minute throughput and latency data with 24-hour retention. Zero extra RTTs (#82).
18
+ - `opts.jobId` - custom job IDs for deterministic identity. Max 256 characters (#79).
19
+ - `queue.addAndWait(name, data, { waitTimeout })` - enqueue and wait for completion without polling.
20
+ - `job.moveToDelayed(timestampMs, nextStep?)` - pause active job mid-processor for step-job workflows.
21
+ - `DelayedError` - exported error type for step-job control.
22
+ - Batch processing via `batch: { size, timeout? }` option. Processor receives `Job[]`, returns `R[]`. `BatchError` for partial failure (#81).
23
+ - `glide-mq/proxy` subpath - HTTP proxy for cross-language job enqueue. REST endpoints with queue allowlist, 1MB limit, graceful shutdown (#83).
24
+ - Wire protocol documentation (`docs/WIRE_PROTOCOL.md`) - raw FCALL reference for any language (#83).
25
+ - DAG workflows - `FlowProducer.addDAG()` and `dag()` helper for arbitrary DAG topologies (#86).
26
+ - Serverless usage guide (`docs/SERVERLESS.md`) - Lambda, Cloudflare Workers, Vercel Edge examples.
27
+ - List-active counter self-healing via `glidemq_healListActive` Lua function. Automatically corrects counter drift caused by worker crashes during scheduler promotion ticks (#124).
28
+ - Proxy endpoints: `GET /queues/:name/jobs` (list/filter), `DELETE /queues/:name/jobs/:id` (#124).
29
+ - CI: `npm audit` security scanning, `timeout-minutes` on all jobs, `npm ci` with cache in publish workflow (#124).
30
+
31
+ ### Fixed
32
+
33
+ - 62 issues from deep project audit across 7 domains (security, performance, code quality, architecture, testing, backend, devops) (#124):
34
+ - **Critical**: Worker heartbeat unhandled rejections, proxy validation gaps (NaN/Infinity), proxy queue cache race condition, poll loop promise handling on close, cross-queue parent registration error handling.
35
+ - **Security**: Sandbox path traversal protection via `realpathSync`, proxy input validation with `Number.isFinite`, queue name length limit (256 chars).
36
+ - **Performance**: Lua metrics HKEYS scan frequency reduced 10x, token bucket early exit, DAG string parsing O(n) to O(1).
37
+ - **Reliability**: Worker/Producer `close()` with double-close guard and closed flag, `QueueEvents` recursive poll guard, sandbox pool exit/error listener cleanup, serverless pool closing state guard.
38
+ - **Proxy**: Configurable `onError` callback (replaces silent error swallowing), graceful shutdown with draining flag, pause/resume returns 200 with state.
39
+ - `globalConcurrency` enforced for LIFO/priority-list jobs via atomic `rpopAndReserve` (#87).
40
+ - Scheduler LIFO forwarding and FlowProducer child LIFO routing (#87).
41
+ - `list-active` counter DECR on job removal/deferral (#87).
42
+ - Function library bumped to version 60.
43
+
44
+ ### Changed
45
+
46
+ - **Breaking (internal)**: `Worker` and `BroadcastWorker` now extend `BaseWorker` abstract class. Public API unchanged. Eliminates ~1400 lines of duplication (3407 to 2024 lines, 41% reduction) (#124).
47
+ - Worker uses explicit state machine (7 states: created, initializing, running, paused, draining, closing, closed) replacing boolean flags (#124).
48
+ - Proxy pause/resume endpoints return 200 with `{ paused: boolean }` instead of 204 (#124).
49
+ - Proxy health endpoint includes `queues` count (#124).
50
+ - Test suite: 24 hardcoded `setTimeout` waits replaced with `waitFor` predicates (#124).
51
+ - 79 eslint `no-unused-vars` warnings resolved across test files (#124).
52
+
53
+ ---
54
+
55
+ ## [0.8.1] - 2026-02-27
56
+
57
+ ### Security
58
+
59
+ - Reject invalid cron patterns: zero step (`*/0`), out-of-bounds values, reversed ranges, malformed tokens (#56).
60
+ - Enforce 1MB payload limit on job data, progress, and logs using `Buffer.byteLength` for correct UTF-8 byte counting. Covers `add`, `addBulk`, `updateData`, `updateProgress`, and `log` (#61).
61
+ - Fix path leak in sandbox error messages (#54).
62
+
63
+ ### Performance
64
+
65
+ - Hierarchical cron search replacing brute-force minute iteration - 4400x speedup for yearly schedules. UTC-correct date handling, 10-year search horizon (#59).
66
+ - Batch Redis commands in `Job.retry()` and `updateProgress()` (#53).
67
+
68
+ ### Added
69
+
70
+ - Comprehensive local fuzzer with pre-push hook.
71
+
72
+ ### Docs
73
+
74
+ - Dashboard section in README, feature map improvements (#57, #58).
10
75
 
11
76
  ---
12
77
 
package/README.md CHANGED
@@ -1,58 +1,42 @@
1
1
  # glide-mq
2
2
 
3
- **High-performance message queue for Node.js** — powered by Valkey/Redis Streams and a Rust-native NAPI client.
3
+ [![npm version](https://img.shields.io/npm/v/glide-mq)](https://www.npmjs.com/package/glide-mq)
4
+ [![license](https://img.shields.io/npm/l/glide-mq)](https://github.com/avifenesh/glide-mq/blob/main/LICENSE)
5
+ [![CI](https://github.com/avifenesh/glide-mq/actions/workflows/ci.yml/badge.svg)](https://github.com/avifenesh/glide-mq/actions/workflows/ci.yml)
6
+ [![node](https://img.shields.io/node/v/glide-mq)](https://nodejs.org/)
4
7
 
5
- If you find this useful, [give it a ⭐ on GitHub](https://github.com/avifenesh/glide-mq) it helps the project reach more developers.
8
+ High-performance message queue for Node.js built on Valkey/Redis Streams with 1-RTT job operations and cluster-native design.
6
9
 
7
- ```bash
8
- npm install glide-mq
9
- ```
10
+ glide-mq is for anyone building background jobs, task queues, or workflow orchestration in Node.js. It connects through a Rust-native NAPI client ([valkey-glide](https://github.com/valkey-io/valkey-glide)), executes all queue logic in a single Valkey Server Function call per operation (FCALL, not EVAL), and hash-tags every key for automatic cluster slot alignment. The result is fewer round trips, no Lua cache misses, and zero cluster configuration.
11
+
12
+ > If glide-mq is useful to you, consider giving it a star on [GitHub](https://github.com/avifenesh/glide-mq). It helps others discover the project.
10
13
 
11
14
  ## Why glide-mq
12
15
 
13
- - **1 RTT per job** `completeAndFetchNext` finishes the current job and fetches the next one in a single round-trip
14
- - **Rust core, not ioredis** built on [Valkey GLIDE](https://github.com/valkey-io/valkey-glide)'s native NAPI bindings for lower latency and less GC pressure
15
- - **1 function library, not 53 scripts** all queue logic runs as a single Valkey Server Function (no EVAL overhead)
16
- - **Cluster-native** hash-tagged keys work out of the box; no manual `{braces}` needed
17
- - **Cloud-ready** AZ-affinity routing and IAM auth built in
16
+ - Use this when you need **throughput**: 48k jobs/s at concurrency=50, 4x faster than BullMQ on the same hardware.
17
+ - Use this when you run **Valkey/Redis clusters**: all keys hash-tagged out of the box, no `{braces}` workarounds.
18
+ - Use this when you need **workflows**: parent-child trees, DAGs with fan-in, step jobs, batch processing, and cron scheduling in one library.
19
+ - Use this when you deploy to **serverless**: lightweight `Producer` and `ServerlessPool` cache connections across warm invocations.
20
+ - Use this when you want **pub/sub with durability**: `Broadcast` delivers to all subscribers with retries, backpressure, and NATS-style subject filtering.
18
21
 
19
- ## Features
22
+ ## Install
20
23
 
21
- - **Queues & Workers** — producer/consumer with configurable concurrency
22
- - **Delayed & priority jobs** — schedule jobs for later or run high-priority work first
23
- - **Workflows** — `FlowProducer` parent-child trees, `chain`, `group`, `chord` pipelines with result aggregation
24
- - **Schedulers** — cron and interval repeatable jobs, persisted across restarts
25
- - **Per-key ordering** sequential processing per key while staying parallel across keys
26
- - **Rate limiting** — token-bucket (cost-based), per-group, and global rate limiting
27
- - **Retries & DLQ** — exponential/fixed/custom backoff with dead-letter queues
28
- - **Deduplication** — simple, throttle, and debounce modes with configurable TTL
29
- - **Job revocation** — cooperative cancellation via AbortSignal for active jobs
30
- - **Stalled job recovery** — auto-reclaim jobs from crashed workers via XAUTOCLAIM
31
- - **Global concurrency** — cross-worker active job cap for the entire queue
32
- - **Pause & resume** — pause/resume at queue level or per-worker, with force option
33
- - **Drain** — `queue.drain(delayed?)` removes all waiting (and optionally delayed) jobs in a single server-side call
34
- - **Real-time events** — `QueueEvents` stream for added, completed, failed, stalled, revoked, and more
35
- - **Job search** — query by state, name, and data filters
36
- - **Progress tracking** — real-time numeric or object progress updates
37
- - **Batch API** — `addBulk` for high-throughput ingestion (12.7× faster than serial)
38
- - **Compression** — transparent gzip (up to 98% size reduction)
39
- - **Graceful shutdown** — one-liner `gracefulShutdown()` for SIGTERM/SIGINT handling
40
- - **Connection sharing** — reuse a single client across components to reduce TCP connections
41
- - **Observability** — OpenTelemetry tracing, per-job logs, [`@glidemq/dashboard`](https://github.com/avifenesh/glidemq-dashboard) web UI
42
- - **In-memory testing** — `TestQueue` & `TestWorker` with zero dependencies via `glide-mq/testing`
43
-
44
- ## Quick Start
24
+ ```bash
25
+ npm install glide-mq
26
+ ```
27
+
28
+ Requires Node.js 20+ and a running [Valkey](https://valkey.io) 7.0+ or Redis 7.0+ instance.
29
+
30
+ ## Quick start
45
31
 
46
32
  ```typescript
47
33
  import { Queue, Worker } from 'glide-mq';
48
34
 
49
35
  const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
50
36
 
51
- // Producer
52
37
  const queue = new Queue('tasks', { connection });
53
38
  await queue.add('send-email', { to: 'user@example.com', subject: 'Hello' });
54
39
 
55
- // Consumer
56
40
  const worker = new Worker('tasks', async (job) => {
57
41
  console.log(`Processing ${job.name}:`, job.data);
58
42
  return { sent: true };
@@ -62,8 +46,6 @@ worker.on('completed', (job) => console.log(`Job ${job.id} done`));
62
46
  worker.on('failed', (job, err) => console.error(`Job ${job.id} failed:`, err.message));
63
47
  ```
64
48
 
65
- Requires Node.js 20+ and a running [Valkey](https://valkey.io) (7.0+) or Redis 7.0+ instance.
66
-
67
49
  ## Benchmarks
68
50
 
69
51
  | Concurrency | Throughput |
@@ -73,28 +55,178 @@ Requires Node.js 20+ and a running [Valkey](https://valkey.io) (7.0+) or Redis 7
73
55
  | c=10 | 15,504 jobs/s |
74
56
  | c=50 | 48,077 jobs/s |
75
57
 
76
- `addBulk` batch API: **1,000 jobs in 18 ms** (12. faster than serial).
58
+ `addBulk` batch API: **1,000 jobs in 18 ms** (12.7x faster than serial).
77
59
  Gzip compression: **98% payload reduction** on 15 KB payloads.
78
60
 
79
61
  *Valkey 8.0, single node, no-op processor. Run `npm run bench` to reproduce.*
80
62
 
63
+ ## Comparison
64
+
65
+ | | glide-mq | BullMQ | Bee Queue |
66
+ |---|---|---|---|
67
+ | **Network per job** | 1 RTT (`completeAndFetchNext`) | 4-7 RTTs (lock + complete + fetch) | 2-3 RTTs |
68
+ | **Client** | Rust NAPI ([valkey-glide](https://github.com/valkey-io/valkey-glide)) | ioredis (pure JS) | node_redis (pure JS) |
69
+ | **Server logic** | 1 Valkey Function library (persistent, named) | 53 EVAL scripts (cache-miss prone) | Lua scripts |
70
+ | **Cluster** | Hash-tagged keys, zero config | Manual `{braces}` or workarounds | Not supported |
71
+ | **Workflows** | FlowProducer trees, DAG, chain/group/chord | FlowProducer trees | Not supported |
72
+ | **Pub/sub** | Native Broadcast with subject filtering | Not supported | Not supported |
73
+ | **Serverless** | Producer + ServerlessPool | Not supported | Not supported |
74
+ | **Throughput** | 48k jobs/s (c=50) | ~12k jobs/s (c=50) | ~5k jobs/s (c=50) |
75
+
76
+ ## Core concepts
77
+
78
+ - **Queue** -- stores jobs in Valkey Streams. Handles enqueue, delay, priority, pause, drain, and bulk operations.
79
+ - **Worker** -- processes jobs with configurable concurrency, prefetch, lock duration, and stalled-job recovery.
80
+ - **Job** -- a unit of work with name, data, options (retries, backoff, priority, TTL), and lifecycle events.
81
+ - **FlowProducer** -- creates parent-child job trees and DAGs. A parent waits for all children before processing.
82
+ - **Producer** -- lightweight enqueue-only client. No EventEmitter, no Job instances, returns plain string IDs. Built for serverless.
83
+ - **Broadcast** -- fan-out pub/sub. Each message is delivered to every subscriber group with independent retries and backpressure.
84
+ - **QueueEvents** -- real-time stream of job lifecycle events (completed, failed, delayed, waiting, etc.).
85
+
86
+ ## Features
87
+
88
+ ### Core
89
+
90
+ - **Queues and workers** with configurable concurrency, prefetch, and lock duration ([Usage](docs/USAGE.md))
91
+ - **Delayed, priority, and bulk enqueue** for scheduling and high-throughput ingestion ([Usage](docs/USAGE.md))
92
+ - **Batch processing** -- process multiple jobs at once via `batch: { size, timeout? }` ([Usage](docs/USAGE.md#batch-processing))
93
+ - **Request-reply** -- `queue.addAndWait(name, data, { waitTimeout })` for synchronous RPC ([Usage](docs/USAGE.md#request-reply-with-addandwait))
94
+ - **LIFO mode** -- `lifo: true` processes newest jobs first ([Advanced](docs/ADVANCED.md#lifo-mode))
95
+ - **Job TTL** -- auto-expire jobs after a time-to-live window ([Advanced](docs/ADVANCED.md#job-ttl))
96
+ - **Custom job IDs** -- deterministic, idempotent enqueue; duplicates return `null` ([Advanced](docs/ADVANCED.md#custom-job-ids))
97
+ - **Pluggable serializers** -- swap JSON for any `{ serialize, deserialize }` implementation ([Advanced](docs/ADVANCED.md#pluggable-serializers))
98
+ - **Transparent compression** -- gzip payloads at the queue level ([Advanced](docs/ADVANCED.md#transparent-compression))
99
+
100
+ ### Reliability
101
+
102
+ - **Retries with exponential, fixed, or custom backoff** and dead-letter queues ([Advanced](docs/ADVANCED.md#retries-and-backoff))
103
+ - **UnrecoverableError** -- skip all retries and fail permanently ([Usage](docs/USAGE.md#unrecoverableerror))
104
+ - **Stalled recovery** -- auto-reclaim stuck jobs via consumer group PEL and `XAUTOCLAIM` ([Usage](docs/USAGE.md#worker))
105
+ - **Job revocation** -- cooperative cancellation with `AbortSignal` ([Advanced](docs/ADVANCED.md#job-revocation))
106
+ - **Deduplication** -- simple, throttle, and debounce modes with configurable TTL ([Advanced](docs/ADVANCED.md#deduplication))
107
+ - **Per-key ordering** -- sequential processing per ordering key with configurable group concurrency ([Advanced](docs/ADVANCED.md#ordering-and-group-concurrency))
108
+ - **Rate limiting** -- per-group sliding window, token bucket, and global queue-wide limits ([Advanced](docs/ADVANCED.md#global-rate-limiting))
109
+ - **Sandboxed processors** -- run processors in worker threads or child processes ([Architecture](docs/ARCHITECTURE.md))
110
+
111
+ ### Orchestration
112
+
113
+ - **FlowProducer** -- parent-child job trees with `chain`, `group`, and `chord` helpers ([Workflows](docs/WORKFLOWS.md))
114
+ - **DAG workflows** -- arbitrary dependency graphs with `FlowProducer.addDAG()` and `dag()` helper; multi-parent fan-in, diamond patterns, cycle detection ([Workflows](docs/WORKFLOWS.md))
115
+ - **Step jobs** -- `job.moveToDelayed(timestamp, nextStep)` suspends a job mid-processor and resumes later ([Usage](docs/USAGE.md#pause-and-resume-a-job-later-step-jobs))
116
+ - **Dynamic children** -- `job.moveToWaitingChildren()` pauses a parent to add children mid-execution ([Workflows](docs/WORKFLOWS.md))
117
+ - **Batch processing** -- process multiple jobs at once for bulk I/O ([Usage](docs/USAGE.md#batch-processing))
118
+
119
+ ### Scheduling
120
+
121
+ - **Cron and interval schedulers** -- 5-field cron with timezone, fixed intervals, and `repeatAfterComplete` mode ([Advanced](docs/ADVANCED.md#job-schedulers))
122
+ - **Bounded schedulers** -- `limit`, `startDate`, and `endDate` for finite schedules ([Advanced](docs/ADVANCED.md#bounded-schedulers))
123
+
124
+ ### Pub/Sub
125
+
126
+ - **Broadcast** -- fan-out delivery to all subscriber groups ([Usage](docs/USAGE.md#broadcast--broadcastworker))
127
+ - **BroadcastWorker** -- independent consumer groups with own retries, concurrency, and backpressure ([Usage](docs/USAGE.md#broadcast--broadcastworker))
128
+ - **Subject filtering** -- NATS-style patterns (`*` one segment, `>` trailing wildcard) for topic-based routing ([Usage](docs/USAGE.md#broadcast--broadcastworker))
129
+
130
+ ### Serverless
131
+
132
+ - **Producer** -- enqueue without EventEmitter overhead, returns plain string IDs ([Usage](docs/USAGE.md))
133
+ - **ServerlessPool** -- connection caching across warm Lambda/Edge invocations ([Serverless](docs/SERVERLESS.md))
134
+
135
+ ### Observability
136
+
137
+ - **QueueEvents** -- real-time stream-based lifecycle events ([Observability](docs/OBSERVABILITY.md))
138
+ - **Time-series metrics** -- per-minute throughput and latency retained 24h, recorded server-side ([Observability](docs/OBSERVABILITY.md))
139
+ - **OpenTelemetry** -- automatic span emission; bring your own tracer or auto-detect `@opentelemetry/api` ([Observability](docs/OBSERVABILITY.md))
140
+ - **Job logs** -- append structured log entries per job with pagination ([Observability](docs/OBSERVABILITY.md))
141
+ - **Job mutations** -- `changePriority()`, `changeDelay()`, `promote()` after enqueue; `retryJobs()` and `clean()` in bulk ([Usage](docs/USAGE.md))
142
+ - **Graceful shutdown** -- `gracefulShutdown()` helper registers SIGTERM/SIGINT handlers ([Usage](docs/USAGE.md#graceful-shutdown))
143
+ - **In-memory testing** -- `TestQueue` and `TestWorker` with zero Valkey dependency ([Testing](docs/TESTING.md))
144
+
145
+ ### Cloud
146
+
147
+ - **Cluster-native** -- hash-tagged keys `glide:{queueName}:*` route all queue data to the same slot ([Usage](docs/USAGE.md#cluster-mode))
148
+ - **IAM authentication** -- native SigV4 auth for AWS ElastiCache and MemoryDB ([Usage](docs/USAGE.md#cluster-mode))
149
+ - **AZ-affinity routing** -- `readFrom: 'AZAffinity'` routes reads to same-AZ replicas ([Usage](docs/USAGE.md#cluster-mode))
150
+
151
+ ## Framework integrations
152
+
153
+ | Package | Install | Setup |
154
+ |---------|---------|-------|
155
+ | [`@glidemq/hono`](https://github.com/avifenesh/glidemq-hono) | `npm i @glidemq/hono` | `app.use(glideMQ({ connection, queues: { ... } }))` |
156
+ | [`@glidemq/fastify`](https://github.com/avifenesh/glidemq-fastify) | `npm i @glidemq/fastify` | `app.register(glideMQPlugin, { connection, queues: { ... } })` |
157
+ | [`@glidemq/nestjs`](https://github.com/avifenesh/glidemq-nestjs) | `npm i @glidemq/nestjs` | `GlideMQModule.forRoot({ connection, queues: { ... } })` |
158
+ | [`@glidemq/dashboard`](https://github.com/avifenesh/glidemq-dashboard) | `npm i @glidemq/dashboard` | `app.use('/dashboard', createDashboard([queue1, queue2]))` |
159
+ | @glidemq/hapi | coming soon | Hapi plugin with the same REST + SSE surface |
160
+
161
+ All framework packages provide REST endpoints, SSE events, and serverless Producer support. See each package's README for full documentation.
162
+
163
+ ## Cross-language
164
+
165
+ Non-Node.js services can enqueue jobs into glide-mq queues using the HTTP proxy or direct FCALL:
166
+
167
+ ```typescript
168
+ import { createProxyServer } from 'glide-mq/proxy';
169
+
170
+ const proxy = createProxyServer({
171
+ connection: { addresses: [{ host: 'localhost', port: 6379 }] },
172
+ queues: ['emails', 'reports'],
173
+ });
174
+ proxy.app.listen(3000);
175
+ ```
176
+
177
+ ```bash
178
+ curl -X POST http://localhost:3000/queues/emails/jobs \
179
+ -H 'Content-Type: application/json' \
180
+ -d '{"name": "send-email", "data": {"to": "user@example.com"}}'
181
+ ```
182
+
183
+ Endpoints: `POST /queues/:name/jobs`, `POST /queues/:name/jobs/bulk`, `GET /queues/:name/jobs/:id`, `POST /queues/:name/pause`, `POST /queues/:name/resume`, `GET /queues/:name/counts`, `GET /health`.
184
+
185
+ For zero-overhead integration, call Valkey Server Functions directly from any language with a Valkey client. See [Wire Protocol](docs/WIRE_PROTOCOL.md) for FCALL signatures, key layout, and examples in Python and Go.
186
+
81
187
  ## Documentation
82
188
 
83
- | Guide | What you'll learn |
84
- |-------|-------------------|
85
- | [Usage](docs/USAGE.md) | Queue & Worker basics, graceful shutdown, cluster mode |
86
- | [Advanced](docs/ADVANCED.md) | Schedulers, rate limiting, dedup, compression, retries & DLQ |
87
- | [Workflows](docs/WORKFLOWS.md) | FlowProducer, `chain`, `group`, `chord` pipelines |
88
- | [Observability](docs/OBSERVABILITY.md) | OpenTelemetry, job logs, `@glidemq/dashboard` |
89
- | [Testing](docs/TESTING.md) | In-memory `TestQueue` & `TestWorker` no Valkey needed |
90
- | [Architecture](docs/ARCHITECTURE.md) | Key design, Valkey functions, data layout |
91
- | [Migration](docs/MIGRATION.md) | Coming from BullMQ? API mapping & workarounds |
92
-
93
- ## Get Involved
94
-
95
- - ⭐ [Star on GitHub](https://github.com/avifenesh/glide-mq) — helps others find the project
96
- - 🐛 [Open an issue](https://github.com/avifenesh/glide-mq/issues) — bug reports & feature requests welcome
97
- - 💬 [Discussions](https://github.com/avifenesh/glide-mq/discussions) — questions, ideas, show & tell
189
+ | Guide | Topics |
190
+ |-------|--------|
191
+ | [Usage](docs/USAGE.md) | Queue, Worker, Broadcast, Producer, batch, request-reply, step jobs, graceful shutdown, cluster mode |
192
+ | [Advanced](docs/ADVANCED.md) | Schedulers, rate limiting, dedup, compression, retries, DLQ, custom IDs, LIFO, TTL, serializers |
193
+ | [Workflows](docs/WORKFLOWS.md) | FlowProducer, DAG, `chain`, `group`, `chord`, dynamic children |
194
+ | [Observability](docs/OBSERVABILITY.md) | OpenTelemetry, time-series metrics, job logs, dashboard |
195
+ | [Serverless](docs/SERVERLESS.md) | Producer, ServerlessPool, Lambda and Edge deployment |
196
+ | [Testing](docs/TESTING.md) | In-memory `TestQueue` and `TestWorker` -- no Valkey needed |
197
+ | [Wire Protocol](docs/WIRE_PROTOCOL.md) | Cross-language FCALL specs, key layout, Python and Go examples |
198
+ | [Architecture](docs/ARCHITECTURE.md) | Key design, Valkey functions, LIFO, Broadcast, DAG internals |
199
+ | [Durability](docs/DURABILITY.md) | Persistence modes, crash windows, feature-specific durability |
200
+ | [Migration](docs/MIGRATION.md) | Coming from BullMQ? API mapping and step-by-step guide |
201
+
202
+ ## Limitations
203
+
204
+ - Requires a running Valkey 7.0+ or Redis 7.0+ instance. There is no embedded mode.
205
+ - Node.js only. The Rust-native NAPI client (`@valkey/valkey-glide`) does not run in browsers or Deno.
206
+ - At-least-once delivery semantics. Jobs may be processed more than once after crashes or stalled recovery.
207
+ - Not a streaming platform. glide-mq is a job/task queue, not a replacement for Kafka or NATS JetStream.
208
+ - Single dependency on `@glidemq/speedkey` (which wraps `@valkey/valkey-glide`). Native addon compilation is required on install.
209
+
210
+ ## Ecosystem
211
+
212
+ | Package | Description | Links |
213
+ |---------|-------------|-------|
214
+ | [glide-mq](https://github.com/avifenesh/glide-mq) | Core queue library | [npm](https://www.npmjs.com/package/glide-mq) |
215
+ | [@glidemq/hono](https://github.com/avifenesh/glidemq-hono) | Hono middleware -- REST endpoints, SSE, serverless Producer | [npm](https://www.npmjs.com/package/@glidemq/hono) |
216
+ | [@glidemq/fastify](https://github.com/avifenesh/glidemq-fastify) | Fastify plugin -- REST endpoints, SSE, serverless Producer | [npm](https://www.npmjs.com/package/@glidemq/fastify) |
217
+ | [@glidemq/nestjs](https://github.com/avifenesh/glidemq-nestjs) | NestJS module -- decorators, DI, lifecycle management | [npm](https://www.npmjs.com/package/@glidemq/nestjs) |
218
+ | [@glidemq/dashboard](https://github.com/avifenesh/glidemq-dashboard) | Web UI -- metrics charts, scheduler management, job mutations | [npm](https://www.npmjs.com/package/@glidemq/dashboard) |
219
+ | [@glidemq/speedkey](https://github.com/avifenesh/speedkey) | Valkey GLIDE client with native NAPI bindings | [npm](https://www.npmjs.com/package/@glidemq/speedkey) |
220
+ | [glidemq-examples](https://github.com/avifenesh/glidemq-examples) | 34 runnable examples across frameworks and use cases | [GitHub](https://github.com/avifenesh/glidemq-examples) |
221
+
222
+ > If glide-mq is useful to you, consider [starring the repo](https://github.com/avifenesh/glide-mq). It helps others find the project.
223
+
224
+ ## Contributing
225
+
226
+ Bug reports, feature requests, and pull requests are welcome. See [CHANGELOG.md](CHANGELOG.md) for release history.
227
+
228
+ - [Open an issue](https://github.com/avifenesh/glide-mq/issues)
229
+ - [Discussions](https://github.com/avifenesh/glide-mq/discussions)
98
230
 
99
231
  ## License
100
232
 
@@ -0,0 +1,282 @@
1
+ import { EventEmitter } from 'events';
2
+ import type { WorkerOptions, Processor, BatchProcessor, Client, Serializer } from './types';
3
+ import { Job } from './job';
4
+ import { buildKeys } from './utils';
5
+ import type { QueueKeys } from './functions/index';
6
+ import { Scheduler } from './scheduler';
7
+ export type WorkerEvent = 'completed' | 'failed' | 'error' | 'stalled' | 'closing' | 'closed' | 'active' | 'drained';
8
+ /**
9
+ * Configuration that differs between Worker and BroadcastWorker.
10
+ * Passed from the subclass constructor to BaseWorker.
11
+ */
12
+ export interface BaseWorkerConfig {
13
+ /** Consumer group name for XREADGROUP / stalled reclaim. */
14
+ consumerGroup: string;
15
+ /** Whether this worker operates in broadcast (fan-out) mode. */
16
+ broadcastMode: boolean;
17
+ /** Stream ID to start from when creating the consumer group. */
18
+ startFrom: string;
19
+ }
20
+ /**
21
+ * Base class that contains all shared Worker / BroadcastWorker logic.
22
+ *
23
+ * Subclasses implement:
24
+ * - pollOnce(): the per-iteration poll strategy (list polling vs stream-only)
25
+ * - getAttemptsMade(): how to resolve attemptsMade (shared hash vs per-subscription)
26
+ * - isDrainComplete(): what "empty" means for drain()
27
+ */
28
+ export declare abstract class BaseWorker<D = any, R = any> extends EventEmitter {
29
+ readonly name: string;
30
+ protected opts: WorkerOptions;
31
+ protected processor: Processor<D, R>;
32
+ protected commandClient: Client | null;
33
+ protected commandClientOwned: boolean;
34
+ protected blockingClient: Client | null;
35
+ protected running: boolean;
36
+ protected paused: boolean;
37
+ protected closing: boolean;
38
+ protected closed: boolean;
39
+ protected queueKeys: ReturnType<typeof buildKeys>;
40
+ protected consumerId: string;
41
+ protected activeCount: number;
42
+ protected activePromises: Set<Promise<void>>;
43
+ protected activeAbortControllers: Map<string, AbortController>;
44
+ protected scheduler: Scheduler | null;
45
+ protected initPromise: Promise<void>;
46
+ protected rateLimitUntil: number;
47
+ protected isDrained: boolean;
48
+ protected reconnectBackoff: number;
49
+ protected internalEvents: EventEmitter<[never]>;
50
+ protected concurrency: number;
51
+ protected prefetch: number;
52
+ protected blockTimeout: number;
53
+ protected stalledInterval: number;
54
+ protected maxStalledCount: number;
55
+ protected lockDuration: number;
56
+ protected heartbeatIntervals: Map<string, ReturnType<typeof setInterval>>;
57
+ protected xreadStreams: Record<string, string>;
58
+ protected globalConcurrencyEnabled: boolean;
59
+ protected globalRateLimitEnabled: boolean;
60
+ protected cachedRateLimitMax: number;
61
+ protected cachedRateLimitDuration: number;
62
+ protected sandboxClose?: (force?: boolean) => Promise<void>;
63
+ protected workerHeartbeatTimer: ReturnType<typeof setInterval> | null;
64
+ protected pollLoopPromise: Promise<void> | null;
65
+ protected readonly startedAt: number;
66
+ protected readonly hostname: string;
67
+ protected serializer: Serializer;
68
+ protected readonly batchMode: boolean;
69
+ protected readonly batchSize: number;
70
+ protected readonly batchTimeout: number;
71
+ protected readonly batchProcessor: BatchProcessor<D, R> | null;
72
+ protected readonly consumerGroup: string;
73
+ protected readonly broadcastMode: boolean;
74
+ protected readonly startFrom: string;
75
+ protected constructor(name: string, processor: Processor<D, R> | BatchProcessor<D, R> | string, opts: WorkerOptions, config: BaseWorkerConfig);
76
+ /**
77
+ * Wait for the worker to be fully initialized and connected.
78
+ */
79
+ waitUntilReady(): Promise<void>;
80
+ private init;
81
+ /**
82
+ * Main poll loop: XREADGROUP BLOCK on the stream, dispatch jobs to the processor.
83
+ * Respects concurrency limits by only requesting (prefetch - activeCount) entries.
84
+ * On connection errors, uses exponential backoff (1s, 2s, 4s, 8s, max 30s) and reconnects.
85
+ */
86
+ protected pollLoop(): Promise<void>;
87
+ private reconnectCtx;
88
+ /**
89
+ * Attempt to reconnect clients and resume polling after a connection error.
90
+ */
91
+ private reconnectAndResume;
92
+ protected waitForSlot(): Promise<void>;
93
+ /**
94
+ * Subclass-specific polling strategy. Called once per poll loop iteration.
95
+ * Worker: checks priority/LIFO lists then XREADGROUP.
96
+ * BroadcastWorker: XREADGROUP only.
97
+ */
98
+ protected abstract pollOnce(): Promise<void>;
99
+ /**
100
+ * Dispatch a single job for processing.
101
+ * Increments activeCount, runs the processor, then completes or fails the job.
102
+ */
103
+ protected dispatchJob(jobId: string, entryId: string): void;
104
+ /**
105
+ * Dispatch a batch for processing (c>1 mode).
106
+ */
107
+ protected dispatchBatch(batch: {
108
+ jobId: string;
109
+ entryId: string;
110
+ job: Job<D, R>;
111
+ }[]): void;
112
+ /**
113
+ * Activate collected batch entries and process them.
114
+ * Shared between Worker and BroadcastWorker batch paths.
115
+ */
116
+ protected activateAndProcessBatch(collected: {
117
+ jobId: string;
118
+ entryId: string;
119
+ }[]): Promise<void>;
120
+ /**
121
+ * Process a batch of jobs through the batch processor.
122
+ * Handles completion, failure, and partial failure (BatchError) for each job individually.
123
+ */
124
+ protected processBatch(batch: {
125
+ jobId: string;
126
+ entryId: string;
127
+ job: Job<D, R>;
128
+ }[]): Promise<void>;
129
+ /**
130
+ * Handle a moveToActive result that is not a valid hash (null or REVOKED).
131
+ * Returns true if the result was handled (caller should return), false if the hash is valid.
132
+ */
133
+ protected handleMoveToActiveEdgeCase(moveResult: Record<string, string> | 'REVOKED' | 'EXPIRED' | 'GROUP_FULL' | 'GROUP_RATE_LIMITED' | 'GROUP_TOKEN_LIMITED' | 'ERR:COST_EXCEEDS_CAPACITY' | null, jobId: string, entryId: string): Promise<boolean>;
134
+ /**
135
+ * Run the processor with optional timeout, AbortController, and heartbeat.
136
+ * Returns { result, error } - exactly one will be set.
137
+ */
138
+ protected runProcessor(job: Job<D, R>, jobId: string): Promise<{
139
+ result?: R;
140
+ error?: Error;
141
+ aborted: boolean;
142
+ }>;
143
+ /**
144
+ * Resolve the number of attempts made for a job.
145
+ * Worker: reads from job.attemptsMade (shared hash).
146
+ * BroadcastWorker: reads per-subscription counter from :sub: hash.
147
+ */
148
+ protected getAttemptsMade(job: Job<D, R>, _jobId: string): Promise<number>;
149
+ /**
150
+ * Handle a failed job: applies rate limiting, backoff, DLQ, and emits 'failed'.
151
+ * Returns true when the job reached a terminal failed state, false when it will retry.
152
+ */
153
+ protected handleJobFailure(job: Job<D, R>, jobId: string, entryId: string, error: Error): Promise<boolean>;
154
+ /**
155
+ * Move an active job back into delayed state after the processor requests a pause.
156
+ */
157
+ protected handleMoveToDelayed(job: Job<D, R>, jobId: string, entryId: string, request: {
158
+ delayedUntil: number;
159
+ serializedData?: string;
160
+ nextData?: D;
161
+ }): Promise<void>;
162
+ /**
163
+ * After a repeatAfterComplete job completes or terminally fails,
164
+ * update the scheduler entry so the next job is scheduled.
165
+ *
166
+ * KNOWN LIMITATIONS:
167
+ * 1. Non-atomic: This update happens after the job completion transaction,
168
+ * so a worker crash between completion and this call will leave the scheduler
169
+ * stuck at nextRun=0 (awaiting completion sentinel) indefinitely.
170
+ * 2. Non-worker failures: Jobs that reach terminal failure outside the worker
171
+ * path (e.g., revoked jobs, expired jobs in moveToActive, stalled terminal
172
+ * failures in glidemq_reclaimStalled) never trigger this update, leaving
173
+ * the scheduler permanently stuck.
174
+ * 3. Race conditions: The idempotency check (nextRun === 0) prevents duplicate
175
+ * updates from stalled reclaim, but doesn't prevent races with concurrent
176
+ * upsertJobScheduler/removeJobScheduler (those use scheduler lock, this doesn't).
177
+ *
178
+ * MITIGATION: Run multiple workers for redundancy. Manually remove/re-add the
179
+ * scheduler to recover from stuck state.
180
+ *
181
+ * FUTURE WORK: Move scheduler update into Lua completion/failure functions to
182
+ * make it atomic and handle all terminal failure paths.
183
+ */
184
+ protected updateSchedulerAfterComplete(schedulerName: string, now: number): Promise<void>;
185
+ /**
186
+ * Build parent dependency info for complete/completeAndFetchNext calls.
187
+ */
188
+ protected buildParentInfo(job: Job<D, R>, jobId: string): Promise<{
189
+ depsMember: string;
190
+ parentId: string;
191
+ parentKeys: QueueKeys;
192
+ } | undefined>;
193
+ protected orderingMetaField(job: Job<D, R>): string | null;
194
+ /**
195
+ * Checks whether this job can run now under per-key ordering.
196
+ * Returns false when an earlier sequence for the same key is still pending.
197
+ */
198
+ protected isOrderingTurn(job: Job<D, R>): Promise<boolean>;
199
+ /**
200
+ * Re-enqueue out-of-order jobs instead of holding an active slot.
201
+ */
202
+ protected deferOutOfOrderJob(jobId: string, entryId: string): Promise<void>;
203
+ /**
204
+ * Process a job through its full lifecycle: activate, run processor, complete, fetch next.
205
+ * Used for both c=1 (inline, blocking poll loop) and c>1 (dispatched via dispatchJob).
206
+ * Chains into the next job via completeAndFetchNext to reuse the same dispatch slot.
207
+ */
208
+ protected processJob(jobId: string, entryId: string): Promise<void>;
209
+ /**
210
+ * Abort a job that is currently being processed by this worker.
211
+ * The processor receives the abort signal via job.abortSignal and must check it cooperatively.
212
+ * Returns true if the job was found and aborted, false if not currently active.
213
+ */
214
+ abortJob(jobId: string): boolean;
215
+ protected startHeartbeat(jobId: string): void;
216
+ protected stopHeartbeat(jobId: string): void;
217
+ protected moveToDLQ(job: Job<D, R>, error: Error): Promise<void>;
218
+ /**
219
+ * Check the server-side rate limiter and wait if the limit is exceeded.
220
+ * Also respects any manual rate limit set via rateLimit(ms).
221
+ */
222
+ protected waitForRateLimit(): Promise<void>;
223
+ /** Refresh cached meta flags from Valkey. Called on init and each scheduler tick. */
224
+ private refreshMetaFlags;
225
+ /**
226
+ * Register this worker in Valkey with a TTL-based heartbeat key.
227
+ * The key expires after stalledInterval ms; a periodic timer refreshes it at half that interval.
228
+ * Registration failure is non-fatal - the worker can still process jobs.
229
+ */
230
+ private registerWorker;
231
+ /**
232
+ * Check if the worker is currently running and not paused.
233
+ */
234
+ isRunning(): boolean;
235
+ /**
236
+ * Check if the worker is currently paused.
237
+ */
238
+ isPaused(): boolean;
239
+ /**
240
+ * Manually trigger a rate limit pause for the given duration.
241
+ * Subsequent jobs will wait until the pause expires.
242
+ */
243
+ rateLimit(ms: number): Promise<void>;
244
+ /**
245
+ * Pause the worker. If force=false (default), waits for active jobs to finish.
246
+ */
247
+ pause(force?: boolean): Promise<void>;
248
+ /**
249
+ * Resume the worker after a pause.
250
+ */
251
+ resume(): Promise<void>;
252
+ /**
253
+ * Check whether the queue has been fully drained.
254
+ * Worker: stream + scheduled must both be empty.
255
+ * BroadcastWorker: scheduled only (stream entries are retained for fan-out).
256
+ */
257
+ protected isDrainComplete(): Promise<boolean>;
258
+ /**
259
+ * Process all remaining jobs in the queue, then stop gracefully.
260
+ * Keeps polling until isDrainComplete() returns true, then closes the worker.
261
+ */
262
+ drain(): Promise<void>;
263
+ /**
264
+ * Close the worker. If force=false (default), waits for active jobs to finish.
265
+ * Idempotent: safe to call multiple times.
266
+ */
267
+ close(force?: boolean): Promise<void>;
268
+ protected waitForActiveJobs(): Promise<void>;
269
+ static isRateLimitError(error: Error): boolean;
270
+ static RateLimitError: {
271
+ new (): {
272
+ name: string;
273
+ message: string;
274
+ stack?: string;
275
+ cause?: unknown;
276
+ };
277
+ captureStackTrace(targetObject: object, constructorOpt?: Function): void;
278
+ prepareStackTrace(err: Error, stackTraces: NodeJS.CallSite[]): any;
279
+ stackTraceLimit: number;
280
+ };
281
+ }
282
+ //# sourceMappingURL=base-worker.d.ts.map