redis-stream-queue 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- redis_stream_queue/__init__.py +27 -0
- redis_stream_queue/client.py +306 -0
- redis_stream_queue/consumer.py +218 -0
- redis_stream_queue/exceptions.py +14 -0
- redis_stream_queue/group.py +143 -0
- redis_stream_queue/message.py +88 -0
- redis_stream_queue/producer.py +84 -0
- redis_stream_queue/py.typed +0 -0
- redis_stream_queue/retry.py +92 -0
- redis_stream_queue/serializers.py +45 -0
- redis_stream_queue-0.1.0.dist-info/METADATA +695 -0
- redis_stream_queue-0.1.0.dist-info/RECORD +14 -0
- redis_stream_queue-0.1.0.dist-info/WHEEL +4 -0
- redis_stream_queue-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,695 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: redis-stream-queue
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: General-purpose async Redis Streams consumer group library with DLQ, crash recovery, and monitoring
|
|
5
|
+
Author: Min An
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: async,consumer-group,dlq,queue,redis,streams,worker
|
|
9
|
+
Classifier: Framework :: AsyncIO
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
16
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Requires-Dist: redis>=5.0.0
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: fakeredis>=2.26; extra == 'dev'
|
|
21
|
+
Requires-Dist: hatchling; extra == 'dev'
|
|
22
|
+
Requires-Dist: msgpack>=1.0; extra == 'dev'
|
|
23
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0; extra == 'dev'
|
|
26
|
+
Provides-Extra: msgpack
|
|
27
|
+
Requires-Dist: msgpack>=1.0; extra == 'msgpack'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# redis-stream-queue
|
|
31
|
+
|
|
32
|
+
Async Python library for Redis Streams consumer groups with built-in crash recovery, DLQ, and monitoring.
|
|
33
|
+
|
|
34
|
+
## Features
|
|
35
|
+
|
|
36
|
+
- **Producer**: push messages from any number of pods — XADD is atomic, no coordination needed
|
|
37
|
+
- **Consumer**: callback-based read → ACK loop; partial ACK supported
|
|
38
|
+
- **Crash recovery**: XAUTOCLAIM cursor loop sweeps full PEL per iteration; NOGROUP auto-recovery if stream deleted externally
|
|
39
|
+
- **Dead-letter queue**: decode errors and poison pills (exceeding `max_deliveries`) routed to DLQ handler
|
|
40
|
+
- **Consumer metrics**: `consumer.metrics()` — `tps_in` / `tps_out` / `tps_total` (60s sliding window), avg TPS, read/acked/DLQ/error counters
|
|
41
|
+
- **Producer metrics**: `producer.metrics()` — push TPS, total pushed, avg TPS, uptime
|
|
42
|
+
- **Process-wide aggregation**: `StreamConsumer.all_metrics()` / `StreamProducer.all_metrics()` — collect from all live instances in this process via weakref registry; zero Redis overhead
|
|
43
|
+
- **Stream monitoring**: lag, PEL size, per-consumer idle time, health checks via `ConsumerGroup`
|
|
44
|
+
- **Pluggable serializers**: JSON (default), msgpack, pickle — or bring your own
|
|
45
|
+
- **Redis Cluster**: `from_cluster()` and `from_url()` factory methods
|
|
46
|
+
- **Multi-pod safe**: unique worker names auto-generated per pod (`{group}_{hostname}_{rand4}`)
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Requirements
|
|
51
|
+
|
|
52
|
+
- Python ≥ 3.11
|
|
53
|
+
- Redis ≥ 6.2 (XAUTOCLAIM support)
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install redis-stream-queue
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
With msgpack support:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install redis-stream-queue[msgpack]
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Quick Start
|
|
72
|
+
|
|
73
|
+
### 1. Start Redis
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
docker run -p 6379:6379 redis
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### 2. Producer
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import asyncio
|
|
83
|
+
from redis_stream_queue import StreamClient, StreamProducer
|
|
84
|
+
|
|
85
|
+
async def main():
|
|
86
|
+
client = StreamClient(host="localhost")
|
|
87
|
+
producer = StreamProducer(client=client, stream="orders", group="order_workers")
|
|
88
|
+
|
|
89
|
+
await producer.ensure_group() # idempotent — safe to call on every startup
|
|
90
|
+
msg_id = await producer.push({"order_id": 1})
|
|
91
|
+
print(f"pushed: {msg_id}")
|
|
92
|
+
await client.close()
|
|
93
|
+
|
|
94
|
+
asyncio.run(main())
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### 3. Consumer
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import asyncio
|
|
101
|
+
from redis_stream_queue import StreamClient, StreamConsumer, ConsumerConfig
|
|
102
|
+
|
|
103
|
+
async def handle(messages):
|
|
104
|
+
for msg in messages:
|
|
105
|
+
print(f"processing: {msg.data}")
|
|
106
|
+
return [m.id for m in messages] # return IDs to ACK; omit to leave in PEL
|
|
107
|
+
|
|
108
|
+
async def on_dlq(msg, reason):
|
|
109
|
+
print(f"DLQ [{reason}]: {msg.data}")
|
|
110
|
+
|
|
111
|
+
async def main():
|
|
112
|
+
client = StreamClient(host="localhost")
|
|
113
|
+
config = ConsumerConfig(
|
|
114
|
+
group="order_workers",
|
|
115
|
+
dlq_stream="orders_dlq",
|
|
116
|
+
batch_size=100,
|
|
117
|
+
block_ms=5_000,
|
|
118
|
+
max_deliveries=3,
|
|
119
|
+
)
|
|
120
|
+
consumer = StreamConsumer(
|
|
121
|
+
client=client,
|
|
122
|
+
stream="orders",
|
|
123
|
+
config=config,
|
|
124
|
+
handler=handle,
|
|
125
|
+
dlq_handler=on_dlq,
|
|
126
|
+
)
|
|
127
|
+
await consumer.run() # infinite loop; Ctrl-C / CancelledError to stop
|
|
128
|
+
|
|
129
|
+
asyncio.run(main())
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
> **Handler contract**: return a list of IDs to ACK. Return `[]` to ACK nothing (messages stay in PEL for retry). Never return `None` — that triggers a warning and no ACK.
|
|
133
|
+
|
|
134
|
+
### 4. Consumer Throughput Metrics
|
|
135
|
+
|
|
136
|
+
`consumer.metrics()` is non-blocking and makes no Redis calls — safe to poll from any monitoring loop or health endpoint.
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
import asyncio
|
|
140
|
+
from redis_stream_queue import StreamClient, StreamConsumer, ConsumerConfig
|
|
141
|
+
|
|
142
|
+
async def monitor(consumer):
|
|
143
|
+
while True:
|
|
144
|
+
m = consumer.metrics()
|
|
145
|
+
print(
|
|
146
|
+
f"in={m.tps_in:.1f} msg/s out={m.tps_out:.1f} msg/s total={m.tps_total:.1f} msg/s "
|
|
147
|
+
f"avg={m.avg_tps:.1f} msg/s "
|
|
148
|
+
f"read={m.total_read} acked={m.total_acked} "
|
|
149
|
+
f"dlq={m.total_dlq} errors={m.total_errors} "
|
|
150
|
+
f"uptime={m.uptime_secs:.0f}s"
|
|
151
|
+
)
|
|
152
|
+
await asyncio.sleep(5)
|
|
153
|
+
|
|
154
|
+
async def main():
|
|
155
|
+
client = StreamClient(host="localhost")
|
|
156
|
+
config = ConsumerConfig(group="order_workers")
|
|
157
|
+
consumer = StreamConsumer(client=client, stream="orders", config=config, handler=handle)
|
|
158
|
+
|
|
159
|
+
await asyncio.gather(consumer.run(), monitor(consumer))
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
| Field | Type | Description |
|
|
163
|
+
|---|---|---|
|
|
164
|
+
| `tps_in` | `float` | Reads/sec — XREADGROUP + XAUTOCLAIM, sliding 60s window |
|
|
165
|
+
| `tps_out` | `float` | Acked/sec — sliding 60s window |
|
|
166
|
+
| `tps_total` | `float` | `tps_in + tps_out` |
|
|
167
|
+
| `avg_tps` | `float` | `total_acked / uptime_secs` since first message |
|
|
168
|
+
| `total_read` | `int` | Messages pulled from stream (new + reclaimed via XAUTOCLAIM) |
|
|
169
|
+
| `total_acked` | `int` | Successfully processed and ACKed by handler |
|
|
170
|
+
| `total_dlq` | `int` | Routed to DLQ (`decode_error` + `max_deliveries` combined) |
|
|
171
|
+
| `total_errors` | `int` | Handler exceptions — message stays in PEL for retry |
|
|
172
|
+
| `uptime_secs` | `float` | Seconds since first message was processed |
|
|
173
|
+
|
|
174
|
+
### 5. Producer Throughput Metrics
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
m = producer.metrics()
|
|
178
|
+
print(f"push={m.tps:.1f} msg/s avg={m.avg_tps:.1f} msg/s total={m.total_pushed} uptime={m.uptime_secs:.0f}s")
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
| Field | Type | Description |
|
|
182
|
+
|---|---|---|
|
|
183
|
+
| `total_pushed` | `int` | Messages pushed since instance creation |
|
|
184
|
+
| `tps` | `float` | Pushed/sec — sliding 60s window |
|
|
185
|
+
| `avg_tps` | `float` | `total_pushed / uptime_secs` since first push |
|
|
186
|
+
| `uptime_secs` | `float` | Seconds since first push |
|
|
187
|
+
|
|
188
|
+
### 6. Process-Wide Metrics (Multiple Instances)
|
|
189
|
+
|
|
190
|
+
Each `StreamConsumer` and `StreamProducer` auto-registers in a process-level weakref registry on creation. Dead instances are evicted automatically by GC — no manual cleanup needed.
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
# Multiple consumers in same asyncio event loop
|
|
194
|
+
c1 = StreamConsumer(client=client, stream="orders", config=cfg_a, handler=handle_a)
|
|
195
|
+
c2 = StreamConsumer(client=client, stream="payments", config=cfg_b, handler=handle_b)
|
|
196
|
+
c3 = StreamConsumer(client=client, stream="events", config=cfg_c, handler=handle_c)
|
|
197
|
+
|
|
198
|
+
await asyncio.gather(c1.run(), c2.run(), c3.run())
|
|
199
|
+
|
|
200
|
+
# From a monitoring task running concurrently:
|
|
201
|
+
for m in StreamConsumer.all_metrics():
|
|
202
|
+
print(f"in={m.tps_in:.1f} out={m.tps_out:.1f} acked={m.total_acked}")
|
|
203
|
+
|
|
204
|
+
# Multiple producers
|
|
205
|
+
p1 = StreamProducer(client=client, stream="orders")
|
|
206
|
+
p2 = StreamProducer(client=client, stream="payments")
|
|
207
|
+
|
|
208
|
+
for m in StreamProducer.all_metrics():
|
|
209
|
+
print(f"tps={m.tps:.1f} pushed={m.total_pushed}")
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
> **Multi-pod note**: `all_metrics()` is in-process only — it sees instances in this pod, not other pods.
|
|
213
|
+
> For cross-pod aggregation, expose metrics via a health endpoint (FastAPI, Django, plain HTTP) and scrape with Prometheus or a similar tool. Each pod reports its own slice; your scraper aggregates across pods. Zero extra Redis IOPS.
|
|
214
|
+
|
|
215
|
+
### 7. Stream / Group Monitoring
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
from redis_stream_queue import StreamClient, ConsumerGroup
|
|
219
|
+
|
|
220
|
+
async def main():
|
|
221
|
+
client = StreamClient(host="localhost")
|
|
222
|
+
cg = ConsumerGroup(client, stream="orders", group="order_workers")
|
|
223
|
+
|
|
224
|
+
# Stream-level stats (requires Redis calls)
|
|
225
|
+
stats = await cg.stats(dlq_stream="orders_dlq")
|
|
226
|
+
print(f"length={stats.stream_length} lag={stats.lag} pel={stats.group_pel_size}")
|
|
227
|
+
for c in stats.consumers:
|
|
228
|
+
print(f" consumer={c.name} pending={c.pending} idle={c.idle_ms}ms")
|
|
229
|
+
|
|
230
|
+
# Health check
|
|
231
|
+
health = await cg.health_check(max_lag=1_000, max_idle_ms=60_000)
|
|
232
|
+
print(f"healthy={health['healthy']} issues={health['issues']}")
|
|
233
|
+
|
|
234
|
+
# Inspect stuck messages
|
|
235
|
+
pending = await cg.pending_details(count=50)
|
|
236
|
+
for entry in pending:
|
|
237
|
+
print(f" {entry.id} consumer={entry.consumer} deliveries={entry.delivery_count}")
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Configuration Reference
|
|
243
|
+
|
|
244
|
+
### `StreamClient`
|
|
245
|
+
|
|
246
|
+
| Param | Default | Description |
|
|
247
|
+
|---|---|---|
|
|
248
|
+
| `host` | `"localhost"` | Redis host |
|
|
249
|
+
| `port` | `6379` | Redis port |
|
|
250
|
+
| `db` | `0` | Redis DB index |
|
|
251
|
+
| `username` | `None` | AUTH username |
|
|
252
|
+
| `password` | `None` | AUTH password |
|
|
253
|
+
| `prefix` | `""` | Key prefix prepended to all stream names (`{prefix}_{stream}`) |
|
|
254
|
+
| `max_connections` | `1000` | Connection pool size |
|
|
255
|
+
| `pool_timeout` | `5` | Seconds to wait for a free connection |
|
|
256
|
+
| `ssl` | `False` | Enable TLS |
|
|
257
|
+
|
|
258
|
+
**Cluster / URL variants:**
|
|
259
|
+
|
|
260
|
+
```python
|
|
261
|
+
# Redis Cluster
|
|
262
|
+
client = StreamClient.from_cluster(
|
|
263
|
+
startup_nodes=[{"host": "node1", "port": 6379}],
|
|
264
|
+
password="secret",
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# URL — single node
|
|
268
|
+
client = StreamClient.from_url("redis://localhost:6379/0")
|
|
269
|
+
client = StreamClient.from_url("rediss://user:pass@host:6380/0") # TLS
|
|
270
|
+
|
|
271
|
+
# URL — cluster
|
|
272
|
+
client = StreamClient.from_url("redis+cluster://node1:6379")
|
|
273
|
+
client = StreamClient.from_url("rediss+cluster://node1:6380") # TLS cluster
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
> **Cluster key rule**: stream and DLQ stream must share a hash tag to land on the same slot:
|
|
277
|
+
> ```python
|
|
278
|
+
> stream="{orders}_main", dlq_stream="{orders}_dlq"
|
|
279
|
+
> ```
|
|
280
|
+
|
|
281
|
+
### `ConsumerConfig`
|
|
282
|
+
|
|
283
|
+
| Param | Default | Description |
|
|
284
|
+
|---|---|---|
|
|
285
|
+
| `group` | required | Consumer group name |
|
|
286
|
+
| `worker_name` | auto | Unique consumer name per pod. Auto = `{group}_{hostname}_{rand4}` (group part truncated if needed to preserve hostname+suffix) |
|
|
287
|
+
| `dlq_stream` | `None` | Stream name to route poison pills and decode errors to |
|
|
288
|
+
| `dlq_group` | `None` | Consumer group for the DLQ stream |
|
|
289
|
+
| `batch_size` | `100` | Max messages per XREADGROUP / XAUTOCLAIM call |
|
|
290
|
+
| `block_ms` | `5000` | XREADGROUP block timeout (ms) — 0 = non-blocking |
|
|
291
|
+
| `min_idle_claim_ms` | `10000` | XAUTOCLAIM idle threshold (ms). Set to at least 2× max handler latency |
|
|
292
|
+
| `max_deliveries` | `3` | Delivery count before message is routed to DLQ |
|
|
293
|
+
| `max_stream_size` | `100000` | Approximate XADD MAXLEN trim |
|
|
294
|
+
| `max_claim_passes` | `None` | Max XAUTOCLAIM cursor iterations per `run_once()`. `None` = sweep full PEL. Set to `1` to restore single-pass behavior |
|
|
295
|
+
|
|
296
|
+
---
|
|
297
|
+
|
|
298
|
+
## Serializers
|
|
299
|
+
|
|
300
|
+
```python
|
|
301
|
+
from redis_stream_queue import JsonSerializer, MsgpackSerializer, PickleSerializer
|
|
302
|
+
|
|
303
|
+
# Serializer must match on both producer and consumer
|
|
304
|
+
producer = StreamProducer(..., serializer=MsgpackSerializer())
|
|
305
|
+
consumer = StreamConsumer(..., serializer=MsgpackSerializer())
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
| Serializer | Extra | Notes |
|
|
309
|
+
|---|---|---|
|
|
310
|
+
| `JsonSerializer` | none | Default; human-readable, broadest compat |
|
|
311
|
+
| `MsgpackSerializer` | `[msgpack]` | Smaller wire size, faster encode/decode |
|
|
312
|
+
| `PickleSerializer` | none | Any Python type; requires same Python version on both ends; **do not use with untrusted data** |
|
|
313
|
+
|
|
314
|
+
Decode failures → `dlq_handler(msg, "decode_error")` + immediate XACK (no retry — corrupt data will always fail).
|
|
315
|
+
|
|
316
|
+
Custom serializer — implement the `Serializer` protocol:
|
|
317
|
+
|
|
318
|
+
```python
|
|
319
|
+
from redis_stream_queue import Serializer
|
|
320
|
+
|
|
321
|
+
class CborSerializer:
|
|
322
|
+
def encode(self, data: dict) -> bytes:
|
|
323
|
+
import cbor2
|
|
324
|
+
return cbor2.dumps(data)
|
|
325
|
+
|
|
326
|
+
def decode(self, raw: bytes) -> dict:
|
|
327
|
+
import cbor2
|
|
328
|
+
return cbor2.loads(raw)
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
---
|
|
332
|
+
|
|
333
|
+
## Multi-Pod Deployment
|
|
334
|
+
|
|
335
|
+
### Producers
|
|
336
|
+
|
|
337
|
+
No coordination needed. XADD is atomic; Redis assigns unique IDs (`{timestamp}-{seq}`). All pods write to the same stream key safely.
|
|
338
|
+
|
|
339
|
+
### Consumers
|
|
340
|
+
|
|
341
|
+
Each pod gets a unique `worker_name`. Redis distributes messages across consumers in the same group — each message delivered to exactly one pod.
|
|
342
|
+
|
|
343
|
+
```
|
|
344
|
+
Pod A: worker_name = "order_workers_pod-a_3821" ─┐
|
|
345
|
+
Pod B: worker_name = "order_workers_pod-b_9174" ─┼─ group "order_workers"
|
|
346
|
+
Pod C: worker_name = "order_workers_pod-c_0042" ─┘
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
**Crash recovery**: if a pod crashes, its unacknowledged PEL messages go idle. Other pods reclaim them via XAUTOCLAIM after `min_idle_claim_ms`. Set this to at least 2× your maximum handler latency.
|
|
350
|
+
|
|
351
|
+
**NOGROUP recovery**: if the stream or group is deleted externally (e.g. `FLUSHALL`, `XGROUP DESTROY`), the consumer detects the `NOGROUP` error, clears its entry from the group registry, and re-creates the group on the next iteration — no manual restart required.
|
|
352
|
+
|
|
353
|
+
**Recommended settings for multi-pod:**
|
|
354
|
+
|
|
355
|
+
| Setting | Value | Reason |
|
|
356
|
+
|---|---|---|
|
|
357
|
+
| `worker_name` | auto (default) | Unique per pod |
|
|
358
|
+
| `min_idle_claim_ms` | 2× max handler latency | Avoid premature cross-pod reclaim |
|
|
359
|
+
| `block_ms` | `5000` | Balanced latency vs idle CPU |
|
|
360
|
+
| `max_deliveries` | `3–5` | Accounts for transient failures across restarts |
|
|
361
|
+
|
|
362
|
+
---
|
|
363
|
+
|
|
364
|
+
## Consumer Loop Internals
|
|
365
|
+
|
|
366
|
+
Each `run_once()` call executes four steps:
|
|
367
|
+
|
|
368
|
+
```
|
|
369
|
+
1. ensure() — XGROUP CREATE mkstream; no-op if group known in class-level registry.
|
|
370
|
+
Registry entry removed on NOGROUP — re-creation runs on next iteration.
|
|
371
|
+
|
|
372
|
+
2. XREADGROUP ">" — fetch new undelivered messages
|
|
373
|
+
├─ decode error → dlq_handler(msg, "decode_error") + XACK (no retry)
|
|
374
|
+
├─ handler(msgs) → XACK returned IDs; total_acked += n; tps_out tracker updated
|
|
375
|
+
├─ tps_in updated with len(raw_messages)
|
|
376
|
+
├─ handler → None → warning logged; no XACK (treat as explicit "ACK nothing")
|
|
377
|
+
└─ unacked IDs stay in PEL for XAUTOCLAIM recovery
|
|
378
|
+
|
|
379
|
+
3. XAUTOCLAIM cursor loop — reclaims msgs idle > min_idle_claim_ms
|
|
380
|
+
├─ follows cursor until Redis returns "0-0" (full PEL swept) or stall detected
|
|
381
|
+
├─ max_claim_passes caps iterations if set; None = unlimited (default)
|
|
382
|
+
├─ tps_in updated with len(claimed) per batch
|
|
383
|
+
└─ reclaimed msgs → same handler → XACK; tps_out updated
|
|
384
|
+
|
|
385
|
+
4. XPENDING sweep — find entries with delivery_count >= max_deliveries
|
|
386
|
+
├─ no dlq_handler → warning logged with IDs; still ACKed (message cleared)
|
|
387
|
+
├─ msg missing from stream (XDEL'd) → warning logged; still ACKed
|
|
388
|
+
└─ poison pills → dlq_handler(msg, "max_deliveries") + batched XACK
|
|
389
|
+
|
|
390
|
+
run() wraps run_once() in an infinite loop:
|
|
391
|
+
├─ CancelledError → re-raised immediately (clean shutdown)
|
|
392
|
+
├─ NOGROUP error → registry entry cleared; sleep 1s; re-enter loop
|
|
393
|
+
└─ any other error → logged; sleep 1s; re-enter loop
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
**Metrics updated per iteration:**
|
|
397
|
+
- `total_read` incremented on every XREADGROUP / XAUTOCLAIM batch
|
|
398
|
+
- `tps_in` sliding tracker records every read batch (new + reclaimed)
|
|
399
|
+
- `total_acked` incremented after each successful handler → XACK
|
|
400
|
+
- `tps_out` sliding tracker records every ack batch
|
|
401
|
+
- `total_dlq` incremented for decode errors + poison pill ACKs
|
|
402
|
+
- `total_errors` incremented for handler exceptions
|
|
403
|
+
|
|
404
|
+
---
|
|
405
|
+
|
|
406
|
+
## Sequence Diagrams
|
|
407
|
+
|
|
408
|
+
> Render with [PlantUML](https://plantuml.com/), VS Code [PlantUML extension](https://marketplace.visualstudio.com/items?itemName=jebbs.plantuml), or IntelliJ PlantUML plugin.
|
|
409
|
+
|
|
410
|
+
### 1. Producer: Push Message
|
|
411
|
+
|
|
412
|
+
```plantuml
|
|
413
|
+
@startuml
|
|
414
|
+
title Producer — Push Message
|
|
415
|
+
|
|
416
|
+
participant "App" as App
|
|
417
|
+
participant "StreamProducer" as P
|
|
418
|
+
participant "StreamClient" as C
|
|
419
|
+
database "Redis Stream" as R
|
|
420
|
+
|
|
421
|
+
App -> P : push(data: dict)
|
|
422
|
+
P -> P : serializer.encode(data) → bytes
|
|
423
|
+
P -> C : push(stream, encoded_bytes, max_len)
|
|
424
|
+
C -> R : XADD {stream} MAXLEN ~ {max_len}\n * data {bytes}
|
|
425
|
+
R --> C : message_id (e.g. "1700000000-0")
|
|
426
|
+
C --> P : message_id
|
|
427
|
+
P -> P : total_pushed += 1\ntps_tracker.record(1)
|
|
428
|
+
P --> App : message_id
|
|
429
|
+
@enduml
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
### 2. Consumer: Normal Message Processing
|
|
433
|
+
|
|
434
|
+
```plantuml
|
|
435
|
+
@startuml
|
|
436
|
+
title Consumer — Normal Message Processing (run_once step 2)
|
|
437
|
+
|
|
438
|
+
participant "StreamConsumer\nrun_once()" as Consumer
|
|
439
|
+
participant "StreamClient" as C
|
|
440
|
+
participant "handler()" as H
|
|
441
|
+
database "Redis PEL" as PEL
|
|
442
|
+
|
|
443
|
+
Consumer -> C : read(stream, group, worker_name,\n count=batch_size, block_ms)
|
|
444
|
+
C -> PEL : XREADGROUP GROUP {group} {worker}\n COUNT {n} BLOCK {ms} STREAMS {stream} >
|
|
445
|
+
PEL --> C : [(id1, fields), (id2, fields), ...]
|
|
446
|
+
note right of PEL : Messages enter PEL\n(pending until ACKed)
|
|
447
|
+
C --> Consumer : [StreamMessage, ...]
|
|
448
|
+
|
|
449
|
+
Consumer -> Consumer : tps_in.record(n)\ntotal_read += n
|
|
450
|
+
Consumer -> Consumer : serializer.decode(raw) per message
|
|
451
|
+
Consumer -> H : handler(decoded_messages)
|
|
452
|
+
H --> Consumer : [acked_ids]
|
|
453
|
+
|
|
454
|
+
Consumer -> C : ack(stream, group, *acked_ids)
|
|
455
|
+
C -> PEL : XACK {stream} {group} {id1} {id2} ...
|
|
456
|
+
note right of PEL : Messages removed from PEL
|
|
457
|
+
Consumer -> Consumer : total_acked += len(acked_ids)\ntps_out.record(n)
|
|
458
|
+
|
|
459
|
+
note over Consumer : Unacked IDs stay in PEL\nfor crash recovery
|
|
460
|
+
@enduml
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
### 3. Crash Recovery: XAUTOCLAIM Cursor Loop
|
|
464
|
+
|
|
465
|
+
```plantuml
|
|
466
|
+
@startuml
|
|
467
|
+
title Crash Recovery — XAUTOCLAIM Cursor Loop (run_once step 3)
|
|
468
|
+
|
|
469
|
+
participant "Pod A" as A
|
|
470
|
+
participant "Pod B\nrun_once()" as B
|
|
471
|
+
database "Redis PEL" as PEL
|
|
472
|
+
|
|
473
|
+
== Pod A: reads, starts processing, then crashes ==
|
|
474
|
+
A -> PEL : XREADGROUP → msg_001 assigned to Pod A
|
|
475
|
+
note over A : Pod A crashes.\nmsg_001 never ACKed.\nSits in PEL, idle growing.
|
|
476
|
+
|
|
477
|
+
== Pod B: autoclaim cursor loop sweeps full PEL ==
|
|
478
|
+
loop cursor != "0-0" and no stall
|
|
479
|
+
B -> PEL : XAUTOCLAIM group={group} consumer={B}\n min_idle_time={ms} count={n} start_id={cursor}
|
|
480
|
+
PEL --> B : (next_cursor, [reclaimed_msgs])
|
|
481
|
+
B -> B : tps_in.record(n)\n_process_batch(reclaimed_msgs)
|
|
482
|
+
B -> B : cursor = next_cursor
|
|
483
|
+
end
|
|
484
|
+
note right of PEL : Loop exits when cursor="0-0"\n(full PEL swept) or stall detected.\nmax_claim_passes caps iterations if set.
|
|
485
|
+
|
|
486
|
+
B -> PEL : XACK {stream} {group} msg_001
|
|
487
|
+
note right of PEL : msg_001 cleared from PEL
|
|
488
|
+
@enduml
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
### 4. Decode Error → Immediate DLQ
|
|
492
|
+
|
|
493
|
+
```plantuml
|
|
494
|
+
@startuml
|
|
495
|
+
title Decode Error — Immediate DLQ (run_once step 2)
|
|
496
|
+
|
|
497
|
+
participant "_process_batch()" as Batch
|
|
498
|
+
participant "RetryHandler" as Retry
|
|
499
|
+
participant "dlq_handler()" as DLQ
|
|
500
|
+
database "Redis PEL" as PEL
|
|
501
|
+
|
|
502
|
+
Batch -> Batch : serializer.decode(raw) — raises Exception
|
|
503
|
+
note right of Batch : Corrupt or wrong format.\nCannot retry — will always fail.
|
|
504
|
+
|
|
505
|
+
Batch -> Retry : send_to_dlq(msg, "decode_error")
|
|
506
|
+
Retry -> DLQ : dlq_handler(msg, "decode_error")
|
|
507
|
+
DLQ --> Retry : (done; dlq handler errors swallowed)
|
|
508
|
+
|
|
509
|
+
Batch -> PEL : XACK bad_msg_id
|
|
510
|
+
Batch -> Batch : total_dlq += 1
|
|
511
|
+
note right of PEL : Removed immediately.\nWill NOT re-enter consumer\nloop for more retries.
|
|
512
|
+
@enduml
|
|
513
|
+
```
|
|
514
|
+
|
|
515
|
+
### 5. Poison Pill → DLQ After Max Deliveries
|
|
516
|
+
|
|
517
|
+
```plantuml
|
|
518
|
+
@startuml
|
|
519
|
+
title Poison Pill — DLQ After max_deliveries (run_once step 4)
|
|
520
|
+
|
|
521
|
+
participant "RetryHandler\nhandle_poison_pills()" as Retry
|
|
522
|
+
participant "dlq_handler()" as DLQ
|
|
523
|
+
database "Redis PEL" as PEL
|
|
524
|
+
database "Redis Stream" as Stream
|
|
525
|
+
|
|
526
|
+
note over PEL : msg_999 redelivered\ndelivery_count >= max_deliveries
|
|
527
|
+
|
|
528
|
+
Retry -> PEL : XPENDING_RANGE {stream} {group}\n - + count={batch_size}
|
|
529
|
+
PEL --> Retry : [PendingEntry(id=msg_999, delivery_count=5)]
|
|
530
|
+
|
|
531
|
+
Retry -> Retry : filter: delivery_count >= max_deliveries
|
|
532
|
+
|
|
533
|
+
alt message exists in stream
|
|
534
|
+
Retry -> Stream : XRANGE {stream} msg_999 msg_999 COUNT 1
|
|
535
|
+
Stream --> Retry : raw bytes
|
|
536
|
+
Retry -> Retry : try_decode(raw)
|
|
537
|
+
Retry -> DLQ : dlq_handler(decoded_msg, "max_deliveries")
|
|
538
|
+
else message deleted from stream (XDEL)
|
|
539
|
+
Retry -> Retry : log warning "missing from stream"\n(no DLQ call — nothing to forward)
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
Retry -> PEL : XACK {stream} {group} *all_poison_ids (batched)
|
|
543
|
+
note right of PEL : Poison pills cleared.\nNever retried again.
|
|
544
|
+
@enduml
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
### 6. NOGROUP Auto-Recovery
|
|
548
|
+
|
|
549
|
+
```plantuml
|
|
550
|
+
@startuml
|
|
551
|
+
title NOGROUP Auto-Recovery
|
|
552
|
+
|
|
553
|
+
participant "run() loop" as Loop
|
|
554
|
+
participant "ConsumerGroup" as CG
|
|
555
|
+
database "Redis" as R
|
|
556
|
+
participant "run_once()" as Once
|
|
557
|
+
|
|
558
|
+
Loop -> Once : await run_once()
|
|
559
|
+
Once -> CG : ensure() [key in _known → skipped]
|
|
560
|
+
Once -> R : XREADGROUP ...
|
|
561
|
+
R --> Once : ResponseError: NOGROUP ...
|
|
562
|
+
Once --> Loop : raises Exception("NOGROUP ...")
|
|
563
|
+
|
|
564
|
+
Loop -> Loop : "NOGROUP" in str(e) → True
|
|
565
|
+
Loop -> CG : reset() [removes key from _known]
|
|
566
|
+
Loop -> Loop : sleep(1)
|
|
567
|
+
|
|
568
|
+
Loop -> Once : await run_once() [next iteration]
|
|
569
|
+
Once -> CG : ensure() [key not in _known → runs]
|
|
570
|
+
CG -> R : XGROUP CREATE {stream} {group} MKSTREAM
|
|
571
|
+
R --> CG : OK
|
|
572
|
+
CG -> CG : _known.add(key)
|
|
573
|
+
Once -> R : XREADGROUP ... [normal processing resumes]
|
|
574
|
+
@enduml
|
|
575
|
+
```
|
|
576
|
+
|
|
577
|
+
### 7. Full run_once() Lifecycle with Metrics
|
|
578
|
+
|
|
579
|
+
```plantuml
|
|
580
|
+
@startuml
|
|
581
|
+
title Full run_once() Lifecycle
|
|
582
|
+
|
|
583
|
+
participant "run_once()" as Loop
|
|
584
|
+
participant "ConsumerGroup\nensure()" as CG
|
|
585
|
+
participant "StreamClient" as C
|
|
586
|
+
participant "RetryHandler" as Retry
|
|
587
|
+
database "Redis" as R
|
|
588
|
+
|
|
589
|
+
Loop -> CG : ensure(dlq_stream, dlq_group)
|
|
590
|
+
note right of CG : No-op if key in _known.\nRuns XGROUP CREATE on first call\nor after NOGROUP reset.
|
|
591
|
+
|
|
592
|
+
group Step 2: New messages
|
|
593
|
+
Loop -> C : read(stream, group, worker, batch_size, block_ms)
|
|
594
|
+
C -> R : XREADGROUP GROUP {group} {worker} COUNT {n} BLOCK {ms} STREAMS {stream} >
|
|
595
|
+
R --> Loop : [StreamMessage, ...]
|
|
596
|
+
Loop -> Loop : total_read += n\ntps_in.record(n)\n_process_batch(messages)
|
|
597
|
+
note right of Loop : ACKed IDs → total_acked, tps_out.record(n)
|
|
598
|
+
end
|
|
599
|
+
|
|
600
|
+
group Step 3: Orphan recovery (cursor loop)
|
|
601
|
+
loop cursor != "0-0"
|
|
602
|
+
Loop -> C : autoclaim(stream, group, worker, min_idle_ms, batch_size, cursor)
|
|
603
|
+
C -> R : XAUTOCLAIM min_idle={ms} count={n} start_id={cursor}
|
|
604
|
+
R --> Loop : (next_cursor, [reclaimed StreamMessage])
|
|
605
|
+
Loop -> Loop : total_read += n\ntps_in.record(n)\n_process_batch(claimed)
|
|
606
|
+
Loop -> Loop : cursor = next_cursor
|
|
607
|
+
end
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
group Step 4: Poison-pill sweep
|
|
611
|
+
Loop -> Retry : handle_poison_pills() → int
|
|
612
|
+
Retry -> R : XPENDING_RANGE → filter delivery_count >= max_deliveries
|
|
613
|
+
Retry -> R : XRANGE per poison ID (fetch raw bytes)
|
|
614
|
+
Retry -> Retry : dlq_handler(msg, "max_deliveries") per pill
|
|
615
|
+
Retry -> R : XACK {stream} {group} *poison_ids (batched)
|
|
616
|
+
Retry --> Loop : count of pills processed
|
|
617
|
+
Loop -> Loop : total_dlq += count
|
|
618
|
+
end
|
|
619
|
+
@enduml
|
|
620
|
+
```
|
|
621
|
+
|
|
622
|
+
---
|
|
623
|
+
|
|
624
|
+
## Running the Example
|
|
625
|
+
|
|
626
|
+
```bash
|
|
627
|
+
# terminal 1
|
|
628
|
+
docker run -p 6379:6379 redis
|
|
629
|
+
|
|
630
|
+
# terminal 2
|
|
631
|
+
python examples/basic_worker.py
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
---
|
|
635
|
+
|
|
636
|
+
## Development
|
|
637
|
+
|
|
638
|
+
### Setup
|
|
639
|
+
|
|
640
|
+
```bash
|
|
641
|
+
git clone <repo>
|
|
642
|
+
cd redis-stream-queue
|
|
643
|
+
|
|
644
|
+
python3 -m venv .venv
|
|
645
|
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
646
|
+
|
|
647
|
+
pip install -e ".[dev]" # editable install + test deps
|
|
648
|
+
```
|
|
649
|
+
|
|
650
|
+
### Running Tests
|
|
651
|
+
|
|
652
|
+
Tests use [fakeredis](https://github.com/cunla/fakeredis-py) — no real Redis needed.
|
|
653
|
+
|
|
654
|
+
```bash
|
|
655
|
+
# all tests
|
|
656
|
+
pytest tests/
|
|
657
|
+
|
|
658
|
+
# single file
|
|
659
|
+
pytest tests/test_consumer.py
|
|
660
|
+
|
|
661
|
+
# single test with verbose output
|
|
662
|
+
pytest tests/test_consumer.py::test_poison_pill_goes_to_dlq -v -s
|
|
663
|
+
|
|
664
|
+
# with coverage
|
|
665
|
+
pip install pytest-cov
|
|
666
|
+
pytest tests/ --cov=src/redis_stream_queue --cov-report=term-missing
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
### Project Structure
|
|
670
|
+
|
|
671
|
+
```
|
|
672
|
+
src/
|
|
673
|
+
└── redis_stream_queue/
|
|
674
|
+
├── __init__.py # public exports
|
|
675
|
+
├── client.py # StreamClient — connection pool, all X* commands
|
|
676
|
+
├── producer.py # StreamProducer — push + ProducerMetrics + all_metrics()
|
|
677
|
+
├── consumer.py # StreamConsumer — main loop + all_metrics()
|
|
678
|
+
├── group.py # ConsumerConfig + ConsumerGroup (class-level registry, stats, health)
|
|
679
|
+
├── message.py # StreamMessage, PendingEntry, StreamStats, ConsumerInfo,
|
|
680
|
+
│ # ConsumerMetrics, ProducerMetrics, _TpsTracker
|
|
681
|
+
├── retry.py # RetryHandler — poison-pill detection + DLQ routing
|
|
682
|
+
├── serializers.py # Json / Msgpack / Pickle
|
|
683
|
+
└── exceptions.py
|
|
684
|
+
tests/
|
|
685
|
+
conftest.py # shared fixtures: fake_redis, make_client, registry resets
|
|
686
|
+
test_consumer.py # consumer loop, metrics, all_metrics, NOGROUP recovery
|
|
687
|
+
test_group.py # stats, health check, pending details
|
|
688
|
+
test_producer.py # push, ensure_group, metrics, all_metrics
|
|
689
|
+
examples/
|
|
690
|
+
basic_worker.py
|
|
691
|
+
```
|
|
692
|
+
|
|
693
|
+
### Known Limitations
|
|
694
|
+
|
|
695
|
+
- **`fetch_by_ids` N+1**: poison-pill fetch does one XRANGE call per ID. Fine for typical `max_deliveries` counts (< 10); would benefit from pipeline for very large poison batches.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
redis_stream_queue/__init__.py,sha256=szzYBCuAMKCK3niLKsflRFEhGU6ZPE4U6-cSiAcMvZM,774
|
|
2
|
+
redis_stream_queue/client.py,sha256=Zv-v3RmT_ezD32sCcGGBdkdODMQYi-hvbKwgmdyMCfk,11808
|
|
3
|
+
redis_stream_queue/consumer.py,sha256=1AmeDUG_d9x5R_ASKNP8_CbeGszAqGs6FmXGjjls-DU,8121
|
|
4
|
+
redis_stream_queue/exceptions.py,sha256=QgQ3DFcIfmBw5pDoL_Ih517dmHt3j3qAHqWdyDuDXNw,197
|
|
5
|
+
redis_stream_queue/group.py,sha256=657IRT8UhoCqxWLeKv-ymDSb6svWOlx3wgXlC30gmbY,5060
|
|
6
|
+
redis_stream_queue/message.py,sha256=fipvc8jKUvxuiE3Jcn1diwMVhJueezlz-VkyWuML_fE,2604
|
|
7
|
+
redis_stream_queue/producer.py,sha256=tWRHXUwtncueDFNEFmzhbJiWFS3X8H3rqu7D58bR_tE,3095
|
|
8
|
+
redis_stream_queue/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
redis_stream_queue/retry.py,sha256=71Zw4RM1qgQZfr-e-zXByKF8JP6xb7kSmp2ig_SQWEw,3324
|
|
10
|
+
redis_stream_queue/serializers.py,sha256=aSrZHBofn3NEbkQjl4dJ2lSafXA--Lm8_4N_We-Pkuw,1152
|
|
11
|
+
redis_stream_queue-0.1.0.dist-info/METADATA,sha256=DhVAxSaOMMDWTEJ9UD07M8RL6Ok5NKNMeIYHFKuT3w8,24430
|
|
12
|
+
redis_stream_queue-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
13
|
+
redis_stream_queue-0.1.0.dist-info/licenses/LICENSE,sha256=4UsagfrwA3etEqP2g5DTSljyPbwRjJkjm2usEng95iU,1068
|
|
14
|
+
redis_stream_queue-0.1.0.dist-info/RECORD,,
|