redis-message-queue 6.0.1__tar.gz → 7.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/PKG-INFO +161 -14
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/README.md +159 -12
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/pyproject.toml +2 -2
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/__init__.py +2 -0
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/_abstract_redis_gateway.py +7 -6
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/_config.py +109 -8
- redis_message_queue-7.0.0/redis_message_queue/_event.py +66 -0
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/_exceptions.py +4 -0
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/_queue_key_manager.py +4 -1
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/_redis_cluster.py +8 -2
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/_redis_gateway.py +30 -6
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/asyncio/__init__.py +2 -0
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/asyncio/_abstract_redis_gateway.py +7 -6
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/asyncio/_redis_gateway.py +30 -6
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/asyncio/redis_message_queue.py +86 -37
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/interrupt_handler/_implementation.py +16 -3
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/redis_message_queue.py +62 -31
- redis_message_queue-6.0.1/redis_message_queue/_event.py +0 -39
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/LICENSE +0 -0
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/_callable_utils.py +0 -0
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/_stored_message.py +0 -0
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/interrupt_handler/__init__.py +0 -0
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/interrupt_handler/_interface.py +0 -0
- {redis_message_queue-6.0.1 → redis_message_queue-7.0.0}/redis_message_queue/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: redis-message-queue
|
|
3
|
-
Version:
|
|
3
|
+
Version: 7.0.0
|
|
4
4
|
Summary: Python message queuing with Redis and message deduplication
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.14
|
|
18
18
|
Classifier: Topic :: Software Development :: Libraries
|
|
19
19
|
Classifier: Topic :: System :: Distributed Computing
|
|
20
|
-
Requires-Dist: redis (>=5.0.0)
|
|
20
|
+
Requires-Dist: redis (>=5.0.0,<8.0.0)
|
|
21
21
|
Requires-Dist: tenacity (>=8.1.0)
|
|
22
22
|
Project-URL: Homepage, https://github.com/Elijas/redis-message-queue
|
|
23
23
|
Project-URL: Issues, https://github.com/Elijas/redis-message-queue/issues
|
|
@@ -26,7 +26,7 @@ Description-Content-Type: text/markdown
|
|
|
26
26
|
|
|
27
27
|
# redis-message-queue
|
|
28
28
|
|
|
29
|
-
[](https://pypi.org/project/redis-message-queue)
|
|
30
30
|
[](https://pypistats.org/packages/redis-message-queue)
|
|
31
31
|
[](LICENSE)
|
|
32
32
|
[](https://github.com/Elijas/redis-message-queue/issues)
|
|
@@ -260,6 +260,22 @@ consumer whose claim request Redis executes next. There is no round-robin,
|
|
|
260
260
|
equal-share, or starvation-freedom guarantee; faster consumers can receive more
|
|
261
261
|
than 1/N of messages.
|
|
262
262
|
|
|
263
|
+
### If you need stronger ordering or fairness guarantees
|
|
264
|
+
|
|
265
|
+
- **Strict queue-wide processing order** — use a single consumer per queue.
|
|
266
|
+
Multiple consumers will interleave handler completions.
|
|
267
|
+
- **Per-key processing order** — partition by key into multiple queues
|
|
268
|
+
(`queue_<hash(key) % N>`), and consume each partition with a single consumer.
|
|
269
|
+
- **Equal-share / round-robin fairness across consumers** — choose a different
|
|
270
|
+
scheduler. This queue does not guarantee that any individual consumer makes
|
|
271
|
+
forward progress at any specific rate.
|
|
272
|
+
- **Cross-batch ordering after reclaim** — accept that reclaimed messages will
|
|
273
|
+
reappear after newer un-reclaimed messages have been consumed. If your handler
|
|
274
|
+
must observe original publish order, persist that order in the payload (for
|
|
275
|
+
example, a sequence number set by the producer). For clock-related operator
|
|
276
|
+
detail behind reclaim behavior, see
|
|
277
|
+
[production readiness R11](docs/production-readiness.md#r11-redis-clock-dependencies).
|
|
278
|
+
|
|
263
279
|
### Dead-letter queue
|
|
264
280
|
|
|
265
281
|
```python
|
|
@@ -310,14 +326,14 @@ There are three distinct shutdown shapes; pick the one that matches your runtime
|
|
|
310
326
|
|---|---|---|---|
|
|
311
327
|
| **Flag-based soft drain** (`GracefulInterruptHandler`) | First SIGINT/SIGTERM flips a flag | Runs to completion | Drained on the next claim call, not on signal arrival |
|
|
312
328
|
| **Async task cancellation** (`asyncio.CancelledError`) | Framework cancels the worker task (Uvicorn/K8s SIGTERM in many setups) | **Hard abort** — message stays in `processing`; with VT it is reclaimed at deadline expiry, without VT it is orphaned | Not drained |
|
|
313
|
-
| **Explicit drain** (`drain()` / `aclose()`) | You call the method | Caller's responsibility to let it finish (drain does **not** cancel) | Drained synchronously via the gateway recovery path |
|
|
329
|
+
| **Explicit drain** (`drain()` / `aclose()`) | You call the method | Caller's responsibility to let it finish (drain does **not** cancel) | Drained synchronously via the gateway recovery path; new publishes are refused |
|
|
314
330
|
|
|
315
331
|
Use `drain()` / `aclose()` to bridge K8s `preStop` / SIGTERM grace windows without
|
|
316
332
|
relying on signal interception:
|
|
317
333
|
|
|
318
334
|
```python
|
|
319
335
|
# sync — in your SIGTERM handler or preStop hook
|
|
320
|
-
queue.drain(timeout=25) # refuses new claims, recovers pending claim IDs
|
|
336
|
+
queue.drain(timeout=25) # refuses new publishes/claims, recovers pending claim IDs
|
|
321
337
|
worker_thread.join() # wait for in-flight process_message to finish
|
|
322
338
|
|
|
323
339
|
# async — same shape
|
|
@@ -326,12 +342,19 @@ await worker_task # task observes ``_draining`` and exits its loop
|
|
|
326
342
|
```
|
|
327
343
|
|
|
328
344
|
`drain()` / `aclose()` set a queue-local flag so subsequent `process_message()`
|
|
329
|
-
calls yield `None` immediately
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
345
|
+
calls yield `None` immediately and subsequent `publish()` calls raise
|
|
346
|
+
`QueueDrainedError("queue is drained")`. Drain also gates the publish path:
|
|
347
|
+
if a publish is already inside the queue instance's publish path, drain waits
|
|
348
|
+
for that publish to finish before it returns; publishes that arrive after the
|
|
349
|
+
drained flag is set are rejected. The drained state is local to that Python
|
|
350
|
+
queue object and is not written to Redis, so constructing a fresh
|
|
351
|
+
`RedisMessageQueue(...)` over the same keys remains usable.
|
|
352
|
+
|
|
353
|
+
Drain does not cancel in-flight handlers — the caller must arrange handler
|
|
354
|
+
exit through normal thread/task coordination. Returns `True` if all in-memory
|
|
355
|
+
pending claim IDs were recovered within the timeout; `False` if the deadline
|
|
356
|
+
fired or transient Redis errors left claim IDs pending (call again to retry).
|
|
357
|
+
`timeout=0` reports current state without attempting recovery.
|
|
335
358
|
|
|
336
359
|
> **Heartbeat caveat (best-effort stop):** when `heartbeat_interval_seconds` is
|
|
337
360
|
> set, the heartbeat sidecar's `stop()` is bounded but not strictly quiescent —
|
|
@@ -472,6 +495,32 @@ Notes:
|
|
|
472
495
|
message. Do not call `fork()` from inside active message handlers unless the
|
|
473
496
|
child exits without using the inherited queue/client.
|
|
474
497
|
|
|
498
|
+
#### Forking after constructing GracefulInterruptHandler
|
|
499
|
+
|
|
500
|
+
If your application constructed `GracefulInterruptHandler` in the parent process
|
|
501
|
+
before `os.fork()` (for example, via module import in a pre-fork app server),
|
|
502
|
+
forked children cannot construct a fresh handler for the same signal because the
|
|
503
|
+
inherited signal table still routes to the parent-process handler.
|
|
504
|
+
|
|
505
|
+
In each child process, call `parent_handler.reset()` before constructing a fresh
|
|
506
|
+
handler:
|
|
507
|
+
|
|
508
|
+
```python
|
|
509
|
+
def worker_main():
|
|
510
|
+
# Inherited handler from parent - reset it.
|
|
511
|
+
if shared.interrupt_handler is not None:
|
|
512
|
+
shared.interrupt_handler.reset()
|
|
513
|
+
|
|
514
|
+
# Now safe to construct a fresh handler for this child.
|
|
515
|
+
interrupt = GracefulInterruptHandler()
|
|
516
|
+
queue = RedisMessageQueue("jobs", client=redis.Redis(), interrupt=interrupt)
|
|
517
|
+
...
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
Alternatively, defer all construction (handler and queue) to inside
|
|
521
|
+
`worker_main()` and pass `--no-preload` (or equivalent) to your app server. That
|
|
522
|
+
avoids the parent-construct hazard entirely.
|
|
523
|
+
|
|
475
524
|
### Redis memory sizing for deduplication and replay metadata
|
|
476
525
|
|
|
477
526
|
When deduplication is enabled, each distinct dedup key creates one Redis string
|
|
@@ -530,6 +579,7 @@ Package logs remain diagnostic; use `on_event` rather than log parsing for
|
|
|
530
579
|
metrics.
|
|
531
580
|
|
|
532
581
|
```python
|
|
582
|
+
from opentelemetry import trace
|
|
533
583
|
from prometheus_client import Counter
|
|
534
584
|
from redis_message_queue import QueueEvent, RedisMessageQueue
|
|
535
585
|
|
|
@@ -543,17 +593,78 @@ def observe(event: QueueEvent) -> None:
|
|
|
543
593
|
events_total.labels(
|
|
544
594
|
event.queue, event.operation, event.outcome, event.exception_type or ""
|
|
545
595
|
).inc()
|
|
596
|
+
if event.error is not None:
|
|
597
|
+
trace.get_current_span().record_exception(event.error)
|
|
546
598
|
|
|
547
599
|
queue = RedisMessageQueue("jobs", client=client, on_event=observe)
|
|
548
600
|
```
|
|
549
601
|
|
|
602
|
+
#### Event dispatch context
|
|
603
|
+
|
|
604
|
+
Callbacks fire inline:
|
|
605
|
+
|
|
606
|
+
- **Sync queue:** the callback runs in the caller's thread. It sees
|
|
607
|
+
contextvars, the OpenTelemetry current span, and structlog contextvars bound
|
|
608
|
+
by the caller.
|
|
609
|
+
- **Async queue:** the callback is awaited in the current asyncio task. It has
|
|
610
|
+
the same contextvars, span, and structlog visibility.
|
|
611
|
+
- **Sync heartbeat:** heartbeat events fire from a separate
|
|
612
|
+
`threading.Thread`. That thread does not inherit caller contextvars or the
|
|
613
|
+
caller's OpenTelemetry current span. Use `event.message_id` and
|
|
614
|
+
`event.lease_token_hash` for correlation.
|
|
615
|
+
- **Async heartbeat:** heartbeat events fire from an asyncio task. The task
|
|
616
|
+
copies the context present when the heartbeat was started, so contextvars and
|
|
617
|
+
OpenTelemetry spans bound at handler entry are visible.
|
|
618
|
+
|
|
619
|
+
#### Event timing vs. Redis commit
|
|
620
|
+
|
|
621
|
+
Most events are post-commit, emitted after the Redis command or Lua script
|
|
622
|
+
returned: `publish/success`, `publish_dedup_hit`, `claim/success`,
|
|
623
|
+
`claim_empty`, `claim_reclaim`, `ack`, `nack`, `completed`, `dlq`,
|
|
624
|
+
`lease_renew`, `trim_failed`, and `stale_lease_*`.
|
|
625
|
+
|
|
626
|
+
Pre-commit and mid-flight exceptions:
|
|
627
|
+
|
|
628
|
+
- `failed/failure` fires after the handler raises but before failed-queue
|
|
629
|
+
cleanup completes. Use `nack` for cleanup-commit metrics; use `failed` for
|
|
630
|
+
handler-exception attribution.
|
|
631
|
+
- `retry_attempt/failure` and `retry_exhausted` fire on the claim-loop retry
|
|
632
|
+
path. The first Redis attempt may or may not have committed.
|
|
633
|
+
- `publish/failure`, `claim/failure`, and `cleanup_failed/failure` follow
|
|
634
|
+
exceptions. Under an ambiguous lost response, Redis may have committed
|
|
635
|
+
despite the exception. Treat them as "operation did not succeed from the
|
|
636
|
+
caller's perspective", not "Redis did not commit".
|
|
637
|
+
|
|
638
|
+
#### Intentionally silent paths
|
|
639
|
+
|
|
640
|
+
The following operations have no `on_event` surface by design:
|
|
641
|
+
|
|
642
|
+
- **B1 Cluster `pcall` cleanup failure:** three lease-aware Lua scripts wrap a
|
|
643
|
+
data-derived `DEL` in `redis.pcall(...)` and ignore the result. This
|
|
644
|
+
preserves queue safety on Cluster `CROSSSLOT` rejection but cannot be
|
|
645
|
+
observed through `on_event`. Operators watching key-TTL behavior or Redis
|
|
646
|
+
slow logs can detect orphans.
|
|
647
|
+
- **VT claim-store OOM compensation:** if the visibility-timeout Lua script
|
|
648
|
+
cannot store the claim result, it removes the message from processing, pushes
|
|
649
|
+
it back to pending, and returns `false`. Python translates that into
|
|
650
|
+
`claim_empty/skipped`, the same shape as an empty poll. This is intentional
|
|
651
|
+
fail-safe behavior; the message is not lost.
|
|
652
|
+
- **`drain()` / `close()` / `aclose()` lifecycle:** explicit shutdown
|
|
653
|
+
operations do not emit lifecycle events. Pending-claim-drain recovery work
|
|
654
|
+
counts as `claim_reclaim` events when reached.
|
|
655
|
+
- **Non-claim-loop retry attempts:** tenacity retries in deduplicated publish,
|
|
656
|
+
ack/remove, move-to-completed/failed, and lease renewal collapse into the
|
|
657
|
+
terminal operation's failure event. There is no per-attempt event for those
|
|
658
|
+
paths.
|
|
659
|
+
|
|
550
660
|
The public exception hierarchy is rooted at `RedisMessageQueueError`.
|
|
551
661
|
Configuration value/combinations raise `ConfigurationError` (also a
|
|
552
662
|
`ValueError`), custom gateway contract violations raise `GatewayContractError`
|
|
553
663
|
(also a `TypeError`), and Lua `redis.error_reply(...)` failures raise
|
|
554
664
|
`LuaScriptError` (also a redis-py `ResponseError`). Publish overload raises
|
|
555
|
-
`QueueBackpressureError
|
|
556
|
-
|
|
665
|
+
`QueueBackpressureError`; publish after explicit drain raises
|
|
666
|
+
`QueueDrainedError`. `CleanupFailedError` and `RetryBudgetExhaustedError` are
|
|
667
|
+
reserved categories for cleanup and retry surfaces.
|
|
557
668
|
|
|
558
669
|
## Known limitations
|
|
559
670
|
|
|
@@ -564,13 +675,46 @@ are reserved categories for cleanup and retry surfaces.
|
|
|
564
675
|
- **Cluster detection uses `isinstance(client, RedisCluster)`.** Wrapped or instrumented cluster clients that delegate without inheriting will bypass hash-tag validation. Custom gateways should set `is_redis_cluster = True` explicitly.
|
|
565
676
|
- **Redis Cluster requires hash tags.** The built-in queue uses multiple Redis keys per operation. Wrap the queue name in hash tags (for example `{myqueue}`) so every generated key lands in the same slot. When you pass a Redis Cluster client to the built-in queue/gateway path, incompatible names are rejected early.
|
|
566
677
|
- **Non-ASCII payloads use ~2x storage.** The default `ensure_ascii=True` in JSON serialization encodes non-ASCII characters as `\uXXXX` escape sequences. This is a deliberate compatibility choice.
|
|
567
|
-
- **Client-side `Retry` can duplicate non-deduplicated publishes.** If you construct your `redis.Redis` client with `retry=Retry(...)`, redis-py retries `ConnectionError` / `TimeoutError` at the connection layer — *below* this library. Idempotent operations (deduplicated `publish()`, lease-scoped cleanup) are safe because their Lua scripts replay the original result. `add_message()` (used by `publish()` when `deduplication=False`) is a bare `LPUSH` by default, or a single non-idempotent Lua enqueue when `max_pending_length` is set: this library deliberately does not retry it, but a client-level `Retry` will, and if the server executed the command before the response was lost the message is enqueued twice.
|
|
678
|
+
- **Client-side `Retry` can duplicate non-deduplicated publishes.** If you construct your `redis.Redis` or `redis.asyncio.Redis` client with `retry=Retry(...)`, redis-py retries `ConnectionError` / `TimeoutError` at the connection layer — *below* this library. Idempotent operations (deduplicated `publish()`, lease-scoped cleanup) are safe because their Lua scripts replay the original result. `add_message()` (used by `publish()` when `deduplication=False`) is a bare `LPUSH` by default, or a single non-idempotent Lua enqueue when `max_pending_length` is set: this library deliberately does not retry it, but a client-level `Retry` will, and if the server executed the command before the response was lost the message is enqueued twice. redis-py 6.0+ changed the default standalone `Redis()` / `redis.asyncio.Redis()` retry policy from `None` (no retry) to a 3-attempt `ExponentialWithJitterBackoff`; pass `retry=None` explicitly if you need strict at-most-once semantics for non-deduplicated publishes, or accept the duplication risk. More broadly, any non-idempotent enqueue path is vulnerable if the connection drops after server execution but before the client receives the response; all other built-in operations (deduplicated publish, lease-scoped ack/move, lease renewal) use replay markers and are safe under client-level `Retry`.
|
|
679
|
+
|
|
680
|
+
```python
|
|
681
|
+
import redis
|
|
682
|
+
from redis_message_queue import RedisMessageQueue
|
|
683
|
+
|
|
684
|
+
# Strict at-most-once for non-dedup messages: disable redis-py's
|
|
685
|
+
# default 3-retry policy explicitly.
|
|
686
|
+
client = redis.Redis(retry=None)
|
|
687
|
+
queue = RedisMessageQueue("jobs", client=client)
|
|
688
|
+
```
|
|
689
|
+
|
|
690
|
+
```python
|
|
691
|
+
import redis.asyncio as redis
|
|
692
|
+
from redis_message_queue.asyncio import RedisMessageQueue
|
|
693
|
+
|
|
694
|
+
# Strict at-most-once for non-dedup messages: disable redis-py's
|
|
695
|
+
# default 3-retry policy explicitly.
|
|
696
|
+
client = redis.Redis(retry=None)
|
|
697
|
+
queue = RedisMessageQueue("jobs", client=client)
|
|
698
|
+
```
|
|
568
699
|
- **Redis Cluster default retry can stack with this library's retry budget.** In redis-py 6.0+, `RedisCluster()` constructs a default `ExponentialWithJitterBackoff` retry below this library's `retry_budget_seconds`. If you need a single retry surface, pass `retry=Retry(NoBackoff(), 0)` to the cluster client or reduce `retry_budget_seconds` to account for the lower-level retry window.
|
|
569
700
|
|
|
570
701
|
For a full analysis, see [docs/production-readiness.md](docs/production-readiness.md).
|
|
571
702
|
|
|
572
703
|
## Upgrading
|
|
573
704
|
|
|
705
|
+
### v6 to v7 migration
|
|
706
|
+
|
|
707
|
+
v7.0.0 changes explicit drain shutdown semantics. After `queue.drain()` /
|
|
708
|
+
`queue.close()` (sync) or `await queue.drain()` / `await queue.aclose()`
|
|
709
|
+
(async), the same queue instance rejects `publish()` with
|
|
710
|
+
`QueueDrainedError("queue is drained")`.
|
|
711
|
+
|
|
712
|
+
This state is queue-local and process-local; it is not stored in Redis. If a
|
|
713
|
+
producer must continue publishing after a worker has drained, use a separate
|
|
714
|
+
`RedisMessageQueue(...)` instance for that producer lifecycle. During
|
|
715
|
+
shutdown, catch `QueueDrainedError` only at boundaries where late publishes are
|
|
716
|
+
expected and safe to drop or reschedule.
|
|
717
|
+
|
|
574
718
|
### Configuration changes on live queues
|
|
575
719
|
|
|
576
720
|
> **Warning:** These changes are destructive on live queues. Drain the queue completely before applying them.
|
|
@@ -596,6 +740,9 @@ v6.0.0 is a non-breaking-defaults release that adds new public APIs. v5 code con
|
|
|
596
740
|
- `max_pending_length=N` caps pending-list depth; with `pending_overload_policy="raise"` (default) producers see `QueueBackpressureError` when the cap is hit; `"block"` waits up to `pending_overload_block_timeout_seconds`; `"drop_oldest"` evicts silently, so use it only when data loss is acceptable.
|
|
597
741
|
- `queue.drain(timeout=...)` (sync) and `await queue.aclose(timeout=...)` (async) are explicit graceful-shutdown hooks. They refuse new claims and recover pending claim IDs but do not cancel in-flight handlers; join or await your worker separately.
|
|
598
742
|
- `on_event=callback` receives a `QueueEvent` dataclass for every publish/claim/ack/reclaim/dedup/cleanup lifecycle event. Use it for metrics, tracing, and structured logging. See [`examples/production/observability.py`](examples/production/observability.py) for the adapter pattern.
|
|
743
|
+
- See [`examples/production/backpressure.py`](examples/production/backpressure.py) and [`examples/production/graceful_shutdown.py`](examples/production/graceful_shutdown.py) for sync production patterns, with async siblings under [`examples/production/asyncio/`](examples/production/asyncio/).
|
|
744
|
+
|
|
745
|
+
> When using a pre-fork app server (gunicorn `--preload`, uvicorn workers that import the app at master startup), call `make_queue()` from your worker startup hook - NOT at module import. See [Fork safety](#fork-safety-and-pre-fork-servers) for why.
|
|
599
746
|
|
|
600
747
|
**New constructor rejections:**
|
|
601
748
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# redis-message-queue
|
|
2
2
|
|
|
3
|
-
[](https://pypi.org/project/redis-message-queue)
|
|
4
4
|
[](https://pypistats.org/packages/redis-message-queue)
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
[](https://github.com/Elijas/redis-message-queue/issues)
|
|
@@ -234,6 +234,22 @@ consumer whose claim request Redis executes next. There is no round-robin,
|
|
|
234
234
|
equal-share, or starvation-freedom guarantee; faster consumers can receive more
|
|
235
235
|
than 1/N of messages.
|
|
236
236
|
|
|
237
|
+
### If you need stronger ordering or fairness guarantees
|
|
238
|
+
|
|
239
|
+
- **Strict queue-wide processing order** — use a single consumer per queue.
|
|
240
|
+
Multiple consumers will interleave handler completions.
|
|
241
|
+
- **Per-key processing order** — partition by key into multiple queues
|
|
242
|
+
(`queue_<hash(key) % N>`), and consume each partition with a single consumer.
|
|
243
|
+
- **Equal-share / round-robin fairness across consumers** — choose a different
|
|
244
|
+
scheduler. This queue does not guarantee that any individual consumer makes
|
|
245
|
+
forward progress at any specific rate.
|
|
246
|
+
- **Cross-batch ordering after reclaim** — accept that reclaimed messages will
|
|
247
|
+
reappear after newer un-reclaimed messages have been consumed. If your handler
|
|
248
|
+
must observe original publish order, persist that order in the payload (for
|
|
249
|
+
example, a sequence number set by the producer). For clock-related operator
|
|
250
|
+
detail behind reclaim behavior, see
|
|
251
|
+
[production readiness R11](docs/production-readiness.md#r11-redis-clock-dependencies).
|
|
252
|
+
|
|
237
253
|
### Dead-letter queue
|
|
238
254
|
|
|
239
255
|
```python
|
|
@@ -284,14 +300,14 @@ There are three distinct shutdown shapes; pick the one that matches your runtime
|
|
|
284
300
|
|---|---|---|---|
|
|
285
301
|
| **Flag-based soft drain** (`GracefulInterruptHandler`) | First SIGINT/SIGTERM flips a flag | Runs to completion | Drained on the next claim call, not on signal arrival |
|
|
286
302
|
| **Async task cancellation** (`asyncio.CancelledError`) | Framework cancels the worker task (Uvicorn/K8s SIGTERM in many setups) | **Hard abort** — message stays in `processing`; with VT it is reclaimed at deadline expiry, without VT it is orphaned | Not drained |
|
|
287
|
-
| **Explicit drain** (`drain()` / `aclose()`) | You call the method | Caller's responsibility to let it finish (drain does **not** cancel) | Drained synchronously via the gateway recovery path |
|
|
303
|
+
| **Explicit drain** (`drain()` / `aclose()`) | You call the method | Caller's responsibility to let it finish (drain does **not** cancel) | Drained synchronously via the gateway recovery path; new publishes are refused |
|
|
288
304
|
|
|
289
305
|
Use `drain()` / `aclose()` to bridge K8s `preStop` / SIGTERM grace windows without
|
|
290
306
|
relying on signal interception:
|
|
291
307
|
|
|
292
308
|
```python
|
|
293
309
|
# sync — in your SIGTERM handler or preStop hook
|
|
294
|
-
queue.drain(timeout=25) # refuses new claims, recovers pending claim IDs
|
|
310
|
+
queue.drain(timeout=25) # refuses new publishes/claims, recovers pending claim IDs
|
|
295
311
|
worker_thread.join() # wait for in-flight process_message to finish
|
|
296
312
|
|
|
297
313
|
# async — same shape
|
|
@@ -300,12 +316,19 @@ await worker_task # task observes ``_draining`` and exits its loop
|
|
|
300
316
|
```
|
|
301
317
|
|
|
302
318
|
`drain()` / `aclose()` set a queue-local flag so subsequent `process_message()`
|
|
303
|
-
calls yield `None` immediately
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
319
|
+
calls yield `None` immediately and subsequent `publish()` calls raise
|
|
320
|
+
`QueueDrainedError("queue is drained")`. Drain also gates the publish path:
|
|
321
|
+
if a publish is already inside the queue instance's publish path, drain waits
|
|
322
|
+
for that publish to finish before it returns; publishes that arrive after the
|
|
323
|
+
drained flag is set are rejected. The drained state is local to that Python
|
|
324
|
+
queue object and is not written to Redis, so constructing a fresh
|
|
325
|
+
`RedisMessageQueue(...)` over the same keys remains usable.
|
|
326
|
+
|
|
327
|
+
Drain does not cancel in-flight handlers — the caller must arrange handler
|
|
328
|
+
exit through normal thread/task coordination. Returns `True` if all in-memory
|
|
329
|
+
pending claim IDs were recovered within the timeout; `False` if the deadline
|
|
330
|
+
fired or transient Redis errors left claim IDs pending (call again to retry).
|
|
331
|
+
`timeout=0` reports current state without attempting recovery.
|
|
309
332
|
|
|
310
333
|
> **Heartbeat caveat (best-effort stop):** when `heartbeat_interval_seconds` is
|
|
311
334
|
> set, the heartbeat sidecar's `stop()` is bounded but not strictly quiescent —
|
|
@@ -446,6 +469,32 @@ Notes:
|
|
|
446
469
|
message. Do not call `fork()` from inside active message handlers unless the
|
|
447
470
|
child exits without using the inherited queue/client.
|
|
448
471
|
|
|
472
|
+
#### Forking after constructing GracefulInterruptHandler
|
|
473
|
+
|
|
474
|
+
If your application constructed `GracefulInterruptHandler` in the parent process
|
|
475
|
+
before `os.fork()` (for example, via module import in a pre-fork app server),
|
|
476
|
+
forked children cannot construct a fresh handler for the same signal because the
|
|
477
|
+
inherited signal table still routes to the parent-process handler.
|
|
478
|
+
|
|
479
|
+
In each child process, call `parent_handler.reset()` before constructing a fresh
|
|
480
|
+
handler:
|
|
481
|
+
|
|
482
|
+
```python
|
|
483
|
+
def worker_main():
|
|
484
|
+
# Inherited handler from parent - reset it.
|
|
485
|
+
if shared.interrupt_handler is not None:
|
|
486
|
+
shared.interrupt_handler.reset()
|
|
487
|
+
|
|
488
|
+
# Now safe to construct a fresh handler for this child.
|
|
489
|
+
interrupt = GracefulInterruptHandler()
|
|
490
|
+
queue = RedisMessageQueue("jobs", client=redis.Redis(), interrupt=interrupt)
|
|
491
|
+
...
|
|
492
|
+
```
|
|
493
|
+
|
|
494
|
+
Alternatively, defer all construction (handler and queue) to inside
|
|
495
|
+
`worker_main()` and pass `--no-preload` (or equivalent) to your app server. That
|
|
496
|
+
avoids the parent-construct hazard entirely.
|
|
497
|
+
|
|
449
498
|
### Redis memory sizing for deduplication and replay metadata
|
|
450
499
|
|
|
451
500
|
When deduplication is enabled, each distinct dedup key creates one Redis string
|
|
@@ -504,6 +553,7 @@ Package logs remain diagnostic; use `on_event` rather than log parsing for
|
|
|
504
553
|
metrics.
|
|
505
554
|
|
|
506
555
|
```python
|
|
556
|
+
from opentelemetry import trace
|
|
507
557
|
from prometheus_client import Counter
|
|
508
558
|
from redis_message_queue import QueueEvent, RedisMessageQueue
|
|
509
559
|
|
|
@@ -517,17 +567,78 @@ def observe(event: QueueEvent) -> None:
|
|
|
517
567
|
events_total.labels(
|
|
518
568
|
event.queue, event.operation, event.outcome, event.exception_type or ""
|
|
519
569
|
).inc()
|
|
570
|
+
if event.error is not None:
|
|
571
|
+
trace.get_current_span().record_exception(event.error)
|
|
520
572
|
|
|
521
573
|
queue = RedisMessageQueue("jobs", client=client, on_event=observe)
|
|
522
574
|
```
|
|
523
575
|
|
|
576
|
+
#### Event dispatch context
|
|
577
|
+
|
|
578
|
+
Callbacks fire inline:
|
|
579
|
+
|
|
580
|
+
- **Sync queue:** the callback runs in the caller's thread. It sees
|
|
581
|
+
contextvars, the OpenTelemetry current span, and structlog contextvars bound
|
|
582
|
+
by the caller.
|
|
583
|
+
- **Async queue:** the callback is awaited in the current asyncio task. It has
|
|
584
|
+
the same contextvars, span, and structlog visibility.
|
|
585
|
+
- **Sync heartbeat:** heartbeat events fire from a separate
|
|
586
|
+
`threading.Thread`. That thread does not inherit caller contextvars or the
|
|
587
|
+
caller's OpenTelemetry current span. Use `event.message_id` and
|
|
588
|
+
`event.lease_token_hash` for correlation.
|
|
589
|
+
- **Async heartbeat:** heartbeat events fire from an asyncio task. The task
|
|
590
|
+
copies the context present when the heartbeat was started, so contextvars and
|
|
591
|
+
OpenTelemetry spans bound at handler entry are visible.
|
|
592
|
+
|
|
593
|
+
#### Event timing vs. Redis commit
|
|
594
|
+
|
|
595
|
+
Most events are post-commit, emitted after the Redis command or Lua script
|
|
596
|
+
returned: `publish/success`, `publish_dedup_hit`, `claim/success`,
|
|
597
|
+
`claim_empty`, `claim_reclaim`, `ack`, `nack`, `completed`, `dlq`,
|
|
598
|
+
`lease_renew`, `trim_failed`, and `stale_lease_*`.
|
|
599
|
+
|
|
600
|
+
Pre-commit and mid-flight exceptions:
|
|
601
|
+
|
|
602
|
+
- `failed/failure` fires after the handler raises but before failed-queue
|
|
603
|
+
cleanup completes. Use `nack` for cleanup-commit metrics; use `failed` for
|
|
604
|
+
handler-exception attribution.
|
|
605
|
+
- `retry_attempt/failure` and `retry_exhausted` fire on the claim-loop retry
|
|
606
|
+
path. The first Redis attempt may or may not have committed.
|
|
607
|
+
- `publish/failure`, `claim/failure`, and `cleanup_failed/failure` follow
|
|
608
|
+
exceptions. Under an ambiguous lost response, Redis may have committed
|
|
609
|
+
despite the exception. Treat them as "operation did not succeed from the
|
|
610
|
+
caller's perspective", not "Redis did not commit".
|
|
611
|
+
|
|
612
|
+
#### Intentionally silent paths
|
|
613
|
+
|
|
614
|
+
The following operations have no `on_event` surface by design:
|
|
615
|
+
|
|
616
|
+
- **B1 Cluster `pcall` cleanup failure:** three lease-aware Lua scripts wrap a
|
|
617
|
+
data-derived `DEL` in `redis.pcall(...)` and ignore the result. This
|
|
618
|
+
preserves queue safety on Cluster `CROSSSLOT` rejection but cannot be
|
|
619
|
+
observed through `on_event`. Operators watching key-TTL behavior or Redis
|
|
620
|
+
slow logs can detect orphans.
|
|
621
|
+
- **VT claim-store OOM compensation:** if the visibility-timeout Lua script
|
|
622
|
+
cannot store the claim result, it removes the message from processing, pushes
|
|
623
|
+
it back to pending, and returns `false`. Python translates that into
|
|
624
|
+
`claim_empty/skipped`, the same shape as an empty poll. This is intentional
|
|
625
|
+
fail-safe behavior; the message is not lost.
|
|
626
|
+
- **`drain()` / `close()` / `aclose()` lifecycle:** explicit shutdown
|
|
627
|
+
operations do not emit lifecycle events. Pending-claim-drain recovery work
|
|
628
|
+
counts as `claim_reclaim` events when reached.
|
|
629
|
+
- **Non-claim-loop retry attempts:** tenacity retries in deduplicated publish,
|
|
630
|
+
ack/remove, move-to-completed/failed, and lease renewal collapse into the
|
|
631
|
+
terminal operation's failure event. There is no per-attempt event for those
|
|
632
|
+
paths.
|
|
633
|
+
|
|
524
634
|
The public exception hierarchy is rooted at `RedisMessageQueueError`.
|
|
525
635
|
Configuration value/combinations raise `ConfigurationError` (also a
|
|
526
636
|
`ValueError`), custom gateway contract violations raise `GatewayContractError`
|
|
527
637
|
(also a `TypeError`), and Lua `redis.error_reply(...)` failures raise
|
|
528
638
|
`LuaScriptError` (also a redis-py `ResponseError`). Publish overload raises
|
|
529
|
-
`QueueBackpressureError
|
|
530
|
-
|
|
639
|
+
`QueueBackpressureError`; publish after explicit drain raises
|
|
640
|
+
`QueueDrainedError`. `CleanupFailedError` and `RetryBudgetExhaustedError` are
|
|
641
|
+
reserved categories for cleanup and retry surfaces.
|
|
531
642
|
|
|
532
643
|
## Known limitations
|
|
533
644
|
|
|
@@ -538,13 +649,46 @@ are reserved categories for cleanup and retry surfaces.
|
|
|
538
649
|
- **Cluster detection uses `isinstance(client, RedisCluster)`.** Wrapped or instrumented cluster clients that delegate without inheriting will bypass hash-tag validation. Custom gateways should set `is_redis_cluster = True` explicitly.
|
|
539
650
|
- **Redis Cluster requires hash tags.** The built-in queue uses multiple Redis keys per operation. Wrap the queue name in hash tags (for example `{myqueue}`) so every generated key lands in the same slot. When you pass a Redis Cluster client to the built-in queue/gateway path, incompatible names are rejected early.
|
|
540
651
|
- **Non-ASCII payloads use ~2x storage.** The default `ensure_ascii=True` in JSON serialization encodes non-ASCII characters as `\uXXXX` escape sequences. This is a deliberate compatibility choice.
|
|
541
|
-
- **Client-side `Retry` can duplicate non-deduplicated publishes.** If you construct your `redis.Redis` client with `retry=Retry(...)`, redis-py retries `ConnectionError` / `TimeoutError` at the connection layer — *below* this library. Idempotent operations (deduplicated `publish()`, lease-scoped cleanup) are safe because their Lua scripts replay the original result. `add_message()` (used by `publish()` when `deduplication=False`) is a bare `LPUSH` by default, or a single non-idempotent Lua enqueue when `max_pending_length` is set: this library deliberately does not retry it, but a client-level `Retry` will, and if the server executed the command before the response was lost the message is enqueued twice.
|
|
652
|
+
- **Client-side `Retry` can duplicate non-deduplicated publishes.** If you construct your `redis.Redis` or `redis.asyncio.Redis` client with `retry=Retry(...)`, redis-py retries `ConnectionError` / `TimeoutError` at the connection layer — *below* this library. Idempotent operations (deduplicated `publish()`, lease-scoped cleanup) are safe because their Lua scripts replay the original result. `add_message()` (used by `publish()` when `deduplication=False`) is a bare `LPUSH` by default, or a single non-idempotent Lua enqueue when `max_pending_length` is set: this library deliberately does not retry it, but a client-level `Retry` will, and if the server executed the command before the response was lost the message is enqueued twice. redis-py 6.0+ changed the default standalone `Redis()` / `redis.asyncio.Redis()` retry policy from `None` (no retry) to a 3-attempt `ExponentialWithJitterBackoff`; pass `retry=None` explicitly if you need strict at-most-once semantics for non-deduplicated publishes, or accept the duplication risk. More broadly, any non-idempotent enqueue path is vulnerable if the connection drops after server execution but before the client receives the response; all other built-in operations (deduplicated publish, lease-scoped ack/move, lease renewal) use replay markers and are safe under client-level `Retry`.
|
|
653
|
+
|
|
654
|
+
```python
|
|
655
|
+
import redis
|
|
656
|
+
from redis_message_queue import RedisMessageQueue
|
|
657
|
+
|
|
658
|
+
# Strict at-most-once for non-dedup messages: disable redis-py's
|
|
659
|
+
# default 3-retry policy explicitly.
|
|
660
|
+
client = redis.Redis(retry=None)
|
|
661
|
+
queue = RedisMessageQueue("jobs", client=client)
|
|
662
|
+
```
|
|
663
|
+
|
|
664
|
+
```python
|
|
665
|
+
import redis.asyncio as redis
|
|
666
|
+
from redis_message_queue.asyncio import RedisMessageQueue
|
|
667
|
+
|
|
668
|
+
# Strict at-most-once for non-dedup messages: disable redis-py's
|
|
669
|
+
# default 3-retry policy explicitly.
|
|
670
|
+
client = redis.Redis(retry=None)
|
|
671
|
+
queue = RedisMessageQueue("jobs", client=client)
|
|
672
|
+
```
|
|
542
673
|
- **Redis Cluster default retry can stack with this library's retry budget.** In redis-py 6.0+, `RedisCluster()` constructs a default `ExponentialWithJitterBackoff` retry below this library's `retry_budget_seconds`. If you need a single retry surface, pass `retry=Retry(NoBackoff(), 0)` to the cluster client or reduce `retry_budget_seconds` to account for the lower-level retry window.
|
|
543
674
|
|
|
544
675
|
For a full analysis, see [docs/production-readiness.md](docs/production-readiness.md).
|
|
545
676
|
|
|
546
677
|
## Upgrading
|
|
547
678
|
|
|
679
|
+
### v6 to v7 migration
|
|
680
|
+
|
|
681
|
+
v7.0.0 changes explicit drain shutdown semantics. After `queue.drain()` /
|
|
682
|
+
`queue.close()` (sync) or `await queue.drain()` / `await queue.aclose()`
|
|
683
|
+
(async), the same queue instance rejects `publish()` with
|
|
684
|
+
`QueueDrainedError("queue is drained")`.
|
|
685
|
+
|
|
686
|
+
This state is queue-local and process-local; it is not stored in Redis. If a
|
|
687
|
+
producer must continue publishing after a worker has drained, use a separate
|
|
688
|
+
`RedisMessageQueue(...)` instance for that producer lifecycle. During
|
|
689
|
+
shutdown, catch `QueueDrainedError` only at boundaries where late publishes are
|
|
690
|
+
expected and safe to drop or reschedule.
|
|
691
|
+
|
|
548
692
|
### Configuration changes on live queues
|
|
549
693
|
|
|
550
694
|
> **Warning:** These changes are destructive on live queues. Drain the queue completely before applying them.
|
|
@@ -570,6 +714,9 @@ v6.0.0 is a non-breaking-defaults release that adds new public APIs. v5 code con
|
|
|
570
714
|
- `max_pending_length=N` caps pending-list depth; with `pending_overload_policy="raise"` (default) producers see `QueueBackpressureError` when the cap is hit; `"block"` waits up to `pending_overload_block_timeout_seconds`; `"drop_oldest"` evicts silently, so use it only when data loss is acceptable.
|
|
571
715
|
- `queue.drain(timeout=...)` (sync) and `await queue.aclose(timeout=...)` (async) are explicit graceful-shutdown hooks. They refuse new claims and recover pending claim IDs but do not cancel in-flight handlers; join or await your worker separately.
|
|
572
716
|
- `on_event=callback` receives a `QueueEvent` dataclass for every publish/claim/ack/reclaim/dedup/cleanup lifecycle event. Use it for metrics, tracing, and structured logging. See [`examples/production/observability.py`](examples/production/observability.py) for the adapter pattern.
|
|
717
|
+
- See [`examples/production/backpressure.py`](examples/production/backpressure.py) and [`examples/production/graceful_shutdown.py`](examples/production/graceful_shutdown.py) for sync production patterns, with async siblings under [`examples/production/asyncio/`](examples/production/asyncio/).
|
|
718
|
+
|
|
719
|
+
> When using a pre-fork app server (gunicorn `--preload`, uvicorn workers that import the app at master startup), call `make_queue()` from your worker startup hook - NOT at module import. See [Fork safety](#fork-safety-and-pre-fork-servers) for why.
|
|
573
720
|
|
|
574
721
|
**New constructor rejections:**
|
|
575
722
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "redis-message-queue"
|
|
3
|
-
version = "
|
|
3
|
+
version = "7.0.0"
|
|
4
4
|
description = "Python message queuing with Redis and message deduplication"
|
|
5
5
|
authors = ["Elijas <4084885+Elijas@users.noreply.github.com>"]
|
|
6
6
|
readme = "README.md"
|
|
@@ -24,7 +24,7 @@ Issues = "https://github.com/Elijas/redis-message-queue/issues"
|
|
|
24
24
|
|
|
25
25
|
[tool.poetry.dependencies]
|
|
26
26
|
python = "^3.12"
|
|
27
|
-
redis = ">=5.0.0"
|
|
27
|
+
redis = ">=5.0.0,<8.0.0"
|
|
28
28
|
tenacity = ">=8.1.0"
|
|
29
29
|
|
|
30
30
|
[tool.poetry.group.test.dependencies]
|
|
@@ -6,6 +6,7 @@ from redis_message_queue._exceptions import (
|
|
|
6
6
|
GatewayContractError,
|
|
7
7
|
LuaScriptError,
|
|
8
8
|
QueueBackpressureError,
|
|
9
|
+
QueueDrainedError,
|
|
9
10
|
RedisMessageQueueError,
|
|
10
11
|
RetryBudgetExhaustedError,
|
|
11
12
|
)
|
|
@@ -33,6 +34,7 @@ __all__ = [
|
|
|
33
34
|
"GatewayContractError",
|
|
34
35
|
"LuaScriptError",
|
|
35
36
|
"QueueBackpressureError",
|
|
37
|
+
"QueueDrainedError",
|
|
36
38
|
"CleanupFailedError",
|
|
37
39
|
"RetryBudgetExhaustedError",
|
|
38
40
|
]
|
|
@@ -83,12 +83,13 @@ class AbstractRedisGateway(ABC):
|
|
|
83
83
|
command can silently duplicate the message. The caller can still
|
|
84
84
|
retry (accepting duplicates).
|
|
85
85
|
|
|
86
|
-
Note:
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
86
|
+
Note on retries: redis-py 6.0+ changed the default standalone
|
|
87
|
+
``Redis()`` / ``redis.asyncio.Redis()`` retry policy from ``None`` (no
|
|
88
|
+
retry) to a 3-attempt ``ExponentialWithJitterBackoff``. If you need
|
|
89
|
+
strict at-most-once for non-deduplicated publishes, pass ``retry=None``
|
|
90
|
+
explicitly when constructing the redis-py client. This library does
|
|
91
|
+
not configure the redis-py client retry; it only controls its own
|
|
92
|
+
retry budget on top of the client.
|
|
92
93
|
"""
|
|
93
94
|
|
|
94
95
|
@abstractmethod
|