redis-message-queue 8.0.1__tar.gz → 8.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/PKG-INFO +100 -4
  2. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/README.md +99 -3
  3. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/pyproject.toml +1 -1
  4. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/__init__.py +2 -0
  5. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/_config.py +82 -25
  6. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/_event.py +4 -0
  7. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/_exceptions.py +4 -0
  8. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/_queue_key_manager.py +8 -0
  9. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/_redis_cluster.py +25 -0
  10. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/_redis_gateway.py +65 -7
  11. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/_stored_message.py +49 -30
  12. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/asyncio/__init__.py +2 -0
  13. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/asyncio/_redis_gateway.py +65 -7
  14. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/asyncio/redis_message_queue.py +193 -88
  15. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/interrupt_handler/_implementation.py +6 -0
  16. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/redis_message_queue.py +185 -91
  17. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/LICENSE +0 -0
  18. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/_abstract_redis_gateway.py +0 -0
  19. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/_callable_utils.py +0 -0
  20. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/asyncio/_abstract_redis_gateway.py +0 -0
  21. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/interrupt_handler/__init__.py +0 -0
  22. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/interrupt_handler/_interface.py +0 -0
  23. {redis_message_queue-8.0.1 → redis_message_queue-8.0.3}/redis_message_queue/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: redis-message-queue
3
- Version: 8.0.1
3
+ Version: 8.0.3
4
4
  Summary: Python message queuing with Redis and message deduplication
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -68,6 +68,13 @@ with queue.process_message() as message:
68
68
  `RedisMessageQueue` itself is not a context manager. Use
69
69
  `with queue.process_message() as message:` for each message.
70
70
 
71
+ > **Important:** In the sync API, work inside `process_message()` must be
72
+ > synchronous. If your handler is `async def`, returns a coroutine, or returns
73
+ > any other awaitable, use `redis_message_queue.asyncio.RedisMessageQueue`.
74
+ > The sync context manager does not inspect the handler's return value; an
75
+ > unawaited coroutine can be dropped while the message is acked. An ergonomic
76
+ > callback API that detects this is planned for v8.1.
77
+
71
78
  ### Async quickstart
72
79
 
73
80
  ```python
@@ -119,6 +126,12 @@ All features are optional and can be enabled or disabled as needed.
119
126
 
120
127
  See [Crash recovery with visibility timeout](#crash-recovery-with-visibility-timeout) for details and tradeoffs.
121
128
 
129
+ > **Important:** Handler exceptions are terminal. This library is a payload
130
+ > queue, not a task framework: raising inside `process_message()` does not
131
+ > requeue the message. With `enable_failed_queue=False`, the message is removed
132
+ > from `processing`; with `enable_failed_queue=True`, it is moved to the failed
133
+ > list.
134
+
122
135
  ## Configuration
123
136
 
124
137
  ### Deduplication
@@ -159,6 +172,13 @@ Avoid fallback patterns such as `lambda msg: msg.get("order_id", "")`.
159
172
  Missing fields should fail loudly instead of collapsing unrelated messages into
160
173
  one deduplication key.
161
174
 
175
+ Deduplication markers and publish retry-safety markers are Redis TTL keys. A
176
+ large forward step in the Redis server expiration clock during an in-call retry
177
+ window can expire those markers before the Python-side monotonic retry budget
178
+ elapses, allowing a duplicate publish. This is an extreme anomaly, mainly
179
+ relevant under cluster-wide NTP step corrections while a producer is retrying
180
+ after an ambiguous Redis write.
181
+
162
182
  ### Success and failure tracking
163
183
 
164
184
  ```python
@@ -234,6 +254,11 @@ queue = RedisMessageQueue(
234
254
  )
235
255
  ```
236
256
 
257
+ > **Important:** `visibility_timeout_seconds` is a lease, not a handler runtime
258
+ > cap. rmq never interrupts a long-running handler. If the lease expires while
259
+ > the handler continues, another consumer can reclaim and process the same
260
+ > message concurrently.
261
+
237
262
  This enables lease-based redelivery for messages left in `processing` by a crashed worker and renews the lease while a healthy long-running handler is still working.
238
263
  Tradeoffs:
239
264
  - delivery becomes at-least-once after lease expiry
@@ -258,6 +283,13 @@ The callback is **advisory** — it may fire briefly after a successful `process
258
283
 
259
284
  Without a visibility timeout, messages already moved to `processing` remain there indefinitely after a consumer crash and are not redelivered, even if the crash happened before your handler started running.
260
285
 
286
+ Visibility deadlines use Redis server time (`TIME`), not Python process time.
287
+ A forward step in the Redis server clock can make a live lease appear expired
288
+ and allow premature redelivery while the original consumer is still processing;
289
+ a backward step can delay reclaim of truly abandoned messages. Treat NTP step
290
+ corrections on Redis hosts as a deployment risk. Prefer time-synchronization
291
+ discipline that slews corrections rather than stepping the Redis clock.
292
+
261
293
  ### Ordering and multi-consumer fairness
262
294
 
263
295
  The built-in queue is a shared-pull Redis list. Successful publishes push to the
@@ -354,6 +386,11 @@ while not interrupt.is_interrupted():
354
386
  > `ValueError`. A repeated owned signal falls back to the default behavior
355
387
  > (for example, a second Ctrl+C raises `KeyboardInterrupt`). If you need multiple
356
388
  > shutdown hooks, use a single handler and fan out in your own code.
389
+ >
390
+ > Process-global signal ownership cannot be safely chained with task-worker
391
+ > CLIs such as Celery, RQ, or Dramatiq. Run sibling workers in separate
392
+ > processes, or install one top-level signal owner that calls `queue.drain()`
393
+ > / `queue.aclose()` or sets an application stop event.
357
394
 
358
395
  There are three distinct shutdown shapes; pick the one that matches your runtime:
359
396
 
@@ -383,7 +420,10 @@ if a publish is already inside the queue instance's publish path, drain waits
383
420
  for that publish to finish before it returns; publishes that arrive after the
384
421
  drained flag is set are rejected. The drained state is local to that Python
385
422
  queue object and is not written to Redis, so constructing a fresh
386
- `RedisMessageQueue(...)` over the same keys remains usable.
423
+ `RedisMessageQueue(...)` over the same keys remains usable. A separate process
424
+ or separate queue instance against the same Redis keys is not marked drained by
425
+ this call. For multi-process graceful shutdown, each process must drain its own
426
+ queue instances.
387
427
 
388
428
  Drain does not cancel in-flight handlers — the caller must arrange handler
389
429
  exit through normal thread/task coordination. Returns `True` if all in-memory
@@ -391,6 +431,24 @@ pending claim IDs were recovered within the timeout; `False` if the deadline
391
431
  fired or transient Redis errors left claim IDs pending (call again to retry).
392
432
  `timeout=0` reports current state without attempting recovery.
393
433
 
434
+ #### Abandoned in-flight messages
435
+
436
+ Abandoned in-flight messages are recovered lazily. Async tasks cancelled
437
+ without `aclose()`, or sync processes killed mid-handler, can leave the message
438
+ and its processing/lease metadata in Redis until a later consumer claim path
439
+ triggers visibility-timeout reclaim. With visibility timeouts enabled, this is
440
+ the designed at-least-once recovery path: the message is delayed by the lease,
441
+ not lost. With `visibility_timeout_seconds=None`, there is no automatic reclaim
442
+ path. For low-visibility-timeout workloads, prefer an explicit `drain()` /
443
+ `aclose()` during shutdown so local pending claim IDs are recovered before
444
+ process exit.
445
+
446
+ `drain()` / `aclose()` timeouts are measured with Python monotonic clocks, but
447
+ any lease deadlines they recover were created from Redis server time. The same
448
+ Redis-clock step caveats from
449
+ [Crash recovery with visibility timeout](#crash-recovery-with-visibility-timeout)
450
+ apply to when abandoned work becomes reclaimable.
451
+
394
452
  > **Heartbeat caveat (best-effort stop):** when `heartbeat_interval_seconds` is
395
453
  > set, the heartbeat sidecar's `stop()` is bounded but not strictly quiescent —
396
454
  > a slow renewal in flight when `process_message` exits may still write to
@@ -495,6 +553,42 @@ await client.aclose()
495
553
  For the sync Redis client, call `client.close()` during application shutdown when
496
554
  you own the client lifecycle.
497
555
 
556
+ ## Migrating from RQ / Celery / Dramatiq / taskiq
557
+
558
+ redis-message-queue is a payload queue, not a task framework. It has no task
559
+ registry, job object, result backend, scheduler, workflow canvas, callback
560
+ graph, or handler-level retry policy. Producers publish a `str` or `dict`
561
+ payload, and consumers decide what that payload means.
562
+
563
+ The most important semantic differences from sibling task libraries are:
564
+
565
+ - Handler exceptions are terminal. Raising inside `process_message()` removes
566
+ the message from `processing`, or moves it to the failed list when
567
+ `enable_failed_queue=True`; it does not requeue or retry the message.
568
+ - `visibility_timeout_seconds` is a crash/stall recovery lease, not a runtime
569
+ limit. Slow handlers are not interrupted; after the lease expires another
570
+ consumer can process the same payload concurrently.
571
+ - `on_event` is telemetry only. Callback exceptions are logged and emitted as
572
+ `RuntimeWarning`, but they do not affect ack/nack, failed-queue movement, or
573
+ any other message outcome. Do not use `on_event` for sagas, follow-up writes,
574
+ billing callbacks, or other correctness-critical work.
575
+ - Dict payloads are JSON data, not Python call arguments. JSON does not
576
+ preserve every Python type: tuples become lists, and sets or custom objects
577
+ raise unless you encode them into JSON-native values first.
578
+ - Process-global signal ownership cannot be safely chained with Celery, RQ, or
579
+ Dramatiq CLI workers. Prefer one top-level owner that calls `queue.drain()`
580
+ or sets an application stop event, and run sibling workers in separate
581
+ processes.
582
+
583
+ When migrating on the same Redis deployment, prefer separate Redis DBs or hard
584
+ namespaces. Do not point a Celery, RQ, Dramatiq, or taskiq worker at an rmq
585
+ pending key. A sibling worker can pop the rmq stored message, fail its own
586
+ decoder, and leave the rmq queue without that message. Also avoid custom
587
+ `key_separator` values that synthesize another library's key namespace, such as
588
+ using `":queue:"` with a queue name that overlaps RQ keys. rmq has no fixed
589
+ library prefix; generated keys share the Redis DB namespace with every other
590
+ Redis user.
591
+
498
592
  ## Production notes
499
593
 
500
594
  ### Fork safety and pre-fork servers
@@ -610,8 +704,10 @@ Events cover publish, dedup hits, claim/empty polls, reclaim, ack/nack,
610
704
  completed/failed cleanup, DLQ moves, heartbeat renewal, stale leases, cleanup
611
705
  and trim failures, and retry attempts. Callback exceptions are logged and
612
706
  reported with `RuntimeWarning`, but never propagate into queue operations.
613
- Package logs remain diagnostic; use `on_event` rather than log parsing for
614
- metrics.
707
+ `on_event` is telemetry only: use it for metrics, tracing, and logging, not for
708
+ sagas, follow-up writes, billing callbacks, or other correctness-critical
709
+ work. Package logs remain diagnostic; use `on_event` rather than log parsing
710
+ for metrics.
615
711
 
616
712
  ```python
617
713
  from opentelemetry import trace
@@ -42,6 +42,13 @@ with queue.process_message() as message:
42
42
  `RedisMessageQueue` itself is not a context manager. Use
43
43
  `with queue.process_message() as message:` for each message.
44
44
 
45
+ > **Important:** In the sync API, work inside `process_message()` must be
46
+ > synchronous. If your handler is `async def`, returns a coroutine, or returns
47
+ > any other awaitable, use `redis_message_queue.asyncio.RedisMessageQueue`.
48
+ > The sync context manager does not inspect the handler's return value; an
49
+ > unawaited coroutine can be dropped while the message is acked. An ergonomic
50
+ > callback API that detects this is planned for v8.1.
51
+
45
52
  ### Async quickstart
46
53
 
47
54
  ```python
@@ -93,6 +100,12 @@ All features are optional and can be enabled or disabled as needed.
93
100
 
94
101
  See [Crash recovery with visibility timeout](#crash-recovery-with-visibility-timeout) for details and tradeoffs.
95
102
 
103
+ > **Important:** Handler exceptions are terminal. This library is a payload
104
+ > queue, not a task framework: raising inside `process_message()` does not
105
+ > requeue the message. With `enable_failed_queue=False`, the message is removed
106
+ > from `processing`; with `enable_failed_queue=True`, it is moved to the failed
107
+ > list.
108
+
96
109
  ## Configuration
97
110
 
98
111
  ### Deduplication
@@ -133,6 +146,13 @@ Avoid fallback patterns such as `lambda msg: msg.get("order_id", "")`.
133
146
  Missing fields should fail loudly instead of collapsing unrelated messages into
134
147
  one deduplication key.
135
148
 
149
+ Deduplication markers and publish retry-safety markers are Redis TTL keys. A
150
+ large forward step in the Redis server expiration clock during an in-call retry
151
+ window can expire those markers before the Python-side monotonic retry budget
152
+ elapses, allowing a duplicate publish. This is an extreme anomaly, mainly
153
+ relevant under cluster-wide NTP step corrections while a producer is retrying
154
+ after an ambiguous Redis write.
155
+
136
156
  ### Success and failure tracking
137
157
 
138
158
  ```python
@@ -208,6 +228,11 @@ queue = RedisMessageQueue(
208
228
  )
209
229
  ```
210
230
 
231
+ > **Important:** `visibility_timeout_seconds` is a lease, not a handler runtime
232
+ > cap. rmq never interrupts a long-running handler. If the lease expires while
233
+ > the handler continues, another consumer can reclaim and process the same
234
+ > message concurrently.
235
+
211
236
  This enables lease-based redelivery for messages left in `processing` by a crashed worker and renews the lease while a healthy long-running handler is still working.
212
237
  Tradeoffs:
213
238
  - delivery becomes at-least-once after lease expiry
@@ -232,6 +257,13 @@ The callback is **advisory** — it may fire briefly after a successful `process
232
257
 
233
258
  Without a visibility timeout, messages already moved to `processing` remain there indefinitely after a consumer crash and are not redelivered, even if the crash happened before your handler started running.
234
259
 
260
+ Visibility deadlines use Redis server time (`TIME`), not Python process time.
261
+ A forward step in the Redis server clock can make a live lease appear expired
262
+ and allow premature redelivery while the original consumer is still processing;
263
+ a backward step can delay reclaim of truly abandoned messages. Treat NTP step
264
+ corrections on Redis hosts as a deployment risk. Prefer time-synchronization
265
+ discipline that slews corrections rather than stepping the Redis clock.
266
+
235
267
  ### Ordering and multi-consumer fairness
236
268
 
237
269
  The built-in queue is a shared-pull Redis list. Successful publishes push to the
@@ -328,6 +360,11 @@ while not interrupt.is_interrupted():
328
360
  > `ValueError`. A repeated owned signal falls back to the default behavior
329
361
  > (for example, a second Ctrl+C raises `KeyboardInterrupt`). If you need multiple
330
362
  > shutdown hooks, use a single handler and fan out in your own code.
363
+ >
364
+ > Process-global signal ownership cannot be safely chained with task-worker
365
+ > CLIs such as Celery, RQ, or Dramatiq. Run sibling workers in separate
366
+ > processes, or install one top-level signal owner that calls `queue.drain()`
367
+ > / `queue.aclose()` or sets an application stop event.
331
368
 
332
369
  There are three distinct shutdown shapes; pick the one that matches your runtime:
333
370
 
@@ -357,7 +394,10 @@ if a publish is already inside the queue instance's publish path, drain waits
357
394
  for that publish to finish before it returns; publishes that arrive after the
358
395
  drained flag is set are rejected. The drained state is local to that Python
359
396
  queue object and is not written to Redis, so constructing a fresh
360
- `RedisMessageQueue(...)` over the same keys remains usable.
397
+ `RedisMessageQueue(...)` over the same keys remains usable. A separate process
398
+ or separate queue instance against the same Redis keys is not marked drained by
399
+ this call. For multi-process graceful shutdown, each process must drain its own
400
+ queue instances.
361
401
 
362
402
  Drain does not cancel in-flight handlers — the caller must arrange handler
363
403
  exit through normal thread/task coordination. Returns `True` if all in-memory
@@ -365,6 +405,24 @@ pending claim IDs were recovered within the timeout; `False` if the deadline
365
405
  fired or transient Redis errors left claim IDs pending (call again to retry).
366
406
  `timeout=0` reports current state without attempting recovery.
367
407
 
408
+ #### Abandoned in-flight messages
409
+
410
+ Abandoned in-flight messages are recovered lazily. Async tasks cancelled
411
+ without `aclose()`, or sync processes killed mid-handler, can leave the message
412
+ and its processing/lease metadata in Redis until a later consumer claim path
413
+ triggers visibility-timeout reclaim. With visibility timeouts enabled, this is
414
+ the designed at-least-once recovery path: the message is delayed by the lease,
415
+ not lost. With `visibility_timeout_seconds=None`, there is no automatic reclaim
416
+ path. For low-visibility-timeout workloads, prefer an explicit `drain()` /
417
+ `aclose()` during shutdown so local pending claim IDs are recovered before
418
+ process exit.
419
+
420
+ `drain()` / `aclose()` timeouts are measured with Python monotonic clocks, but
421
+ any lease deadlines they recover were created from Redis server time. The same
422
+ Redis-clock step caveats from
423
+ [Crash recovery with visibility timeout](#crash-recovery-with-visibility-timeout)
424
+ apply to when abandoned work becomes reclaimable.
425
+
368
426
  > **Heartbeat caveat (best-effort stop):** when `heartbeat_interval_seconds` is
369
427
  > set, the heartbeat sidecar's `stop()` is bounded but not strictly quiescent —
370
428
  > a slow renewal in flight when `process_message` exits may still write to
@@ -469,6 +527,42 @@ await client.aclose()
469
527
  For the sync Redis client, call `client.close()` during application shutdown when
470
528
  you own the client lifecycle.
471
529
 
530
+ ## Migrating from RQ / Celery / Dramatiq / taskiq
531
+
532
+ redis-message-queue is a payload queue, not a task framework. It has no task
533
+ registry, job object, result backend, scheduler, workflow canvas, callback
534
+ graph, or handler-level retry policy. Producers publish a `str` or `dict`
535
+ payload, and consumers decide what that payload means.
536
+
537
+ The most important semantic differences from sibling task libraries are:
538
+
539
+ - Handler exceptions are terminal. Raising inside `process_message()` removes
540
+ the message from `processing`, or moves it to the failed list when
541
+ `enable_failed_queue=True`; it does not requeue or retry the message.
542
+ - `visibility_timeout_seconds` is a crash/stall recovery lease, not a runtime
543
+ limit. Slow handlers are not interrupted; after the lease expires another
544
+ consumer can process the same payload concurrently.
545
+ - `on_event` is telemetry only. Callback exceptions are logged and emitted as
546
+ `RuntimeWarning`, but they do not affect ack/nack, failed-queue movement, or
547
+ any other message outcome. Do not use `on_event` for sagas, follow-up writes,
548
+ billing callbacks, or other correctness-critical work.
549
+ - Dict payloads are JSON data, not Python call arguments. JSON does not
550
+ preserve every Python type: tuples become lists, and sets or custom objects
551
+ raise unless you encode them into JSON-native values first.
552
+ - Process-global signal ownership cannot be safely chained with Celery, RQ, or
553
+ Dramatiq CLI workers. Prefer one top-level owner that calls `queue.drain()`
554
+ or sets an application stop event, and run sibling workers in separate
555
+ processes.
556
+
557
+ When migrating on the same Redis deployment, prefer separate Redis DBs or hard
558
+ namespaces. Do not point a Celery, RQ, Dramatiq, or taskiq worker at an rmq
559
+ pending key. A sibling worker can pop the rmq stored message, fail its own
560
+ decoder, and leave the rmq queue without that message. Also avoid custom
561
+ `key_separator` values that synthesize another library's key namespace, such as
562
+ using `":queue:"` with a queue name that overlaps RQ keys. rmq has no fixed
563
+ library prefix; generated keys share the Redis DB namespace with every other
564
+ Redis user.
565
+
472
566
  ## Production notes
473
567
 
474
568
  ### Fork safety and pre-fork servers
@@ -584,8 +678,10 @@ Events cover publish, dedup hits, claim/empty polls, reclaim, ack/nack,
584
678
  completed/failed cleanup, DLQ moves, heartbeat renewal, stale leases, cleanup
585
679
  and trim failures, and retry attempts. Callback exceptions are logged and
586
680
  reported with `RuntimeWarning`, but never propagate into queue operations.
587
- Package logs remain diagnostic; use `on_event` rather than log parsing for
588
- metrics.
681
+ `on_event` is telemetry only: use it for metrics, tracing, and logging, not for
682
+ sagas, follow-up writes, billing callbacks, or other correctness-critical
683
+ work. Package logs remain diagnostic; use `on_event` rather than log parsing
684
+ for metrics.
589
685
 
590
686
  ```python
591
687
  from opentelemetry import trace
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "redis-message-queue"
3
- version = "8.0.1"
3
+ version = "8.0.3"
4
4
  description = "Python message queuing with Redis and message deduplication"
5
5
  authors = ["Elijas <4084885+Elijas@users.noreply.github.com>"]
6
6
  readme = "README.md"
@@ -5,6 +5,7 @@ from redis_message_queue._exceptions import (
5
5
  ConfigurationError,
6
6
  GatewayContractError,
7
7
  LuaScriptError,
8
+ MalformedStoredMessageError,
8
9
  QueueBackpressureError,
9
10
  QueueDrainedError,
10
11
  RedisMessageQueueError,
@@ -33,6 +34,7 @@ __all__ = [
33
34
  "ConfigurationError",
34
35
  "GatewayContractError",
35
36
  "LuaScriptError",
37
+ "MalformedStoredMessageError",
36
38
  "QueueBackpressureError",
37
39
  "QueueDrainedError",
38
40
  "CleanupFailedError",
@@ -50,6 +50,9 @@ def is_redis_retryable_exception(exception):
50
50
  ),
51
51
  )
52
52
 
53
+ if isinstance(exception, redis.exceptions.ClusterError) and "TTL exhausted" in str(exception):
54
+ return True
55
+
53
56
  # 2. Explicit retryable exceptions (BusyLoadingError is a ConnectionError
54
57
  # subclass, so it is already handled by branch 1 above)
55
58
  return isinstance(
@@ -739,6 +742,26 @@ local function redis_message_queue_decode_claim(cached_claim)
739
742
  return nil
740
743
  end
741
744
 
745
+ local function redis_message_queue_decode_envelope(stored)
746
+ local prefix = string.char(30) .. 'RMQ1:'
747
+ if type(stored) ~= 'string' or string.sub(stored, 1, string.len(prefix)) ~= prefix then
748
+ return nil
749
+ end
750
+ local ok, envelope = pcall(cjson.decode, string.sub(stored, string.len(prefix) + 1))
751
+ if ok and type(envelope) == 'table' and type(envelope['id']) == 'string' then
752
+ return envelope
753
+ end
754
+ return nil
755
+ end
756
+
757
+ local function redis_message_queue_message_id(stored)
758
+ local envelope = redis_message_queue_decode_envelope(stored)
759
+ if envelope then
760
+ return envelope['id']
761
+ end
762
+ return ''
763
+ end
764
+
742
765
  local time = redis.call('TIME')
743
766
  local now_ms = tonumber(time[1]) * 1000 + math.floor(tonumber(time[2]) / 1000)
744
767
 
@@ -779,6 +802,7 @@ end
779
802
  -- With a single consumer polling at default interval, 1000 expired leases drain in ~2.5s.
780
803
  local expired = redis.call('ZRANGEBYSCORE', KEYS[3], '-inf', now_ms, 'LIMIT', 0, 100)
781
804
  local to_requeue = {}
805
+ local reclaimed_events = {}
782
806
  for i = #expired, 1, -1 do
783
807
  local expired_lease_token = redis.call('HGET', KEYS[4], expired[i])
784
808
  redis.call('ZREM', KEYS[3], expired[i])
@@ -800,13 +824,14 @@ for i = #expired, 1, -1 do
800
824
  end
801
825
  if redis.call('LREM', KEYS[2], 1, expired[i]) == 1 then
802
826
  table.insert(to_requeue, expired[i])
827
+ local delivery_count = redis.call('HGET', KEYS[6], expired[i])
828
+ table.insert(reclaimed_events, {redis_message_queue_message_id(expired[i]), tostring(delivery_count or '0')})
803
829
  end
804
830
  end
805
831
  if #to_requeue > 0 then
806
832
  redis.call('RPUSH', KEYS[1], unpack(to_requeue))
807
833
  end
808
- local reclaimed_count = #to_requeue
809
- local dead_lettered_count = 0
834
+ local dead_lettered_events = {}
810
835
 
811
836
  local function store_claim_and_return(stored)
812
837
  -- pcall guards against OOM mid-write: compensate by returning message to pending
@@ -819,7 +844,7 @@ local function store_claim_and_return(stored)
819
844
  redis.call('HSET', KEYS[9], lease_token, KEYS[8])
820
845
  redis.call('HSET', KEYS[10], ARGV[4], claim_payload)
821
846
  redis.call('HSET', KEYS[11], lease_token, ARGV[4])
822
- return {stored, lease_token, tostring(reclaimed_count), tostring(dead_lettered_count)}
847
+ return {stored, lease_token, reclaimed_events, dead_lettered_events}
823
848
  end)
824
849
  if not ok then
825
850
  redis.call('LREM', KEYS[2], 1, stored)
@@ -835,36 +860,28 @@ while claim_attempts < 100 do
835
860
 
836
861
  local stored = redis.call('LMOVE', KEYS[1], KEYS[2], 'RIGHT', 'LEFT')
837
862
  if not stored then
838
- return {'', '', tostring(reclaimed_count), tostring(dead_lettered_count)}
863
+ return {'', '', reclaimed_events, dead_lettered_events}
839
864
  end
840
865
 
841
- if max_delivery_count > 0 then
842
- local count = redis.call('HINCRBY', KEYS[6], stored, 1)
843
- if count > max_delivery_count then
844
- redis.call('LREM', KEYS[2], 1, stored)
845
- redis.call('HDEL', KEYS[6], stored)
846
- -- Strip envelope to store raw payload in DLQ, consistent with completed/failed queues.
847
- -- The per-delivery UUID in the envelope is lost; see README dead-letter notes.
848
- local dead_letter_value = stored
849
- local prefix = string.char(30) .. 'RMQ1:'
850
- if string.sub(stored, 1, string.len(prefix)) == prefix then
851
- local ok, envelope = pcall(cjson.decode, string.sub(stored, string.len(prefix) + 1))
852
- if ok and type(envelope) == 'table' and type(envelope['id']) == 'string'
853
- and type(envelope['payload']) == 'string' then
854
- dead_letter_value = envelope['payload']
855
- end
856
- end
857
- redis.call('LPUSH', KEYS[7], dead_letter_value)
858
- dead_lettered_count = dead_lettered_count + 1
859
- else
860
- return store_claim_and_return(stored)
866
+ local count = redis.call('HINCRBY', KEYS[6], stored, 1)
867
+ if max_delivery_count > 0 and count > max_delivery_count then
868
+ redis.call('LREM', KEYS[2], 1, stored)
869
+ redis.call('HDEL', KEYS[6], stored)
870
+ -- Strip envelope to store raw payload in DLQ, consistent with completed/failed queues.
871
+ -- The per-delivery UUID in the envelope is lost; see README dead-letter notes.
872
+ local dead_letter_value = stored
873
+ local envelope = redis_message_queue_decode_envelope(stored)
874
+ if envelope and type(envelope['payload']) == 'string' then
875
+ dead_letter_value = envelope['payload']
861
876
  end
877
+ redis.call('LPUSH', KEYS[7], dead_letter_value)
878
+ table.insert(dead_lettered_events, {redis_message_queue_message_id(stored), tostring(count)})
862
879
  else
863
880
  return store_claim_and_return(stored)
864
881
  end
865
882
  end
866
883
 
867
- return {'', '', tostring(reclaimed_count), tostring(dead_lettered_count)}
884
+ return {'', '', reclaimed_events, dead_lettered_events}
868
885
  """
869
886
  )
870
887
 
@@ -1037,6 +1054,46 @@ return removed
1037
1054
  """
1038
1055
  )
1039
1056
 
1057
+ CLEANUP_DRAINED_LEASE_TOKEN_COUNTER_LUA_SCRIPT = (
1058
+ _LUA_KEY_TYPE_GUARD
1059
+ + """
1060
+ local err = redis_message_queue_require_type(KEYS[1], 'list')
1061
+ if err then
1062
+ return err
1063
+ end
1064
+
1065
+ local err = redis_message_queue_require_type(KEYS[2], 'zset')
1066
+ if err then
1067
+ return err
1068
+ end
1069
+
1070
+ local err = redis_message_queue_require_type(KEYS[3], 'hash')
1071
+ if err then
1072
+ return err
1073
+ end
1074
+
1075
+ local err = redis_message_queue_require_type(KEYS[4], 'hash')
1076
+ if err then
1077
+ return err
1078
+ end
1079
+
1080
+ local err = redis_message_queue_require_type(KEYS[5], 'string')
1081
+ if err then
1082
+ return err
1083
+ end
1084
+
1085
+ if redis.call('LLEN', KEYS[1]) == 0
1086
+ and redis.call('ZCARD', KEYS[2]) == 0
1087
+ and redis.call('HLEN', KEYS[3]) == 0
1088
+ and redis.call('HLEN', KEYS[4]) == 0 then
1089
+ redis.call('DEL', KEYS[5])
1090
+ return 1
1091
+ end
1092
+
1093
+ return 0
1094
+ """
1095
+ )
1096
+
1040
1097
  RENEW_MESSAGE_LEASE_LUA_SCRIPT = (
1041
1098
  _LUA_KEY_TYPE_GUARD
1042
1099
  + """
@@ -52,6 +52,10 @@ class QueueEvent:
52
52
  """a diagnostic hash of the lease token when visibility timeout is enabled"""
53
53
  destination_queue: str | None = None
54
54
  """the queue a message was moved to, when applicable"""
55
+ delivery_count: int | None = None
56
+ """the number of delivery attempts recorded for this message, when applicable"""
57
+ max_delivery_count: int | None = None
58
+ """the configured delivery-attempt threshold, when applicable"""
55
59
  exception_type: str | None = None
56
60
  """
57
61
  type name of the raised exception for metrics labels (e.g., 'TimeoutError');
@@ -21,6 +21,10 @@ class CleanupFailedError(RedisMessageQueueError):
21
21
  """Cleanup after handler completion failed."""
22
22
 
23
23
 
24
+ class MalformedStoredMessageError(RedisMessageQueueError):
25
+ """Stored value starts with the RMQ envelope prefix but is not a valid envelope."""
26
+
27
+
24
28
  class QueueBackpressureError(RedisMessageQueueError):
25
29
  """Publish rejected because the pending queue is at its configured limit."""
26
30
 
@@ -17,6 +17,14 @@ def validate_callable_deduplication_key(dedup_key: object, message: str | dict)
17
17
 
18
18
 
19
19
  class QueueKeyManager:
20
+ """Build Redis keys for one rmq queue namespace.
21
+
22
+ ``key_separator`` is part of every generated key and rmq has no fixed
23
+ library prefix. Do not choose a separator that overlaps another Redis task
24
+ library's namespace, such as ``":queue:"`` with RQ-style keys; user-chosen
25
+ separators interact with every Redis user on the same DB.
26
+ """
27
+
20
28
  # Logs message existence to prevent duplication.
21
29
  # Messages are marked for the duration of their lifecycle.
22
30
  _MESSAGE_DEDUPLICATION_LOG = "deduplication"
@@ -1,4 +1,5 @@
1
1
  import re
2
+ from collections.abc import Mapping
2
3
 
3
4
  from redis.crc import key_slot
4
5
 
@@ -6,12 +7,36 @@ from redis_message_queue._exceptions import ConfigurationError
6
7
  from redis_message_queue._queue_key_manager import QueueKeyManager
7
8
 
8
9
  _HASH_TAG_PATTERN = re.compile(r"\{([^{}]+)\}")
10
+ PLAIN_REDIS_CLUSTER_CLIENT_MESSAGE = (
11
+ "The provided Redis client is a plain {client_type} connected to a Redis Cluster node "
12
+ "('INFO cluster' reports cluster_enabled=1). Use redis.RedisCluster or "
13
+ "redis.asyncio.RedisCluster instead, and use a hash-tagged queue name such as '{{myqueue}}' "
14
+ "so all queue keys share one Redis Cluster slot."
15
+ )
9
16
 
10
17
 
11
18
  def _redis_cluster_key_slot(key: str) -> int:
12
19
  return key_slot(key.encode("utf-8"))
13
20
 
14
21
 
22
+ def redis_info_reports_cluster_enabled(info: object) -> bool:
23
+ if not isinstance(info, Mapping):
24
+ return False
25
+
26
+ value = info.get("cluster_enabled")
27
+ if value is None:
28
+ value = info.get(b"cluster_enabled")
29
+ if isinstance(value, bytes):
30
+ value = value.decode("utf-8", errors="replace")
31
+ if isinstance(value, str):
32
+ return value.strip() == "1"
33
+ return value == 1
34
+
35
+
36
+ def plain_redis_cluster_client_error(client_type: str) -> ConfigurationError:
37
+ return ConfigurationError(PLAIN_REDIS_CLUSTER_CLIENT_MESSAGE.format(client_type=client_type))
38
+
39
+
15
40
  def validate_queue_keys_for_redis_cluster(
16
41
  key_manager: QueueKeyManager,
17
42
  *,