redis-message-queue 2.1.0__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/PKG-INFO +24 -16
  2. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/README.md +23 -15
  3. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/pyproject.toml +1 -1
  4. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/_abstract_redis_gateway.py +8 -4
  5. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/_config.py +53 -3
  6. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/_redis_gateway.py +43 -33
  7. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/asyncio/_abstract_redis_gateway.py +8 -4
  8. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/asyncio/_redis_gateway.py +43 -33
  9. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/asyncio/redis_message_queue.py +35 -16
  10. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/interrupt_handler/_implementation.py +7 -2
  11. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/redis_message_queue.py +29 -16
  12. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/LICENSE +0 -0
  13. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/__init__.py +0 -0
  14. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/_callable_utils.py +0 -0
  15. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/_queue_key_manager.py +0 -0
  16. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/_redis_cluster.py +0 -0
  17. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/_stored_message.py +0 -0
  18. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/asyncio/__init__.py +0 -0
  19. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/interrupt_handler/__init__.py +0 -0
  20. {redis_message_queue-2.1.0 → redis_message_queue-3.1.0}/redis_message_queue/interrupt_handler/_interface.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: redis-message-queue
3
- Version: 2.1.0
3
+ Version: 3.1.0
4
4
  Summary: Python message queuing with Redis and message deduplication
5
5
  License-File: LICENSE
6
6
  Author: Elijas
@@ -16,7 +16,7 @@ Description-Content-Type: text/markdown
16
16
 
17
17
  # redis-message-queue
18
18
 
19
- [![PyPI Version](https://img.shields.io/badge/v2.1.0-version?color=43cd0f&style=flat&label=pypi)](https://pypi.org/project/redis-message-queue)
19
+ [![PyPI Version](https://img.shields.io/badge/v3.1.0-version?color=43cd0f&style=flat&label=pypi)](https://pypi.org/project/redis-message-queue)
20
20
  [![PyPI Downloads](https://img.shields.io/pypi/dm/redis-message-queue?color=43cd0f&style=flat&label=downloads)](https://pypistats.org/packages/redis-message-queue)
21
21
  [![License: MIT](https://img.shields.io/badge/License-MIT-43cd0f.svg?style=flat&label=license)](LICENSE)
22
22
  [![Maintained: yes](https://img.shields.io/badge/yes-43cd0f.svg?style=flat&label=maintained)](https://github.com/Elijas/redis-message-queue/issues)
@@ -27,7 +27,7 @@ Description-Content-Type: text/markdown
27
27
  **Lightweight Python message queuing with Redis and built-in publish-side deduplication.** Deduplicate publishes within a TTL window, with optional crash recovery — across any number of producers and consumers.
28
28
 
29
29
  ```bash
30
- pip install "redis-message-queue>=2.0.0,<3.0.0"
30
+ pip install "redis-message-queue>=3.0.0,<4.0.0"
31
31
  ```
32
32
 
33
33
  Requires Redis server >= 6.2.
@@ -219,10 +219,12 @@ while not interrupt.is_interrupted():
219
219
  ```python
220
220
  from redis_message_queue._redis_gateway import RedisGateway
221
221
 
222
- # Custom retry logic, dedup TTL, or wait interval
222
+ # Tune retry budget, dedup TTL, or wait interval
223
223
  gateway = RedisGateway(
224
224
  redis_client=client,
225
- retry_strategy=my_custom_retry,
225
+ retry_budget_seconds=120, # total retry window (set 0 to disable retry)
226
+ retry_max_delay_seconds=5.0, # cap on per-attempt backoff
227
+ retry_initial_delay_seconds=0.01, # first backoff
226
228
  message_deduplication_log_ttl_seconds=3600,
227
229
  message_wait_interval_seconds=10,
228
230
  message_visibility_timeout_seconds=300,
@@ -230,6 +232,21 @@ gateway = RedisGateway(
230
232
  queue = RedisMessageQueue("q", gateway=gateway)
231
233
  ```
232
234
 
235
+ The retry knobs configure an internal `tenacity` strategy: exponential
236
+ backoff with jitter, retry on transient Redis errors only, capped at
237
+ `retry_budget_seconds`. The budget is wall-clock time from the first attempt (including attempt duration), not inter-attempt delay; a single attempt that takes longer than the budget results in zero retries. Setting `retry_budget_seconds=0` disables retry
238
+ entirely (single attempt; exceptions propagate). The library uses
239
+ `retry_budget_seconds` to size the operation-result cache TTL automatically,
240
+ so the previous footgun of an over-long retry budget out-living the cache
241
+ and producing misleading "cleanup was a no-op" warnings is now structurally
242
+ impossible.
243
+
244
+ To plug in a different retry library (`backoff`, `asyncstdlib.retry`, or your
245
+ own logic) or fundamentally different semantics, subclass
246
+ `AbstractRedisGateway` from `redis_message_queue._abstract_redis_gateway`
247
+ (or `redis_message_queue.asyncio._abstract_redis_gateway`) and override the
248
+ operation methods directly.
249
+
233
250
  If your custom gateway uses visibility timeouts, it must expose a public
234
251
  `message_visibility_timeout_seconds` value and return `ClaimedMessage` from
235
252
  `wait_for_message_and_move()`. The queue now fails closed if a lease-capable
@@ -240,16 +257,6 @@ the queue cannot detect that lease semantics are in play and will treat the
240
257
  gateway as a non-lease gateway. In that misconfigured state, lease-token safety
241
258
  checks and heartbeat validation are bypassed.
242
259
 
243
- A custom `retry_strategy` MUST have a total retry budget no longer than
244
- `max(message_visibility_timeout_seconds, 300)` seconds. That value is the TTL
245
- of the built-in gateway's ambiguous-success cache: if a retry arrives after the
246
- cache has expired, the gateway re-runs the Lua script and — because the message
247
- was already acked on the first attempt — sees `LREM=0` and returns `False`. This
248
- surfaces as a misleading "cleanup was a no-op" warning from `process_message`;
249
- no data is lost or double-processed, but a `max_completed_length` /
250
- `max_failed_length` bound may be skipped on that call. The default
251
- `tenacity.stop_after_delay(120)` is safely within the 300 s floor.
252
-
253
260
  When using a custom gateway with dead-letter queue support, configure `max_delivery_count`
254
261
  and `dead_letter_queue` directly on the gateway — do **not** pass `max_delivery_count` to
255
262
  `RedisMessageQueue`:
@@ -292,8 +299,9 @@ await client.aclose()
292
299
  - **Timed waits use polling claim loops.** To make claims recoverable after ambiguous connection drops, `wait_for_message_and_move()` uses idempotent Lua claim polling instead of raw blocking list-move commands. This adds a small polling cadence during timed waits.
293
300
  - **Redis Lua is atomic, not rollback-transactional.** The built-in scripts now preflight queue key types and fail closed on `WRONGTYPE` before mutating queue state, but Redis does not undo earlier writes if a later script command fails for another reason (for example `OOM` under severe memory pressure).
294
301
  - **Batch reclaim limit of 100.** The visibility-timeout reclaim Lua script processes at most 100 expired messages per consumer poll. Under extreme backlog this may delay recovery, but prevents any single poll from blocking Redis.
302
+ - **Claim-attempt loop limit of 100 per poll.** The VT claim Lua script attempts at most 100 LMOVE+delivery-count checks per invocation. Under pathological conditions (>100 consecutive poison messages in pending), a single poll returns no message even though non-poison messages exist deeper in the queue. Subsequent polls drain the poison batch 100 at a time.
295
303
  - **Redis Cluster requires hash tags.** The built-in queue uses multiple Redis keys per operation. Wrap the queue name in hash tags (for example `{myqueue}`) so every generated key lands in the same slot. When you pass a Redis Cluster client to the built-in queue/gateway path, incompatible names are rejected early.
296
- - **Client-side `Retry` can duplicate non-deduplicated publishes.** If you construct your `redis.Redis` client with `retry=Retry(...)`, redis-py retries `ConnectionError` / `TimeoutError` at the connection layer — *below* this library. Idempotent operations (deduplicated `publish()`, lease-scoped cleanup) are safe because their Lua scripts replay the original result. `add_message()` (used by `publish()` when `deduplication=False`) is a bare `LPUSH`: this library deliberately does not retry it, but a client-level `Retry` will, and if the server executed the command before the response was lost the message is enqueued twice. Leave `retry=None` (the default) if you need strict at-most-once semantics for non-deduplicated publishes, or accept the duplication risk.
304
+ - **Client-side `Retry` can duplicate non-deduplicated publishes.** If you construct your `redis.Redis` client with `retry=Retry(...)`, redis-py retries `ConnectionError` / `TimeoutError` at the connection layer — *below* this library. Idempotent operations (deduplicated `publish()`, lease-scoped cleanup) are safe because their Lua scripts replay the original result. `add_message()` (used by `publish()` when `deduplication=False`) is a bare `LPUSH`: this library deliberately does not retry it, but a client-level `Retry` will, and if the server executed the command before the response was lost the message is enqueued twice. Leave `retry=None` (the default) if you need strict at-most-once semantics for non-deduplicated publishes, or accept the duplication risk. More broadly, any non-idempotent `LPUSH` path is vulnerable if the connection drops after server execution but before the client receives the response; all other built-in operations (deduplicated publish, lease-scoped ack/move, lease renewal) use replay markers and are safe under client-level `Retry`.
297
305
 
298
306
  For a full analysis, see [docs/production-readiness.md](docs/production-readiness.md).
299
307
 
@@ -1,6 +1,6 @@
1
1
  # redis-message-queue
2
2
 
3
- [![PyPI Version](https://img.shields.io/badge/v2.1.0-version?color=43cd0f&style=flat&label=pypi)](https://pypi.org/project/redis-message-queue)
3
+ [![PyPI Version](https://img.shields.io/badge/v3.1.0-version?color=43cd0f&style=flat&label=pypi)](https://pypi.org/project/redis-message-queue)
4
4
  [![PyPI Downloads](https://img.shields.io/pypi/dm/redis-message-queue?color=43cd0f&style=flat&label=downloads)](https://pypistats.org/packages/redis-message-queue)
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-43cd0f.svg?style=flat&label=license)](LICENSE)
6
6
  [![Maintained: yes](https://img.shields.io/badge/yes-43cd0f.svg?style=flat&label=maintained)](https://github.com/Elijas/redis-message-queue/issues)
@@ -11,7 +11,7 @@
11
11
  **Lightweight Python message queuing with Redis and built-in publish-side deduplication.** Deduplicate publishes within a TTL window, with optional crash recovery — across any number of producers and consumers.
12
12
 
13
13
  ```bash
14
- pip install "redis-message-queue>=2.0.0,<3.0.0"
14
+ pip install "redis-message-queue>=3.0.0,<4.0.0"
15
15
  ```
16
16
 
17
17
  Requires Redis server >= 6.2.
@@ -203,10 +203,12 @@ while not interrupt.is_interrupted():
203
203
  ```python
204
204
  from redis_message_queue._redis_gateway import RedisGateway
205
205
 
206
- # Custom retry logic, dedup TTL, or wait interval
206
+ # Tune retry budget, dedup TTL, or wait interval
207
207
  gateway = RedisGateway(
208
208
  redis_client=client,
209
- retry_strategy=my_custom_retry,
209
+ retry_budget_seconds=120, # total retry window (set 0 to disable retry)
210
+ retry_max_delay_seconds=5.0, # cap on per-attempt backoff
211
+ retry_initial_delay_seconds=0.01, # first backoff
210
212
  message_deduplication_log_ttl_seconds=3600,
211
213
  message_wait_interval_seconds=10,
212
214
  message_visibility_timeout_seconds=300,
@@ -214,6 +216,21 @@ gateway = RedisGateway(
214
216
  queue = RedisMessageQueue("q", gateway=gateway)
215
217
  ```
216
218
 
219
+ The retry knobs configure an internal `tenacity` strategy: exponential
220
+ backoff with jitter, retry on transient Redis errors only, capped at
221
+ `retry_budget_seconds`. The budget is wall-clock time from the first attempt (including attempt duration), not inter-attempt delay; a single attempt that takes longer than the budget results in zero retries. Setting `retry_budget_seconds=0` disables retry
222
+ entirely (single attempt; exceptions propagate). The library uses
223
+ `retry_budget_seconds` to size the operation-result cache TTL automatically,
224
+ so the previous footgun of an over-long retry budget out-living the cache
225
+ and producing misleading "cleanup was a no-op" warnings is now structurally
226
+ impossible.
227
+
228
+ To plug in a different retry library (`backoff`, `asyncstdlib.retry`, or your
229
+ own logic) or fundamentally different semantics, subclass
230
+ `AbstractRedisGateway` from `redis_message_queue._abstract_redis_gateway`
231
+ (or `redis_message_queue.asyncio._abstract_redis_gateway`) and override the
232
+ operation methods directly.
233
+
217
234
  If your custom gateway uses visibility timeouts, it must expose a public
218
235
  `message_visibility_timeout_seconds` value and return `ClaimedMessage` from
219
236
  `wait_for_message_and_move()`. The queue now fails closed if a lease-capable
@@ -224,16 +241,6 @@ the queue cannot detect that lease semantics are in play and will treat the
224
241
  gateway as a non-lease gateway. In that misconfigured state, lease-token safety
225
242
  checks and heartbeat validation are bypassed.
226
243
 
227
- A custom `retry_strategy` MUST have a total retry budget no longer than
228
- `max(message_visibility_timeout_seconds, 300)` seconds. That value is the TTL
229
- of the built-in gateway's ambiguous-success cache: if a retry arrives after the
230
- cache has expired, the gateway re-runs the Lua script and — because the message
231
- was already acked on the first attempt — sees `LREM=0` and returns `False`. This
232
- surfaces as a misleading "cleanup was a no-op" warning from `process_message`;
233
- no data is lost or double-processed, but a `max_completed_length` /
234
- `max_failed_length` bound may be skipped on that call. The default
235
- `tenacity.stop_after_delay(120)` is safely within the 300 s floor.
236
-
237
244
  When using a custom gateway with dead-letter queue support, configure `max_delivery_count`
238
245
  and `dead_letter_queue` directly on the gateway — do **not** pass `max_delivery_count` to
239
246
  `RedisMessageQueue`:
@@ -276,8 +283,9 @@ await client.aclose()
276
283
  - **Timed waits use polling claim loops.** To make claims recoverable after ambiguous connection drops, `wait_for_message_and_move()` uses idempotent Lua claim polling instead of raw blocking list-move commands. This adds a small polling cadence during timed waits.
277
284
  - **Redis Lua is atomic, not rollback-transactional.** The built-in scripts now preflight queue key types and fail closed on `WRONGTYPE` before mutating queue state, but Redis does not undo earlier writes if a later script command fails for another reason (for example `OOM` under severe memory pressure).
278
285
  - **Batch reclaim limit of 100.** The visibility-timeout reclaim Lua script processes at most 100 expired messages per consumer poll. Under extreme backlog this may delay recovery, but prevents any single poll from blocking Redis.
286
+ - **Claim-attempt loop limit of 100 per poll.** The VT claim Lua script attempts at most 100 LMOVE+delivery-count checks per invocation. Under pathological conditions (>100 consecutive poison messages in pending), a single poll returns no message even though non-poison messages exist deeper in the queue. Subsequent polls drain the poison batch 100 at a time.
279
287
  - **Redis Cluster requires hash tags.** The built-in queue uses multiple Redis keys per operation. Wrap the queue name in hash tags (for example `{myqueue}`) so every generated key lands in the same slot. When you pass a Redis Cluster client to the built-in queue/gateway path, incompatible names are rejected early.
280
- - **Client-side `Retry` can duplicate non-deduplicated publishes.** If you construct your `redis.Redis` client with `retry=Retry(...)`, redis-py retries `ConnectionError` / `TimeoutError` at the connection layer — *below* this library. Idempotent operations (deduplicated `publish()`, lease-scoped cleanup) are safe because their Lua scripts replay the original result. `add_message()` (used by `publish()` when `deduplication=False`) is a bare `LPUSH`: this library deliberately does not retry it, but a client-level `Retry` will, and if the server executed the command before the response was lost the message is enqueued twice. Leave `retry=None` (the default) if you need strict at-most-once semantics for non-deduplicated publishes, or accept the duplication risk.
288
+ - **Client-side `Retry` can duplicate non-deduplicated publishes.** If you construct your `redis.Redis` client with `retry=Retry(...)`, redis-py retries `ConnectionError` / `TimeoutError` at the connection layer — *below* this library. Idempotent operations (deduplicated `publish()`, lease-scoped cleanup) are safe because their Lua scripts replay the original result. `add_message()` (used by `publish()` when `deduplication=False`) is a bare `LPUSH`: this library deliberately does not retry it, but a client-level `Retry` will, and if the server executed the command before the response was lost the message is enqueued twice. Leave `retry=None` (the default) if you need strict at-most-once semantics for non-deduplicated publishes, or accept the duplication risk. More broadly, any non-idempotent `LPUSH` path is vulnerable if the connection drops after server execution but before the client receives the response; all other built-in operations (deduplicated publish, lease-scoped ack/move, lease renewal) use replay markers and are safe under client-level `Retry`.
281
289
 
282
290
  For a full analysis, see [docs/production-readiness.md](docs/production-readiness.md).
283
291
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "redis-message-queue"
3
- version = "2.1.0"
3
+ version = "3.1.0"
4
4
  description = "Python message queuing with Redis and message deduplication"
5
5
  authors = ["Elijas <4084885+Elijas@users.noreply.github.com>"]
6
6
  readme = "README.md"
@@ -11,13 +11,17 @@ class AbstractRedisGateway(ABC):
11
11
  gateways MUST uphold the same behavioral contracts documented on each method
12
12
  to avoid phantom heartbeats, undetected lease conflicts, or silent data loss.
13
13
 
14
- Gateways that support visibility timeouts (lease-based claiming) should expose
14
+ Gateways that support visibility timeouts (lease-based claiming) MUST expose
15
15
  a ``message_visibility_timeout_seconds`` property (int or None). This is not
16
16
  abstract because it is configuration rather than protocol, but it is required
17
17
  when the queue is configured with ``heartbeat_interval_seconds``.
18
- Lease-capable custom gateways should always expose this property; otherwise
19
- the queue cannot enforce lease-specific fail-closed checks and will treat the
20
- gateway as a non-lease implementation.
18
+ Lease-capable custom gateways MUST expose this property; omitting it
19
+ silently disables heartbeat validation and lease-token safety checks,
20
+ causing the queue to treat the gateway as a non-lease implementation.
21
+
22
+ Gateways that wrap a Redis Cluster client should expose an
23
+ ``is_redis_cluster`` property returning ``True`` so the queue can apply
24
+ hash-tag validation at construction time.
21
25
 
22
26
  Concurrency
23
27
  -----------
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import math
2
3
  import typing
3
4
 
4
5
  import redis
@@ -19,6 +20,10 @@ from redis_message_queue.interrupt_handler._interface import (
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
23
+ DEFAULT_RETRY_BUDGET_SECONDS = 120
24
+ DEFAULT_RETRY_MAX_DELAY_SECONDS = 5.0
25
+ DEFAULT_RETRY_INITIAL_DELAY_SECONDS = 0.01
26
+
22
27
 
23
28
  def is_redis_retryable_exception(exception):
24
29
  # 1. Handle ConnectionError hierarchy (retryable except credentials/config issues)
@@ -62,10 +67,27 @@ class interruptable_retry(retry_base):
62
67
  return self._parent_instance.__call__(retry_state)
63
68
 
64
69
 
65
- def get_default_redis_connection_retry_strategy(*, interrupt: BaseGracefulInterruptHandler | None = None):
70
+ def _noop_retry(func):
71
+ return func
72
+
73
+
74
+ def build_retry_strategy(
75
+ *,
76
+ retry_budget_seconds: int,
77
+ retry_max_delay_seconds: float,
78
+ retry_initial_delay_seconds: float,
79
+ interrupt: BaseGracefulInterruptHandler | None = None,
80
+ ):
81
+ if retry_budget_seconds == 0:
82
+ return _noop_retry
66
83
  return retry(
67
- stop=stop_after_delay(120),
68
- wait=wait_exponential_jitter(initial=0.01, exp_base=2, max=5, jitter=0.1),
84
+ stop=stop_after_delay(retry_budget_seconds),
85
+ wait=wait_exponential_jitter(
86
+ initial=retry_initial_delay_seconds,
87
+ exp_base=2,
88
+ max=retry_max_delay_seconds,
89
+ jitter=0.1,
90
+ ),
69
91
  retry=interruptable_retry(
70
92
  interrupt=interrupt,
71
93
  get_parent_retry=lambda: retry_if_exception(is_redis_retryable_exception),
@@ -82,6 +104,10 @@ def validate_gateway_parameters(
82
104
  message_deduplication_log_ttl_seconds: int,
83
105
  message_wait_interval_seconds: int,
84
106
  message_visibility_timeout_seconds: int | None = None,
107
+ *,
108
+ retry_budget_seconds: int,
109
+ retry_max_delay_seconds: float,
110
+ retry_initial_delay_seconds: float,
85
111
  ) -> None:
86
112
  if not isinstance(message_deduplication_log_ttl_seconds, int) or isinstance(
87
113
  message_deduplication_log_ttl_seconds, bool
@@ -114,6 +140,30 @@ def validate_gateway_parameters(
114
140
  f"got {message_visibility_timeout_seconds}"
115
141
  )
116
142
 
143
+ if not isinstance(retry_budget_seconds, int) or isinstance(retry_budget_seconds, bool):
144
+ raise TypeError(f"'retry_budget_seconds' must be an int, got {type(retry_budget_seconds).__name__}")
145
+ if retry_budget_seconds < 0:
146
+ raise ValueError(f"'retry_budget_seconds' must be non-negative, got {retry_budget_seconds}")
147
+
148
+ if isinstance(retry_max_delay_seconds, bool) or not isinstance(retry_max_delay_seconds, (int, float)):
149
+ raise TypeError(f"'retry_max_delay_seconds' must be a number, got {type(retry_max_delay_seconds).__name__}")
150
+ if not math.isfinite(retry_max_delay_seconds) or retry_max_delay_seconds <= 0:
151
+ raise ValueError(f"'retry_max_delay_seconds' must be a finite positive number, got {retry_max_delay_seconds}")
152
+
153
+ if isinstance(retry_initial_delay_seconds, bool) or not isinstance(retry_initial_delay_seconds, (int, float)):
154
+ raise TypeError(
155
+ f"'retry_initial_delay_seconds' must be a number, got {type(retry_initial_delay_seconds).__name__}"
156
+ )
157
+ if not math.isfinite(retry_initial_delay_seconds) or retry_initial_delay_seconds <= 0:
158
+ raise ValueError(
159
+ f"'retry_initial_delay_seconds' must be a finite positive number, got {retry_initial_delay_seconds}"
160
+ )
161
+ if retry_initial_delay_seconds > retry_max_delay_seconds:
162
+ raise ValueError(
163
+ "'retry_initial_delay_seconds' must be <= 'retry_max_delay_seconds', "
164
+ f"got {retry_initial_delay_seconds} > {retry_max_delay_seconds}"
165
+ )
166
+
117
167
 
118
168
  def validate_dead_letter_parameters(
119
169
  max_delivery_count: int | None,
@@ -9,19 +9,21 @@ import redis
9
9
  import redis.asyncio
10
10
 
11
11
  from redis_message_queue._abstract_redis_gateway import AbstractRedisGateway
12
- from redis_message_queue._callable_utils import is_async_callable
13
12
  from redis_message_queue._config import (
14
13
  CLAIM_MESSAGE_LUA_SCRIPT,
15
14
  CLAIM_MESSAGE_WITH_VISIBILITY_TIMEOUT_LUA_SCRIPT,
16
15
  DEFAULT_MESSAGE_DEDUPLICATION_LOG_TTL,
17
16
  DEFAULT_MESSAGE_WAIT_INTERVAL_SECONDS,
17
+ DEFAULT_RETRY_BUDGET_SECONDS,
18
+ DEFAULT_RETRY_INITIAL_DELAY_SECONDS,
19
+ DEFAULT_RETRY_MAX_DELAY_SECONDS,
18
20
  MOVE_MESSAGE_LUA_SCRIPT,
19
21
  MOVE_MESSAGE_WITH_LEASE_TOKEN_LUA_SCRIPT,
20
22
  PUBLISH_MESSAGE_LUA_SCRIPT,
21
23
  REMOVE_MESSAGE_LUA_SCRIPT,
22
24
  REMOVE_MESSAGE_WITH_LEASE_TOKEN_LUA_SCRIPT,
23
25
  RENEW_MESSAGE_LEASE_LUA_SCRIPT,
24
- get_default_redis_connection_retry_strategy,
26
+ build_retry_strategy,
25
27
  is_redis_retryable_exception,
26
28
  validate_dead_letter_parameters,
27
29
  validate_gateway_parameters,
@@ -54,11 +56,28 @@ _VISIBILITY_TIMEOUT_POLL_INTERVAL_SECONDS = 0.25
54
56
 
55
57
 
56
58
  class RedisGateway(AbstractRedisGateway):
59
+ """Sync Redis gateway with built-in tenacity-based retry on transient errors.
60
+
61
+ The retry knobs (``retry_budget_seconds``, ``retry_max_delay_seconds``,
62
+ ``retry_initial_delay_seconds``) configure the internal tenacity strategy.
63
+ Setting ``retry_budget_seconds=0`` disables retry entirely (single attempt;
64
+ exceptions propagate). The library uses ``retry_budget_seconds`` to size the
65
+ operation-result cache TTL so that a successfully-acked operation cannot
66
+ appear "not removed" to a retry that arrives after the budget elapses.
67
+
68
+ Power-user escape hatch: to plug in a different retry library
69
+ (``backoff``, ``asyncstdlib.retry``, custom exponential backoff, etc.) or
70
+ fundamentally different retry semantics, subclass
71
+ :class:`AbstractRedisGateway` and override the operation methods directly.
72
+ """
73
+
57
74
  def __init__(
58
75
  self,
59
76
  *,
60
77
  redis_client: redis.Redis,
61
- retry_strategy: Optional[Callable] = None,
78
+ retry_budget_seconds: int = DEFAULT_RETRY_BUDGET_SECONDS,
79
+ retry_max_delay_seconds: float = DEFAULT_RETRY_MAX_DELAY_SECONDS,
80
+ retry_initial_delay_seconds: float = DEFAULT_RETRY_INITIAL_DELAY_SECONDS,
62
81
  message_deduplication_log_ttl_seconds: Optional[int] = None,
63
82
  message_wait_interval_seconds: Optional[int] = None,
64
83
  message_visibility_timeout_seconds: Optional[int] = None,
@@ -78,21 +97,9 @@ class RedisGateway(AbstractRedisGateway):
78
97
  "Pass the underlying redis.Redis instance instead."
79
98
  )
80
99
  self._redis_client = redis_client
81
- if retry_strategy is not None and not callable(retry_strategy):
82
- raise TypeError(f"'retry_strategy' must be callable, got {type(retry_strategy).__name__}")
83
- if retry_strategy is not None and is_async_callable(retry_strategy):
84
- raise TypeError(
85
- "'retry_strategy' is an async callable; "
86
- "use the async RedisGateway from redis_message_queue.asyncio instead"
87
- )
88
100
  if interrupt is not None and not isinstance(interrupt, BaseGracefulInterruptHandler):
89
101
  raise TypeError(f"'interrupt' must be a BaseGracefulInterruptHandler, got {type(interrupt).__name__}")
90
102
  self._interrupt = interrupt
91
- self._retry_strategy = (
92
- get_default_redis_connection_retry_strategy(interrupt=interrupt)
93
- if retry_strategy is None
94
- else retry_strategy
95
- )
96
103
  self._message_deduplication_log_ttl_seconds = (
97
104
  DEFAULT_MESSAGE_DEDUPLICATION_LOG_TTL
98
105
  if message_deduplication_log_ttl_seconds is None
@@ -108,12 +115,22 @@ class RedisGateway(AbstractRedisGateway):
108
115
  self._message_deduplication_log_ttl_seconds,
109
116
  self._message_wait_interval_seconds,
110
117
  self._message_visibility_timeout_seconds,
118
+ retry_budget_seconds=retry_budget_seconds,
119
+ retry_max_delay_seconds=retry_max_delay_seconds,
120
+ retry_initial_delay_seconds=retry_initial_delay_seconds,
111
121
  )
112
122
  validate_dead_letter_parameters(
113
123
  max_delivery_count,
114
124
  dead_letter_queue,
115
125
  self._message_visibility_timeout_seconds,
116
126
  )
127
+ self._retry_budget_seconds = retry_budget_seconds
128
+ self._retry_strategy = build_retry_strategy(
129
+ retry_budget_seconds=retry_budget_seconds,
130
+ retry_max_delay_seconds=retry_max_delay_seconds,
131
+ retry_initial_delay_seconds=retry_initial_delay_seconds,
132
+ interrupt=interrupt,
133
+ )
117
134
  self._max_delivery_count = max_delivery_count
118
135
  self._dead_letter_queue = dead_letter_queue
119
136
  self._pending_claim_ids: dict[str, list[str]] = {}
@@ -572,23 +589,20 @@ class RedisGateway(AbstractRedisGateway):
572
589
  return f"{processing_queue}{_OPERATION_RESULT_SUFFIX}:{lease_token}:{operation_id}"
573
590
 
574
591
  def _publish_operation_result_ttl_ms(self) -> str:
575
- return str(max(self._message_deduplication_log_ttl_seconds, 3600) * 1000)
592
+ return str(max(self._message_deduplication_log_ttl_seconds, 3600, self._retry_budget_seconds + 180) * 1000)
576
593
 
577
594
  def _operation_result_ttl_ms(self) -> str:
578
- # Floor is 300s so the cached result outlives tenacity's
579
- # stop_after_delay(120) retry budget with margin. Equal deadlines
580
- # produce a boundary race where a retry arriving past 120s finds the
581
- # cache just expired and wrongly returns 0.
595
+ # Floor is derived from the configured retry budget so the cached
596
+ # operation result outlives the retry window with a 180s margin. Equal
597
+ # deadlines produce a boundary race where a retry arriving past the
598
+ # budget finds the cache just expired and re-runs the Lua, which then
599
+ # observes LREM=0 for an already-acked message and returns False.
582
600
  #
583
- # This is ALSO an upper bound on any caller-supplied ``retry_strategy``:
584
- # a custom retry budget longer than max(visibility_timeout, 300) can
585
- # step past this TTL and re-run the Lua with a stale cache, causing an
586
- # already-acked move/remove to report False. Documented in README under
587
- # the custom gateway section.
588
- ttl_seconds = self._message_visibility_timeout_seconds
589
- if ttl_seconds is None:
590
- ttl_seconds = 120
591
- return str(max(ttl_seconds, 300) * 1000)
601
+ # Sized internally from ``retry_budget_seconds`` (which the library now
602
+ # owns), so the relationship is a structural invariant rather than a
603
+ # caller-supplied constraint.
604
+ vt_seconds = self._message_visibility_timeout_seconds or 0
605
+ return str(max(vt_seconds, self._retry_budget_seconds + 180) * 1000)
592
606
 
593
607
  def _lease_operation_result_ttl_ms(self) -> str:
594
608
  return self._operation_result_ttl_ms()
@@ -671,8 +685,6 @@ class RedisGateway(AbstractRedisGateway):
671
685
  claim_result_key = self._claim_result_key(processing_queue, claim_id)
672
686
  cached_claim = self._redis_client.get(claim_result_key)
673
687
  if cached_claim is None:
674
- if self._is_interrupted():
675
- return None
676
688
  cached_claim = self._redis_client.hget(self._claim_result_ids_key(processing_queue), claim_id)
677
689
  if cached_claim is None:
678
690
  return None
@@ -687,8 +699,6 @@ class RedisGateway(AbstractRedisGateway):
687
699
  claim_result_key = self._claim_result_key(processing_queue, claim_id)
688
700
  cached_claim = self._redis_client.get(claim_result_key)
689
701
  if cached_claim is None:
690
- if self._is_interrupted():
691
- return None
692
702
  cached_claim = self._redis_client.hget(self._claim_result_ids_key(processing_queue), claim_id)
693
703
  if cached_claim is None:
694
704
  return None
@@ -12,13 +12,17 @@ class AbstractRedisGateway(ABC):
12
12
  documented on each method to avoid phantom heartbeats, undetected lease conflicts,
13
13
  or silent data loss.
14
14
 
15
- Gateways that support visibility timeouts (lease-based claiming) should expose
15
+ Gateways that support visibility timeouts (lease-based claiming) MUST expose
16
16
  a ``message_visibility_timeout_seconds`` property (int or None). This is not
17
17
  abstract because it is configuration rather than protocol, but it is required
18
18
  when the queue is configured with ``heartbeat_interval_seconds``.
19
- Lease-capable custom gateways should always expose this property; otherwise
20
- the queue cannot enforce lease-specific fail-closed checks and will treat the
21
- gateway as a non-lease implementation.
19
+ Lease-capable custom gateways MUST expose this property; omitting it
20
+ silently disables heartbeat validation and lease-token safety checks,
21
+ causing the queue to treat the gateway as a non-lease implementation.
22
+
23
+ Gateways that wrap a Redis Cluster client should expose an
24
+ ``is_redis_cluster`` property returning ``True`` so the queue can apply
25
+ hash-tag validation at construction time.
22
26
 
23
27
  Concurrency
24
28
  -----------
@@ -8,19 +8,21 @@ from typing import Awaitable, Callable, Optional, TypeVar
8
8
  import redis
9
9
  import redis.asyncio
10
10
 
11
- from redis_message_queue._callable_utils import is_async_callable
12
11
  from redis_message_queue._config import (
13
12
  CLAIM_MESSAGE_LUA_SCRIPT,
14
13
  CLAIM_MESSAGE_WITH_VISIBILITY_TIMEOUT_LUA_SCRIPT,
15
14
  DEFAULT_MESSAGE_DEDUPLICATION_LOG_TTL,
16
15
  DEFAULT_MESSAGE_WAIT_INTERVAL_SECONDS,
16
+ DEFAULT_RETRY_BUDGET_SECONDS,
17
+ DEFAULT_RETRY_INITIAL_DELAY_SECONDS,
18
+ DEFAULT_RETRY_MAX_DELAY_SECONDS,
17
19
  MOVE_MESSAGE_LUA_SCRIPT,
18
20
  MOVE_MESSAGE_WITH_LEASE_TOKEN_LUA_SCRIPT,
19
21
  PUBLISH_MESSAGE_LUA_SCRIPT,
20
22
  REMOVE_MESSAGE_LUA_SCRIPT,
21
23
  REMOVE_MESSAGE_WITH_LEASE_TOKEN_LUA_SCRIPT,
22
24
  RENEW_MESSAGE_LEASE_LUA_SCRIPT,
23
- get_default_redis_connection_retry_strategy,
25
+ build_retry_strategy,
24
26
  is_redis_retryable_exception,
25
27
  validate_dead_letter_parameters,
26
28
  validate_gateway_parameters,
@@ -54,11 +56,28 @@ _VISIBILITY_TIMEOUT_POLL_INTERVAL_SECONDS = 0.25
54
56
 
55
57
 
56
58
  class RedisGateway(AbstractRedisGateway):
59
+ """Async Redis gateway with built-in tenacity-based retry on transient errors.
60
+
61
+ The retry knobs (``retry_budget_seconds``, ``retry_max_delay_seconds``,
62
+ ``retry_initial_delay_seconds``) configure the internal tenacity strategy.
63
+ Setting ``retry_budget_seconds=0`` disables retry entirely (single attempt;
64
+ exceptions propagate). The library uses ``retry_budget_seconds`` to size the
65
+ operation-result cache TTL so that a successfully-acked operation cannot
66
+ appear "not removed" to a retry that arrives after the budget elapses.
67
+
68
+ Power-user escape hatch: to plug in a different retry library
69
+ (``backoff``, ``asyncstdlib.retry``, custom exponential backoff, etc.) or
70
+ fundamentally different retry semantics, subclass
71
+ :class:`AbstractRedisGateway` and override the operation methods directly.
72
+ """
73
+
57
74
  def __init__(
58
75
  self,
59
76
  *,
60
77
  redis_client: redis.asyncio.Redis,
61
- retry_strategy: Optional[Callable] = None,
78
+ retry_budget_seconds: int = DEFAULT_RETRY_BUDGET_SECONDS,
79
+ retry_max_delay_seconds: float = DEFAULT_RETRY_MAX_DELAY_SECONDS,
80
+ retry_initial_delay_seconds: float = DEFAULT_RETRY_INITIAL_DELAY_SECONDS,
62
81
  message_deduplication_log_ttl_seconds: Optional[int] = None,
63
82
  message_wait_interval_seconds: Optional[int] = None,
64
83
  message_visibility_timeout_seconds: Optional[int] = None,
@@ -78,21 +97,9 @@ class RedisGateway(AbstractRedisGateway):
78
97
  "Pass the underlying redis.asyncio.Redis instance instead."
79
98
  )
80
99
  self._redis_client = redis_client
81
- if retry_strategy is not None and not callable(retry_strategy):
82
- raise TypeError(f"'retry_strategy' must be callable, got {type(retry_strategy).__name__}")
83
- if retry_strategy is not None and is_async_callable(retry_strategy):
84
- raise TypeError(
85
- "'retry_strategy' must not be an async callable. "
86
- "Provide a synchronous callable decorator (e.g., tenacity.retry(...))"
87
- )
88
100
  if interrupt is not None and not isinstance(interrupt, BaseGracefulInterruptHandler):
89
101
  raise TypeError(f"'interrupt' must be a BaseGracefulInterruptHandler, got {type(interrupt).__name__}")
90
102
  self._interrupt = interrupt
91
- self._retry_strategy = (
92
- get_default_redis_connection_retry_strategy(interrupt=interrupt)
93
- if retry_strategy is None
94
- else retry_strategy
95
- )
96
103
  self._message_deduplication_log_ttl_seconds = (
97
104
  DEFAULT_MESSAGE_DEDUPLICATION_LOG_TTL
98
105
  if message_deduplication_log_ttl_seconds is None
@@ -108,12 +115,22 @@ class RedisGateway(AbstractRedisGateway):
108
115
  self._message_deduplication_log_ttl_seconds,
109
116
  self._message_wait_interval_seconds,
110
117
  self._message_visibility_timeout_seconds,
118
+ retry_budget_seconds=retry_budget_seconds,
119
+ retry_max_delay_seconds=retry_max_delay_seconds,
120
+ retry_initial_delay_seconds=retry_initial_delay_seconds,
111
121
  )
112
122
  validate_dead_letter_parameters(
113
123
  max_delivery_count,
114
124
  dead_letter_queue,
115
125
  self._message_visibility_timeout_seconds,
116
126
  )
127
+ self._retry_budget_seconds = retry_budget_seconds
128
+ self._retry_strategy = build_retry_strategy(
129
+ retry_budget_seconds=retry_budget_seconds,
130
+ retry_max_delay_seconds=retry_max_delay_seconds,
131
+ retry_initial_delay_seconds=retry_initial_delay_seconds,
132
+ interrupt=interrupt,
133
+ )
117
134
  self._max_delivery_count = max_delivery_count
118
135
  self._dead_letter_queue = dead_letter_queue
119
136
  self._pending_claim_ids: dict[str, list[str]] = {}
@@ -573,23 +590,20 @@ class RedisGateway(AbstractRedisGateway):
573
590
  return f"{processing_queue}{_OPERATION_RESULT_SUFFIX}:{lease_token}:{operation_id}"
574
591
 
575
592
  def _publish_operation_result_ttl_ms(self) -> str:
576
- return str(max(self._message_deduplication_log_ttl_seconds, 3600) * 1000)
593
+ return str(max(self._message_deduplication_log_ttl_seconds, 3600, self._retry_budget_seconds + 180) * 1000)
577
594
 
578
595
  def _operation_result_ttl_ms(self) -> str:
579
- # Floor is 300s so the cached result outlives tenacity's
580
- # stop_after_delay(120) retry budget with margin. Equal deadlines
581
- # produce a boundary race where a retry arriving past 120s finds the
582
- # cache just expired and wrongly returns 0.
596
+ # Floor is derived from the configured retry budget so the cached
597
+ # operation result outlives the retry window with a 180s margin. Equal
598
+ # deadlines produce a boundary race where a retry arriving past the
599
+ # budget finds the cache just expired and re-runs the Lua, which then
600
+ # observes LREM=0 for an already-acked message and returns False.
583
601
  #
584
- # This is ALSO an upper bound on any caller-supplied ``retry_strategy``:
585
- # a custom retry budget longer than max(visibility_timeout, 300) can
586
- # step past this TTL and re-run the Lua with a stale cache, causing an
587
- # already-acked move/remove to report False. Documented in README under
588
- # the custom gateway section.
589
- ttl_seconds = self._message_visibility_timeout_seconds
590
- if ttl_seconds is None:
591
- ttl_seconds = 120
592
- return str(max(ttl_seconds, 300) * 1000)
602
+ # Sized internally from ``retry_budget_seconds`` (which the library now
603
+ # owns), so the relationship is a structural invariant rather than a
604
+ # caller-supplied constraint.
605
+ vt_seconds = self._message_visibility_timeout_seconds or 0
606
+ return str(max(vt_seconds, self._retry_budget_seconds + 180) * 1000)
593
607
 
594
608
  def _lease_operation_result_ttl_ms(self) -> str:
595
609
  return self._operation_result_ttl_ms()
@@ -672,8 +686,6 @@ class RedisGateway(AbstractRedisGateway):
672
686
  claim_result_key = self._claim_result_key(processing_queue, claim_id)
673
687
  cached_claim = await self._redis_client.get(claim_result_key)
674
688
  if cached_claim is None:
675
- if self._is_interrupted():
676
- return None
677
689
  cached_claim = await self._redis_client.hget(self._claim_result_ids_key(processing_queue), claim_id)
678
690
  if cached_claim is None:
679
691
  return None
@@ -688,8 +700,6 @@ class RedisGateway(AbstractRedisGateway):
688
700
  claim_result_key = self._claim_result_key(processing_queue, claim_id)
689
701
  cached_claim = await self._redis_client.get(claim_result_key)
690
702
  if cached_claim is None:
691
- if self._is_interrupted():
692
- return None
693
703
  cached_claim = await self._redis_client.hget(self._claim_result_ids_key(processing_queue), claim_id)
694
704
  if cached_claim is None:
695
705
  return None
@@ -20,6 +20,17 @@ logger = logging.getLogger(__name__)
20
20
  _T = TypeVar("_T")
21
21
  _GATEWAY_BOUND_PENDING_QUEUE_ATTR = "_rmq_bound_pending_queue"
22
22
 
23
+ _STALE_LEASE_ACK_WARNING = (
24
+ "Message cleanup after successful processing was a no-op: "
25
+ "the lease expired and the message was likely reclaimed by another consumer. "
26
+ "This is expected at-least-once delivery behavior under visibility timeout."
27
+ )
28
+ _STALE_LEASE_NACK_WARNING = (
29
+ "Message cleanup after failed processing was a no-op: "
30
+ "the lease expired and the message was likely reclaimed by another consumer. "
31
+ "This is expected at-least-once delivery behavior under visibility timeout."
32
+ )
33
+
23
34
 
24
35
  class _TaskBaseException(Exception):
25
36
  def __init__(self, original: BaseException):
@@ -36,10 +47,16 @@ async def _run_operation_in_task(operation: Awaitable[_T]) -> _T:
36
47
  raise _TaskBaseException(exc) from None
37
48
 
38
49
 
50
+ def _consume_task_exception(task: "asyncio.Task[_T]") -> None:
51
+ if not task.cancelled():
52
+ task.exception()
53
+
54
+
39
55
  async def _await_preserving_cancellation(operation: Awaitable[_T]) -> _T:
40
56
  """Finish cleanup before propagating task cancellation."""
41
57
 
42
58
  task = asyncio.create_task(_run_operation_in_task(operation))
59
+ task.add_done_callback(_consume_task_exception)
43
60
  try:
44
61
  return await asyncio.shield(task)
45
62
  except asyncio.CancelledError:
@@ -68,6 +85,7 @@ async def _await_suppressing_external_cancellation(operation: Awaitable[_T]) ->
68
85
  """
69
86
 
70
87
  task = asyncio.create_task(_run_operation_in_task(operation))
88
+ task.add_done_callback(_consume_task_exception)
71
89
  try:
72
90
  return await asyncio.shield(task)
73
91
  except asyncio.CancelledError:
@@ -103,10 +121,10 @@ def _validate_heartbeat_interval_seconds(
103
121
  "'heartbeat_interval_seconds' requires a configured visibility timeout."
104
122
  )
105
123
  raise ValueError(require_visibility_timeout_message)
106
- if heartbeat_interval_seconds > visibility_timeout_seconds / 2:
124
+ if heartbeat_interval_seconds >= visibility_timeout_seconds / 2:
107
125
  raise ValueError(
108
- "'heartbeat_interval_seconds' must be no more than half of 'visibility_timeout_seconds' "
109
- f"({heartbeat_interval_seconds} > {visibility_timeout_seconds / 2})"
126
+ "'heartbeat_interval_seconds' must be less than half of 'visibility_timeout_seconds' "
127
+ f"({heartbeat_interval_seconds} >= {visibility_timeout_seconds / 2})"
110
128
  )
111
129
  return heartbeat_interval_seconds
112
130
 
@@ -378,7 +396,6 @@ class RedisMessageQueue:
378
396
  raise TypeError(f"'gateway' must be an AbstractRedisGateway, got {type(gateway).__name__}")
379
397
  gateway_visibility_timeout_seconds = _get_optional_gateway_visibility_timeout_seconds(gateway)
380
398
  self._requires_claimed_message = gateway_visibility_timeout_seconds is not None
381
- _bind_dead_letter_gateway_to_queue(gateway, self.key.pending)
382
399
  _validate_cluster_configuration(self.key, gateway=gateway)
383
400
  if heartbeat_interval_seconds is not None:
384
401
  gateway_visibility_timeout_seconds = _get_gateway_visibility_timeout_seconds(gateway)
@@ -395,6 +412,7 @@ class RedisMessageQueue:
395
412
  "'max_delivery_count' cannot be provided alongside 'gateway'."
396
413
  " Configure 'max_delivery_count' and 'dead_letter_queue' on the gateway directly instead."
397
414
  )
415
+ _bind_dead_letter_gateway_to_queue(gateway, self.key.pending)
398
416
  self._redis = gateway
399
417
  elif client is None:
400
418
  raise ValueError("Either 'client' or 'gateway' must be provided.")
@@ -427,12 +445,21 @@ class RedisMessageQueue:
427
445
  """Publish a message.
428
446
 
429
447
  Dict messages are serialized via ``json.dumps(message, sort_keys=True)``.
430
- Non-string dict keys are coerced to strings by ``json.dumps``, so
431
- ``{1: "x"}`` and ``{"1": "x"}`` produce the same dedup key.
448
+ All top-level dict keys must be strings; non-string keys raise
449
+ ``TypeError`` to avoid silent ``json.dumps`` coercion that would
450
+ collapse distinct keys into the same dedup key (e.g. ``{1: "x"}``
451
+ vs ``{"1": "x"}``). Only top-level keys are validated; nested
452
+ dicts follow ``json.dumps`` defaults.
432
453
  """
433
454
  if not isinstance(message, (str, dict)):
434
455
  raise TypeError(f"'message' must be a str or dict, got {type(message).__name__}")
435
456
  if isinstance(message, dict):
457
+ non_str_keys = [k for k in message if not isinstance(k, str)]
458
+ if non_str_keys:
459
+ raise TypeError(
460
+ "'message' dict keys must all be strings; "
461
+ f"got non-string keys: {non_str_keys[:3]}" + (" (and more)" if len(non_str_keys) > 3 else "")
462
+ )
436
463
  message_str = json.dumps(message, sort_keys=True)
437
464
  else:
438
465
  message_str = message
@@ -519,11 +546,7 @@ class RedisMessageQueue:
519
546
  self._remove_processed_message(stored_message, lease_token)
520
547
  )
521
548
  if lease_token is not None and not applied:
522
- logger.warning(
523
- "Message cleanup after failed processing was a no-op: "
524
- "the lease expired and the message was likely reclaimed by another consumer. "
525
- "This is expected at-least-once delivery behavior under visibility timeout."
526
- )
549
+ logger.warning(_STALE_LEASE_NACK_WARNING)
527
550
  except BaseException:
528
551
  logger.exception("Failed to clean up message from processing queue")
529
552
  raise
@@ -539,11 +562,7 @@ class RedisMessageQueue:
539
562
  self._remove_processed_message(stored_message, lease_token)
540
563
  )
541
564
  if lease_token is not None and not applied:
542
- logger.warning(
543
- "Message cleanup after successful processing was a no-op: "
544
- "the lease expired and the message was likely reclaimed by another consumer. "
545
- "This is expected at-least-once delivery behavior under visibility timeout."
546
- )
565
+ logger.warning(_STALE_LEASE_ACK_WARNING)
547
566
  finished_without_error = True
548
567
  finally:
549
568
  if lease_heartbeat is not None:
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import signal
3
+ import sys
3
4
  from typing import Iterable
4
5
 
5
6
  from redis_message_queue.interrupt_handler._interface import (
@@ -70,6 +71,7 @@ class GracefulInterruptHandler(BaseGracefulInterruptHandler):
70
71
  raise ValueError(
71
72
  f"Signal {sig.name} already has a non-default handler installed."
72
73
  " GracefulInterruptHandler refuses to replace existing handlers."
74
+ " If running inside asyncio.run(), create the handler before asyncio.run() starts."
73
75
  )
74
76
  self._interrupted = False
75
77
  self._verbose = verbose
@@ -91,6 +93,9 @@ class GracefulInterruptHandler(BaseGracefulInterruptHandler):
91
93
  return
92
94
  os.kill(os.getpid(), signum)
93
95
  return
94
- if self._verbose:
95
- print(f"Received signal: {signal.strsignal(signum)}")
96
96
  self._interrupted = True
97
+ if self._verbose:
98
+ try:
99
+ print(f"Received signal: {signal.strsignal(signum)}", file=sys.stderr)
100
+ except Exception:
101
+ pass
@@ -20,6 +20,17 @@ from redis_message_queue.interrupt_handler import BaseGracefulInterruptHandler
20
20
  logger = logging.getLogger(__name__)
21
21
  _GATEWAY_BOUND_PENDING_QUEUE_ATTR = "_rmq_bound_pending_queue"
22
22
 
23
+ _STALE_LEASE_ACK_WARNING = (
24
+ "Message cleanup after successful processing was a no-op: "
25
+ "the lease expired and the message was likely reclaimed by another consumer. "
26
+ "This is expected at-least-once delivery behavior under visibility timeout."
27
+ )
28
+ _STALE_LEASE_NACK_WARNING = (
29
+ "Message cleanup after failed processing was a no-op: "
30
+ "the lease expired and the message was likely reclaimed by another consumer. "
31
+ "This is expected at-least-once delivery behavior under visibility timeout."
32
+ )
33
+
23
34
 
24
35
  def _validate_heartbeat_interval_seconds(
25
36
  heartbeat_interval_seconds: int | float | None,
@@ -45,10 +56,10 @@ def _validate_heartbeat_interval_seconds(
45
56
  "'heartbeat_interval_seconds' requires a configured visibility timeout."
46
57
  )
47
58
  raise ValueError(require_visibility_timeout_message)
48
- if heartbeat_interval_seconds > visibility_timeout_seconds / 2:
59
+ if heartbeat_interval_seconds >= visibility_timeout_seconds / 2:
49
60
  raise ValueError(
50
- "'heartbeat_interval_seconds' must be no more than half of 'visibility_timeout_seconds' "
51
- f"({heartbeat_interval_seconds} > {visibility_timeout_seconds / 2})"
61
+ "'heartbeat_interval_seconds' must be less than half of 'visibility_timeout_seconds' "
62
+ f"({heartbeat_interval_seconds} >= {visibility_timeout_seconds / 2})"
52
63
  )
53
64
  return heartbeat_interval_seconds
54
65
 
@@ -338,7 +349,6 @@ class RedisMessageQueue:
338
349
  raise TypeError(f"'gateway' must be an AbstractRedisGateway, got {type(gateway).__name__}")
339
350
  gateway_visibility_timeout_seconds = _get_optional_gateway_visibility_timeout_seconds(gateway)
340
351
  self._requires_claimed_message = gateway_visibility_timeout_seconds is not None
341
- _bind_dead_letter_gateway_to_queue(gateway, self.key.pending)
342
352
  _validate_cluster_configuration(self.key, gateway=gateway)
343
353
  if heartbeat_interval_seconds is not None:
344
354
  gateway_visibility_timeout_seconds = _get_gateway_visibility_timeout_seconds(gateway)
@@ -355,6 +365,7 @@ class RedisMessageQueue:
355
365
  "'max_delivery_count' cannot be provided alongside 'gateway'."
356
366
  " Configure 'max_delivery_count' and 'dead_letter_queue' on the gateway directly instead."
357
367
  )
368
+ _bind_dead_letter_gateway_to_queue(gateway, self.key.pending)
358
369
  self._redis = gateway
359
370
  elif client is None:
360
371
  raise ValueError("Either 'client' or 'gateway' must be provided.")
@@ -387,12 +398,22 @@ class RedisMessageQueue:
387
398
  """Publish a message.
388
399
 
389
400
  Dict messages are serialized via ``json.dumps(message, sort_keys=True)``.
390
- Non-string dict keys are coerced to strings by ``json.dumps``, so
391
- ``{1: "x"}`` and ``{"1": "x"}`` produce the same dedup key.
401
+ All top-level dict keys must be strings; non-string keys raise
402
+ ``TypeError`` to avoid silent ``json.dumps`` coercion that would
403
+ collapse distinct keys into the same dedup key (e.g. ``{1: "x"}``
404
+ vs ``{"1": "x"}``). Only top-level keys are validated; nested
405
+ dicts follow ``json.dumps`` defaults (e.g. nested non-string keys
406
+ are silently coerced: integer keys become strings).
392
407
  """
393
408
  if not isinstance(message, (str, dict)):
394
409
  raise TypeError(f"'message' must be a str or dict, got {type(message).__name__}")
395
410
  if isinstance(message, dict):
411
+ non_str_keys = [k for k in message if not isinstance(k, str)]
412
+ if non_str_keys:
413
+ raise TypeError(
414
+ "'message' dict keys must all be strings; "
415
+ f"got non-string keys: {non_str_keys[:3]}" + (" (and more)" if len(non_str_keys) > 3 else "")
416
+ )
396
417
  message_str = json.dumps(message, sort_keys=True)
397
418
  else:
398
419
  message_str = message
@@ -483,11 +504,7 @@ class RedisMessageQueue:
483
504
  else:
484
505
  applied = self._remove_processed_message(stored_message, lease_token)
485
506
  if lease_token is not None and not applied:
486
- logger.warning(
487
- "Message cleanup after failed processing was a no-op: "
488
- "the lease expired and the message was likely reclaimed by another consumer. "
489
- "This is expected at-least-once delivery behavior under visibility timeout."
490
- )
507
+ logger.warning(_STALE_LEASE_NACK_WARNING)
491
508
  except BaseException:
492
509
  logger.exception("Failed to clean up message from processing queue")
493
510
  raise
@@ -499,11 +516,7 @@ class RedisMessageQueue:
499
516
  else:
500
517
  applied = self._remove_processed_message(stored_message, lease_token)
501
518
  if lease_token is not None and not applied:
502
- logger.warning(
503
- "Message cleanup after successful processing was a no-op: "
504
- "the lease expired and the message was likely reclaimed by another consumer. "
505
- "This is expected at-least-once delivery behavior under visibility timeout."
506
- )
519
+ logger.warning(_STALE_LEASE_ACK_WARNING)
507
520
  finally:
508
521
  if lease_heartbeat is not None:
509
522
  lease_heartbeat.stop()