redis-message-queue 8.2.9__tar.gz → 8.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/PKG-INFO +44 -13
  2. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/README.md +43 -12
  3. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/pyproject.toml +2 -2
  4. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/_config.py +49 -25
  5. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/_redis_gateway.py +10 -0
  6. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/asyncio/_redis_gateway.py +10 -0
  7. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/asyncio/redis_message_queue.py +65 -30
  8. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/redis_message_queue.py +60 -27
  9. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/.gitignore +0 -0
  10. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/LICENSE +0 -0
  11. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/__init__.py +0 -0
  12. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/_abstract_redis_gateway.py +0 -0
  13. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/_callable_utils.py +0 -0
  14. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/_event.py +0 -0
  15. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/_exceptions.py +0 -0
  16. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/_payload_limits.py +0 -0
  17. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/_queue_key_manager.py +0 -0
  18. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/_redis_cluster.py +0 -0
  19. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/_stored_message.py +0 -0
  20. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/asyncio/__init__.py +0 -0
  21. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/asyncio/_abstract_redis_gateway.py +0 -0
  22. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/interrupt_handler/__init__.py +0 -0
  23. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/interrupt_handler/_event_driven.py +0 -0
  24. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/interrupt_handler/_implementation.py +0 -0
  25. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/interrupt_handler/_interface.py +0 -0
  26. {redis_message_queue-8.2.9 → redis_message_queue-8.3.0}/redis_message_queue/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: redis-message-queue
3
- Version: 8.2.9
3
+ Version: 8.3.0
4
4
  Summary: Python message queuing with Redis and message deduplication
5
5
  Project-URL: Homepage, https://github.com/Elijas/redis-message-queue
6
6
  Project-URL: Repository, https://github.com/Elijas/redis-message-queue
@@ -34,10 +34,10 @@ Description-Content-Type: text/markdown
34
34
  **Lightweight Python message queuing with Redis and built-in publish-side deduplication.** Deduplicate publishes within a TTL window, with optional crash recovery — across any number of producers and consumers.
35
35
 
36
36
  ```bash
37
- pip install "redis-message-queue>=8.2.9,<9.0.0"
37
+ pip install "redis-message-queue>=8.3.0,<9.0.0"
38
38
  ```
39
39
 
40
- Requires Redis server >= 6.2.
40
+ Requires Python >= 3.12 and Redis server >= 6.2.
41
41
 
42
42
  ## Quickstart
43
43
 
@@ -764,22 +764,35 @@ work. Package logs remain diagnostic; use `on_event` rather than log parsing
764
764
  for metrics.
765
765
 
766
766
  ```python
767
- from opentelemetry import trace
768
- from prometheus_client import Counter
769
767
  from redis_message_queue import QueueEvent, RedisMessageQueue
770
768
 
771
- events_total = Counter(
772
- "rmq_events_total",
773
- "redis-message-queue lifecycle events",
774
- ["queue", "operation", "outcome", "exception_type"],
769
+ try:
770
+ from opentelemetry import trace
771
+ except ImportError:
772
+ trace = None
773
+
774
+ try:
775
+ from prometheus_client import Counter
776
+ except ImportError:
777
+ Counter = None
778
+
779
+ events_total = (
780
+ Counter(
781
+ "rmq_events_total",
782
+ "redis-message-queue lifecycle events",
783
+ ["queue", "operation", "outcome", "exception_type"],
784
+ )
785
+ if Counter is not None
786
+ else None
775
787
  )
776
788
  SPAN_SINK_TRUSTED = False
777
789
 
778
790
  def observe(event: QueueEvent) -> None:
779
- events_total.labels(
780
- event.queue, event.operation, event.outcome, event.exception_type or ""
781
- ).inc()
782
- if event.error is not None and SPAN_SINK_TRUSTED:
791
+ if events_total is not None:
792
+ events_total.labels(
793
+ event.queue, event.operation, event.outcome, event.exception_type or ""
794
+ ).inc()
795
+ if event.error is not None and SPAN_SINK_TRUSTED and trace is not None:
783
796
  trace.get_current_span().record_exception(event.error)
784
797
 
785
798
  queue = RedisMessageQueue("jobs", client=client, on_event=observe)
@@ -830,6 +843,13 @@ Callbacks fire inline:
830
843
  copies the context present when the heartbeat was started, so contextvars and
831
844
  OpenTelemetry spans bound at handler entry are visible.
832
845
 
846
+ > **Warning:** Because callbacks fire inline and may run while an internal
847
+ > publish/drain lock is held, an `on_event` callback must not call back into the
848
+ > same queue instance's `publish()`, `drain()`, `close()` (sync), or `aclose()`
849
+ > (async). Those locks are non-reentrant, so re-entering deadlocks and wedges
850
+ > the caller permanently. Re-entering a *different* queue instance, or scheduling
851
+ > the follow-up work outside the callback, is safe.
852
+
833
853
  #### Event timing vs. Redis commit
834
854
 
835
855
  Most events are post-commit, emitted after the Redis command or Lua script
@@ -889,6 +909,17 @@ The following operations have no `on_event` surface by design:
889
909
  ack/remove, move-to-completed/failed, and lease renewal collapse into the
890
910
  terminal operation's failure event. There is no per-attempt event for those
891
911
  paths.
912
+ - **Claim cache-replay after a lost reply:** a visibility-timeout claim can
913
+ commit server-side — dead-lettering a poison message (`dlq`) or reclaiming an
914
+ expired lease (`claim_reclaim`) before claiming the next live message — and
915
+ then lose its reply (for example, a dropped connection). The claim loop
916
+ retries the same claim ID and hits the `claim_result` cache-replay, which
917
+ re-asserts the lease and returns the stored claim but does not re-run those
918
+ side effects, so their `claim_reclaim` / `dlq` event payloads are not
919
+ re-emitted. Queue state stays correct (the poison message stays
920
+ dead-lettered, the live message is claimed exactly once); only telemetry for
921
+ the lost-reply attempt is dropped. Reconcile poison-message alerting against
922
+ `LLEN {name}::dlq` rather than the `on_event` stream alone.
892
923
 
893
924
  The public exception hierarchy is rooted at `RedisMessageQueueError`. The
894
925
  current exported queue-owned exception classes are:
@@ -11,10 +11,10 @@
11
11
  **Lightweight Python message queuing with Redis and built-in publish-side deduplication.** Deduplicate publishes within a TTL window, with optional crash recovery — across any number of producers and consumers.
12
12
 
13
13
  ```bash
14
- pip install "redis-message-queue>=8.2.9,<9.0.0"
14
+ pip install "redis-message-queue>=8.3.0,<9.0.0"
15
15
  ```
16
16
 
17
- Requires Redis server >= 6.2.
17
+ Requires Python >= 3.12 and Redis server >= 6.2.
18
18
 
19
19
  ## Quickstart
20
20
 
@@ -741,22 +741,35 @@ work. Package logs remain diagnostic; use `on_event` rather than log parsing
741
741
  for metrics.
742
742
 
743
743
  ```python
744
- from opentelemetry import trace
745
- from prometheus_client import Counter
746
744
  from redis_message_queue import QueueEvent, RedisMessageQueue
747
745
 
748
- events_total = Counter(
749
- "rmq_events_total",
750
- "redis-message-queue lifecycle events",
751
- ["queue", "operation", "outcome", "exception_type"],
746
+ try:
747
+ from opentelemetry import trace
748
+ except ImportError:
749
+ trace = None
750
+
751
+ try:
752
+ from prometheus_client import Counter
753
+ except ImportError:
754
+ Counter = None
755
+
756
+ events_total = (
757
+ Counter(
758
+ "rmq_events_total",
759
+ "redis-message-queue lifecycle events",
760
+ ["queue", "operation", "outcome", "exception_type"],
761
+ )
762
+ if Counter is not None
763
+ else None
752
764
  )
753
765
  SPAN_SINK_TRUSTED = False
754
766
 
755
767
  def observe(event: QueueEvent) -> None:
756
- events_total.labels(
757
- event.queue, event.operation, event.outcome, event.exception_type or ""
758
- ).inc()
759
- if event.error is not None and SPAN_SINK_TRUSTED:
768
+ if events_total is not None:
769
+ events_total.labels(
770
+ event.queue, event.operation, event.outcome, event.exception_type or ""
771
+ ).inc()
772
+ if event.error is not None and SPAN_SINK_TRUSTED and trace is not None:
760
773
  trace.get_current_span().record_exception(event.error)
761
774
 
762
775
  queue = RedisMessageQueue("jobs", client=client, on_event=observe)
@@ -807,6 +820,13 @@ Callbacks fire inline:
807
820
  copies the context present when the heartbeat was started, so contextvars and
808
821
  OpenTelemetry spans bound at handler entry are visible.
809
822
 
823
+ > **Warning:** Because callbacks fire inline and may run while an internal
824
+ > publish/drain lock is held, an `on_event` callback must not call back into the
825
+ > same queue instance's `publish()`, `drain()`, `close()` (sync), or `aclose()`
826
+ > (async). Those locks are non-reentrant, so re-entering deadlocks and wedges
827
+ > the caller permanently. Re-entering a *different* queue instance, or scheduling
828
+ > the follow-up work outside the callback, is safe.
829
+
810
830
  #### Event timing vs. Redis commit
811
831
 
812
832
  Most events are post-commit, emitted after the Redis command or Lua script
@@ -866,6 +886,17 @@ The following operations have no `on_event` surface by design:
866
886
  ack/remove, move-to-completed/failed, and lease renewal collapse into the
867
887
  terminal operation's failure event. There is no per-attempt event for those
868
888
  paths.
889
+ - **Claim cache-replay after a lost reply:** a visibility-timeout claim can
890
+ commit server-side — dead-lettering a poison message (`dlq`) or reclaiming an
891
+ expired lease (`claim_reclaim`) before claiming the next live message — and
892
+ then lose its reply (for example, a dropped connection). The claim loop
893
+ retries the same claim ID and hits the `claim_result` cache-replay, which
894
+ re-asserts the lease and returns the stored claim but does not re-run those
895
+ side effects, so their `claim_reclaim` / `dlq` event payloads are not
896
+ re-emitted. Queue state stays correct (the poison message stays
897
+ dead-lettered, the live message is claimed exactly once); only telemetry for
898
+ the lost-reply attempt is dropped. Reconcile poison-message alerting against
899
+ `LLEN {name}::dlq` rather than the `on_event` stream alone.
869
900
 
870
901
  The public exception hierarchy is rooted at `RedisMessageQueueError`. The
871
902
  current exported queue-owned exception classes are:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "redis-message-queue"
3
- version = "8.2.9"
3
+ version = "8.3.0"
4
4
  description = "Python message queuing with Redis and message deduplication"
5
5
  authors = [{ name = "Elijas", email = "4084885+Elijas@users.noreply.github.com" }]
6
6
  readme = "README.md"
@@ -48,7 +48,7 @@ default-groups = ["dev", "test"]
48
48
  ##############################
49
49
 
50
50
  [tool.bumpversion]
51
- current_version = "8.2.9"
51
+ current_version = "8.3.0"
52
52
  parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
53
53
  serialize = ["{major}.{minor}.{patch}"]
54
54
  search = "{current_version}"
@@ -358,7 +358,13 @@ def validate_pending_backpressure_parameters(
358
358
  raise ConfigurationError(
359
359
  "drop_oldest requires max_pending_length to be set. "
360
360
  "Use a positive max_pending_length to define what can be dropped, or use "
361
- "pending_overload_policy='raise' or 'block' for unbounded queues."
361
+ "pending_overload_policy='raise' for an unbounded queue."
362
+ )
363
+ if pending_overload_policy == "block" and max_pending_length is None:
364
+ raise ConfigurationError(
365
+ "block requires max_pending_length to be set. "
366
+ "Use a positive max_pending_length to define the threshold to block on, or use "
367
+ "pending_overload_policy='raise' for an unbounded queue."
362
368
  )
363
369
  if pending_overload_policy == "drop_oldest" and (deduplication or get_deduplication_key_configured):
364
370
  raise ConfigurationError(
@@ -857,35 +863,53 @@ end
857
863
  -- Cap at 100 to bound Lua execution time (Redis blocks during scripts).
858
864
  -- With a single consumer polling at default interval, 1000 expired leases drain in ~2.5s.
859
865
  local expired = redis.call('ZRANGEBYSCORE', KEYS[3], '-inf', now_ms, 'LIMIT', 0, 100)
860
- local to_requeue = {}
861
866
  local reclaimed_events = {}
862
867
  for i = #expired, 1, -1 do
863
- local expired_lease_token = redis.call('HGET', KEYS[4], expired[i])
864
- redis.call('ZREM', KEYS[3], expired[i])
865
- redis.call('HDEL', KEYS[4], expired[i])
866
- if expired_lease_token then
867
- local claim_result_key = redis.call('HGET', KEYS[9], expired_lease_token)
868
- if claim_result_key then
869
- -- Use pcall: in Redis Cluster, claim_result_key was read from KEYS[9] (claim_result_refs)
870
- -- and is therefore not in the declared EVAL KEYS[] set. Cluster may reject the DEL;
871
- -- TTL on the claim_result string (PX visibility_timeout_seconds) bounds the orphan.
872
- redis.pcall('DEL', claim_result_key)
873
- redis.call('HDEL', KEYS[9], expired_lease_token)
868
+ local stored = expired[i]
869
+ local expired_lease_token = redis.call('HGET', KEYS[4], stored)
870
+
871
+ -- Durable-before-destructive (mirror RETURN_MESSAGE_TO_PENDING): requeue to
872
+ -- pending BEFORE removing from processing or deleting lease metadata. If this
873
+ -- write fails, the message remains in processing with its metadata intact for
874
+ -- a future reclaim attempt.
875
+ redis.call('RPUSH', KEYS[1], stored)
876
+ if redis.call('LREM', KEYS[2], 1, stored) == 1 then
877
+ redis.call('ZREM', KEYS[3], stored)
878
+ redis.call('HDEL', KEYS[4], stored)
879
+ if expired_lease_token then
880
+ local claim_result_key = redis.call('HGET', KEYS[9], expired_lease_token)
881
+ if claim_result_key then
882
+ -- Use pcall: in Redis Cluster, claim_result_key was read from KEYS[9] (claim_result_refs)
883
+ -- and is therefore not in the declared EVAL KEYS[] set. Cluster may reject the DEL;
884
+ -- TTL on the claim_result string (PX visibility_timeout_seconds) bounds the orphan.
885
+ redis.pcall('DEL', claim_result_key)
886
+ redis.call('HDEL', KEYS[9], expired_lease_token)
887
+ end
888
+ local claim_id = redis.call('HGET', KEYS[11], expired_lease_token)
889
+ if claim_id then
890
+ redis.call('HDEL', KEYS[10], claim_id)
891
+ redis.call('HDEL', KEYS[11], expired_lease_token)
892
+ end
874
893
  end
875
- local claim_id = redis.call('HGET', KEYS[11], expired_lease_token)
876
- if claim_id then
877
- redis.call('HDEL', KEYS[10], claim_id)
878
- redis.call('HDEL', KEYS[11], expired_lease_token)
894
+ local delivery_count = redis.call('HGET', KEYS[6], stored)
895
+ table.insert(reclaimed_events, {redis_message_queue_message_id(stored), tostring(delivery_count or '0')})
896
+ else
897
+ redis.call('LREM', KEYS[1], 1, stored)
898
+ redis.call('ZREM', KEYS[3], stored)
899
+ redis.call('HDEL', KEYS[4], stored)
900
+ if expired_lease_token then
901
+ local claim_result_key = redis.call('HGET', KEYS[9], expired_lease_token)
902
+ if claim_result_key then
903
+ redis.pcall('DEL', claim_result_key)
904
+ redis.call('HDEL', KEYS[9], expired_lease_token)
905
+ end
906
+ local claim_id = redis.call('HGET', KEYS[11], expired_lease_token)
907
+ if claim_id then
908
+ redis.call('HDEL', KEYS[10], claim_id)
909
+ redis.call('HDEL', KEYS[11], expired_lease_token)
910
+ end
879
911
  end
880
912
  end
881
- if redis.call('LREM', KEYS[2], 1, expired[i]) == 1 then
882
- table.insert(to_requeue, expired[i])
883
- local delivery_count = redis.call('HGET', KEYS[6], expired[i])
884
- table.insert(reclaimed_events, {redis_message_queue_message_id(expired[i]), tostring(delivery_count or '0')})
885
- end
886
- end
887
- if #to_requeue > 0 then
888
- redis.call('RPUSH', KEYS[1], unpack(to_requeue))
889
913
  end
890
914
  local dead_lettered_events = {}
891
915
  local claim_store_failed_sentinel = string.char(0) .. '__rmq_claim_store_failed__'
@@ -402,6 +402,15 @@ class RedisGateway(AbstractRedisGateway):
402
402
  def is_redis_cluster(self) -> bool:
403
403
  return isinstance(self._redis_client, redis.RedisCluster)
404
404
 
405
+ def _raise_if_drop_oldest_deduplicated_publish(self) -> None:
406
+ if self._pending_overload_policy == "drop_oldest":
407
+ raise ConfigurationError(
408
+ "'pending_overload_policy=drop_oldest' cannot be used with RedisGateway.publish_message "
409
+ "because dropped messages leave their deduplication keys in Redis, causing future publishes "
410
+ "of the same payload to be silently suppressed. Use add_message for non-deduplicated lossy "
411
+ "queues, or use 'raise' or 'block' for deduplicated publishes."
412
+ )
413
+
405
414
  def publish_message(self, queue: str, message: str, dedup_key: str) -> bool:
406
415
  if not isinstance(dedup_key, str):
407
416
  raise TypeError(f"'dedup_key' must be a str, got {type(dedup_key).__name__}")
@@ -410,6 +419,7 @@ class RedisGateway(AbstractRedisGateway):
410
419
  "'dedup_key' must be a non-empty string; "
411
420
  "an empty key would create a bare-prefix Redis marker that silently suppresses unrelated messages"
412
421
  )
422
+ self._raise_if_drop_oldest_deduplicated_publish()
413
423
  stored_message = encode_stored_message(message)
414
424
  message_id = extract_stored_message_id(stored_message)
415
425
  operation_id = uuid.uuid4().hex
@@ -381,6 +381,15 @@ class RedisGateway(AbstractRedisGateway):
381
381
  def is_redis_cluster(self) -> bool:
382
382
  return isinstance(self._redis_client, redis.asyncio.RedisCluster)
383
383
 
384
+ def _raise_if_drop_oldest_deduplicated_publish(self) -> None:
385
+ if self._pending_overload_policy == "drop_oldest":
386
+ raise ConfigurationError(
387
+ "'pending_overload_policy=drop_oldest' cannot be used with RedisGateway.publish_message "
388
+ "because dropped messages leave their deduplication keys in Redis, causing future publishes "
389
+ "of the same payload to be silently suppressed. Use add_message for non-deduplicated lossy "
390
+ "queues, or use 'raise' or 'block' for deduplicated publishes."
391
+ )
392
+
384
393
  async def publish_message(self, queue: str, message: str, dedup_key: str) -> bool:
385
394
  if not isinstance(dedup_key, str):
386
395
  raise TypeError(f"'dedup_key' must be a str, got {type(dedup_key).__name__}")
@@ -389,6 +398,7 @@ class RedisGateway(AbstractRedisGateway):
389
398
  "'dedup_key' must be a non-empty string; "
390
399
  "an empty key would create a bare-prefix Redis marker that silently suppresses unrelated messages"
391
400
  )
401
+ self._raise_if_drop_oldest_deduplicated_publish()
392
402
  stored_message = encode_stored_message(message)
393
403
  message_id = extract_stored_message_id(stored_message)
394
404
  operation_id = uuid.uuid4().hex
@@ -511,6 +511,29 @@ class _LeaseHeartbeat:
511
511
  stacklevel=1,
512
512
  )
513
513
 
514
+ async def _report_renewal_failure(self, exc: BaseException) -> None:
515
+ if self._stop_event.is_set():
516
+ return
517
+ logger.exception("Failed to renew message lease")
518
+ await self._emit(
519
+ "lease_renew_failed",
520
+ "failure",
521
+ message_id=self._message_id,
522
+ lease_token_hash=self._lease_token_hash,
523
+ exception_type=type(exc).__name__,
524
+ error=exc,
525
+ )
526
+ with warnings.catch_warnings():
527
+ warnings.simplefilter("always", RuntimeWarning)
528
+ warnings.warn(
529
+ "Failed to renew message lease "
530
+ f"({_warning_exception_name(exc)}); message will be reclaimed by another consumer "
531
+ "when the visibility timeout expires",
532
+ RuntimeWarning,
533
+ stacklevel=1,
534
+ )
535
+ await self._invoke_failure_callback()
536
+
514
537
  async def _run(self) -> None:
515
538
  try:
516
539
  while True:
@@ -531,30 +554,14 @@ class _LeaseHeartbeat:
531
554
  f"gateway.renew_message_lease() must return bool, got {type(renewed).__name__}. "
532
555
  "See AbstractRedisGateway.renew_message_lease for the full contract."
533
556
  )
534
- except asyncio.CancelledError:
535
- raise
557
+ except asyncio.CancelledError as exc:
558
+ current_task = asyncio.current_task()
559
+ if self._stop_event.is_set() or (current_task is not None and current_task.cancelling() > 0):
560
+ raise
561
+ await self._report_renewal_failure(exc)
562
+ return
536
563
  except Exception as exc:
537
- if self._stop_event.is_set():
538
- return
539
- logger.exception("Failed to renew message lease")
540
- await self._emit(
541
- "lease_renew_failed",
542
- "failure",
543
- message_id=self._message_id,
544
- lease_token_hash=self._lease_token_hash,
545
- exception_type=type(exc).__name__,
546
- error=exc,
547
- )
548
- with warnings.catch_warnings():
549
- warnings.simplefilter("always", RuntimeWarning)
550
- warnings.warn(
551
- "Failed to renew message lease "
552
- f"({_warning_exception_name(exc)}); message will be reclaimed by another consumer "
553
- "when the visibility timeout expires",
554
- RuntimeWarning,
555
- stacklevel=1,
556
- )
557
- await self._invoke_failure_callback()
564
+ await self._report_renewal_failure(exc)
558
565
  return
559
566
  if not renewed:
560
567
  await self._emit(
@@ -660,6 +667,8 @@ class RedisMessageQueue:
660
667
  ``"drop_oldest"`` evicts the oldest pending message before enqueueing
661
668
  the new one. ``"drop_oldest"`` requires ``max_pending_length`` and is
662
669
  not compatible with deduplication or ``max_delivery_count``.
670
+ ``"block"`` also requires ``max_pending_length`` (the threshold to
671
+ block on); only the default ``"raise"`` operates on an unbounded queue.
663
672
 
664
673
  ``pending_overload_block_timeout_seconds`` bounds how long ``"block"``
665
674
  waits for capacity before raising ``QueueBackpressureError``. ``0``
@@ -672,12 +681,23 @@ class RedisMessageQueue:
672
681
  ``interrupt`` accepts a ``BaseGracefulInterruptHandler``; pass
673
682
  ``GracefulInterruptHandler()`` for prompt Ctrl-C / termination handling
674
683
  in polling waits. ``on_heartbeat_failure`` is a zero-argument callable
675
- or coroutine callable invoked when lease renewal fails. ``on_event`` is
676
- telemetry only: a callable returning an awaitable and receiving
684
+ or coroutine callable invoked when lease renewal fails. It runs on the
685
+ heartbeat's background thread (sync queue) or on the event loop (async
686
+ queue); in the async queue it MUST NOT block (no ``time.sleep``,
687
+ blocking I/O, or CPU-heavy work; use ``await``), because a blocking
688
+ callback freezes the event loop, delaying lease renewal for every other
689
+ in-flight message (whose leases can then expire and be reclaimed) and
690
+ stalling ``aclose()``. Keep it quick and offload slow work yourself.
691
+ ``on_event`` is telemetry only: a callable returning an awaitable and
692
+ receiving
677
693
  best-effort QueueEvent lifecycle notifications. Callback failures are
678
694
  logged and converted to RuntimeWarning without influencing ack/nack or
679
695
  any other message outcome. Do not use it for correctness-critical
680
- callbacks or follow-up writes.
696
+ callbacks or follow-up writes. ``on_event`` is awaited inline in the
697
+ current asyncio task and may execute while an internal publish/drain
698
+ lock is held, so the callback must not call back into the same queue
699
+ instance's ``publish()``, ``drain()``, or ``aclose()``; that lock is
700
+ non-reentrant, so re-entering deadlocks the caller permanently.
681
701
  """
682
702
  self.key = QueueKeyManager(name, key_separator=key_separator)
683
703
  if not isinstance(deduplication, bool):
@@ -762,6 +782,18 @@ class RedisMessageQueue:
762
782
  deduplication=deduplication,
763
783
  get_deduplication_key=get_deduplication_key,
764
784
  )
785
+ if gateway is not None:
786
+ # Before the generic validator so gateway-incompat wins over the drop_oldest runaround; non-default only.
787
+ if pending_overload_policy != "raise":
788
+ raise ConfigurationError(
789
+ "'pending_overload_policy' cannot be provided alongside 'gateway'."
790
+ " Configure publish backpressure on the gateway directly instead."
791
+ )
792
+ if pending_overload_block_timeout_seconds != DEFAULT_PENDING_OVERLOAD_BLOCK_TIMEOUT_SECONDS:
793
+ raise ConfigurationError(
794
+ "'pending_overload_block_timeout_seconds' cannot be provided alongside 'gateway'."
795
+ " Configure publish backpressure on the gateway directly instead."
796
+ )
765
797
  validate_pending_backpressure_parameters(
766
798
  max_pending_length,
767
799
  pending_overload_policy,
@@ -1179,7 +1211,7 @@ class RedisMessageQueue:
1179
1211
  "visibility timeouts are in use."
1180
1212
  )
1181
1213
  logger.warning(no_lease_token_warning)
1182
- warnings.warn(no_lease_token_warning, RuntimeWarning, stacklevel=2)
1214
+ _warn_runtime_warning(no_lease_token_warning, stacklevel=2)
1183
1215
 
1184
1216
  if lease_token is None and self._requires_claimed_message:
1185
1217
  raise GatewayContractError(
@@ -1232,11 +1264,14 @@ class RedisMessageQueue:
1232
1264
  )
1233
1265
 
1234
1266
  lease_heartbeat = self._build_lease_heartbeat(stored_message, lease_token, message_id, lease_token_hash)
1235
- if lease_heartbeat is not None:
1236
- lease_heartbeat.start()
1237
1267
  finished_without_error = False
1238
1268
  processing_started_at = time.perf_counter()
1239
1269
  try:
1270
+ # start() must be inside this try so its finally always stop()s the
1271
+ # heartbeat; an exception in the start()->yield window would
1272
+ # otherwise orphan it.
1273
+ if lease_heartbeat is not None:
1274
+ lease_heartbeat.start()
1240
1275
  yield message # type: ignore
1241
1276
  except BaseException as exc:
1242
1277
  skip_cleanup = _should_skip_message_cleanup(exc)
@@ -1497,7 +1532,7 @@ class RedisMessageQueue:
1497
1532
  timeout_seconds = None if timeout is None else float(timeout)
1498
1533
  async with self._aclose_lock:
1499
1534
  cleanup_lease_counter = getattr(self._redis, "_cleanup_drained_lease_token_counter", None)
1500
- if self._aclose_result is not None:
1535
+ if self._aclose_result is True:
1501
1536
  pending_claim_ids = self._pending_claim_ids_count()
1502
1537
  if pending_claim_ids:
1503
1538
  self._aclose_result = None
@@ -482,6 +482,29 @@ class _LeaseHeartbeat:
482
482
  stacklevel=1,
483
483
  )
484
484
 
485
+ def _report_renewal_failure(self, exc: BaseException) -> None:
486
+ if self._stop_event.is_set():
487
+ return
488
+ logger.exception("Failed to renew message lease")
489
+ self._emit(
490
+ "lease_renew_failed",
491
+ "failure",
492
+ message_id=self._message_id,
493
+ lease_token_hash=self._lease_token_hash,
494
+ exception_type=type(exc).__name__,
495
+ error=exc,
496
+ )
497
+ with warnings.catch_warnings():
498
+ warnings.simplefilter("always", RuntimeWarning)
499
+ warnings.warn(
500
+ "Failed to renew message lease "
501
+ f"({_warning_exception_name(exc)}); message will be reclaimed by another consumer "
502
+ "when the visibility timeout expires",
503
+ RuntimeWarning,
504
+ stacklevel=1,
505
+ )
506
+ self._invoke_failure_callback()
507
+
485
508
  def _run(self) -> None:
486
509
  # No explicit _is_interrupted() check here. Heartbeat lifetime is owned
487
510
  # by process_message, which sets _stop_event in its finally block on any
@@ -498,28 +521,11 @@ class _LeaseHeartbeat:
498
521
  f"gateway.renew_message_lease() must return bool, got {type(renewed).__name__}. "
499
522
  "See AbstractRedisGateway.renew_message_lease for the full contract."
500
523
  )
524
+ except asyncio.CancelledError as exc:
525
+ self._report_renewal_failure(exc)
526
+ return
501
527
  except Exception as exc:
502
- if self._stop_event.is_set():
503
- return
504
- logger.exception("Failed to renew message lease")
505
- self._emit(
506
- "lease_renew_failed",
507
- "failure",
508
- message_id=self._message_id,
509
- lease_token_hash=self._lease_token_hash,
510
- exception_type=type(exc).__name__,
511
- error=exc,
512
- )
513
- with warnings.catch_warnings():
514
- warnings.simplefilter("always", RuntimeWarning)
515
- warnings.warn(
516
- "Failed to renew message lease "
517
- f"({_warning_exception_name(exc)}); message will be reclaimed by another consumer "
518
- "when the visibility timeout expires",
519
- RuntimeWarning,
520
- stacklevel=1,
521
- )
522
- self._invoke_failure_callback()
528
+ self._report_renewal_failure(exc)
523
529
  return
524
530
  if not renewed:
525
531
  self._emit(
@@ -623,6 +629,8 @@ class RedisMessageQueue:
623
629
  ``"drop_oldest"`` evicts the oldest pending message before enqueueing
624
630
  the new one. ``"drop_oldest"`` requires ``max_pending_length`` and is
625
631
  not compatible with deduplication or ``max_delivery_count``.
632
+ ``"block"`` also requires ``max_pending_length`` (the threshold to
633
+ block on); only the default ``"raise"`` operates on an unbounded queue.
626
634
 
627
635
  ``pending_overload_block_timeout_seconds`` bounds how long ``"block"``
628
636
  waits for capacity before raising ``QueueBackpressureError``. ``0``
@@ -635,11 +643,21 @@ class RedisMessageQueue:
635
643
  ``interrupt`` accepts a ``BaseGracefulInterruptHandler``; pass
636
644
  ``GracefulInterruptHandler()`` for prompt Ctrl-C / termination handling
637
645
  in polling waits. ``on_heartbeat_failure`` is a zero-argument callable
638
- invoked when lease renewal fails. ``on_event`` is telemetry only and
639
- receives best-effort QueueEvent lifecycle notifications; callback
646
+ invoked when lease renewal fails. It runs on the heartbeat's background
647
+ thread (sync queue) or on the event loop (async queue); in the async
648
+ queue it MUST NOT block (no ``time.sleep``, blocking I/O, or CPU-heavy
649
+ work; use ``await``), because a blocking callback freezes the event
650
+ loop, delaying lease renewal for every other in-flight message (whose
651
+ leases can then expire and be reclaimed) and stalling ``aclose()``.
652
+ Keep it quick and offload slow work yourself. ``on_event`` is telemetry
653
+ only and receives best-effort QueueEvent lifecycle notifications; callback
640
654
  failures are logged and converted to RuntimeWarning without influencing
641
655
  ack/nack or any other message outcome. Do not use it for
642
- correctness-critical callbacks or follow-up writes.
656
+ correctness-critical callbacks or follow-up writes. ``on_event`` runs
657
+ inline in the caller's thread and may execute while an internal
658
+ publish/drain lock is held, so the callback must not call back into the
659
+ same queue instance's ``publish()``, ``drain()``, or ``close()``; that
660
+ lock is non-reentrant, so re-entering deadlocks the caller permanently.
643
661
  """
644
662
  self.key = QueueKeyManager(name, key_separator=key_separator)
645
663
  if not isinstance(deduplication, bool):
@@ -728,6 +746,18 @@ class RedisMessageQueue:
728
746
  deduplication=deduplication,
729
747
  get_deduplication_key=get_deduplication_key,
730
748
  )
749
+ if gateway is not None:
750
+ # Before the generic validator so gateway-incompat wins over the drop_oldest runaround; non-default only.
751
+ if pending_overload_policy != "raise":
752
+ raise ConfigurationError(
753
+ "'pending_overload_policy' cannot be provided alongside 'gateway'."
754
+ " Configure publish backpressure on the gateway directly instead."
755
+ )
756
+ if pending_overload_block_timeout_seconds != DEFAULT_PENDING_OVERLOAD_BLOCK_TIMEOUT_SECONDS:
757
+ raise ConfigurationError(
758
+ "'pending_overload_block_timeout_seconds' cannot be provided alongside 'gateway'."
759
+ " Configure publish backpressure on the gateway directly instead."
760
+ )
731
761
  validate_pending_backpressure_parameters(
732
762
  max_pending_length,
733
763
  pending_overload_policy,
@@ -1150,7 +1180,7 @@ class RedisMessageQueue:
1150
1180
  "visibility timeouts are in use."
1151
1181
  )
1152
1182
  logger.warning(no_lease_token_warning)
1153
- warnings.warn(no_lease_token_warning, RuntimeWarning, stacklevel=2)
1183
+ _warn_runtime_warning(no_lease_token_warning, stacklevel=2)
1154
1184
 
1155
1185
  if lease_token is None and self._requires_claimed_message:
1156
1186
  raise GatewayContractError(
@@ -1203,10 +1233,13 @@ class RedisMessageQueue:
1203
1233
  )
1204
1234
 
1205
1235
  lease_heartbeat = self._build_lease_heartbeat(stored_message, lease_token, message_id, lease_token_hash)
1206
- if lease_heartbeat is not None:
1207
- lease_heartbeat.start()
1208
1236
  processing_started_at = time.perf_counter()
1209
1237
  try:
1238
+ # start() must be inside this try so its finally always stop()s the
1239
+ # heartbeat; an exception in the start()->yield window (e.g. a
1240
+ # signal-induced KeyboardInterrupt) would otherwise orphan it.
1241
+ if lease_heartbeat is not None:
1242
+ lease_heartbeat.start()
1210
1243
  yield message # type: ignore
1211
1244
  except BaseException as exc:
1212
1245
  skip_cleanup = _should_skip_message_cleanup(exc)