rabbitkit 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. rabbitkit/__init__.py +201 -0
  2. rabbitkit/_version.py +3 -0
  3. rabbitkit/aio/__init__.py +31 -0
  4. rabbitkit/async_/__init__.py +9 -0
  5. rabbitkit/async_/batch.py +213 -0
  6. rabbitkit/async_/broker.py +1123 -0
  7. rabbitkit/async_/connection.py +274 -0
  8. rabbitkit/async_/pool.py +363 -0
  9. rabbitkit/async_/transport.py +877 -0
  10. rabbitkit/asyncapi/__init__.py +5 -0
  11. rabbitkit/asyncapi/generator.py +219 -0
  12. rabbitkit/asyncapi/schema.py +98 -0
  13. rabbitkit/cli/__init__.py +77 -0
  14. rabbitkit/cli/_utils.py +38 -0
  15. rabbitkit/cli/commands/__init__.py +0 -0
  16. rabbitkit/cli/commands/dlq.py +190 -0
  17. rabbitkit/cli/commands/health.py +34 -0
  18. rabbitkit/cli/commands/migrate.py +570 -0
  19. rabbitkit/cli/commands/routes.py +88 -0
  20. rabbitkit/cli/commands/run.py +144 -0
  21. rabbitkit/cli/commands/shell.py +72 -0
  22. rabbitkit/cli/commands/topology.py +346 -0
  23. rabbitkit/concurrency.py +451 -0
  24. rabbitkit/core/__init__.py +5 -0
  25. rabbitkit/core/app.py +323 -0
  26. rabbitkit/core/config.py +849 -0
  27. rabbitkit/core/env_config.py +251 -0
  28. rabbitkit/core/errors.py +199 -0
  29. rabbitkit/core/logging.py +261 -0
  30. rabbitkit/core/message.py +235 -0
  31. rabbitkit/core/path.py +53 -0
  32. rabbitkit/core/pipeline.py +1289 -0
  33. rabbitkit/core/protocols.py +349 -0
  34. rabbitkit/core/registry.py +284 -0
  35. rabbitkit/core/route.py +329 -0
  36. rabbitkit/core/router.py +142 -0
  37. rabbitkit/core/topology.py +261 -0
  38. rabbitkit/core/topology_dispatch.py +74 -0
  39. rabbitkit/core/types.py +324 -0
  40. rabbitkit/dashboard/__init__.py +5 -0
  41. rabbitkit/dashboard/app.py +212 -0
  42. rabbitkit/di/__init__.py +19 -0
  43. rabbitkit/di/context.py +193 -0
  44. rabbitkit/di/depends.py +42 -0
  45. rabbitkit/di/resolver.py +503 -0
  46. rabbitkit/dlq.py +320 -0
  47. rabbitkit/experimental/__init__.py +50 -0
  48. rabbitkit/fastapi.py +91 -0
  49. rabbitkit/health.py +654 -0
  50. rabbitkit/highload/__init__.py +10 -0
  51. rabbitkit/highload/backpressure.py +514 -0
  52. rabbitkit/highload/batch.py +448 -0
  53. rabbitkit/locking.py +277 -0
  54. rabbitkit/management.py +470 -0
  55. rabbitkit/middleware/__init__.py +27 -0
  56. rabbitkit/middleware/base.py +125 -0
  57. rabbitkit/middleware/circuit_breaker.py +131 -0
  58. rabbitkit/middleware/compression.py +267 -0
  59. rabbitkit/middleware/deduplication.py +651 -0
  60. rabbitkit/middleware/error_classifier.py +43 -0
  61. rabbitkit/middleware/exception.py +105 -0
  62. rabbitkit/middleware/metrics.py +440 -0
  63. rabbitkit/middleware/otel.py +203 -0
  64. rabbitkit/middleware/rate_limit.py +247 -0
  65. rabbitkit/middleware/retry.py +540 -0
  66. rabbitkit/middleware/signing.py +682 -0
  67. rabbitkit/middleware/timeout.py +291 -0
  68. rabbitkit/py.typed +0 -0
  69. rabbitkit/queue_metrics.py +174 -0
  70. rabbitkit/results/__init__.py +6 -0
  71. rabbitkit/results/backend.py +102 -0
  72. rabbitkit/results/middleware.py +123 -0
  73. rabbitkit/rpc.py +632 -0
  74. rabbitkit/serialization/__init__.py +25 -0
  75. rabbitkit/serialization/base.py +35 -0
  76. rabbitkit/serialization/json.py +122 -0
  77. rabbitkit/serialization/msgspec.py +136 -0
  78. rabbitkit/serialization/pipeline.py +255 -0
  79. rabbitkit/streams.py +139 -0
  80. rabbitkit/sync/__init__.py +11 -0
  81. rabbitkit/sync/batch.py +595 -0
  82. rabbitkit/sync/broker.py +996 -0
  83. rabbitkit/sync/connection.py +209 -0
  84. rabbitkit/sync/pool.py +262 -0
  85. rabbitkit/sync/transport.py +1085 -0
  86. rabbitkit/testing/__init__.py +20 -0
  87. rabbitkit/testing/app.py +99 -0
  88. rabbitkit/testing/broker.py +540 -0
  89. rabbitkit/testing/fixtures.py +56 -0
  90. rabbitkit-0.9.0.dist-info/METADATA +575 -0
  91. rabbitkit-0.9.0.dist-info/RECORD +95 -0
  92. rabbitkit-0.9.0.dist-info/WHEEL +5 -0
  93. rabbitkit-0.9.0.dist-info/entry_points.txt +2 -0
  94. rabbitkit-0.9.0.dist-info/licenses/LICENSE +21 -0
  95. rabbitkit-0.9.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,540 @@
1
+ """RetryMiddleware — routes failed messages to delay queues for retry.
2
+
3
+ TOPOLOGY SPEC (see Contract in plan):
4
+ - Per-queue delay queues: {source_queue}.retry.{attempt}
5
+ - Dead letter queue: {source_queue}.dlq
6
+ - Shared mode: rabbitkit.retry.{attempt}, rabbitkit.dlq
7
+
8
+ Mechanism: TTL + DLX (dead-letter exchange)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ import random
15
+ from collections.abc import Awaitable, Callable, Sequence
16
+ from typing import Any
17
+
18
+ from rabbitkit.core.config import RetryConfig
19
+ from rabbitkit.core.errors import ErrorPredicate
20
+ from rabbitkit.core.message import RabbitMessage
21
+ from rabbitkit.core.topology import RabbitQueue
22
+ from rabbitkit.core.types import REQUEUED_FOR_RETRY, ErrorSeverity, MessageEnvelope
23
+ from rabbitkit.middleware.base import BaseMiddleware
24
+ from rabbitkit.middleware.error_classifier import ErrorClassifierMiddleware
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def _shard_index(message_id: str, shards: int) -> int:
30
+ """Stable shard pick (F4). Python's hash() is salted per process — a
31
+ message's retry shard must be identical across every consumer process,
32
+ or its cadence changes on each redelivery. md5 here is a stable bucket
33
+ hash, not crypto."""
34
+ if not message_id:
35
+ return 0
36
+ import hashlib
37
+
38
+ return int(hashlib.md5(message_id.encode(), usedforsecurity=False).hexdigest(), 16) % shards
39
+
40
+
41
+ def _shard_queue_name(source_queue: str, attempt: int, shard: int) -> str:
42
+ """Shard 0 keeps the legacy `{q}.retry.{n}` name (backward compatible —
43
+ enabling sharded jitter on an existing topology is purely additive)."""
44
+ base = f"{source_queue}.retry.{attempt}"
45
+ return base if shard == 0 else f"{base}.s{shard}"
46
+
47
+
48
+ def _shard_ttl_multipliers(shards: int, jitter_factor: float) -> list[float]:
49
+ """Shard 0 is exactly 1.0 (legacy TTL, no redeclare conflict); shards
50
+ 1..N-1 spread evenly across [1-jf, 1+jf]. Every queue's TTL is still
51
+ UNIFORM for all messages in it — jitter comes from which shard a
52
+ message hashes to, never from per-message TTL (head-of-line safety)."""
53
+ if shards == 2:
54
+ return [1.0, 1.0 + jitter_factor]
55
+ rest = shards - 1
56
+ return [1.0] + [
57
+ 1.0 + jitter_factor * (-1.0 + 2.0 * i / (rest - 1)) for i in range(rest)
58
+ ]
59
+
60
+
61
+ def retry_middleware_insertion_index(middlewares: Sequence[Any]) -> int:
62
+ """Index at which an auto-wired ``RetryMiddleware`` should be inserted.
63
+
64
+ Retry must be OUTER of ordinary user middlewares (e.g. ``TimeoutMiddleware``)
65
+ so it can classify and re-queue exceptions they raise — this is the
66
+ documented composition in ``middleware/timeout.py``
67
+ (``middlewares=[retry_mw, timeout_mw] # retry outermost``), which relies on
68
+ retry seeing ``HandlerTimeoutError``.
69
+
70
+ Retry must be INNER of any ``ExceptionMiddleware``, which is documented as
71
+ the true outermost layer that "catches exceptions AFTER retry gives up"
72
+ (``middleware/exception.py``) — it needs to see the ``_rabbitkit_terminal``
73
+ exceptions retry re-raises on exhaustion/permanent failure.
74
+
75
+ So retry is inserted right after any *leading* ``ExceptionMiddleware``
76
+ instances, ahead of everything else.
77
+ """
78
+ from rabbitkit.middleware.exception import ExceptionMiddleware
79
+
80
+ index = 0
81
+ for mw in middlewares:
82
+ if isinstance(mw, ExceptionMiddleware):
83
+ index += 1
84
+ else:
85
+ break
86
+ return index
87
+
88
+
89
+ def warn_retry_without_confirms(route_name: str, *, context: str = "retry") -> None:
90
+ """Warn when a route republishes internally (retry delay-queue, or a
91
+ ``@publisher()`` result forward) but its broker publishes with
92
+ ``PublisherConfig.confirm_delivery=False`` (M4).
93
+
94
+ Both ``RetryMiddleware`` and the pipeline's result-publish step (Contract
95
+ 5) settle the SOURCE message as soon as their republish reports
96
+ ``outcome.ok`` -- with confirms off, that publish reports
97
+ ``PublishStatus.SENT`` (fire-and-forget, never broker-confirmed) rather
98
+ than ``CONFIRMED``, and ``.ok`` is True for both. If that SENT publish is
99
+ actually lost in flight (e.g. a connection drop right after), the source
100
+ message is still settled -- a real loss, not just a delay.
101
+ Enable ``confirm_delivery=True`` (the default) on any such broker if
102
+ this matters for your workload.
103
+ """
104
+ import warnings
105
+
106
+ what = "retry" if context == "retry" else "a @publisher() result forward"
107
+ settles = "acks the source message" if context == "retry" else "settles the source message"
108
+ warnings.warn(
109
+ f"Route {route_name!r} uses {what} but the broker publishes with "
110
+ f"confirm_delivery=False. The pipeline {settles} as soon as its internal republish "
111
+ "is sent, without waiting for a broker confirm -- a publish lost in flight after "
112
+ "that point is a real loss (the source is already settled). Set "
113
+ "PublisherConfig(confirm_delivery=True) (the default) if durability here matters.",
114
+ RuntimeWarning,
115
+ stacklevel=3,
116
+ )
117
+
118
+
119
+ def warn_retry_middleware_without_topology(route_name: str) -> None:
120
+ """Warn when a route carries a ``RetryMiddleware`` but no retry topology.
121
+
122
+ A ``RetryMiddleware`` publishes failed messages to ``<queue>.retry.<n>``
123
+ delay queues. Those queues are only declared when retry is enabled via
124
+ ``RabbitConfig.retry`` / ``@subscriber(retry=...)`` (which drives
125
+ ``_declare_topology``). If a caller adds a ``RetryMiddleware`` manually to
126
+ ``middlewares=[...]`` *without* also setting ``retry=``, the delay queues
127
+ are never declared, so the retry publishes target non-existent queues on the
128
+ default exchange and are silently dropped — the source message is acked and
129
+ the retry is lost. Surface that half-configuration loudly.
130
+ """
131
+ import warnings
132
+
133
+ warnings.warn(
134
+ f"Route {route_name!r} has a RetryMiddleware but no retry topology was declared "
135
+ "(no retry=RetryConfig(...) on the broker or subscriber). Its delay-queue publishes "
136
+ "will target non-existent queues and be dropped. Set retry=RetryConfig(...) so the "
137
+ "delay/DLQ topology is declared, or remove the manual RetryMiddleware.",
138
+ RuntimeWarning,
139
+ stacklevel=3,
140
+ )
141
+
142
+
143
+ class RetryMiddleware(BaseMiddleware):
144
+ """Routes failed messages to delay queues for retry.
145
+
146
+ On exception:
147
+ 1. Classify error (transient/permanent)
148
+ 2. If transient + retries left → publish to delay queue + ack source
149
+ 3. If permanent or retries exhausted → tag as terminal + re-raise
150
+ """
151
+
152
+ def __init__(
153
+ self,
154
+ config: RetryConfig,
155
+ *,
156
+ publish_fn: Callable[[MessageEnvelope], Any] | None = None,
157
+ publish_async_fn: Callable[[MessageEnvelope], Awaitable[Any]] | None = None,
158
+ predicates: Sequence[ErrorPredicate] = (),
159
+ metrics_collector: Any | None = None,
160
+ metrics_config: Any | None = None,
161
+ ) -> None:
162
+ self._config = config
163
+ # predicates run first (True=transient, False=permanent, None=defer to the
164
+ # built-in type tuples, then unknown_policy). Lets callers classify by
165
+ # something other than exception type (e.g. an HTTP status attribute).
166
+ self._classifier = ErrorClassifierMiddleware(
167
+ predicates=predicates,
168
+ unknown_policy=config.unknown_policy,
169
+ )
170
+ self._publish_fn = publish_fn
171
+ self._publish_async_fn = publish_async_fn
172
+ # M2: optional -- wired by the broker from a MetricsMiddleware already
173
+ # present on the same route, so retried/dead-lettered counts are
174
+ # observable. None (the default) is a no-op; RetryMiddleware itself
175
+ # has no metrics opinion otherwise.
176
+ self._metrics_collector = metrics_collector
177
+ self._metrics_config = metrics_config
178
+
179
+ def _record_metric(self, metric_name: str | None, message: RabbitMessage) -> None:
180
+ if self._metrics_collector is None or metric_name is None:
181
+ return
182
+ queue = message.headers.get("x-rabbitkit-original-queue") or message.routing_key or "unknown"
183
+ self._metrics_collector.inc_counter(metric_name, {"queue": str(queue)})
184
+
185
+ @property
186
+ def config(self) -> RetryConfig:
187
+ return self._config
188
+
189
+ def consume_scope(
190
+ self,
191
+ call_next: Callable[[RabbitMessage], Any],
192
+ message: RabbitMessage,
193
+ ) -> Any:
194
+ """Sync retry scope.
195
+
196
+ H8: on a caught, requeued failure, returns ``REQUEUED_FOR_RETRY``
197
+ (never ``None``) — see that sentinel's docstring in ``core/types.py``.
198
+ ``_handle_retry_sync`` either returns normally (requeued: routed to a
199
+ delay queue, or nacked for immediate redelivery if that publish
200
+ itself failed) or re-raises (terminal: permanent/exhausted) via
201
+ ``_mark_terminal_and_raise``, so reaching this ``return`` unambiguously
202
+ means "requeued" — an outer middleware (e.g. DeduplicationMiddleware)
203
+ MUST NOT treat this the same as the handler actually succeeding.
204
+ """
205
+ try:
206
+ return call_next(message)
207
+ except Exception as exc:
208
+ self._handle_retry_sync(exc, message)
209
+ return REQUEUED_FOR_RETRY
210
+
211
+ async def consume_scope_async(
212
+ self,
213
+ call_next: Callable[[RabbitMessage], Awaitable[Any]],
214
+ message: RabbitMessage,
215
+ ) -> Any:
216
+ """Async retry scope. See :meth:`consume_scope` (H8) for why this
217
+ returns ``REQUEUED_FOR_RETRY`` rather than ``None``."""
218
+ try:
219
+ return await call_next(message)
220
+ except Exception as exc:
221
+ await self._handle_retry_async(exc, message)
222
+ return REQUEUED_FOR_RETRY
223
+
224
+ def _handle_retry_sync(self, exc: Exception, message: RabbitMessage) -> None:
225
+ """Handle exception in sync context."""
226
+ classified = self._classifier.classify(exc)
227
+ retry_count = self._get_retry_count(message)
228
+
229
+ if classified.severity == ErrorSeverity.TRANSIENT and retry_count < self._config.max_retries:
230
+ # Route to delay queue
231
+ self._route_to_delay_queue_sync(message, retry_count)
232
+ return
233
+
234
+ # Terminal: permanent or exhausted
235
+ self._mark_terminal_and_raise(exc, classified.severity, retry_count, message)
236
+
237
+ async def _handle_retry_async(self, exc: Exception, message: RabbitMessage) -> None:
238
+ """Handle exception in async context."""
239
+ classified = self._classifier.classify(exc)
240
+ retry_count = self._get_retry_count(message)
241
+
242
+ if classified.severity == ErrorSeverity.TRANSIENT and retry_count < self._config.max_retries:
243
+ await self._route_to_delay_queue_async(message, retry_count)
244
+ return
245
+
246
+ self._mark_terminal_and_raise(exc, classified.severity, retry_count, message)
247
+
248
+ def _get_retry_count(self, message: RabbitMessage) -> int:
249
+ """Get current retry count from message headers, clamped to
250
+ ``[0, max_retries]`` (H5).
251
+
252
+ The header is read verbatim from an inbound AMQP message — nothing
253
+ distinguishes a value written by this middleware's own delay-queue
254
+ round trip from one set directly by an untrusted producer (there is
255
+ no broker-side attestation of provenance for a plain header). Without
256
+ clamping, a producer could set it negative (``attempt = retry_count +
257
+ 1`` in :meth:`_build_retry_envelope` would then be <= 0, producing a
258
+ delay-queue routing key like ``...retry.-4`` that was never declared
259
+ — the retry publish silently targets a non-existent queue and the
260
+ message is lost rather than retried) or absurdly large (forcing every
261
+ message straight to the DLQ, skipping retries entirely). Clamping
262
+ makes ``max_retries`` an enforced ceiling regardless of what the
263
+ header claims, independent of its configured value being read from a
264
+ trusted or untrusted source. A malformed (non-numeric) header value
265
+ is treated the same as missing (0) rather than raising, so a garbage
266
+ header degrades to "start of the retry sequence" instead of crashing
267
+ the pipeline.
268
+
269
+ For a broker-enforced backstop on top of this (e.g. against a
270
+ misbehaving consumer that never expires/dead-letters a message),
271
+ prefer quorum source queues with ``x-delivery-limit`` — see
272
+ ``docs/retry-and-dlq.md``.
273
+ """
274
+ raw = message.headers.get(self._config.retry_header, 0)
275
+ try:
276
+ retry_count = int(raw)
277
+ except (TypeError, ValueError):
278
+ retry_count = 0
279
+ return max(0, min(retry_count, self._config.max_retries))
280
+
281
+ def _compute_delay(self, retry_count: int) -> int:
282
+ """Compute delay for this retry attempt (with jitter)."""
283
+ delays = self._config.delays
284
+ idx = min(retry_count, len(delays) - 1)
285
+ base_delay = delays[idx]
286
+
287
+ # Apply jitter
288
+ jitter = base_delay * self._config.jitter_factor
289
+ return max(1, int(base_delay + random.uniform(-jitter, jitter))) # noqa: S311 — jitter, not crypto
290
+
291
+ def _build_retry_envelope(self, message: RabbitMessage, retry_count: int) -> MessageEnvelope:
292
+ """Build envelope for the delay queue."""
293
+ # Determine delay queue name
294
+ attempt = retry_count + 1
295
+ # Always per-queue (shared mode is rejected by RetryConfig — H3).
296
+ source_queue = message.headers.get("x-rabbitkit-original-queue", "unknown")
297
+ if self._config.jitter_mode == "sharded":
298
+ shard = _shard_index(message.message_id or "", self._config.jitter_shards)
299
+ delay_queue_rk = _shard_queue_name(str(source_queue), attempt, shard)
300
+ else:
301
+ delay_queue_rk = f"{source_queue}.retry.{attempt}"
302
+
303
+ # Preserve original headers + add retry metadata
304
+ headers = dict(message.headers)
305
+ headers[self._config.retry_header] = retry_count + 1
306
+ if "x-rabbitkit-original-exchange" not in headers:
307
+ headers["x-rabbitkit-original-exchange"] = message.exchange
308
+ if "x-rabbitkit-original-routing-key" not in headers:
309
+ headers["x-rabbitkit-original-routing-key"] = message.routing_key
310
+ if "x-rabbitkit-original-queue" not in headers:
311
+ headers["x-rabbitkit-original-queue"] = "" # set by broker at consume time
312
+
313
+ return MessageEnvelope(
314
+ routing_key=delay_queue_rk,
315
+ body=message.body,
316
+ exchange="", # direct to delay queue by name
317
+ headers=headers,
318
+ message_id=message.message_id or "",
319
+ correlation_id=message.correlation_id,
320
+ content_type=message.content_type or "application/octet-stream",
321
+ content_encoding=message.content_encoding,
322
+ # Preserve the remaining original message properties -- these used
323
+ # to be silently dropped on every retry republish, so e.g. a
324
+ # priority-queue message lost its priority on its first retry, and
325
+ # an RPC request's reply_to/type/app_id/user_id never survived
326
+ # long enough for the eventual (retried) reply to route back.
327
+ reply_to=message.reply_to,
328
+ priority=message.priority,
329
+ expiration=message.expiration,
330
+ type=message.type,
331
+ app_id=message.app_id,
332
+ user_id=message.user_id,
333
+ # M4: mandatory so a runtime-deleted/missing delay queue comes back
334
+ # as RETURNED (outcome not-ok) instead of being broker-confirmed
335
+ # into the void. The route-to-delay-queue path checks outcome.ok
336
+ # and nack-requeues on failure, so this turns silent loss into a
337
+ # redelivery. (Requires publisher confirms + basic.return handling,
338
+ # which both transports wire up.)
339
+ mandatory=True,
340
+ )
341
+
342
+ def _route_to_delay_queue_sync(self, message: RabbitMessage, retry_count: int) -> None:
343
+ """Publish to delay queue and ack source (sync)."""
344
+ envelope = self._build_retry_envelope(message, retry_count)
345
+
346
+ if self._publish_fn is not None:
347
+ outcome = self._publish_fn(envelope)
348
+ if outcome is not None and not outcome.ok:
349
+ # Delay-queue publish failed — DO NOT ack, or the message is
350
+ # lost forever (never retried, never dead-lettered). Nack with
351
+ # requeue so the broker redelivers it.
352
+ if not message.is_settled:
353
+ message.nack(requeue=True)
354
+ logger.warning(
355
+ "Retry publish failed; nacked for redelivery: routing_key=%s",
356
+ envelope.routing_key,
357
+ )
358
+ return
359
+
360
+ # Ack source message (it's safely in the delay queue now)
361
+ if not message.is_settled:
362
+ message.ack()
363
+
364
+ if self._metrics_config is not None:
365
+ self._record_metric(self._metrics_config.messages_retried_total, message)
366
+
367
+ logger.info(
368
+ "Retrying message (attempt %d/%d): routing_key=%s",
369
+ retry_count + 1,
370
+ self._config.max_retries,
371
+ envelope.routing_key,
372
+ )
373
+
374
+ async def _route_to_delay_queue_async(self, message: RabbitMessage, retry_count: int) -> None:
375
+ """Publish to delay queue and ack source (async)."""
376
+ envelope = self._build_retry_envelope(message, retry_count)
377
+
378
+ if self._publish_async_fn is not None:
379
+ outcome = await self._publish_async_fn(envelope)
380
+ if outcome is not None and not outcome.ok:
381
+ # Delay-queue publish failed — DO NOT ack (see sync variant).
382
+ if not message.is_settled:
383
+ await message.nack_async(requeue=True)
384
+ logger.warning(
385
+ "Retry publish failed; nacked for redelivery: routing_key=%s",
386
+ envelope.routing_key,
387
+ )
388
+ return
389
+
390
+ if not message.is_settled:
391
+ await message.ack_async()
392
+
393
+ if self._metrics_config is not None:
394
+ self._record_metric(self._metrics_config.messages_retried_total, message)
395
+
396
+ logger.info(
397
+ "Retrying message (attempt %d/%d): routing_key=%s",
398
+ retry_count + 1,
399
+ self._config.max_retries,
400
+ envelope.routing_key,
401
+ )
402
+
403
+ def _mark_terminal_and_raise(
404
+ self,
405
+ exc: Exception,
406
+ severity: ErrorSeverity,
407
+ retry_count: int,
408
+ message: RabbitMessage,
409
+ ) -> None:
410
+ """Mark exception as terminal and re-raise.
411
+
412
+ M2: this is the point where a message is committed to being
413
+ dead-lettered -- permanent errors dead-letter on the first attempt,
414
+ exhausted-retry errors dead-letter after ``max_retries`` -- so
415
+ ``messages_dead_lettered_total`` is recorded here rather than at the
416
+ actual reject() call (which happens later, in the pipeline's
417
+ exception handling, and doesn't know WHY the reject is happening).
418
+ """
419
+ exc._rabbitkit_terminal = True # type: ignore[attr-defined]
420
+ if self._metrics_config is not None:
421
+ self._record_metric(self._metrics_config.messages_dead_lettered_total, message)
422
+ logger.warning(
423
+ "Terminal failure (%s, retries=%d/%d): %s: %s",
424
+ severity.value,
425
+ retry_count,
426
+ self._config.max_retries,
427
+ type(exc).__name__,
428
+ exc,
429
+ )
430
+ raise
431
+
432
+
433
+ class RetryRouter:
434
+ """Declares delay queue topology at startup.
435
+
436
+ Called by broker.start() for each route that has retry enabled.
437
+ RetryRouter is the SINGLE OWNER of all retry/DLQ topology for a route.
438
+
439
+ DLQ routing:
440
+ - The source queue is re-declared with ``x-dead-letter-exchange=""``
441
+ and ``x-dead-letter-routing-key=<dlq_name>`` so that messages
442
+ rejected/nacked with ``requeue=False`` are automatically routed to
443
+ the DLQ by RabbitMQ — no application-level routing needed.
444
+ - Use ``get_source_queue_dlq_arguments()`` to obtain the extra arguments
445
+ that must be added to the source queue declaration.
446
+ """
447
+
448
+ def __init__(self, config: RetryConfig) -> None:
449
+ self._config = config
450
+
451
+ def get_dlq_name(self, source_queue_name: str) -> str:
452
+ """Return the DLQ name for a given source queue.
453
+
454
+ Always per-queue — shared mode (per_queue=False) is rejected by
455
+ RetryConfig (H3), so there is no shared-DLQ branch.
456
+ """
457
+ return f"{source_queue_name}.dlq"
458
+
459
+ def get_source_queue_dlq_arguments(self, source_queue_name: str) -> dict[str, str]:
460
+ """Return x-dead-letter arguments to add to the source queue declaration.
461
+
462
+ When these arguments are present on the source queue, RabbitMQ
463
+ automatically forwards messages that are rejected/nacked with
464
+ requeue=False to the DLQ — making the DLQ actually reachable.
465
+ """
466
+ dlq_name = self.get_dlq_name(source_queue_name)
467
+ return {
468
+ "x-dead-letter-exchange": "", # default exchange
469
+ "x-dead-letter-routing-key": dlq_name, # route directly by queue name
470
+ }
471
+
472
+ def get_delay_queue_definitions(
473
+ self,
474
+ source_queue_name: str,
475
+ source_exchange_name: str, # kept for signature stability (M5) — see docstring
476
+ ) -> list[RabbitQueue]:
477
+ """Generate delay queue definitions for a source queue.
478
+
479
+ Returns list of RabbitQueue objects for delay queues + DLQ.
480
+ The DLQ is now reachable because ``get_source_queue_dlq_arguments()``
481
+ wires the source queue's x-dead-letter-exchange to it.
482
+
483
+ M5: on TTL expiry, a delay queue dead-letters back to the SOURCE
484
+ QUEUE via the **default exchange** (``x-dead-letter-exchange=""``)
485
+ with the queue's own name as the routing key — never the source
486
+ queue's real exchange. On the default exchange a routing key that
487
+ matches a queue's name always delivers directly to that queue,
488
+ completely independent of how the queue is actually bound elsewhere.
489
+ The previous version dead-lettered to ``source_exchange_name``
490
+ using ``source_queue_name`` as the routing key — for a source queue
491
+ bound to its real exchange via a topic pattern (e.g.
492
+ ``orders.*.created``) rather than literally by its own name, that
493
+ routing key almost never matches the binding, so the retried
494
+ message silently vanished instead of coming back after the delay.
495
+ ``source_exchange_name`` is intentionally unused now — kept as a
496
+ parameter so existing call sites don't need updating.
497
+ """
498
+ queues: list[RabbitQueue] = []
499
+
500
+ if self._config.jitter_mode == "sharded":
501
+ multipliers = _shard_ttl_multipliers(self._config.jitter_shards, self._config.jitter_factor)
502
+ else:
503
+ multipliers = [1.0]
504
+
505
+ for attempt in range(1, self._config.max_retries + 1):
506
+ delay_ms = self._get_delay_ms(attempt - 1)
507
+
508
+ # Always per-queue (shared mode is rejected by RetryConfig — H3).
509
+ for shard, mult in enumerate(multipliers):
510
+ queue = RabbitQueue(
511
+ name=_shard_queue_name(source_queue_name, attempt, shard),
512
+ durable=True,
513
+ arguments={
514
+ # Uniform per-queue TTL (head-of-line safety); shards
515
+ # stagger TTLs ACROSS queues, never within one (F4).
516
+ "x-message-ttl": max(1, int(delay_ms * mult)),
517
+ "x-dead-letter-exchange": "", # default exchange (M5)
518
+ "x-dead-letter-routing-key": source_queue_name,
519
+ "x-queue-type": "classic", # classic for delay queues
520
+ },
521
+ )
522
+ queues.append(queue)
523
+
524
+ # DLQ — declared as a plain durable queue.
525
+ # The source queue's x-dead-letter-exchange (set via
526
+ # get_source_queue_dlq_arguments) routes nacked/rejected messages here.
527
+ dlq_name = self.get_dlq_name(source_queue_name)
528
+ dlq = RabbitQueue(
529
+ name=dlq_name,
530
+ durable=True,
531
+ )
532
+ queues.append(dlq)
533
+
534
+ return queues
535
+
536
+ def _get_delay_ms(self, index: int) -> int:
537
+ """Get delay in milliseconds for retry attempt."""
538
+ delays = self._config.delays
539
+ idx = min(index, len(delays) - 1)
540
+ return delays[idx] * 1000