rabbitkit 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rabbitkit/__init__.py +201 -0
- rabbitkit/_version.py +3 -0
- rabbitkit/aio/__init__.py +31 -0
- rabbitkit/async_/__init__.py +9 -0
- rabbitkit/async_/batch.py +213 -0
- rabbitkit/async_/broker.py +1123 -0
- rabbitkit/async_/connection.py +274 -0
- rabbitkit/async_/pool.py +363 -0
- rabbitkit/async_/transport.py +877 -0
- rabbitkit/asyncapi/__init__.py +5 -0
- rabbitkit/asyncapi/generator.py +219 -0
- rabbitkit/asyncapi/schema.py +98 -0
- rabbitkit/cli/__init__.py +77 -0
- rabbitkit/cli/_utils.py +38 -0
- rabbitkit/cli/commands/__init__.py +0 -0
- rabbitkit/cli/commands/dlq.py +190 -0
- rabbitkit/cli/commands/health.py +34 -0
- rabbitkit/cli/commands/migrate.py +570 -0
- rabbitkit/cli/commands/routes.py +88 -0
- rabbitkit/cli/commands/run.py +144 -0
- rabbitkit/cli/commands/shell.py +72 -0
- rabbitkit/cli/commands/topology.py +346 -0
- rabbitkit/concurrency.py +451 -0
- rabbitkit/core/__init__.py +5 -0
- rabbitkit/core/app.py +323 -0
- rabbitkit/core/config.py +849 -0
- rabbitkit/core/env_config.py +251 -0
- rabbitkit/core/errors.py +199 -0
- rabbitkit/core/logging.py +261 -0
- rabbitkit/core/message.py +235 -0
- rabbitkit/core/path.py +53 -0
- rabbitkit/core/pipeline.py +1289 -0
- rabbitkit/core/protocols.py +349 -0
- rabbitkit/core/registry.py +284 -0
- rabbitkit/core/route.py +329 -0
- rabbitkit/core/router.py +142 -0
- rabbitkit/core/topology.py +261 -0
- rabbitkit/core/topology_dispatch.py +74 -0
- rabbitkit/core/types.py +324 -0
- rabbitkit/dashboard/__init__.py +5 -0
- rabbitkit/dashboard/app.py +212 -0
- rabbitkit/di/__init__.py +19 -0
- rabbitkit/di/context.py +193 -0
- rabbitkit/di/depends.py +42 -0
- rabbitkit/di/resolver.py +503 -0
- rabbitkit/dlq.py +320 -0
- rabbitkit/experimental/__init__.py +50 -0
- rabbitkit/fastapi.py +91 -0
- rabbitkit/health.py +654 -0
- rabbitkit/highload/__init__.py +10 -0
- rabbitkit/highload/backpressure.py +514 -0
- rabbitkit/highload/batch.py +448 -0
- rabbitkit/locking.py +277 -0
- rabbitkit/management.py +470 -0
- rabbitkit/middleware/__init__.py +27 -0
- rabbitkit/middleware/base.py +125 -0
- rabbitkit/middleware/circuit_breaker.py +131 -0
- rabbitkit/middleware/compression.py +267 -0
- rabbitkit/middleware/deduplication.py +651 -0
- rabbitkit/middleware/error_classifier.py +43 -0
- rabbitkit/middleware/exception.py +105 -0
- rabbitkit/middleware/metrics.py +440 -0
- rabbitkit/middleware/otel.py +203 -0
- rabbitkit/middleware/rate_limit.py +247 -0
- rabbitkit/middleware/retry.py +540 -0
- rabbitkit/middleware/signing.py +682 -0
- rabbitkit/middleware/timeout.py +291 -0
- rabbitkit/py.typed +0 -0
- rabbitkit/queue_metrics.py +174 -0
- rabbitkit/results/__init__.py +6 -0
- rabbitkit/results/backend.py +102 -0
- rabbitkit/results/middleware.py +123 -0
- rabbitkit/rpc.py +632 -0
- rabbitkit/serialization/__init__.py +25 -0
- rabbitkit/serialization/base.py +35 -0
- rabbitkit/serialization/json.py +122 -0
- rabbitkit/serialization/msgspec.py +136 -0
- rabbitkit/serialization/pipeline.py +255 -0
- rabbitkit/streams.py +139 -0
- rabbitkit/sync/__init__.py +11 -0
- rabbitkit/sync/batch.py +595 -0
- rabbitkit/sync/broker.py +996 -0
- rabbitkit/sync/connection.py +209 -0
- rabbitkit/sync/pool.py +262 -0
- rabbitkit/sync/transport.py +1085 -0
- rabbitkit/testing/__init__.py +20 -0
- rabbitkit/testing/app.py +99 -0
- rabbitkit/testing/broker.py +540 -0
- rabbitkit/testing/fixtures.py +56 -0
- rabbitkit-0.9.0.dist-info/METADATA +575 -0
- rabbitkit-0.9.0.dist-info/RECORD +95 -0
- rabbitkit-0.9.0.dist-info/WHEEL +5 -0
- rabbitkit-0.9.0.dist-info/entry_points.txt +2 -0
- rabbitkit-0.9.0.dist-info/licenses/LICENSE +21 -0
- rabbitkit-0.9.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,540 @@
|
|
|
1
|
+
"""RetryMiddleware — routes failed messages to delay queues for retry.
|
|
2
|
+
|
|
3
|
+
TOPOLOGY SPEC (see Contract in plan):
|
|
4
|
+
- Per-queue delay queues: {source_queue}.retry.{attempt}
|
|
5
|
+
- Dead letter queue: {source_queue}.dlq
|
|
6
|
+
- Shared mode: rabbitkit.retry.{attempt}, rabbitkit.dlq
|
|
7
|
+
|
|
8
|
+
Mechanism: TTL + DLX (dead-letter exchange)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import random
|
|
15
|
+
from collections.abc import Awaitable, Callable, Sequence
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from rabbitkit.core.config import RetryConfig
|
|
19
|
+
from rabbitkit.core.errors import ErrorPredicate
|
|
20
|
+
from rabbitkit.core.message import RabbitMessage
|
|
21
|
+
from rabbitkit.core.topology import RabbitQueue
|
|
22
|
+
from rabbitkit.core.types import REQUEUED_FOR_RETRY, ErrorSeverity, MessageEnvelope
|
|
23
|
+
from rabbitkit.middleware.base import BaseMiddleware
|
|
24
|
+
from rabbitkit.middleware.error_classifier import ErrorClassifierMiddleware
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _shard_index(message_id: str, shards: int) -> int:
|
|
30
|
+
"""Stable shard pick (F4). Python's hash() is salted per process — a
|
|
31
|
+
message's retry shard must be identical across every consumer process,
|
|
32
|
+
or its cadence changes on each redelivery. md5 here is a stable bucket
|
|
33
|
+
hash, not crypto."""
|
|
34
|
+
if not message_id:
|
|
35
|
+
return 0
|
|
36
|
+
import hashlib
|
|
37
|
+
|
|
38
|
+
return int(hashlib.md5(message_id.encode(), usedforsecurity=False).hexdigest(), 16) % shards
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _shard_queue_name(source_queue: str, attempt: int, shard: int) -> str:
|
|
42
|
+
"""Shard 0 keeps the legacy `{q}.retry.{n}` name (backward compatible —
|
|
43
|
+
enabling sharded jitter on an existing topology is purely additive)."""
|
|
44
|
+
base = f"{source_queue}.retry.{attempt}"
|
|
45
|
+
return base if shard == 0 else f"{base}.s{shard}"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _shard_ttl_multipliers(shards: int, jitter_factor: float) -> list[float]:
|
|
49
|
+
"""Shard 0 is exactly 1.0 (legacy TTL, no redeclare conflict); shards
|
|
50
|
+
1..N-1 spread evenly across [1-jf, 1+jf]. Every queue's TTL is still
|
|
51
|
+
UNIFORM for all messages in it — jitter comes from which shard a
|
|
52
|
+
message hashes to, never from per-message TTL (head-of-line safety)."""
|
|
53
|
+
if shards == 2:
|
|
54
|
+
return [1.0, 1.0 + jitter_factor]
|
|
55
|
+
rest = shards - 1
|
|
56
|
+
return [1.0] + [
|
|
57
|
+
1.0 + jitter_factor * (-1.0 + 2.0 * i / (rest - 1)) for i in range(rest)
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def retry_middleware_insertion_index(middlewares: Sequence[Any]) -> int:
|
|
62
|
+
"""Index at which an auto-wired ``RetryMiddleware`` should be inserted.
|
|
63
|
+
|
|
64
|
+
Retry must be OUTER of ordinary user middlewares (e.g. ``TimeoutMiddleware``)
|
|
65
|
+
so it can classify and re-queue exceptions they raise — this is the
|
|
66
|
+
documented composition in ``middleware/timeout.py``
|
|
67
|
+
(``middlewares=[retry_mw, timeout_mw] # retry outermost``), which relies on
|
|
68
|
+
retry seeing ``HandlerTimeoutError``.
|
|
69
|
+
|
|
70
|
+
Retry must be INNER of any ``ExceptionMiddleware``, which is documented as
|
|
71
|
+
the true outermost layer that "catches exceptions AFTER retry gives up"
|
|
72
|
+
(``middleware/exception.py``) — it needs to see the ``_rabbitkit_terminal``
|
|
73
|
+
exceptions retry re-raises on exhaustion/permanent failure.
|
|
74
|
+
|
|
75
|
+
So retry is inserted right after any *leading* ``ExceptionMiddleware``
|
|
76
|
+
instances, ahead of everything else.
|
|
77
|
+
"""
|
|
78
|
+
from rabbitkit.middleware.exception import ExceptionMiddleware
|
|
79
|
+
|
|
80
|
+
index = 0
|
|
81
|
+
for mw in middlewares:
|
|
82
|
+
if isinstance(mw, ExceptionMiddleware):
|
|
83
|
+
index += 1
|
|
84
|
+
else:
|
|
85
|
+
break
|
|
86
|
+
return index
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def warn_retry_without_confirms(route_name: str, *, context: str = "retry") -> None:
|
|
90
|
+
"""Warn when a route republishes internally (retry delay-queue, or a
|
|
91
|
+
``@publisher()`` result forward) but its broker publishes with
|
|
92
|
+
``PublisherConfig.confirm_delivery=False`` (M4).
|
|
93
|
+
|
|
94
|
+
Both ``RetryMiddleware`` and the pipeline's result-publish step (Contract
|
|
95
|
+
5) settle the SOURCE message as soon as their republish reports
|
|
96
|
+
``outcome.ok`` -- with confirms off, that publish reports
|
|
97
|
+
``PublishStatus.SENT`` (fire-and-forget, never broker-confirmed) rather
|
|
98
|
+
than ``CONFIRMED``, and ``.ok`` is True for both. If that SENT publish is
|
|
99
|
+
actually lost in flight (e.g. a connection drop right after), the source
|
|
100
|
+
message is still settled -- a real loss, not just a delay.
|
|
101
|
+
Enable ``confirm_delivery=True`` (the default) on any such broker if
|
|
102
|
+
this matters for your workload.
|
|
103
|
+
"""
|
|
104
|
+
import warnings
|
|
105
|
+
|
|
106
|
+
what = "retry" if context == "retry" else "a @publisher() result forward"
|
|
107
|
+
settles = "acks the source message" if context == "retry" else "settles the source message"
|
|
108
|
+
warnings.warn(
|
|
109
|
+
f"Route {route_name!r} uses {what} but the broker publishes with "
|
|
110
|
+
f"confirm_delivery=False. The pipeline {settles} as soon as its internal republish "
|
|
111
|
+
"is sent, without waiting for a broker confirm -- a publish lost in flight after "
|
|
112
|
+
"that point is a real loss (the source is already settled). Set "
|
|
113
|
+
"PublisherConfig(confirm_delivery=True) (the default) if durability here matters.",
|
|
114
|
+
RuntimeWarning,
|
|
115
|
+
stacklevel=3,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def warn_retry_middleware_without_topology(route_name: str) -> None:
|
|
120
|
+
"""Warn when a route carries a ``RetryMiddleware`` but no retry topology.
|
|
121
|
+
|
|
122
|
+
A ``RetryMiddleware`` publishes failed messages to ``<queue>.retry.<n>``
|
|
123
|
+
delay queues. Those queues are only declared when retry is enabled via
|
|
124
|
+
``RabbitConfig.retry`` / ``@subscriber(retry=...)`` (which drives
|
|
125
|
+
``_declare_topology``). If a caller adds a ``RetryMiddleware`` manually to
|
|
126
|
+
``middlewares=[...]`` *without* also setting ``retry=``, the delay queues
|
|
127
|
+
are never declared, so the retry publishes target non-existent queues on the
|
|
128
|
+
default exchange and are silently dropped — the source message is acked and
|
|
129
|
+
the retry is lost. Surface that half-configuration loudly.
|
|
130
|
+
"""
|
|
131
|
+
import warnings
|
|
132
|
+
|
|
133
|
+
warnings.warn(
|
|
134
|
+
f"Route {route_name!r} has a RetryMiddleware but no retry topology was declared "
|
|
135
|
+
"(no retry=RetryConfig(...) on the broker or subscriber). Its delay-queue publishes "
|
|
136
|
+
"will target non-existent queues and be dropped. Set retry=RetryConfig(...) so the "
|
|
137
|
+
"delay/DLQ topology is declared, or remove the manual RetryMiddleware.",
|
|
138
|
+
RuntimeWarning,
|
|
139
|
+
stacklevel=3,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class RetryMiddleware(BaseMiddleware):
|
|
144
|
+
"""Routes failed messages to delay queues for retry.
|
|
145
|
+
|
|
146
|
+
On exception:
|
|
147
|
+
1. Classify error (transient/permanent)
|
|
148
|
+
2. If transient + retries left → publish to delay queue + ack source
|
|
149
|
+
3. If permanent or retries exhausted → tag as terminal + re-raise
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(
|
|
153
|
+
self,
|
|
154
|
+
config: RetryConfig,
|
|
155
|
+
*,
|
|
156
|
+
publish_fn: Callable[[MessageEnvelope], Any] | None = None,
|
|
157
|
+
publish_async_fn: Callable[[MessageEnvelope], Awaitable[Any]] | None = None,
|
|
158
|
+
predicates: Sequence[ErrorPredicate] = (),
|
|
159
|
+
metrics_collector: Any | None = None,
|
|
160
|
+
metrics_config: Any | None = None,
|
|
161
|
+
) -> None:
|
|
162
|
+
self._config = config
|
|
163
|
+
# predicates run first (True=transient, False=permanent, None=defer to the
|
|
164
|
+
# built-in type tuples, then unknown_policy). Lets callers classify by
|
|
165
|
+
# something other than exception type (e.g. an HTTP status attribute).
|
|
166
|
+
self._classifier = ErrorClassifierMiddleware(
|
|
167
|
+
predicates=predicates,
|
|
168
|
+
unknown_policy=config.unknown_policy,
|
|
169
|
+
)
|
|
170
|
+
self._publish_fn = publish_fn
|
|
171
|
+
self._publish_async_fn = publish_async_fn
|
|
172
|
+
# M2: optional -- wired by the broker from a MetricsMiddleware already
|
|
173
|
+
# present on the same route, so retried/dead-lettered counts are
|
|
174
|
+
# observable. None (the default) is a no-op; RetryMiddleware itself
|
|
175
|
+
# has no metrics opinion otherwise.
|
|
176
|
+
self._metrics_collector = metrics_collector
|
|
177
|
+
self._metrics_config = metrics_config
|
|
178
|
+
|
|
179
|
+
def _record_metric(self, metric_name: str | None, message: RabbitMessage) -> None:
|
|
180
|
+
if self._metrics_collector is None or metric_name is None:
|
|
181
|
+
return
|
|
182
|
+
queue = message.headers.get("x-rabbitkit-original-queue") or message.routing_key or "unknown"
|
|
183
|
+
self._metrics_collector.inc_counter(metric_name, {"queue": str(queue)})
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def config(self) -> RetryConfig:
|
|
187
|
+
return self._config
|
|
188
|
+
|
|
189
|
+
def consume_scope(
|
|
190
|
+
self,
|
|
191
|
+
call_next: Callable[[RabbitMessage], Any],
|
|
192
|
+
message: RabbitMessage,
|
|
193
|
+
) -> Any:
|
|
194
|
+
"""Sync retry scope.
|
|
195
|
+
|
|
196
|
+
H8: on a caught, requeued failure, returns ``REQUEUED_FOR_RETRY``
|
|
197
|
+
(never ``None``) — see that sentinel's docstring in ``core/types.py``.
|
|
198
|
+
``_handle_retry_sync`` either returns normally (requeued: routed to a
|
|
199
|
+
delay queue, or nacked for immediate redelivery if that publish
|
|
200
|
+
itself failed) or re-raises (terminal: permanent/exhausted) via
|
|
201
|
+
``_mark_terminal_and_raise``, so reaching this ``return`` unambiguously
|
|
202
|
+
means "requeued" — an outer middleware (e.g. DeduplicationMiddleware)
|
|
203
|
+
MUST NOT treat this the same as the handler actually succeeding.
|
|
204
|
+
"""
|
|
205
|
+
try:
|
|
206
|
+
return call_next(message)
|
|
207
|
+
except Exception as exc:
|
|
208
|
+
self._handle_retry_sync(exc, message)
|
|
209
|
+
return REQUEUED_FOR_RETRY
|
|
210
|
+
|
|
211
|
+
async def consume_scope_async(
|
|
212
|
+
self,
|
|
213
|
+
call_next: Callable[[RabbitMessage], Awaitable[Any]],
|
|
214
|
+
message: RabbitMessage,
|
|
215
|
+
) -> Any:
|
|
216
|
+
"""Async retry scope. See :meth:`consume_scope` (H8) for why this
|
|
217
|
+
returns ``REQUEUED_FOR_RETRY`` rather than ``None``."""
|
|
218
|
+
try:
|
|
219
|
+
return await call_next(message)
|
|
220
|
+
except Exception as exc:
|
|
221
|
+
await self._handle_retry_async(exc, message)
|
|
222
|
+
return REQUEUED_FOR_RETRY
|
|
223
|
+
|
|
224
|
+
def _handle_retry_sync(self, exc: Exception, message: RabbitMessage) -> None:
|
|
225
|
+
"""Handle exception in sync context."""
|
|
226
|
+
classified = self._classifier.classify(exc)
|
|
227
|
+
retry_count = self._get_retry_count(message)
|
|
228
|
+
|
|
229
|
+
if classified.severity == ErrorSeverity.TRANSIENT and retry_count < self._config.max_retries:
|
|
230
|
+
# Route to delay queue
|
|
231
|
+
self._route_to_delay_queue_sync(message, retry_count)
|
|
232
|
+
return
|
|
233
|
+
|
|
234
|
+
# Terminal: permanent or exhausted
|
|
235
|
+
self._mark_terminal_and_raise(exc, classified.severity, retry_count, message)
|
|
236
|
+
|
|
237
|
+
async def _handle_retry_async(self, exc: Exception, message: RabbitMessage) -> None:
|
|
238
|
+
"""Handle exception in async context."""
|
|
239
|
+
classified = self._classifier.classify(exc)
|
|
240
|
+
retry_count = self._get_retry_count(message)
|
|
241
|
+
|
|
242
|
+
if classified.severity == ErrorSeverity.TRANSIENT and retry_count < self._config.max_retries:
|
|
243
|
+
await self._route_to_delay_queue_async(message, retry_count)
|
|
244
|
+
return
|
|
245
|
+
|
|
246
|
+
self._mark_terminal_and_raise(exc, classified.severity, retry_count, message)
|
|
247
|
+
|
|
248
|
+
def _get_retry_count(self, message: RabbitMessage) -> int:
|
|
249
|
+
"""Get current retry count from message headers, clamped to
|
|
250
|
+
``[0, max_retries]`` (H5).
|
|
251
|
+
|
|
252
|
+
The header is read verbatim from an inbound AMQP message — nothing
|
|
253
|
+
distinguishes a value written by this middleware's own delay-queue
|
|
254
|
+
round trip from one set directly by an untrusted producer (there is
|
|
255
|
+
no broker-side attestation of provenance for a plain header). Without
|
|
256
|
+
clamping, a producer could set it negative (``attempt = retry_count +
|
|
257
|
+
1`` in :meth:`_build_retry_envelope` would then be <= 0, producing a
|
|
258
|
+
delay-queue routing key like ``...retry.-4`` that was never declared
|
|
259
|
+
— the retry publish silently targets a non-existent queue and the
|
|
260
|
+
message is lost rather than retried) or absurdly large (forcing every
|
|
261
|
+
message straight to the DLQ, skipping retries entirely). Clamping
|
|
262
|
+
makes ``max_retries`` an enforced ceiling regardless of what the
|
|
263
|
+
header claims, independent of its configured value being read from a
|
|
264
|
+
trusted or untrusted source. A malformed (non-numeric) header value
|
|
265
|
+
is treated the same as missing (0) rather than raising, so a garbage
|
|
266
|
+
header degrades to "start of the retry sequence" instead of crashing
|
|
267
|
+
the pipeline.
|
|
268
|
+
|
|
269
|
+
For a broker-enforced backstop on top of this (e.g. against a
|
|
270
|
+
misbehaving consumer that never expires/dead-letters a message),
|
|
271
|
+
prefer quorum source queues with ``x-delivery-limit`` — see
|
|
272
|
+
``docs/retry-and-dlq.md``.
|
|
273
|
+
"""
|
|
274
|
+
raw = message.headers.get(self._config.retry_header, 0)
|
|
275
|
+
try:
|
|
276
|
+
retry_count = int(raw)
|
|
277
|
+
except (TypeError, ValueError):
|
|
278
|
+
retry_count = 0
|
|
279
|
+
return max(0, min(retry_count, self._config.max_retries))
|
|
280
|
+
|
|
281
|
+
def _compute_delay(self, retry_count: int) -> int:
|
|
282
|
+
"""Compute delay for this retry attempt (with jitter)."""
|
|
283
|
+
delays = self._config.delays
|
|
284
|
+
idx = min(retry_count, len(delays) - 1)
|
|
285
|
+
base_delay = delays[idx]
|
|
286
|
+
|
|
287
|
+
# Apply jitter
|
|
288
|
+
jitter = base_delay * self._config.jitter_factor
|
|
289
|
+
return max(1, int(base_delay + random.uniform(-jitter, jitter))) # noqa: S311 — jitter, not crypto
|
|
290
|
+
|
|
291
|
+
def _build_retry_envelope(self, message: RabbitMessage, retry_count: int) -> MessageEnvelope:
|
|
292
|
+
"""Build envelope for the delay queue."""
|
|
293
|
+
# Determine delay queue name
|
|
294
|
+
attempt = retry_count + 1
|
|
295
|
+
# Always per-queue (shared mode is rejected by RetryConfig — H3).
|
|
296
|
+
source_queue = message.headers.get("x-rabbitkit-original-queue", "unknown")
|
|
297
|
+
if self._config.jitter_mode == "sharded":
|
|
298
|
+
shard = _shard_index(message.message_id or "", self._config.jitter_shards)
|
|
299
|
+
delay_queue_rk = _shard_queue_name(str(source_queue), attempt, shard)
|
|
300
|
+
else:
|
|
301
|
+
delay_queue_rk = f"{source_queue}.retry.{attempt}"
|
|
302
|
+
|
|
303
|
+
# Preserve original headers + add retry metadata
|
|
304
|
+
headers = dict(message.headers)
|
|
305
|
+
headers[self._config.retry_header] = retry_count + 1
|
|
306
|
+
if "x-rabbitkit-original-exchange" not in headers:
|
|
307
|
+
headers["x-rabbitkit-original-exchange"] = message.exchange
|
|
308
|
+
if "x-rabbitkit-original-routing-key" not in headers:
|
|
309
|
+
headers["x-rabbitkit-original-routing-key"] = message.routing_key
|
|
310
|
+
if "x-rabbitkit-original-queue" not in headers:
|
|
311
|
+
headers["x-rabbitkit-original-queue"] = "" # set by broker at consume time
|
|
312
|
+
|
|
313
|
+
return MessageEnvelope(
|
|
314
|
+
routing_key=delay_queue_rk,
|
|
315
|
+
body=message.body,
|
|
316
|
+
exchange="", # direct to delay queue by name
|
|
317
|
+
headers=headers,
|
|
318
|
+
message_id=message.message_id or "",
|
|
319
|
+
correlation_id=message.correlation_id,
|
|
320
|
+
content_type=message.content_type or "application/octet-stream",
|
|
321
|
+
content_encoding=message.content_encoding,
|
|
322
|
+
# Preserve the remaining original message properties -- these used
|
|
323
|
+
# to be silently dropped on every retry republish, so e.g. a
|
|
324
|
+
# priority-queue message lost its priority on its first retry, and
|
|
325
|
+
# an RPC request's reply_to/type/app_id/user_id never survived
|
|
326
|
+
# long enough for the eventual (retried) reply to route back.
|
|
327
|
+
reply_to=message.reply_to,
|
|
328
|
+
priority=message.priority,
|
|
329
|
+
expiration=message.expiration,
|
|
330
|
+
type=message.type,
|
|
331
|
+
app_id=message.app_id,
|
|
332
|
+
user_id=message.user_id,
|
|
333
|
+
# M4: mandatory so a runtime-deleted/missing delay queue comes back
|
|
334
|
+
# as RETURNED (outcome not-ok) instead of being broker-confirmed
|
|
335
|
+
# into the void. The route-to-delay-queue path checks outcome.ok
|
|
336
|
+
# and nack-requeues on failure, so this turns silent loss into a
|
|
337
|
+
# redelivery. (Requires publisher confirms + basic.return handling,
|
|
338
|
+
# which both transports wire up.)
|
|
339
|
+
mandatory=True,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
def _route_to_delay_queue_sync(self, message: RabbitMessage, retry_count: int) -> None:
|
|
343
|
+
"""Publish to delay queue and ack source (sync)."""
|
|
344
|
+
envelope = self._build_retry_envelope(message, retry_count)
|
|
345
|
+
|
|
346
|
+
if self._publish_fn is not None:
|
|
347
|
+
outcome = self._publish_fn(envelope)
|
|
348
|
+
if outcome is not None and not outcome.ok:
|
|
349
|
+
# Delay-queue publish failed — DO NOT ack, or the message is
|
|
350
|
+
# lost forever (never retried, never dead-lettered). Nack with
|
|
351
|
+
# requeue so the broker redelivers it.
|
|
352
|
+
if not message.is_settled:
|
|
353
|
+
message.nack(requeue=True)
|
|
354
|
+
logger.warning(
|
|
355
|
+
"Retry publish failed; nacked for redelivery: routing_key=%s",
|
|
356
|
+
envelope.routing_key,
|
|
357
|
+
)
|
|
358
|
+
return
|
|
359
|
+
|
|
360
|
+
# Ack source message (it's safely in the delay queue now)
|
|
361
|
+
if not message.is_settled:
|
|
362
|
+
message.ack()
|
|
363
|
+
|
|
364
|
+
if self._metrics_config is not None:
|
|
365
|
+
self._record_metric(self._metrics_config.messages_retried_total, message)
|
|
366
|
+
|
|
367
|
+
logger.info(
|
|
368
|
+
"Retrying message (attempt %d/%d): routing_key=%s",
|
|
369
|
+
retry_count + 1,
|
|
370
|
+
self._config.max_retries,
|
|
371
|
+
envelope.routing_key,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
async def _route_to_delay_queue_async(self, message: RabbitMessage, retry_count: int) -> None:
|
|
375
|
+
"""Publish to delay queue and ack source (async)."""
|
|
376
|
+
envelope = self._build_retry_envelope(message, retry_count)
|
|
377
|
+
|
|
378
|
+
if self._publish_async_fn is not None:
|
|
379
|
+
outcome = await self._publish_async_fn(envelope)
|
|
380
|
+
if outcome is not None and not outcome.ok:
|
|
381
|
+
# Delay-queue publish failed — DO NOT ack (see sync variant).
|
|
382
|
+
if not message.is_settled:
|
|
383
|
+
await message.nack_async(requeue=True)
|
|
384
|
+
logger.warning(
|
|
385
|
+
"Retry publish failed; nacked for redelivery: routing_key=%s",
|
|
386
|
+
envelope.routing_key,
|
|
387
|
+
)
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
if not message.is_settled:
|
|
391
|
+
await message.ack_async()
|
|
392
|
+
|
|
393
|
+
if self._metrics_config is not None:
|
|
394
|
+
self._record_metric(self._metrics_config.messages_retried_total, message)
|
|
395
|
+
|
|
396
|
+
logger.info(
|
|
397
|
+
"Retrying message (attempt %d/%d): routing_key=%s",
|
|
398
|
+
retry_count + 1,
|
|
399
|
+
self._config.max_retries,
|
|
400
|
+
envelope.routing_key,
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
def _mark_terminal_and_raise(
|
|
404
|
+
self,
|
|
405
|
+
exc: Exception,
|
|
406
|
+
severity: ErrorSeverity,
|
|
407
|
+
retry_count: int,
|
|
408
|
+
message: RabbitMessage,
|
|
409
|
+
) -> None:
|
|
410
|
+
"""Mark exception as terminal and re-raise.
|
|
411
|
+
|
|
412
|
+
M2: this is the point where a message is committed to being
|
|
413
|
+
dead-lettered -- permanent errors dead-letter on the first attempt,
|
|
414
|
+
exhausted-retry errors dead-letter after ``max_retries`` -- so
|
|
415
|
+
``messages_dead_lettered_total`` is recorded here rather than at the
|
|
416
|
+
actual reject() call (which happens later, in the pipeline's
|
|
417
|
+
exception handling, and doesn't know WHY the reject is happening).
|
|
418
|
+
"""
|
|
419
|
+
exc._rabbitkit_terminal = True # type: ignore[attr-defined]
|
|
420
|
+
if self._metrics_config is not None:
|
|
421
|
+
self._record_metric(self._metrics_config.messages_dead_lettered_total, message)
|
|
422
|
+
logger.warning(
|
|
423
|
+
"Terminal failure (%s, retries=%d/%d): %s: %s",
|
|
424
|
+
severity.value,
|
|
425
|
+
retry_count,
|
|
426
|
+
self._config.max_retries,
|
|
427
|
+
type(exc).__name__,
|
|
428
|
+
exc,
|
|
429
|
+
)
|
|
430
|
+
raise
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
class RetryRouter:
|
|
434
|
+
"""Declares delay queue topology at startup.
|
|
435
|
+
|
|
436
|
+
Called by broker.start() for each route that has retry enabled.
|
|
437
|
+
RetryRouter is the SINGLE OWNER of all retry/DLQ topology for a route.
|
|
438
|
+
|
|
439
|
+
DLQ routing:
|
|
440
|
+
- The source queue is re-declared with ``x-dead-letter-exchange=""``
|
|
441
|
+
and ``x-dead-letter-routing-key=<dlq_name>`` so that messages
|
|
442
|
+
rejected/nacked with ``requeue=False`` are automatically routed to
|
|
443
|
+
the DLQ by RabbitMQ — no application-level routing needed.
|
|
444
|
+
- Use ``get_source_queue_dlq_arguments()`` to obtain the extra arguments
|
|
445
|
+
that must be added to the source queue declaration.
|
|
446
|
+
"""
|
|
447
|
+
|
|
448
|
+
def __init__(self, config: RetryConfig) -> None:
|
|
449
|
+
self._config = config
|
|
450
|
+
|
|
451
|
+
def get_dlq_name(self, source_queue_name: str) -> str:
|
|
452
|
+
"""Return the DLQ name for a given source queue.
|
|
453
|
+
|
|
454
|
+
Always per-queue — shared mode (per_queue=False) is rejected by
|
|
455
|
+
RetryConfig (H3), so there is no shared-DLQ branch.
|
|
456
|
+
"""
|
|
457
|
+
return f"{source_queue_name}.dlq"
|
|
458
|
+
|
|
459
|
+
def get_source_queue_dlq_arguments(self, source_queue_name: str) -> dict[str, str]:
|
|
460
|
+
"""Return x-dead-letter arguments to add to the source queue declaration.
|
|
461
|
+
|
|
462
|
+
When these arguments are present on the source queue, RabbitMQ
|
|
463
|
+
automatically forwards messages that are rejected/nacked with
|
|
464
|
+
requeue=False to the DLQ — making the DLQ actually reachable.
|
|
465
|
+
"""
|
|
466
|
+
dlq_name = self.get_dlq_name(source_queue_name)
|
|
467
|
+
return {
|
|
468
|
+
"x-dead-letter-exchange": "", # default exchange
|
|
469
|
+
"x-dead-letter-routing-key": dlq_name, # route directly by queue name
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
def get_delay_queue_definitions(
|
|
473
|
+
self,
|
|
474
|
+
source_queue_name: str,
|
|
475
|
+
source_exchange_name: str, # kept for signature stability (M5) — see docstring
|
|
476
|
+
) -> list[RabbitQueue]:
|
|
477
|
+
"""Generate delay queue definitions for a source queue.
|
|
478
|
+
|
|
479
|
+
Returns list of RabbitQueue objects for delay queues + DLQ.
|
|
480
|
+
The DLQ is now reachable because ``get_source_queue_dlq_arguments()``
|
|
481
|
+
wires the source queue's x-dead-letter-exchange to it.
|
|
482
|
+
|
|
483
|
+
M5: on TTL expiry, a delay queue dead-letters back to the SOURCE
|
|
484
|
+
QUEUE via the **default exchange** (``x-dead-letter-exchange=""``)
|
|
485
|
+
with the queue's own name as the routing key — never the source
|
|
486
|
+
queue's real exchange. On the default exchange a routing key that
|
|
487
|
+
matches a queue's name always delivers directly to that queue,
|
|
488
|
+
completely independent of how the queue is actually bound elsewhere.
|
|
489
|
+
The previous version dead-lettered to ``source_exchange_name``
|
|
490
|
+
using ``source_queue_name`` as the routing key — for a source queue
|
|
491
|
+
bound to its real exchange via a topic pattern (e.g.
|
|
492
|
+
``orders.*.created``) rather than literally by its own name, that
|
|
493
|
+
routing key almost never matches the binding, so the retried
|
|
494
|
+
message silently vanished instead of coming back after the delay.
|
|
495
|
+
``source_exchange_name`` is intentionally unused now — kept as a
|
|
496
|
+
parameter so existing call sites don't need updating.
|
|
497
|
+
"""
|
|
498
|
+
queues: list[RabbitQueue] = []
|
|
499
|
+
|
|
500
|
+
if self._config.jitter_mode == "sharded":
|
|
501
|
+
multipliers = _shard_ttl_multipliers(self._config.jitter_shards, self._config.jitter_factor)
|
|
502
|
+
else:
|
|
503
|
+
multipliers = [1.0]
|
|
504
|
+
|
|
505
|
+
for attempt in range(1, self._config.max_retries + 1):
|
|
506
|
+
delay_ms = self._get_delay_ms(attempt - 1)
|
|
507
|
+
|
|
508
|
+
# Always per-queue (shared mode is rejected by RetryConfig — H3).
|
|
509
|
+
for shard, mult in enumerate(multipliers):
|
|
510
|
+
queue = RabbitQueue(
|
|
511
|
+
name=_shard_queue_name(source_queue_name, attempt, shard),
|
|
512
|
+
durable=True,
|
|
513
|
+
arguments={
|
|
514
|
+
# Uniform per-queue TTL (head-of-line safety); shards
|
|
515
|
+
# stagger TTLs ACROSS queues, never within one (F4).
|
|
516
|
+
"x-message-ttl": max(1, int(delay_ms * mult)),
|
|
517
|
+
"x-dead-letter-exchange": "", # default exchange (M5)
|
|
518
|
+
"x-dead-letter-routing-key": source_queue_name,
|
|
519
|
+
"x-queue-type": "classic", # classic for delay queues
|
|
520
|
+
},
|
|
521
|
+
)
|
|
522
|
+
queues.append(queue)
|
|
523
|
+
|
|
524
|
+
# DLQ — declared as a plain durable queue.
|
|
525
|
+
# The source queue's x-dead-letter-exchange (set via
|
|
526
|
+
# get_source_queue_dlq_arguments) routes nacked/rejected messages here.
|
|
527
|
+
dlq_name = self.get_dlq_name(source_queue_name)
|
|
528
|
+
dlq = RabbitQueue(
|
|
529
|
+
name=dlq_name,
|
|
530
|
+
durable=True,
|
|
531
|
+
)
|
|
532
|
+
queues.append(dlq)
|
|
533
|
+
|
|
534
|
+
return queues
|
|
535
|
+
|
|
536
|
+
def _get_delay_ms(self, index: int) -> int:
|
|
537
|
+
"""Get delay in milliseconds for retry attempt."""
|
|
538
|
+
delays = self._config.delays
|
|
539
|
+
idx = min(index, len(delays) - 1)
|
|
540
|
+
return delays[idx] * 1000
|