rabbitkit 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rabbitkit/__init__.py +201 -0
- rabbitkit/_version.py +3 -0
- rabbitkit/aio/__init__.py +31 -0
- rabbitkit/async_/__init__.py +9 -0
- rabbitkit/async_/batch.py +213 -0
- rabbitkit/async_/broker.py +1123 -0
- rabbitkit/async_/connection.py +274 -0
- rabbitkit/async_/pool.py +363 -0
- rabbitkit/async_/transport.py +877 -0
- rabbitkit/asyncapi/__init__.py +5 -0
- rabbitkit/asyncapi/generator.py +219 -0
- rabbitkit/asyncapi/schema.py +98 -0
- rabbitkit/cli/__init__.py +77 -0
- rabbitkit/cli/_utils.py +38 -0
- rabbitkit/cli/commands/__init__.py +0 -0
- rabbitkit/cli/commands/dlq.py +190 -0
- rabbitkit/cli/commands/health.py +34 -0
- rabbitkit/cli/commands/migrate.py +570 -0
- rabbitkit/cli/commands/routes.py +88 -0
- rabbitkit/cli/commands/run.py +144 -0
- rabbitkit/cli/commands/shell.py +72 -0
- rabbitkit/cli/commands/topology.py +346 -0
- rabbitkit/concurrency.py +451 -0
- rabbitkit/core/__init__.py +5 -0
- rabbitkit/core/app.py +323 -0
- rabbitkit/core/config.py +849 -0
- rabbitkit/core/env_config.py +251 -0
- rabbitkit/core/errors.py +199 -0
- rabbitkit/core/logging.py +261 -0
- rabbitkit/core/message.py +235 -0
- rabbitkit/core/path.py +53 -0
- rabbitkit/core/pipeline.py +1289 -0
- rabbitkit/core/protocols.py +349 -0
- rabbitkit/core/registry.py +284 -0
- rabbitkit/core/route.py +329 -0
- rabbitkit/core/router.py +142 -0
- rabbitkit/core/topology.py +261 -0
- rabbitkit/core/topology_dispatch.py +74 -0
- rabbitkit/core/types.py +324 -0
- rabbitkit/dashboard/__init__.py +5 -0
- rabbitkit/dashboard/app.py +212 -0
- rabbitkit/di/__init__.py +19 -0
- rabbitkit/di/context.py +193 -0
- rabbitkit/di/depends.py +42 -0
- rabbitkit/di/resolver.py +503 -0
- rabbitkit/dlq.py +320 -0
- rabbitkit/experimental/__init__.py +50 -0
- rabbitkit/fastapi.py +91 -0
- rabbitkit/health.py +654 -0
- rabbitkit/highload/__init__.py +10 -0
- rabbitkit/highload/backpressure.py +514 -0
- rabbitkit/highload/batch.py +448 -0
- rabbitkit/locking.py +277 -0
- rabbitkit/management.py +470 -0
- rabbitkit/middleware/__init__.py +27 -0
- rabbitkit/middleware/base.py +125 -0
- rabbitkit/middleware/circuit_breaker.py +131 -0
- rabbitkit/middleware/compression.py +267 -0
- rabbitkit/middleware/deduplication.py +651 -0
- rabbitkit/middleware/error_classifier.py +43 -0
- rabbitkit/middleware/exception.py +105 -0
- rabbitkit/middleware/metrics.py +440 -0
- rabbitkit/middleware/otel.py +203 -0
- rabbitkit/middleware/rate_limit.py +247 -0
- rabbitkit/middleware/retry.py +540 -0
- rabbitkit/middleware/signing.py +682 -0
- rabbitkit/middleware/timeout.py +291 -0
- rabbitkit/py.typed +0 -0
- rabbitkit/queue_metrics.py +174 -0
- rabbitkit/results/__init__.py +6 -0
- rabbitkit/results/backend.py +102 -0
- rabbitkit/results/middleware.py +123 -0
- rabbitkit/rpc.py +632 -0
- rabbitkit/serialization/__init__.py +25 -0
- rabbitkit/serialization/base.py +35 -0
- rabbitkit/serialization/json.py +122 -0
- rabbitkit/serialization/msgspec.py +136 -0
- rabbitkit/serialization/pipeline.py +255 -0
- rabbitkit/streams.py +139 -0
- rabbitkit/sync/__init__.py +11 -0
- rabbitkit/sync/batch.py +595 -0
- rabbitkit/sync/broker.py +996 -0
- rabbitkit/sync/connection.py +209 -0
- rabbitkit/sync/pool.py +262 -0
- rabbitkit/sync/transport.py +1085 -0
- rabbitkit/testing/__init__.py +20 -0
- rabbitkit/testing/app.py +99 -0
- rabbitkit/testing/broker.py +540 -0
- rabbitkit/testing/fixtures.py +56 -0
- rabbitkit-0.9.0.dist-info/METADATA +575 -0
- rabbitkit-0.9.0.dist-info/RECORD +95 -0
- rabbitkit-0.9.0.dist-info/WHEEL +5 -0
- rabbitkit-0.9.0.dist-info/entry_points.txt +2 -0
- rabbitkit-0.9.0.dist-info/licenses/LICENSE +21 -0
- rabbitkit-0.9.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1085 @@
|
|
|
1
|
+
"""SyncTransport — pika-based transport adapter.
|
|
2
|
+
|
|
3
|
+
THREAD SAFETY (CRITICAL):
|
|
4
|
+
Model A — One connection per thread (used in 0.1.0):
|
|
5
|
+
Each thread gets its own dedicated pika connection.
|
|
6
|
+
No cross-thread connection sharing.
|
|
7
|
+
|
|
8
|
+
Fork safety: lazy connect (NOT in __init__) — pika sockets can't cross fork().
|
|
9
|
+
Reconnection: _ensure_connected() before each publish.
|
|
10
|
+
TopologyMode: respected in declare_exchange/declare_queue/bind_queue.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
import random
|
|
17
|
+
import threading
|
|
18
|
+
import time
|
|
19
|
+
import uuid
|
|
20
|
+
from collections.abc import Callable
|
|
21
|
+
from datetime import UTC, datetime
|
|
22
|
+
from typing import Any, TypeVar
|
|
23
|
+
|
|
24
|
+
from rabbitkit.core.config import ConnectionConfig, SecurityConfig, SocketConfig
|
|
25
|
+
from rabbitkit.core.errors import ConfigurationError
|
|
26
|
+
from rabbitkit.core.message import RabbitMessage
|
|
27
|
+
from rabbitkit.core.topology import RabbitExchange, RabbitQueue
|
|
28
|
+
from rabbitkit.core.topology_dispatch import TopoAction, TopologyDispatcher
|
|
29
|
+
from rabbitkit.core.types import (
|
|
30
|
+
DIRECT_REPLY_TO_QUEUE,
|
|
31
|
+
MessageEnvelope,
|
|
32
|
+
PublishOutcome,
|
|
33
|
+
PublishStatus,
|
|
34
|
+
TopologyMode,
|
|
35
|
+
)
|
|
36
|
+
from rabbitkit.sync.connection import get_connection_errors, make_pika_connection_params
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
_T = TypeVar("_T")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class SyncTransport:
|
|
44
|
+
"""Pika-based synchronous transport.
|
|
45
|
+
|
|
46
|
+
Lazy connect: connection is established on first use, not in __init__.
|
|
47
|
+
This ensures fork safety and avoids connection issues during import.
|
|
48
|
+
|
|
49
|
+
THE INVARIANT: no pika connection is ever used from a thread other than
|
|
50
|
+
the one that created it.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
connection_config: ConnectionConfig | None = None,
|
|
56
|
+
socket_config: SocketConfig | None = None,
|
|
57
|
+
security_config: SecurityConfig | None = None,
|
|
58
|
+
topology_mode: TopologyMode = TopologyMode.AUTO_DECLARE,
|
|
59
|
+
confirm_delivery: bool = True,
|
|
60
|
+
confirm_timeout: float = 5.0,
|
|
61
|
+
on_topology_conflict: str = "raise",
|
|
62
|
+
) -> None:
|
|
63
|
+
self._connection_config = connection_config or ConnectionConfig()
|
|
64
|
+
self._socket_config = socket_config or SocketConfig()
|
|
65
|
+
self._security_config = security_config or SecurityConfig()
|
|
66
|
+
self._topology_mode = topology_mode
|
|
67
|
+
self._topo = TopologyDispatcher(topology_mode)
|
|
68
|
+
# M14: "raise" | "warn_continue" on a 406 topology-drift conflict.
|
|
69
|
+
self._on_topology_conflict = on_topology_conflict
|
|
70
|
+
self._confirm_delivery = confirm_delivery
|
|
71
|
+
# I-10: bound the publish+confirm wait so a missing confirm cannot stall
|
|
72
|
+
# a worker forever. Brokers should pass ``config.publisher.confirm_timeout``
|
|
73
|
+
# here; the default is a sane fallback.
|
|
74
|
+
self._confirm_timeout = float(confirm_timeout)
|
|
75
|
+
|
|
76
|
+
self._connection: Any = None # pika.BlockingConnection
|
|
77
|
+
self._channel: Any = None # pika.channel.Channel (publisher/topology)
|
|
78
|
+
self._connected = False
|
|
79
|
+
self._consumer_tags: dict[str, str] = {} # queue_name → consumer_tag
|
|
80
|
+
self._owner_ident: int | None = None # thread that owns the connection
|
|
81
|
+
self._consuming = False # True while the I/O loop is running
|
|
82
|
+
# H2: True once start_consuming() has ever run on this connection, and
|
|
83
|
+
# never reset to False until disconnect(). Unlike _consuming (which
|
|
84
|
+
# goes False the instant the loop stops pumping — including during
|
|
85
|
+
# SyncBroker.stop()'s worker-pool drain, while workers may still be
|
|
86
|
+
# mid-handler), this stays True for the connection's whole lifetime
|
|
87
|
+
# once a consume loop has run. _run_on_io_thread uses THIS (not
|
|
88
|
+
# _consuming) to decide whether a cross-thread call must marshal —
|
|
89
|
+
# a worker thread's ack must never run inline just because the loop
|
|
90
|
+
# has momentarily stopped pumping (see _run_on_io_thread).
|
|
91
|
+
self._ever_consumed = False
|
|
92
|
+
|
|
93
|
+
# Per-queue consumer channels (H-SRE1): each queue gets its own channel
|
|
94
|
+
# so per-queue basic_qos does not overwrite other consumers and fair
|
|
95
|
+
# dispatch is preserved. The publisher/topology channel stays separate.
|
|
96
|
+
self._consumer_channels: dict[str, Any] = {}
|
|
97
|
+
|
|
98
|
+
# The channel currently consuming DIRECT_REPLY_TO_QUEUE (set by
|
|
99
|
+
# consume(declare=False), cleared on cancel/disconnect). RabbitMQ's
|
|
100
|
+
# direct reply-to requires the reply consumer and the corresponding
|
|
101
|
+
# request publish to happen on the SAME channel (a publish on a
|
|
102
|
+
# different channel raises "PRECONDITION_FAILED - fast reply consumer
|
|
103
|
+
# does not exist") — publish() checks this to route RPC requests
|
|
104
|
+
# correctly without RPCClient needing to know about channels at all.
|
|
105
|
+
self._reply_to_channel: Any = None
|
|
106
|
+
|
|
107
|
+
# H1: channels (by id) that have had confirm_delivery() enabled.
|
|
108
|
+
# Detecting an unroutable Basic.Return via pika's UnroutableError
|
|
109
|
+
# requires confirms — in non-confirm mode basic_publish() has no way
|
|
110
|
+
# to report a return at all (see pika's own basic_publish docstring).
|
|
111
|
+
# A mandatory=True publish upgrades its target channel to confirm mode
|
|
112
|
+
# on demand (once, idempotently) regardless of confirm_delivery.
|
|
113
|
+
self._confirmed_channel_ids: set[int] = set()
|
|
114
|
+
|
|
115
|
+
# Backpressure callbacks (FlowController registers here). Each is a
|
|
116
|
+
# zero-arg callable; pika's blocked/unblocked frames are adapted to it.
|
|
117
|
+
self._blocked_callbacks: list[Callable[[], None]] = []
|
|
118
|
+
self._unblocked_callbacks: list[Callable[[], None]] = []
|
|
119
|
+
|
|
120
|
+
# L15: passive blocked-state tracking, independent of whether a
|
|
121
|
+
# FlowController is registered above -- health.broker_health_check
|
|
122
|
+
# reads this (via the is_blocked property) so a broker/disk/memory
|
|
123
|
+
# alarm is visible even when the caller never opted into FlowController.
|
|
124
|
+
self._blocked_state: bool = False
|
|
125
|
+
|
|
126
|
+
# L14: fired once per start_consuming() loop iteration (after each
|
|
127
|
+
# process_data_events() call returns), i.e. once per I/O loop tick --
|
|
128
|
+
# NOT once per delivered message. The broker uses this to refresh a
|
|
129
|
+
# liveness heartbeat so a healthy but message-idle consumer doesn't
|
|
130
|
+
# get mistaken for a wedged one (broker_liveness previously only saw
|
|
131
|
+
# a heartbeat update when a message was actually delivered).
|
|
132
|
+
self._io_tick_callbacks: list[Callable[[], None]] = []
|
|
133
|
+
|
|
134
|
+
# Reconnect bound (H-SRE4): never retry forever. Hardcoded sane default;
|
|
135
|
+
# the broker may override via attribute if desired.
|
|
136
|
+
self.max_reconnect_attempts: int = 0 # 0 == use the time-bounded default below
|
|
137
|
+
self._reconnect_total_timeout: float = 300.0
|
|
138
|
+
|
|
139
|
+
# Connection-churn signal: reconnects were logged but never counted,
|
|
140
|
+
# so a flapping broker/network was invisible to metrics alerting.
|
|
141
|
+
# Fired on every successful connect() AFTER the first (see connect()).
|
|
142
|
+
self._reconnect_callbacks: list[Callable[[], None]] = []
|
|
143
|
+
self._ever_connected = False
|
|
144
|
+
|
|
145
|
+
def on_reconnect(self, callback: Callable[[], None]) -> None:
|
|
146
|
+
"""Register a callback fired on every re-connection after the first
|
|
147
|
+
successful connect (connection-churn metric hook)."""
|
|
148
|
+
self._reconnect_callbacks.append(callback)
|
|
149
|
+
|
|
150
|
+
def _fire_reconnect(self) -> None:
|
|
151
|
+
for cb in list(self._reconnect_callbacks):
|
|
152
|
+
try:
|
|
153
|
+
cb()
|
|
154
|
+
except Exception: # pragma: no cover — never let a cb break connect
|
|
155
|
+
logger.exception("reconnect callback raised")
|
|
156
|
+
|
|
157
|
+
def on_blocked(self, callback: Callable[[], None]) -> None:
|
|
158
|
+
"""Register a connection-blocked callback (e.g. FlowController.on_blocked)."""
|
|
159
|
+
self._blocked_callbacks.append(callback)
|
|
160
|
+
|
|
161
|
+
def on_unblocked(self, callback: Callable[[], None]) -> None:
|
|
162
|
+
"""Register a connection-unblocked callback (e.g. FlowController.on_unblocked)."""
|
|
163
|
+
self._unblocked_callbacks.append(callback)
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def is_blocked(self) -> bool:
|
|
167
|
+
"""True if RabbitMQ has sent ``connection.blocked`` (L15) -- e.g. a
|
|
168
|
+
broker memory/disk alarm. Tracked passively regardless of whether
|
|
169
|
+
any ``on_blocked``/``on_unblocked`` callback is registered, so
|
|
170
|
+
``health.broker_health_check`` can see it even without an opt-in
|
|
171
|
+
``FlowController``."""
|
|
172
|
+
return self._blocked_state
|
|
173
|
+
|
|
174
|
+
def on_io_tick(self, callback: Callable[[], None]) -> None:
|
|
175
|
+
"""Register a callback fired once per ``start_consuming()`` loop
|
|
176
|
+
iteration (L14) -- e.g. the broker's liveness heartbeat refresh."""
|
|
177
|
+
self._io_tick_callbacks.append(callback)
|
|
178
|
+
|
|
179
|
+
def _fire_io_tick(self) -> None:
|
|
180
|
+
for cb in list(self._io_tick_callbacks):
|
|
181
|
+
try:
|
|
182
|
+
cb()
|
|
183
|
+
except Exception: # pragma: no cover — never let a cb break the I/O loop
|
|
184
|
+
logger.exception("io_tick callback raised")
|
|
185
|
+
|
|
186
|
+
def _pika_blocked(self, _connection: Any, *_args: Any) -> None:
|
|
187
|
+
self._blocked_state = True
|
|
188
|
+
for cb in list(self._blocked_callbacks):
|
|
189
|
+
try:
|
|
190
|
+
cb()
|
|
191
|
+
except Exception: # pragma: no cover — never let a cb break the I/O loop
|
|
192
|
+
logger.exception("blocked callback raised")
|
|
193
|
+
|
|
194
|
+
def _pika_unblocked(self, _connection: Any, *_args: Any) -> None:
|
|
195
|
+
self._blocked_state = False
|
|
196
|
+
for cb in list(self._unblocked_callbacks):
|
|
197
|
+
try:
|
|
198
|
+
cb()
|
|
199
|
+
except Exception: # pragma: no cover
|
|
200
|
+
logger.exception("unblocked callback raised")
|
|
201
|
+
|
|
202
|
+
def connect(self) -> None:
|
|
203
|
+
"""Establish connection to RabbitMQ."""
|
|
204
|
+
if self._connected:
|
|
205
|
+
return
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
import pika
|
|
209
|
+
except ImportError:
|
|
210
|
+
raise ImportError(
|
|
211
|
+
"pika is required for sync transport. Install it with: pip install rabbitkit[sync]"
|
|
212
|
+
) from None
|
|
213
|
+
|
|
214
|
+
params = make_pika_connection_params(
|
|
215
|
+
self._connection_config,
|
|
216
|
+
self._socket_config,
|
|
217
|
+
self._security_config,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
logger.info(
|
|
221
|
+
"Connecting to RabbitMQ at %s:%d",
|
|
222
|
+
self._connection_config.host,
|
|
223
|
+
self._connection_config.port,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
self._connection = pika.BlockingConnection(params)
|
|
227
|
+
# Publisher/topology channel (confirm_delivery for publisher confirms).
|
|
228
|
+
self._channel = self._connection.channel()
|
|
229
|
+
if self._confirm_delivery:
|
|
230
|
+
self._channel.confirm_delivery()
|
|
231
|
+
self._confirmed_channel_ids.add(id(self._channel))
|
|
232
|
+
|
|
233
|
+
# Register connection blocked/unblocked callbacks (C-6) so a
|
|
234
|
+
# FlowController can throttle publishes when RabbitMQ raises an alarm.
|
|
235
|
+
try:
|
|
236
|
+
self._connection.add_on_connection_blocked_callback(self._pika_blocked)
|
|
237
|
+
self._connection.add_on_connection_unblocked_callback(self._pika_unblocked)
|
|
238
|
+
except Exception: # pragma: no cover — older pika may lack these
|
|
239
|
+
logger.debug("Could not register blocked/unblocked callbacks")
|
|
240
|
+
|
|
241
|
+
self._connected = True
|
|
242
|
+
self._owner_ident = threading.get_ident()
|
|
243
|
+
if self._ever_connected:
|
|
244
|
+
self._fire_reconnect() # connection-churn metric hook
|
|
245
|
+
self._ever_connected = True
|
|
246
|
+
logger.info("Connected to RabbitMQ")
|
|
247
|
+
|
|
248
|
+
def __enter__(self) -> SyncTransport:
|
|
249
|
+
self.connect()
|
|
250
|
+
return self
|
|
251
|
+
|
|
252
|
+
def __exit__(self, *args: Any) -> None:
|
|
253
|
+
self.disconnect()
|
|
254
|
+
|
|
255
|
+
def disconnect(self) -> None:
|
|
256
|
+
"""Close connection to RabbitMQ."""
|
|
257
|
+
if not self._connected:
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
# Close per-queue consumer channels first
|
|
262
|
+
for ch in list(self._consumer_channels.values()):
|
|
263
|
+
try:
|
|
264
|
+
if ch.is_open:
|
|
265
|
+
ch.close()
|
|
266
|
+
except Exception: # pragma: no cover — best effort
|
|
267
|
+
pass
|
|
268
|
+
self._consumer_channels.clear()
|
|
269
|
+
self._consumer_tags = {}
|
|
270
|
+
|
|
271
|
+
if self._channel and self._channel.is_open:
|
|
272
|
+
self._channel.close()
|
|
273
|
+
if self._connection and self._connection.is_open:
|
|
274
|
+
self._connection.close()
|
|
275
|
+
except Exception as e:
|
|
276
|
+
logger.warning("Error during disconnect: %s", e)
|
|
277
|
+
finally:
|
|
278
|
+
self._connection = None
|
|
279
|
+
self._channel = None
|
|
280
|
+
self._reply_to_channel = None
|
|
281
|
+
self._confirmed_channel_ids.clear()
|
|
282
|
+
self._connected = False
|
|
283
|
+
self._owner_ident = None
|
|
284
|
+
self._ever_consumed = False
|
|
285
|
+
logger.info("Disconnected from RabbitMQ")
|
|
286
|
+
|
|
287
|
+
def is_connected(self) -> bool:
|
|
288
|
+
"""Check if connected to RabbitMQ."""
|
|
289
|
+
if not self._connected:
|
|
290
|
+
return False
|
|
291
|
+
try:
|
|
292
|
+
return (
|
|
293
|
+
self._connection is not None
|
|
294
|
+
and self._connection.is_open
|
|
295
|
+
and self._channel is not None
|
|
296
|
+
and self._channel.is_open
|
|
297
|
+
)
|
|
298
|
+
except Exception:
|
|
299
|
+
return False
|
|
300
|
+
|
|
301
|
+
@property
|
|
302
|
+
def has_open_channels(self) -> bool:
|
|
303
|
+
"""True when at least one consumer channel is open and all are open.
|
|
304
|
+
|
|
305
|
+
Transport-contract attribute (I-5) consumed by
|
|
306
|
+
:func:`rabbitkit.health._transport_consumers_alive`: when this is
|
|
307
|
+
``False``, registered ``consumer_tag``s are treated as stale and the
|
|
308
|
+
health/readiness probes drop the consumer count. Backed by
|
|
309
|
+
``self._consumer_channels`` so it reflects the per-queue channels
|
|
310
|
+
actually held by this transport.
|
|
311
|
+
"""
|
|
312
|
+
channels = self._consumer_channels
|
|
313
|
+
return bool(channels) and all(ch.is_open for ch in channels.values())
|
|
314
|
+
|
|
315
|
+
def _ensure_connected(self) -> None:
|
|
316
|
+
"""Ensure connection is established, reconnecting if needed.
|
|
317
|
+
|
|
318
|
+
Uses exponential backoff with full jitter (H-SRE3) to avoid the
|
|
319
|
+
thundering-herd problem when many clients reconnect at once, and is
|
|
320
|
+
bounded by a total-time/attempt cap (H-SRE4) so the loop can never
|
|
321
|
+
retry forever — on exhaustion it raises so the broker's run() recovery
|
|
322
|
+
can decide what to do.
|
|
323
|
+
"""
|
|
324
|
+
if self.is_connected():
|
|
325
|
+
return
|
|
326
|
+
|
|
327
|
+
self._connected = False
|
|
328
|
+
backoff = self._connection_config.reconnect_backoff_base
|
|
329
|
+
max_backoff = self._connection_config.reconnect_backoff_max
|
|
330
|
+
connection_errors = get_connection_errors()
|
|
331
|
+
|
|
332
|
+
# Bounded reconnect: never infinite (H-SRE4). Hardcoded sane defaults.
|
|
333
|
+
max_attempts = self.max_reconnect_attempts or 30
|
|
334
|
+
total_deadline = time.monotonic() + self._reconnect_total_timeout
|
|
335
|
+
attempts = 0
|
|
336
|
+
|
|
337
|
+
while True:
|
|
338
|
+
try:
|
|
339
|
+
self.connect()
|
|
340
|
+
return
|
|
341
|
+
except connection_errors as e:
|
|
342
|
+
attempts += 1
|
|
343
|
+
# Full jitter: sleep a random fraction of the current backoff to
|
|
344
|
+
# spread reconnects across clients (H-SRE3).
|
|
345
|
+
sleep_for = random.uniform(0.0, backoff) # noqa: S311
|
|
346
|
+
logger.warning(
|
|
347
|
+
"Connection failed, retrying in %.2fs (attempt %d): %s",
|
|
348
|
+
sleep_for,
|
|
349
|
+
attempts,
|
|
350
|
+
e,
|
|
351
|
+
)
|
|
352
|
+
time.sleep(sleep_for)
|
|
353
|
+
backoff = min(backoff * 2, max_backoff)
|
|
354
|
+
if attempts >= max_attempts or time.monotonic() >= total_deadline:
|
|
355
|
+
logger.critical(
|
|
356
|
+
"Reconnect attempts exhausted after %d tries / %.0fs; giving up",
|
|
357
|
+
attempts,
|
|
358
|
+
self._reconnect_total_timeout,
|
|
359
|
+
)
|
|
360
|
+
raise
|
|
361
|
+
|
|
362
|
+
def reconnect(self) -> None:
|
|
363
|
+
"""Force a fresh connection + channel (used by consumer recovery)."""
|
|
364
|
+
self.disconnect()
|
|
365
|
+
self._ensure_connected()
|
|
366
|
+
|
|
367
|
+
def ensure_connected(self) -> None:
|
|
368
|
+
"""Public wrapper for :meth:`_ensure_connected` (idle-pump support).
|
|
369
|
+
|
|
370
|
+
Unlike :meth:`reconnect`, this is a no-op if already connected —
|
|
371
|
+
cheap to call on every tick of an idle-pump loop (see
|
|
372
|
+
``SyncBroker.pump_idle``). Reconnects (bounded backoff) only when
|
|
373
|
+
the connection or channel is actually dead.
|
|
374
|
+
"""
|
|
375
|
+
self._ensure_connected()
|
|
376
|
+
|
|
377
|
+
def _run_on_io_thread(
|
|
378
|
+
self,
|
|
379
|
+
fn: Callable[[], _T],
|
|
380
|
+
*,
|
|
381
|
+
timeout: float = 30.0,
|
|
382
|
+
) -> _T:
|
|
383
|
+
"""Run a channel operation on the connection's I/O thread.
|
|
384
|
+
|
|
385
|
+
pika's BlockingConnection is NOT thread-safe: every basic_* call must
|
|
386
|
+
execute on the thread that owns the connection. When a worker thread
|
|
387
|
+
(worker_count > 1) acks/nacks/publishes, marshal the call onto the I/O
|
|
388
|
+
loop via add_callback_threadsafe and block for its result/exception.
|
|
389
|
+
When already on the owner thread (single worker / publisher), or when
|
|
390
|
+
no consume loop has EVER run on this connection (a pure producer with
|
|
391
|
+
no consumers — nothing else can be concurrently driving the socket),
|
|
392
|
+
run inline.
|
|
393
|
+
|
|
394
|
+
H2: deliberately does NOT fall back to inline just because the I/O
|
|
395
|
+
loop has momentarily stopped pumping (``not self._consuming``) once a
|
|
396
|
+
consume loop has run at least once (``self._ever_consumed``) — that
|
|
397
|
+
used to be true for the whole SyncBroker.stop() drain window (consumers
|
|
398
|
+
already cancelled, worker pool still finishing in-flight handlers),
|
|
399
|
+
so a worker thread's ack/nack/reject ran INLINE, cross-thread, on the
|
|
400
|
+
pika connection — unsynchronized with, and possibly concurrent with,
|
|
401
|
+
other worker threads' acks on the same consumer channel or the owner
|
|
402
|
+
thread's own disconnect(). Once ``_ever_consumed`` is True we always
|
|
403
|
+
marshal and rely on the owner thread pumping the I/O loop during drain
|
|
404
|
+
(see ``pump()``, called from ``SyncBroker.stop()``); if nothing pumps,
|
|
405
|
+
we fail fast with ``TimeoutError`` below rather than run unsafely.
|
|
406
|
+
|
|
407
|
+
*timeout* bounds the wait for the I/O loop to drain the callback (R-3):
|
|
408
|
+
on expiry we raise ``TimeoutError`` AND mark the callback cancelled so a
|
|
409
|
+
late drain (after the caller has already nacked+requeued and moved on)
|
|
410
|
+
becomes a no-op instead of settling an already-redelivered message.
|
|
411
|
+
"""
|
|
412
|
+
if (
|
|
413
|
+
self._owner_ident is None
|
|
414
|
+
or threading.get_ident() == self._owner_ident
|
|
415
|
+
or not self._ever_consumed
|
|
416
|
+
):
|
|
417
|
+
return fn()
|
|
418
|
+
|
|
419
|
+
result: list[_T] = []
|
|
420
|
+
error: list[BaseException] = []
|
|
421
|
+
done = threading.Event()
|
|
422
|
+
# R-3: set when the caller gives up waiting, so a later _cb drain is a
|
|
423
|
+
# no-op rather than settling an already-redelivered message.
|
|
424
|
+
cancelled = threading.Event()
|
|
425
|
+
|
|
426
|
+
def _cb() -> None:
|
|
427
|
+
if cancelled.is_set():
|
|
428
|
+
# The caller already timed out and moved on (nack+requeue).
|
|
429
|
+
# Running fn() now could double-settle, so drop the late callback.
|
|
430
|
+
return
|
|
431
|
+
try:
|
|
432
|
+
result.append(fn())
|
|
433
|
+
except BaseException as exc: # re-raised on the caller thread
|
|
434
|
+
error.append(exc)
|
|
435
|
+
finally:
|
|
436
|
+
done.set()
|
|
437
|
+
|
|
438
|
+
# ponytail: blocks until the I/O loop drains the callback. Bound the
|
|
439
|
+
# wait so a stalled/dead I/O loop can't pin the worker thread forever —
|
|
440
|
+
# on expiry we raise TimeoutError so the pipeline exception handler can
|
|
441
|
+
# nack+requeue and the worker is freed. 30s is well beyond any healthy
|
|
442
|
+
# round-trip (H-P7); the publish path passes a tighter bound (I-10).
|
|
443
|
+
io_stall_timeout = timeout
|
|
444
|
+
self._connection.add_callback_threadsafe(_cb)
|
|
445
|
+
if not done.wait(timeout=io_stall_timeout):
|
|
446
|
+
cancelled.set()
|
|
447
|
+
raise TimeoutError(
|
|
448
|
+
f"Timed out after {io_stall_timeout}s waiting for the pika I/O "
|
|
449
|
+
"loop to drain a cross-thread callback (connection stalled?)"
|
|
450
|
+
)
|
|
451
|
+
if error:
|
|
452
|
+
raise error[0]
|
|
453
|
+
return result[0]
|
|
454
|
+
|
|
455
|
+
def _publish_confirm_wait_bounded(self, fn: Callable[[], _T], timeout: float) -> _T:
|
|
456
|
+
"""Bound a blocking publish call that would otherwise run fully
|
|
457
|
+
inline and unbounded (I-11).
|
|
458
|
+
|
|
459
|
+
pika's ``BlockingChannel.basic_publish()`` takes no timeout
|
|
460
|
+
parameter, and its confirm-wait loops via ``process_data_events``
|
|
461
|
+
with no aggregate time limit -- a broker that accepts the TCP
|
|
462
|
+
connection but never sends the confirm frame back (disk full,
|
|
463
|
+
internally wedged) hangs this call forever, `confirm_timeout`
|
|
464
|
+
notwithstanding, whenever ``_run_on_io_thread`` would otherwise run
|
|
465
|
+
it inline (single-worker/pure-producer case — see
|
|
466
|
+
``_publish_on_channel``, the cross-thread marshal case is already
|
|
467
|
+
bounded by ``_run_on_io_thread`` itself).
|
|
468
|
+
|
|
469
|
+
Runs *fn* on a dedicated one-shot thread and bounds OUR wait for it
|
|
470
|
+
(same R-3 shape as ``_run_on_io_thread``). On timeout, that thread
|
|
471
|
+
may still be blocked inside pika, touching the connection -- never
|
|
472
|
+
safe to touch that connection from any other thread afterward
|
|
473
|
+
(pika's ``BlockingConnection`` supports exactly one thread at a
|
|
474
|
+
time), so it is poisoned (all references dropped, never closed —
|
|
475
|
+
closing would itself be a second thread touching it).
|
|
476
|
+
``_ensure_connected()`` transparently creates a fresh connection on
|
|
477
|
+
the next call, the same recovery path as a genuine network failure.
|
|
478
|
+
|
|
479
|
+
Only called when no consume loop can be sharing this connection
|
|
480
|
+
(see the call site) -- if one were, resuming ``start_consuming()``
|
|
481
|
+
after giving up would immediately recreate the exact concurrent-
|
|
482
|
+
touch hazard this method exists to avoid.
|
|
483
|
+
"""
|
|
484
|
+
result: list[_T] = []
|
|
485
|
+
error: list[BaseException] = []
|
|
486
|
+
done = threading.Event()
|
|
487
|
+
|
|
488
|
+
def _run() -> None:
|
|
489
|
+
try:
|
|
490
|
+
result.append(fn())
|
|
491
|
+
except BaseException as exc:
|
|
492
|
+
error.append(exc)
|
|
493
|
+
finally:
|
|
494
|
+
done.set()
|
|
495
|
+
|
|
496
|
+
threading.Thread(target=_run, name="rabbitkit-publish-confirm-wait", daemon=True).start()
|
|
497
|
+
if not done.wait(timeout=timeout):
|
|
498
|
+
self._poison_wedged_connection()
|
|
499
|
+
raise TimeoutError(
|
|
500
|
+
f"Timed out after {timeout}s waiting for a publish confirm; connection "
|
|
501
|
+
"presumed wedged and will be re-established on the next call"
|
|
502
|
+
)
|
|
503
|
+
if error:
|
|
504
|
+
raise error[0]
|
|
505
|
+
return result[0]
|
|
506
|
+
|
|
507
|
+
def _poison_wedged_connection(self) -> None:
|
|
508
|
+
"""Drop all references to a connection a timed-out background
|
|
509
|
+
publish (I-11) may still be touching. Never call ``.close()`` or
|
|
510
|
+
otherwise touch the pika objects here -- that would itself be a
|
|
511
|
+
second thread concurrently touching a ``BlockingConnection``, which
|
|
512
|
+
pika does not support. The abandoned background thread's eventual
|
|
513
|
+
completion (or, rarely, permanent hang) only ever touches its own
|
|
514
|
+
locally-captured references and no longer affects anything here.
|
|
515
|
+
"""
|
|
516
|
+
self._connection = None
|
|
517
|
+
self._channel = None
|
|
518
|
+
self._reply_to_channel = None
|
|
519
|
+
self._consumer_channels = {}
|
|
520
|
+
self._consumer_tags = {}
|
|
521
|
+
self._confirmed_channel_ids = set()
|
|
522
|
+
self._connected = False
|
|
523
|
+
self._owner_ident = None
|
|
524
|
+
self._ever_consumed = False
|
|
525
|
+
|
|
526
|
+
def publish(self, envelope: MessageEnvelope) -> PublishOutcome:
|
|
527
|
+
"""Publish a message to RabbitMQ.
|
|
528
|
+
|
|
529
|
+
Returns PublishOutcome with status indicating success/failure.
|
|
530
|
+
|
|
531
|
+
A request with ``reply_to=DIRECT_REPLY_TO_QUEUE`` (RPCClient's direct
|
|
532
|
+
reply-to requests) is routed onto ``self._reply_to_channel`` — the same
|
|
533
|
+
channel that registered the reply consumer — rather than the default
|
|
534
|
+
publisher channel. RabbitMQ requires this exact channel affinity for
|
|
535
|
+
direct reply-to; publishing on a different channel raises
|
|
536
|
+
"PRECONDITION_FAILED - fast reply consumer does not exist".
|
|
537
|
+
"""
|
|
538
|
+
self._ensure_connected()
|
|
539
|
+
|
|
540
|
+
channel = self._channel
|
|
541
|
+
if (
|
|
542
|
+
envelope.reply_to == DIRECT_REPLY_TO_QUEUE
|
|
543
|
+
and self._reply_to_channel is not None
|
|
544
|
+
and self._reply_to_channel.is_open
|
|
545
|
+
):
|
|
546
|
+
channel = self._reply_to_channel
|
|
547
|
+
|
|
548
|
+
return self._publish_on_channel(channel, envelope)
|
|
549
|
+
|
|
550
|
+
def _ensure_mandatory_confirms(self, channel: Any) -> None:
|
|
551
|
+
"""Enable publisher confirms on *channel* if not already active.
|
|
552
|
+
|
|
553
|
+
H1: detecting an unroutable ``Basic.Return`` via pika's
|
|
554
|
+
``UnroutableError`` requires confirm mode — in non-confirm mode
|
|
555
|
+
``basic_publish()`` has no way to report a return at all. Idempotent
|
|
556
|
+
and tracked per-channel (by id) so a repeat call is a no-op rather than
|
|
557
|
+
pika logging a spurious "confirmation was already enabled" error.
|
|
558
|
+
Marshaled like ``basic_publish`` since it drives blocking I/O.
|
|
559
|
+
"""
|
|
560
|
+
if id(channel) in self._confirmed_channel_ids:
|
|
561
|
+
return
|
|
562
|
+
self._run_on_io_thread(channel.confirm_delivery)
|
|
563
|
+
self._confirmed_channel_ids.add(id(channel))
|
|
564
|
+
|
|
565
|
+
def _publish_on_channel(self, channel: Any, envelope: MessageEnvelope) -> PublishOutcome:
|
|
566
|
+
"""Publish *envelope* on a specific already-open channel."""
|
|
567
|
+
try:
|
|
568
|
+
import pika
|
|
569
|
+
|
|
570
|
+
if envelope.mandatory:
|
|
571
|
+
self._ensure_mandatory_confirms(channel)
|
|
572
|
+
|
|
573
|
+
properties = pika.BasicProperties(
|
|
574
|
+
message_id=envelope.message_id,
|
|
575
|
+
correlation_id=envelope.correlation_id,
|
|
576
|
+
reply_to=envelope.reply_to,
|
|
577
|
+
content_type=envelope.content_type,
|
|
578
|
+
content_encoding=envelope.content_encoding,
|
|
579
|
+
headers=envelope.headers or None,
|
|
580
|
+
delivery_mode=envelope.delivery_mode,
|
|
581
|
+
priority=envelope.priority,
|
|
582
|
+
expiration=envelope.expiration,
|
|
583
|
+
type=envelope.type,
|
|
584
|
+
user_id=envelope.user_id,
|
|
585
|
+
app_id=envelope.app_id,
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
if envelope.timestamp:
|
|
589
|
+
properties.timestamp = int(envelope.timestamp.timestamp())
|
|
590
|
+
|
|
591
|
+
# I-10: bound the publish+confirm wait by confirm_timeout so a
|
|
592
|
+
# missing confirm cannot stall the worker forever.
|
|
593
|
+
publish_timeout = min(30.0, self._confirm_timeout)
|
|
594
|
+
def do_publish() -> None:
|
|
595
|
+
channel.basic_publish(
|
|
596
|
+
exchange=envelope.exchange,
|
|
597
|
+
routing_key=envelope.routing_key,
|
|
598
|
+
body=envelope.body,
|
|
599
|
+
properties=properties,
|
|
600
|
+
mandatory=envelope.mandatory,
|
|
601
|
+
)
|
|
602
|
+
if threading.get_ident() == self._owner_ident and self._ever_consumed:
|
|
603
|
+
# I-11: this thread also owns dispatching further deliveries
|
|
604
|
+
# via start_consuming() -- cannot safely bound this wait on a
|
|
605
|
+
# separate thread (see _publish_confirm_wait_bounded's
|
|
606
|
+
# docstring: resuming start_consuming() the instant we gave
|
|
607
|
+
# up would immediately touch a connection our own abandoned
|
|
608
|
+
# helper thread might still be using). Documented residual
|
|
609
|
+
# limitation: pika's BlockingChannel has no native way to
|
|
610
|
+
# bound a confirm wait from the owner thread itself. Mitigate
|
|
611
|
+
# by using worker_count > 1, so a handler's publish marshals
|
|
612
|
+
# through the already-bounded cross-thread path instead.
|
|
613
|
+
do_publish()
|
|
614
|
+
elif self._owner_ident is None or not self._ever_consumed:
|
|
615
|
+
# No consume loop can be sharing this connection (pure
|
|
616
|
+
# producer, or nothing has ever consumed yet) -- safe to
|
|
617
|
+
# bound with a dedicated helper thread.
|
|
618
|
+
self._publish_confirm_wait_bounded(do_publish, timeout=publish_timeout)
|
|
619
|
+
else:
|
|
620
|
+
# Cross-thread: marshal onto the owner's I/O loop, which
|
|
621
|
+
# _run_on_io_thread already bounds by confirm_timeout.
|
|
622
|
+
self._run_on_io_thread(do_publish, timeout=publish_timeout)
|
|
623
|
+
|
|
624
|
+
# M4: only report CONFIRMED when the channel is actually in
|
|
625
|
+
# publisher-confirm mode -- confirm_delivery=False (unless this
|
|
626
|
+
# publish is `mandatory`, which always enables confirms via
|
|
627
|
+
# _ensure_mandatory_confirms above) means basic_publish() is
|
|
628
|
+
# fire-and-forget and nothing was broker-acknowledged.
|
|
629
|
+
confirmed = self._confirm_delivery or envelope.mandatory
|
|
630
|
+
return PublishOutcome(
|
|
631
|
+
status=PublishStatus.CONFIRMED if confirmed else PublishStatus.SENT,
|
|
632
|
+
exchange=envelope.exchange,
|
|
633
|
+
routing_key=envelope.routing_key,
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
except pika.exceptions.UnroutableError as e:
|
|
637
|
+
logger.warning(
|
|
638
|
+
"Publish returned as unroutable (mandatory=True, no matching binding): "
|
|
639
|
+
"exchange=%s routing_key=%s",
|
|
640
|
+
envelope.exchange,
|
|
641
|
+
envelope.routing_key,
|
|
642
|
+
)
|
|
643
|
+
return PublishOutcome(
|
|
644
|
+
status=PublishStatus.RETURNED,
|
|
645
|
+
exchange=envelope.exchange,
|
|
646
|
+
routing_key=envelope.routing_key,
|
|
647
|
+
error=e,
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
except pika.exceptions.NackError as e:
|
|
651
|
+
logger.warning(
|
|
652
|
+
"Publish nacked by broker: exchange=%s routing_key=%s",
|
|
653
|
+
envelope.exchange,
|
|
654
|
+
envelope.routing_key,
|
|
655
|
+
)
|
|
656
|
+
return PublishOutcome(
|
|
657
|
+
status=PublishStatus.NACKED,
|
|
658
|
+
exchange=envelope.exchange,
|
|
659
|
+
routing_key=envelope.routing_key,
|
|
660
|
+
error=e,
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
except TimeoutError as e:
|
|
664
|
+
# I-10: basic_publish() blocks synchronously for the broker confirm
|
|
665
|
+
# (in confirm mode); _run_on_io_thread bounds that wait by
|
|
666
|
+
# confirm_timeout and raises TimeoutError on expiry -- exactly the
|
|
667
|
+
# "no confirm arrived in time" case docs/message-safety.md documents
|
|
668
|
+
# as PublishStatus.TIMEOUT (matching the async transport's
|
|
669
|
+
# equivalent asyncio.timeout(confirm_timeout) branch). This used to
|
|
670
|
+
# fall through to the generic ERROR branch below, so a caller
|
|
671
|
+
# correctly checking `status == PublishStatus.TIMEOUT` per the
|
|
672
|
+
# documented contract silently never saw it on the sync transport.
|
|
673
|
+
logger.warning(
|
|
674
|
+
"Publish confirm timed out: exchange=%s routing_key=%s",
|
|
675
|
+
envelope.exchange,
|
|
676
|
+
envelope.routing_key,
|
|
677
|
+
)
|
|
678
|
+
return PublishOutcome(
|
|
679
|
+
status=PublishStatus.TIMEOUT,
|
|
680
|
+
exchange=envelope.exchange,
|
|
681
|
+
routing_key=envelope.routing_key,
|
|
682
|
+
error=e,
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
except Exception as e:
|
|
686
|
+
logger.error("Publish failed: %s", e)
|
|
687
|
+
return PublishOutcome(
|
|
688
|
+
status=PublishStatus.ERROR,
|
|
689
|
+
exchange=envelope.exchange,
|
|
690
|
+
routing_key=envelope.routing_key,
|
|
691
|
+
error=e,
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
def consume(
|
|
695
|
+
self,
|
|
696
|
+
queue: str,
|
|
697
|
+
callback: Callable[[RabbitMessage], None],
|
|
698
|
+
prefetch: int = 10,
|
|
699
|
+
*,
|
|
700
|
+
no_ack: bool = False,
|
|
701
|
+
declare: bool = True,
|
|
702
|
+
) -> str:
|
|
703
|
+
"""Start consuming from a queue.
|
|
704
|
+
|
|
705
|
+
Each queue gets its OWN channel so per-queue ``basic_qos`` is isolated
|
|
706
|
+
and no longer overwrites other consumers' prefetch (H-SRE1). The
|
|
707
|
+
publisher/topology channel stays separate. Returns the consumer tag.
|
|
708
|
+
|
|
709
|
+
``no_ack=True`` starts a no-ack consumer: the broker auto-acks on
|
|
710
|
+
delivery, and the built ``RabbitMessage`` is not wired with settlement
|
|
711
|
+
functions (there is nothing to ack/nack/reject). The sync path never
|
|
712
|
+
declares the queue here regardless of ``declare`` (declaration is the
|
|
713
|
+
caller's responsibility via ``declare_queue()``); when ``declare=False``
|
|
714
|
+
and ``queue == DIRECT_REPLY_TO_QUEUE``, this consumer's channel is also
|
|
715
|
+
remembered as ``self._reply_to_channel`` so :meth:`publish` can route
|
|
716
|
+
matching requests onto the SAME channel — required by RabbitMQ's direct
|
|
717
|
+
reply-to (see :meth:`publish`).
|
|
718
|
+
"""
|
|
719
|
+
self._ensure_connected()
|
|
720
|
+
|
|
721
|
+
# Dedicated channel per consumer queue for isolated QoS / fair dispatch.
|
|
722
|
+
consumer_channel = self._connection.channel()
|
|
723
|
+
consumer_channel.basic_qos(prefetch_count=prefetch)
|
|
724
|
+
self._consumer_channels[queue] = consumer_channel
|
|
725
|
+
|
|
726
|
+
if not declare and queue == DIRECT_REPLY_TO_QUEUE:
|
|
727
|
+
self._reply_to_channel = consumer_channel
|
|
728
|
+
|
|
729
|
+
consumer_tag = f"rabbitkit.{uuid.uuid4()}"
|
|
730
|
+
|
|
731
|
+
def on_message(ch: Any, method: Any, properties: Any, body: bytes) -> None:
|
|
732
|
+
"""Internal pika callback — builds RabbitMessage and calls user callback."""
|
|
733
|
+
message = self._build_message(ch, method, properties, body, no_ack=no_ack)
|
|
734
|
+
callback(message)
|
|
735
|
+
|
|
736
|
+
consumer_channel.basic_consume(
|
|
737
|
+
queue=queue,
|
|
738
|
+
on_message_callback=on_message,
|
|
739
|
+
auto_ack=no_ack,
|
|
740
|
+
consumer_tag=consumer_tag,
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
self._consumer_tags[queue] = consumer_tag
|
|
744
|
+
logger.info("Started consuming from queue '%s' with tag '%s'", queue, consumer_tag)
|
|
745
|
+
return consumer_tag
|
|
746
|
+
|
|
747
|
+
def declare_exchange(self, exchange: RabbitExchange) -> None:
|
|
748
|
+
"""Declare an exchange on RabbitMQ."""
|
|
749
|
+
action = self._topo.exchange_action(exchange)
|
|
750
|
+
if action is TopoAction.SKIP:
|
|
751
|
+
return
|
|
752
|
+
|
|
753
|
+
self._ensure_connected()
|
|
754
|
+
|
|
755
|
+
kwargs = exchange.to_declare_kwargs()
|
|
756
|
+
|
|
757
|
+
import pika
|
|
758
|
+
|
|
759
|
+
try:
|
|
760
|
+
if action is TopoAction.PASSIVE:
|
|
761
|
+
self._channel.exchange_declare(
|
|
762
|
+
exchange=kwargs["exchange"],
|
|
763
|
+
passive=True,
|
|
764
|
+
)
|
|
765
|
+
else:
|
|
766
|
+
self._channel.exchange_declare(**kwargs)
|
|
767
|
+
except pika.exceptions.ChannelClosedByBroker as exc:
|
|
768
|
+
self._raise_precondition_failed_or_reraise("exchange", kwargs["exchange"], exc)
|
|
769
|
+
|
|
770
|
+
def declare_queue(self, queue: RabbitQueue) -> None:
|
|
771
|
+
"""Declare a queue on RabbitMQ."""
|
|
772
|
+
action = self._topo.queue_action(queue)
|
|
773
|
+
if action is TopoAction.SKIP:
|
|
774
|
+
return
|
|
775
|
+
|
|
776
|
+
self._ensure_connected()
|
|
777
|
+
|
|
778
|
+
kwargs = queue.to_declare_kwargs()
|
|
779
|
+
|
|
780
|
+
import pika
|
|
781
|
+
|
|
782
|
+
try:
|
|
783
|
+
if action is TopoAction.PASSIVE:
|
|
784
|
+
self._channel.queue_declare(
|
|
785
|
+
queue=kwargs["queue"],
|
|
786
|
+
passive=True,
|
|
787
|
+
)
|
|
788
|
+
else:
|
|
789
|
+
self._channel.queue_declare(**kwargs)
|
|
790
|
+
except pika.exceptions.ChannelClosedByBroker as exc:
|
|
791
|
+
self._raise_precondition_failed_or_reraise("queue", kwargs["queue"], exc)
|
|
792
|
+
|
|
793
|
+
def _raise_precondition_failed_or_reraise(self, kind: str, name: str, exc: Any) -> None:
|
|
794
|
+
"""M6: turn a 406 PRECONDITION_FAILED into a typed, actionable error.
|
|
795
|
+
|
|
796
|
+
Declaring a queue/exchange with arguments that conflict with an
|
|
797
|
+
existing one of the same name (e.g. an ops-created quorum queue
|
|
798
|
+
where rabbitkit's config declares classic, or a different TTL/DLX)
|
|
799
|
+
closes the channel with reply_code 406 and an opaque
|
|
800
|
+
``ChannelClosedByBroker`` — previously this aborted startup with a
|
|
801
|
+
low-level pika traceback giving no hint which queue/exchange or
|
|
802
|
+
argument actually conflicted. Any other reply code is re-raised
|
|
803
|
+
as-is (not this middleware's concern).
|
|
804
|
+
|
|
805
|
+
M14: under ``SafetyConfig.on_topology_conflict="warn_continue"`` a 406
|
|
806
|
+
is logged and swallowed — the entity already exists (a 406, unlike a
|
|
807
|
+
404, proves existence), so rabbitkit continues with the EXISTING
|
|
808
|
+
definition instead of crash-looping. The 406 closed the channel, so
|
|
809
|
+
we reopen it first (connection stays open) for subsequent declares.
|
|
810
|
+
"""
|
|
811
|
+
if exc.reply_code == 406 and self._on_topology_conflict == "warn_continue":
|
|
812
|
+
# Reopen the broker-closed channel so the rest of topology
|
|
813
|
+
# declaration can proceed on the existing (drifted) entity.
|
|
814
|
+
self._channel = self._connection.channel()
|
|
815
|
+
self._confirmed_channel_ids.discard(id(self._channel))
|
|
816
|
+
logger.warning(
|
|
817
|
+
"Topology drift on %s %r (broker: %s); on_topology_conflict='warn_continue' "
|
|
818
|
+
"— continuing with the EXISTING definition (rabbitkit's declaration was NOT "
|
|
819
|
+
"applied). Reconcile the %s or fix its rabbitkit config to silence this.",
|
|
820
|
+
kind,
|
|
821
|
+
name,
|
|
822
|
+
exc.reply_text,
|
|
823
|
+
kind,
|
|
824
|
+
)
|
|
825
|
+
return
|
|
826
|
+
if exc.reply_code == 406:
|
|
827
|
+
raise ConfigurationError(
|
|
828
|
+
f"Cannot declare {kind} {name!r}: it already exists with incompatible "
|
|
829
|
+
f"arguments (broker said: {exc.reply_text}). This usually means it was "
|
|
830
|
+
f"created outside rabbitkit (e.g. ops tooling) with different arguments "
|
|
831
|
+
f"(e.g. quorum vs classic queue type, a different TTL, or a different "
|
|
832
|
+
f"dead-letter exchange). Either delete/reconcile the existing {kind}, "
|
|
833
|
+
f"adjust its rabbitkit definition to match, or use "
|
|
834
|
+
f"TopologyMode.PASSIVE_ONLY to skip declaration and just verify it exists."
|
|
835
|
+
) from exc
|
|
836
|
+
raise exc
|
|
837
|
+
|
|
838
|
+
def bind_queue(
|
|
839
|
+
self,
|
|
840
|
+
queue: str,
|
|
841
|
+
exchange: str,
|
|
842
|
+
routing_key: str,
|
|
843
|
+
arguments: dict[str, Any] | None = None,
|
|
844
|
+
) -> None:
|
|
845
|
+
"""Bind a queue to an exchange.
|
|
846
|
+
|
|
847
|
+
``arguments`` carries header-match criteria for HEADERS exchanges
|
|
848
|
+
(``x-match`` etc.) — without them a headers binding matches every
|
|
849
|
+
message (C4).
|
|
850
|
+
"""
|
|
851
|
+
if self._topo.binding_action() is TopoAction.SKIP:
|
|
852
|
+
return
|
|
853
|
+
|
|
854
|
+
self._ensure_connected()
|
|
855
|
+
|
|
856
|
+
self._channel.queue_bind(
|
|
857
|
+
queue=queue,
|
|
858
|
+
exchange=exchange,
|
|
859
|
+
routing_key=routing_key,
|
|
860
|
+
arguments=arguments,
|
|
861
|
+
)
|
|
862
|
+
|
|
863
|
+
def bind_exchange(
|
|
864
|
+
self,
|
|
865
|
+
destination: str,
|
|
866
|
+
source: str,
|
|
867
|
+
routing_key: str = "",
|
|
868
|
+
arguments: dict[str, Any] | None = None,
|
|
869
|
+
) -> None:
|
|
870
|
+
"""Bind an exchange to another exchange (exchange-to-exchange binding)."""
|
|
871
|
+
if self._topo.binding_action() is TopoAction.SKIP:
|
|
872
|
+
return
|
|
873
|
+
|
|
874
|
+
self._ensure_connected()
|
|
875
|
+
|
|
876
|
+
self._channel.exchange_bind(
|
|
877
|
+
destination=destination,
|
|
878
|
+
source=source,
|
|
879
|
+
routing_key=routing_key,
|
|
880
|
+
arguments=arguments,
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
def cancel_consumer(self, consumer_tag: str) -> None:
|
|
884
|
+
"""Cancel a consumer by tag."""
|
|
885
|
+
if not self.is_connected():
|
|
886
|
+
return
|
|
887
|
+
|
|
888
|
+
# Find the queue whose channel owns this consumer_tag (per-consumer
|
|
889
|
+
# channels - H-SRE1), cancel on THAT channel, then drop it.
|
|
890
|
+
queue_name: str | None = None
|
|
891
|
+
for q, tag in self._consumer_tags.items():
|
|
892
|
+
if tag == consumer_tag:
|
|
893
|
+
queue_name = q
|
|
894
|
+
break
|
|
895
|
+
|
|
896
|
+
if queue_name is None:
|
|
897
|
+
return
|
|
898
|
+
|
|
899
|
+
channel = self._consumer_channels.get(queue_name)
|
|
900
|
+
try:
|
|
901
|
+
if channel is not None and channel.is_open:
|
|
902
|
+
channel.basic_cancel(consumer_tag=consumer_tag)
|
|
903
|
+
except Exception as e:
|
|
904
|
+
logger.warning("Failed to cancel consumer %s: %s", consumer_tag, e)
|
|
905
|
+
finally:
|
|
906
|
+
try:
|
|
907
|
+
if channel is not None and channel.is_open:
|
|
908
|
+
channel.close()
|
|
909
|
+
except Exception: # pragma: no cover - best effort
|
|
910
|
+
pass
|
|
911
|
+
self._consumer_tags.pop(queue_name, None)
|
|
912
|
+
self._consumer_channels.pop(queue_name, None)
|
|
913
|
+
if channel is self._reply_to_channel:
|
|
914
|
+
self._reply_to_channel = None
|
|
915
|
+
|
|
916
|
+
def start_consuming(self) -> None:
|
|
917
|
+
"""Start the pika consume loop (blocking).
|
|
918
|
+
|
|
919
|
+
Drives the connection's I/O loop directly via ``process_data_events`` so
|
|
920
|
+
consumers on ANY channel (the per-queue ``_consumer_channels`` from H-SRE1)
|
|
921
|
+
are processed. pika's ``channel.start_consuming()`` only loops while *that*
|
|
922
|
+
channel has consumers, which would exit immediately for the publisher
|
|
923
|
+
channel under the per-consumer-channel design — so we must not use it.
|
|
924
|
+
"""
|
|
925
|
+
self._ensure_connected()
|
|
926
|
+
self._consuming = True
|
|
927
|
+
self._ever_consumed = True
|
|
928
|
+
self._owner_ident = threading.get_ident()
|
|
929
|
+
try:
|
|
930
|
+
while self._consuming:
|
|
931
|
+
# process_data_events drains ALL channels' consumers + queued
|
|
932
|
+
# add_callback_threadsafe callbacks (acks from worker threads).
|
|
933
|
+
self._connection.process_data_events(time_limit=1.0)
|
|
934
|
+
# L14: process_data_events returning (rather than raising a
|
|
935
|
+
# connection error) is itself evidence the I/O loop is alive
|
|
936
|
+
# and pumping -- fire once per tick regardless of whether any
|
|
937
|
+
# message was actually delivered this iteration.
|
|
938
|
+
self._fire_io_tick()
|
|
939
|
+
# Safety: if no consumers are registered, exit (avoids looping
|
|
940
|
+
# forever in tests/embeds that call start_consuming without a
|
|
941
|
+
# consumer). Real consumers are cancelled by stop_consuming which
|
|
942
|
+
# sets _consuming=False.
|
|
943
|
+
if not self._consumer_channels:
|
|
944
|
+
break
|
|
945
|
+
except KeyboardInterrupt:
|
|
946
|
+
self._stop_all_consumers()
|
|
947
|
+
finally:
|
|
948
|
+
self._consuming = False
|
|
949
|
+
|
|
950
|
+
def _stop_all_consumers(self) -> None:
|
|
951
|
+
"""Stop consuming on the publisher channel and every consumer channel.
|
|
952
|
+
|
|
953
|
+
Also clears ``self._consuming`` so the ``start_consuming`` I/O loop exits.
|
|
954
|
+
"""
|
|
955
|
+
self._consuming = False
|
|
956
|
+
for ch in [self._channel, *self._consumer_channels.values()]:
|
|
957
|
+
try:
|
|
958
|
+
if ch is not None and ch.is_open:
|
|
959
|
+
ch.stop_consuming()
|
|
960
|
+
except Exception: # pragma: no cover - best effort during shutdown
|
|
961
|
+
logger.warning("stop_consuming raised on a channel", exc_info=True)
|
|
962
|
+
|
|
963
|
+
def stop_consuming(self) -> None:
|
|
964
|
+
"""Stop the pika consume loop (safe to call from any thread).
|
|
965
|
+
|
|
966
|
+
pika's ``BlockingChannel.stop_consuming`` is not thread-safe and must run
|
|
967
|
+
on the connection-owning I/O thread. Route through ``_run_on_io_thread``
|
|
968
|
+
(I-17): when called cross-thread during an active consume loop (e.g. the
|
|
969
|
+
SIGTERM daemon thread), marshal via ``add_callback_threadsafe``; when
|
|
970
|
+
called inline (single-threaded / test / not consuming), run directly.
|
|
971
|
+
On a stalled I/O loop we do NOT fall back to an inline cross-thread call
|
|
972
|
+
(that would be the unsafe pika call I-17 prevents) — the broker's run()
|
|
973
|
+
loop / k8s SIGKILL + redelivery backstop handles a true stall.
|
|
974
|
+
"""
|
|
975
|
+
if not self.is_connected():
|
|
976
|
+
return
|
|
977
|
+
try:
|
|
978
|
+
self._run_on_io_thread(self._stop_all_consumers, timeout=5.0)
|
|
979
|
+
except TimeoutError:
|
|
980
|
+
logger.warning(
|
|
981
|
+
"stop_consuming marshal timed out (I/O loop stalled); "
|
|
982
|
+
"leaving settlement to broker recovery / redelivery"
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
def pump(self, time_limit: float = 0.05) -> None:
|
|
986
|
+
"""Briefly drive the connection's I/O loop.
|
|
987
|
+
|
|
988
|
+
H2: once ``start_consuming()``'s loop has exited (consumers cancelled,
|
|
989
|
+
``_consuming`` is False), nothing drains callbacks scheduled via
|
|
990
|
+
``add_callback_threadsafe`` anymore — including worker-thread
|
|
991
|
+
acks/nacks marshaled by ``_run_on_io_thread``. ``SyncBroker.stop()``
|
|
992
|
+
calls this between waits during its worker-pool/in-flight drain so
|
|
993
|
+
those marshaled callbacks still get executed on the owner thread
|
|
994
|
+
instead of stalling until ``_run_on_io_thread``'s timeout. MUST be
|
|
995
|
+
called from the connection's owner thread — same requirement as any
|
|
996
|
+
other direct pika call.
|
|
997
|
+
"""
|
|
998
|
+
if self._connection is not None and self._connection.is_open:
|
|
999
|
+
self._connection.process_data_events(time_limit=time_limit)
|
|
1000
|
+
|
|
1001
|
+
# ── DLQ / inspection (DLQInspector protocol) ──────────────────────────
|
|
1002
|
+
|
|
1003
|
+
def basic_get(self, queue: str) -> RabbitMessage | None:
|
|
1004
|
+
"""Get a single message without subscribing (auto_ack=False).
|
|
1005
|
+
|
|
1006
|
+
Used by DLQInspector for peek/replay. Returns None if the queue is empty.
|
|
1007
|
+
"""
|
|
1008
|
+
self._ensure_connected()
|
|
1009
|
+
method, properties, body = self._run_on_io_thread(lambda: self._channel.basic_get(queue=queue, auto_ack=False))
|
|
1010
|
+
if method is None:
|
|
1011
|
+
return None
|
|
1012
|
+
return self._build_message(self._channel, method, properties, body)
|
|
1013
|
+
|
|
1014
|
+
def purge_queue(self, queue: str) -> int:
|
|
1015
|
+
"""Purge all messages from a queue. Returns the number of messages purged."""
|
|
1016
|
+
self._ensure_connected()
|
|
1017
|
+
frame = self._run_on_io_thread(lambda: self._channel.queue_purge(queue=queue))
|
|
1018
|
+
return int(frame.method.message_count)
|
|
1019
|
+
|
|
1020
|
+
# ── Internal ──────────────────────────────────────────────────────────
|
|
1021
|
+
|
|
1022
|
+
def _build_message(
|
|
1023
|
+
self, channel: Any, method: Any, properties: Any, body: bytes, *, no_ack: bool = False
|
|
1024
|
+
) -> RabbitMessage:
|
|
1025
|
+
"""Build RabbitMessage from a pika delivery.
|
|
1026
|
+
|
|
1027
|
+
``channel`` is the pika channel the delivery arrived on (per-consumer
|
|
1028
|
+
channel for consume, publisher/topology channel for basic_get); sync
|
|
1029
|
+
settlement (ack/nack/reject) is wired to THAT channel so it stays on the
|
|
1030
|
+
correct I/O thread (H-SRE1).
|
|
1031
|
+
|
|
1032
|
+
``no_ack=True`` (delivery came from a no-ack consumer) skips wiring
|
|
1033
|
+
settlement functions entirely — the broker already auto-acked the
|
|
1034
|
+
delivery, and a manual ``basic_ack``/``basic_nack``/``basic_reject`` on it
|
|
1035
|
+
would be a protocol violation.
|
|
1036
|
+
"""
|
|
1037
|
+
# pika carries the AMQP timestamp as a Unix int (seconds); surface it as a
|
|
1038
|
+
# tz-aware datetime to match the publish side. Was never populated before.
|
|
1039
|
+
ts = properties.timestamp
|
|
1040
|
+
timestamp = datetime.fromtimestamp(ts, tz=UTC) if isinstance(ts, (int, float)) else None
|
|
1041
|
+
message = RabbitMessage(
|
|
1042
|
+
body=body,
|
|
1043
|
+
headers=dict(properties.headers) if properties.headers else {},
|
|
1044
|
+
message_id=properties.message_id,
|
|
1045
|
+
correlation_id=properties.correlation_id,
|
|
1046
|
+
reply_to=properties.reply_to,
|
|
1047
|
+
content_type=properties.content_type,
|
|
1048
|
+
content_encoding=properties.content_encoding,
|
|
1049
|
+
type=properties.type,
|
|
1050
|
+
app_id=properties.app_id,
|
|
1051
|
+
priority=properties.priority,
|
|
1052
|
+
expiration=properties.expiration,
|
|
1053
|
+
user_id=properties.user_id,
|
|
1054
|
+
timestamp=timestamp,
|
|
1055
|
+
routing_key=method.routing_key,
|
|
1056
|
+
exchange=method.exchange,
|
|
1057
|
+
delivery_tag=method.delivery_tag,
|
|
1058
|
+
redelivered=method.redelivered,
|
|
1059
|
+
consumer_tag=getattr(method, "consumer_tag", None), # absent on basic_get (Basic.GetOk)
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
if no_ack:
|
|
1063
|
+
# Broker already auto-acked this delivery — leave settlement
|
|
1064
|
+
# functions unset so ack()/nack()/reject() raise (RabbitMessage's
|
|
1065
|
+
# existing "no settlement fn set" guard) instead of issuing an
|
|
1066
|
+
# invalid basic_ack/nack/reject against a no-ack delivery. Callers
|
|
1067
|
+
# that only read the message (e.g. RPCClient's reply handler,
|
|
1068
|
+
# which never settles) are unaffected.
|
|
1069
|
+
return message
|
|
1070
|
+
|
|
1071
|
+
# Wire sync settlement functions to the channel that owns this delivery.
|
|
1072
|
+
def ack_fn() -> None:
|
|
1073
|
+
self._run_on_io_thread(lambda: channel.basic_ack(delivery_tag=method.delivery_tag))
|
|
1074
|
+
|
|
1075
|
+
def nack_fn(requeue: bool = True) -> None:
|
|
1076
|
+
self._run_on_io_thread(lambda: channel.basic_nack(delivery_tag=method.delivery_tag, requeue=requeue))
|
|
1077
|
+
|
|
1078
|
+
def reject_fn(requeue: bool = False) -> None:
|
|
1079
|
+
self._run_on_io_thread(lambda: channel.basic_reject(delivery_tag=method.delivery_tag, requeue=requeue))
|
|
1080
|
+
|
|
1081
|
+
message._ack_fn = ack_fn
|
|
1082
|
+
message._nack_fn = nack_fn
|
|
1083
|
+
message._reject_fn = reject_fn
|
|
1084
|
+
|
|
1085
|
+
return message
|