jararaca 0.3.11a15__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jararaca might be problematic. Click here for more details.
- README.md +120 -0
- jararaca/__init__.py +106 -8
- jararaca/cli.py +216 -31
- jararaca/messagebus/worker.py +1386 -137
- jararaca/microservice.py +42 -0
- jararaca/persistence/interceptors/aiosqa_interceptor.py +82 -73
- jararaca/persistence/interceptors/constants.py +1 -0
- jararaca/persistence/interceptors/decorators.py +45 -0
- jararaca/presentation/server.py +57 -11
- jararaca/presentation/websocket/redis.py +113 -7
- jararaca/reflect/metadata.py +1 -1
- jararaca/rpc/http/__init__.py +97 -0
- jararaca/rpc/http/backends/__init__.py +10 -0
- jararaca/rpc/http/backends/httpx.py +39 -9
- jararaca/rpc/http/decorators.py +302 -6
- jararaca/scheduler/beat_worker.py +550 -91
- jararaca/tools/typescript/__init__.py +0 -0
- jararaca/tools/typescript/decorators.py +95 -0
- jararaca/tools/typescript/interface_parser.py +699 -156
- jararaca/utils/retry.py +141 -0
- jararaca-0.3.12.dist-info/LICENSE +674 -0
- {jararaca-0.3.11a15.dist-info → jararaca-0.3.12.dist-info}/METADATA +4 -3
- {jararaca-0.3.11a15.dist-info → jararaca-0.3.12.dist-info}/RECORD +27 -19
- {jararaca-0.3.11a15.dist-info → jararaca-0.3.12.dist-info}/WHEEL +1 -1
- pyproject.toml +86 -0
- /jararaca-0.3.11a15.dist-info/LICENSE → /LICENSE +0 -0
- {jararaca-0.3.11a15.dist-info → jararaca-0.3.12.dist-info}/entry_points.txt +0 -0
jararaca/messagebus/worker.py
CHANGED
|
@@ -1,18 +1,36 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import inspect
|
|
3
3
|
import logging
|
|
4
|
+
import random
|
|
4
5
|
import signal
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
5
8
|
from abc import ABC
|
|
6
9
|
from contextlib import asynccontextmanager, suppress
|
|
7
|
-
from dataclasses import dataclass
|
|
10
|
+
from dataclasses import dataclass, field
|
|
8
11
|
from datetime import UTC, datetime
|
|
9
|
-
from typing import
|
|
12
|
+
from typing import (
|
|
13
|
+
Any,
|
|
14
|
+
AsyncContextManager,
|
|
15
|
+
AsyncGenerator,
|
|
16
|
+
Awaitable,
|
|
17
|
+
Optional,
|
|
18
|
+
Type,
|
|
19
|
+
get_origin,
|
|
20
|
+
)
|
|
10
21
|
from urllib.parse import parse_qs, urlparse
|
|
11
22
|
|
|
12
23
|
import aio_pika
|
|
13
24
|
import aio_pika.abc
|
|
14
25
|
import uvloop
|
|
15
|
-
from aio_pika.exceptions import
|
|
26
|
+
from aio_pika.exceptions import (
|
|
27
|
+
AMQPChannelError,
|
|
28
|
+
AMQPConnectionError,
|
|
29
|
+
AMQPError,
|
|
30
|
+
ChannelClosed,
|
|
31
|
+
ChannelNotFoundEntity,
|
|
32
|
+
ConnectionClosed,
|
|
33
|
+
)
|
|
16
34
|
from pydantic import BaseModel
|
|
17
35
|
|
|
18
36
|
from jararaca.broker_backend import MessageBrokerBackend
|
|
@@ -38,9 +56,12 @@ from jararaca.microservice import (
|
|
|
38
56
|
MessageBusTransactionData,
|
|
39
57
|
Microservice,
|
|
40
58
|
SchedulerTransactionData,
|
|
59
|
+
ShutdownState,
|
|
60
|
+
provide_shutdown_state,
|
|
41
61
|
)
|
|
42
62
|
from jararaca.scheduler.decorators import ScheduledActionData
|
|
43
63
|
from jararaca.utils.rabbitmq_utils import RabbitmqUtils
|
|
64
|
+
from jararaca.utils.retry import RetryConfig, retry_with_backoff
|
|
44
65
|
|
|
45
66
|
logger = logging.getLogger(__name__)
|
|
46
67
|
|
|
@@ -50,6 +71,34 @@ class AioPikaWorkerConfig:
|
|
|
50
71
|
url: str
|
|
51
72
|
exchange: str
|
|
52
73
|
prefetch_count: int
|
|
74
|
+
connection_retry_config: RetryConfig = field(
|
|
75
|
+
default_factory=lambda: RetryConfig(
|
|
76
|
+
max_retries=15,
|
|
77
|
+
initial_delay=1.0,
|
|
78
|
+
max_delay=60.0,
|
|
79
|
+
backoff_factor=2.0,
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
consumer_retry_config: RetryConfig = field(
|
|
83
|
+
default_factory=lambda: RetryConfig(
|
|
84
|
+
max_retries=15,
|
|
85
|
+
initial_delay=0.5,
|
|
86
|
+
max_delay=40.0,
|
|
87
|
+
backoff_factor=2.0,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
# Connection health monitoring settings
|
|
91
|
+
connection_heartbeat_interval: float = 30.0 # seconds
|
|
92
|
+
connection_health_check_interval: float = 10.0 # seconds
|
|
93
|
+
reconnection_backoff_config: RetryConfig = field(
|
|
94
|
+
default_factory=lambda: RetryConfig(
|
|
95
|
+
max_retries=-1, # Infinite retries for reconnection
|
|
96
|
+
initial_delay=2.0,
|
|
97
|
+
max_delay=120.0,
|
|
98
|
+
backoff_factor=2.0,
|
|
99
|
+
jitter=True,
|
|
100
|
+
)
|
|
101
|
+
)
|
|
53
102
|
|
|
54
103
|
|
|
55
104
|
class AioPikaMessage(MessageOf[Message]):
|
|
@@ -97,6 +146,20 @@ class MessageBusConsumer(ABC):
|
|
|
97
146
|
|
|
98
147
|
def shutdown(self) -> None: ...
|
|
99
148
|
|
|
149
|
+
async def close(self) -> None:
|
|
150
|
+
"""Close all resources related to the consumer"""
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class _WorkerShutdownState(ShutdownState):
|
|
154
|
+
def __init__(self, shutdown_event: asyncio.Event):
|
|
155
|
+
self.shutdown_event = shutdown_event
|
|
156
|
+
|
|
157
|
+
def request_shutdown(self) -> None:
|
|
158
|
+
self.shutdown_event.set()
|
|
159
|
+
|
|
160
|
+
def is_shutdown_requested(self) -> bool:
|
|
161
|
+
return self.shutdown_event.is_set()
|
|
162
|
+
|
|
100
163
|
|
|
101
164
|
class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
102
165
|
def __init__(
|
|
@@ -115,107 +178,741 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
115
178
|
self.incoming_map: dict[str, MessageHandlerData] = {}
|
|
116
179
|
self.uow_context_provider = uow_context_provider
|
|
117
180
|
self.shutdown_event = asyncio.Event()
|
|
181
|
+
self.shutdown_state = _WorkerShutdownState(self.shutdown_event)
|
|
118
182
|
self.lock = asyncio.Lock()
|
|
119
183
|
self.tasks: set[asyncio.Task[Any]] = set()
|
|
184
|
+
self.connection: aio_pika.abc.AbstractConnection | None = None
|
|
185
|
+
self.channels: dict[str, aio_pika.abc.AbstractChannel] = {}
|
|
186
|
+
|
|
187
|
+
# Connection resilience attributes
|
|
188
|
+
self.connection_healthy = False
|
|
189
|
+
self.connection_lock = asyncio.Lock()
|
|
190
|
+
self.reconnection_event = asyncio.Event()
|
|
191
|
+
self.reconnection_in_progress = False
|
|
192
|
+
self.consumer_tags: dict[str, str] = {} # Track consumer tags for cleanup
|
|
193
|
+
self.health_check_task: asyncio.Task[Any] | None = None
|
|
194
|
+
self.reconnection_task: asyncio.Task[Any] | None = None
|
|
195
|
+
|
|
196
|
+
async def _verify_infrastructure(self) -> bool:
|
|
197
|
+
"""
|
|
198
|
+
Verify that the required RabbitMQ infrastructure (exchanges, queues) exists.
|
|
199
|
+
Returns True if all required infrastructure is in place.
|
|
200
|
+
"""
|
|
201
|
+
try:
|
|
202
|
+
async with self.connect() as connection:
|
|
203
|
+
# Create a main channel just for checking infrastructure
|
|
204
|
+
async with connection.channel() as main_channel:
|
|
205
|
+
# Get existing exchange and queues to verify infrastructure is in place
|
|
206
|
+
await RabbitmqUtils.get_main_exchange(
|
|
207
|
+
channel=main_channel,
|
|
208
|
+
exchange_name=self.config.exchange,
|
|
209
|
+
)
|
|
210
|
+
await RabbitmqUtils.get_dl_exchange(channel=main_channel)
|
|
211
|
+
await RabbitmqUtils.get_dl_queue(channel=main_channel)
|
|
212
|
+
return True
|
|
213
|
+
except (ChannelNotFoundEntity, ChannelClosed, AMQPError) as e:
|
|
214
|
+
logger.critical(
|
|
215
|
+
f"Required exchange or queue infrastructure not found. "
|
|
216
|
+
f"Please use the declare command first to create the required infrastructure. Error: {e}"
|
|
217
|
+
)
|
|
218
|
+
return False
|
|
219
|
+
|
|
220
|
+
async def _setup_message_handler_consumer(
|
|
221
|
+
self, handler: MessageHandlerData
|
|
222
|
+
) -> bool:
|
|
223
|
+
"""
|
|
224
|
+
Set up a consumer for a message handler with retry mechanism.
|
|
225
|
+
Returns True if successful, False otherwise.
|
|
226
|
+
"""
|
|
227
|
+
queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
|
|
228
|
+
routing_key = f"{handler.message_type.MESSAGE_TOPIC}.#"
|
|
229
|
+
|
|
230
|
+
async def setup_consumer() -> None:
|
|
231
|
+
# Wait for connection to be healthy if reconnection is in progress
|
|
232
|
+
if self.reconnection_in_progress:
|
|
233
|
+
await self.reconnection_event.wait()
|
|
234
|
+
|
|
235
|
+
# Create a channel using the context manager
|
|
236
|
+
async with self.create_channel(queue_name) as channel:
|
|
237
|
+
queue = await RabbitmqUtils.get_queue(
|
|
238
|
+
channel=channel, queue_name=queue_name
|
|
239
|
+
)
|
|
120
240
|
|
|
121
|
-
|
|
241
|
+
# Configure consumer and get the consumer tag
|
|
242
|
+
consumer_tag = await queue.consume(
|
|
243
|
+
callback=MessageHandlerCallback(
|
|
244
|
+
consumer=self,
|
|
245
|
+
queue_name=queue_name,
|
|
246
|
+
routing_key=routing_key,
|
|
247
|
+
message_handler=handler,
|
|
248
|
+
),
|
|
249
|
+
no_ack=handler.spec.auto_ack,
|
|
250
|
+
)
|
|
122
251
|
|
|
123
|
-
|
|
252
|
+
# Store consumer tag for cleanup
|
|
253
|
+
self.consumer_tags[queue_name] = consumer_tag
|
|
124
254
|
|
|
125
|
-
|
|
255
|
+
logger.info(
|
|
256
|
+
f"Consuming message handler {queue_name} on dedicated channel"
|
|
257
|
+
)
|
|
126
258
|
|
|
127
|
-
|
|
259
|
+
try:
|
|
260
|
+
# Setup with retry
|
|
261
|
+
await retry_with_backoff(
|
|
262
|
+
setup_consumer,
|
|
263
|
+
retry_config=self.config.consumer_retry_config,
|
|
264
|
+
retry_exceptions=(
|
|
265
|
+
ChannelNotFoundEntity,
|
|
266
|
+
ChannelClosed,
|
|
267
|
+
AMQPError,
|
|
268
|
+
AMQPConnectionError,
|
|
269
|
+
AMQPChannelError,
|
|
270
|
+
ConnectionClosed,
|
|
271
|
+
),
|
|
272
|
+
)
|
|
273
|
+
return True
|
|
274
|
+
except Exception as e:
|
|
275
|
+
logger.error(
|
|
276
|
+
f"Failed to setup consumer for queue '{queue_name}' after retries: {e}"
|
|
277
|
+
)
|
|
278
|
+
return False
|
|
279
|
+
|
|
280
|
+
async def _setup_scheduled_action_consumer(
|
|
281
|
+
self, scheduled_action: ScheduledActionData
|
|
282
|
+
) -> bool:
|
|
283
|
+
"""
|
|
284
|
+
Set up a consumer for a scheduled action with retry mechanism.
|
|
285
|
+
Returns True if successful, False otherwise.
|
|
286
|
+
"""
|
|
287
|
+
queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
|
|
288
|
+
routing_key = queue_name
|
|
289
|
+
|
|
290
|
+
async def setup_consumer() -> None:
|
|
291
|
+
# Wait for connection to be healthy if reconnection is in progress
|
|
292
|
+
if self.reconnection_in_progress:
|
|
293
|
+
await self.reconnection_event.wait()
|
|
294
|
+
|
|
295
|
+
# Create a channel using the context manager
|
|
296
|
+
async with self.create_channel(queue_name) as channel:
|
|
297
|
+
queue = await RabbitmqUtils.get_queue(
|
|
298
|
+
channel=channel, queue_name=queue_name
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Configure consumer and get the consumer tag
|
|
302
|
+
consumer_tag = await queue.consume(
|
|
303
|
+
callback=ScheduledMessageHandlerCallback(
|
|
304
|
+
consumer=self,
|
|
305
|
+
queue_name=queue_name,
|
|
306
|
+
routing_key=routing_key,
|
|
307
|
+
scheduled_action=scheduled_action,
|
|
308
|
+
),
|
|
309
|
+
no_ack=True,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# Store consumer tag for cleanup
|
|
313
|
+
self.consumer_tags[queue_name] = consumer_tag
|
|
314
|
+
|
|
315
|
+
logger.info(f"Consuming scheduler {queue_name} on dedicated channel")
|
|
128
316
|
|
|
129
|
-
# Get existing exchange and queues
|
|
130
317
|
try:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
318
|
+
# Setup with retry
|
|
319
|
+
await retry_with_backoff(
|
|
320
|
+
setup_consumer,
|
|
321
|
+
retry_config=self.config.consumer_retry_config,
|
|
322
|
+
retry_exceptions=(
|
|
323
|
+
ChannelNotFoundEntity,
|
|
324
|
+
ChannelClosed,
|
|
325
|
+
AMQPError,
|
|
326
|
+
AMQPConnectionError,
|
|
327
|
+
AMQPChannelError,
|
|
328
|
+
ConnectionClosed,
|
|
329
|
+
),
|
|
134
330
|
)
|
|
331
|
+
return True
|
|
332
|
+
except Exception as e:
|
|
333
|
+
logger.error(
|
|
334
|
+
f"Failed to setup consumer for scheduler queue '{queue_name}' after retries: {e}"
|
|
335
|
+
)
|
|
336
|
+
return False
|
|
135
337
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
338
|
+
async def consume(self) -> None:
|
|
339
|
+
"""
|
|
340
|
+
Main consume method that sets up all message handlers and scheduled actions with retry mechanisms.
|
|
341
|
+
"""
|
|
342
|
+
# Establish initial connection
|
|
343
|
+
async with self.connect() as connection:
|
|
344
|
+
self.connection_healthy = True
|
|
345
|
+
|
|
346
|
+
# Start connection health monitoring
|
|
347
|
+
self.health_check_task = asyncio.create_task(
|
|
348
|
+
self._monitor_connection_health()
|
|
142
349
|
)
|
|
143
|
-
|
|
350
|
+
|
|
351
|
+
# Verify infrastructure with retry
|
|
352
|
+
infra_check_success = await retry_with_backoff(
|
|
353
|
+
self._verify_infrastructure,
|
|
354
|
+
retry_config=self.config.connection_retry_config,
|
|
355
|
+
retry_exceptions=(Exception,),
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
if not infra_check_success:
|
|
359
|
+
logger.critical(
|
|
360
|
+
"Failed to verify RabbitMQ infrastructure. Shutting down."
|
|
361
|
+
)
|
|
362
|
+
self.shutdown_event.set()
|
|
363
|
+
return
|
|
364
|
+
|
|
365
|
+
async def wait_for(
|
|
366
|
+
type: str, name: str, coroutine: Awaitable[bool]
|
|
367
|
+
) -> tuple[str, str, bool]:
|
|
368
|
+
return type, name, await coroutine
|
|
369
|
+
|
|
370
|
+
tasks: set[asyncio.Task[tuple[str, str, bool]]] = set()
|
|
371
|
+
|
|
372
|
+
# Setup message handlers
|
|
373
|
+
for handler in self.message_handler_set:
|
|
374
|
+
queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
|
|
375
|
+
self.incoming_map[queue_name] = handler
|
|
376
|
+
|
|
377
|
+
tasks.add(
|
|
378
|
+
task := asyncio.create_task(
|
|
379
|
+
wait_for(
|
|
380
|
+
"message_handler",
|
|
381
|
+
queue_name,
|
|
382
|
+
self._setup_message_handler_consumer(handler),
|
|
383
|
+
)
|
|
384
|
+
)
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
# Setup scheduled actions
|
|
388
|
+
for scheduled_action in self.scheduled_actions:
|
|
389
|
+
queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
|
|
390
|
+
tasks.add(
|
|
391
|
+
task := asyncio.create_task(
|
|
392
|
+
wait_for(
|
|
393
|
+
"scheduled_action",
|
|
394
|
+
queue_name,
|
|
395
|
+
self._setup_scheduled_action_consumer(scheduled_action),
|
|
396
|
+
)
|
|
397
|
+
)
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
async def handle_task_results() -> None:
|
|
401
|
+
for task in asyncio.as_completed(tasks):
|
|
402
|
+
type, name, success = await task
|
|
403
|
+
if success:
|
|
404
|
+
logger.info(f"Successfully set up {type} consumer for {name}")
|
|
405
|
+
else:
|
|
406
|
+
logger.warning(
|
|
407
|
+
f"Failed to set up {type} consumer for {name}, will not process messages from this queue"
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
handle_task_results_task = asyncio.create_task(handle_task_results())
|
|
411
|
+
|
|
412
|
+
# Wait for shutdown signal
|
|
413
|
+
await self.shutdown_event.wait()
|
|
414
|
+
logger.info("Shutdown event received, stopping consumers")
|
|
415
|
+
|
|
416
|
+
# Cancel health monitoring
|
|
417
|
+
if self.health_check_task:
|
|
418
|
+
self.health_check_task.cancel()
|
|
419
|
+
with suppress(asyncio.CancelledError):
|
|
420
|
+
await self.health_check_task
|
|
421
|
+
|
|
422
|
+
# Cancel reconnection task if running
|
|
423
|
+
if self.reconnection_task:
|
|
424
|
+
self.reconnection_task.cancel()
|
|
425
|
+
with suppress(asyncio.CancelledError):
|
|
426
|
+
await self.reconnection_task
|
|
427
|
+
|
|
428
|
+
handle_task_results_task.cancel()
|
|
429
|
+
with suppress(asyncio.CancelledError):
|
|
430
|
+
await handle_task_results_task
|
|
431
|
+
for task in tasks:
|
|
432
|
+
if not task.done():
|
|
433
|
+
task.cancel()
|
|
434
|
+
with suppress(asyncio.CancelledError):
|
|
435
|
+
await task
|
|
436
|
+
logger.info("Worker shutting down")
|
|
437
|
+
|
|
438
|
+
# Wait for all tasks to complete
|
|
439
|
+
await self.wait_all_tasks_done()
|
|
440
|
+
|
|
441
|
+
# Close all channels and the connection
|
|
442
|
+
await self.close_channels_and_connection()
|
|
443
|
+
|
|
444
|
+
async def wait_all_tasks_done(self) -> None:
|
|
445
|
+
if not self.tasks:
|
|
144
446
|
return
|
|
145
447
|
|
|
146
|
-
for
|
|
448
|
+
logger.info(f"Waiting for {len(self.tasks)} in-flight tasks to complete")
|
|
449
|
+
async with self.lock:
|
|
450
|
+
# Use gather with return_exceptions=True to ensure all tasks are awaited
|
|
451
|
+
# even if some raise exceptions
|
|
452
|
+
results = await asyncio.gather(*self.tasks, return_exceptions=True)
|
|
147
453
|
|
|
148
|
-
|
|
149
|
-
|
|
454
|
+
# Log any exceptions that occurred
|
|
455
|
+
for result in results:
|
|
456
|
+
if isinstance(result, Exception):
|
|
457
|
+
logger.error(f"Task raised an exception during shutdown: {result}")
|
|
458
|
+
|
|
459
|
+
async def close_channels_and_connection(self) -> None:
|
|
460
|
+
"""Close all channels and then the connection"""
|
|
461
|
+
logger.info("Closing channels and connection...")
|
|
462
|
+
await self._cleanup_connection()
|
|
150
463
|
|
|
151
|
-
|
|
464
|
+
def shutdown(self) -> None:
|
|
465
|
+
"""Signal for shutdown"""
|
|
466
|
+
logger.info("Initiating graceful shutdown")
|
|
467
|
+
self.shutdown_event.set()
|
|
152
468
|
|
|
469
|
+
async def close(self) -> None:
|
|
470
|
+
"""Implement MessageBusConsumer.close for cleanup"""
|
|
471
|
+
logger.info("Closing consumer...")
|
|
472
|
+
self.shutdown()
|
|
473
|
+
|
|
474
|
+
# Cancel health monitoring
|
|
475
|
+
if self.health_check_task:
|
|
476
|
+
self.health_check_task.cancel()
|
|
477
|
+
with suppress(asyncio.CancelledError):
|
|
478
|
+
await self.health_check_task
|
|
479
|
+
|
|
480
|
+
# Cancel reconnection task if running
|
|
481
|
+
if self.reconnection_task:
|
|
482
|
+
self.reconnection_task.cancel()
|
|
483
|
+
with suppress(asyncio.CancelledError):
|
|
484
|
+
await self.reconnection_task
|
|
485
|
+
|
|
486
|
+
await self.wait_all_tasks_done()
|
|
487
|
+
await self.close_channels_and_connection()
|
|
488
|
+
|
|
489
|
+
async def get_channel(self, queue_name: str) -> aio_pika.abc.AbstractChannel | None:
|
|
490
|
+
"""
|
|
491
|
+
Get the channel for a specific queue, or None if not found.
|
|
492
|
+
This helps with error handling when a channel might have been closed.
|
|
493
|
+
"""
|
|
494
|
+
# If reconnection is in progress, wait for it to complete
|
|
495
|
+
if self.reconnection_in_progress:
|
|
153
496
|
try:
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
logger.error(
|
|
159
|
-
f"Queue '{queue_name}' not found and passive mode is enabled. "
|
|
160
|
-
f"Please use the declare command first to create the queue. Error: {e}"
|
|
497
|
+
await asyncio.wait_for(self.reconnection_event.wait(), timeout=30.0)
|
|
498
|
+
except asyncio.TimeoutError:
|
|
499
|
+
logger.warning(
|
|
500
|
+
f"Timeout waiting for reconnection when getting channel for {queue_name}"
|
|
161
501
|
)
|
|
162
|
-
|
|
502
|
+
return None
|
|
163
503
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
504
|
+
if queue_name not in self.channels:
|
|
505
|
+
logger.warning(f"No channel found for queue {queue_name}")
|
|
506
|
+
return None
|
|
507
|
+
|
|
508
|
+
try:
|
|
509
|
+
channel = self.channels[queue_name]
|
|
510
|
+
if channel.is_closed:
|
|
511
|
+
logger.warning(f"Channel for queue {queue_name} is closed")
|
|
512
|
+
# Remove the closed channel
|
|
513
|
+
del self.channels[queue_name]
|
|
514
|
+
|
|
515
|
+
# Attempt to recreate the channel if connection is healthy
|
|
516
|
+
if (
|
|
517
|
+
self.connection
|
|
518
|
+
and not self.connection.is_closed
|
|
519
|
+
and self.connection_healthy
|
|
520
|
+
):
|
|
521
|
+
try:
|
|
522
|
+
logger.info(f"Creating new channel for {queue_name}")
|
|
523
|
+
self.channels[queue_name] = await self.connection.channel()
|
|
524
|
+
await self.channels[queue_name].set_qos(
|
|
525
|
+
prefetch_count=self.config.prefetch_count
|
|
526
|
+
)
|
|
527
|
+
return self.channels[queue_name]
|
|
528
|
+
except Exception as e:
|
|
529
|
+
logger.error(
|
|
530
|
+
f"Failed to recreate channel for {queue_name}: {e}"
|
|
531
|
+
)
|
|
532
|
+
# Trigger reconnection if channel creation fails
|
|
533
|
+
self._trigger_reconnection()
|
|
534
|
+
return None
|
|
535
|
+
else:
|
|
536
|
+
# Connection is not healthy, trigger reconnection
|
|
537
|
+
self._trigger_reconnection()
|
|
538
|
+
return None
|
|
539
|
+
return channel
|
|
540
|
+
except Exception as e:
|
|
541
|
+
logger.error(f"Error accessing channel for queue {queue_name}: {e}")
|
|
542
|
+
# Trigger reconnection on any channel access error
|
|
543
|
+
self._trigger_reconnection()
|
|
544
|
+
return None
|
|
545
|
+
|
|
546
|
+
async def _establish_channel(self, queue_name: str) -> aio_pika.abc.AbstractChannel:
|
|
547
|
+
"""
|
|
548
|
+
Creates a new channel for the specified queue with proper QoS settings.
|
|
549
|
+
"""
|
|
550
|
+
if self.connection is None or self.connection.is_closed:
|
|
551
|
+
logger.warning(
|
|
552
|
+
f"Cannot create channel for {queue_name}: connection is not available"
|
|
172
553
|
)
|
|
554
|
+
raise RuntimeError("Connection is not available")
|
|
173
555
|
|
|
174
|
-
|
|
556
|
+
logger.debug(f"Creating channel for queue {queue_name}")
|
|
557
|
+
channel = await self.connection.channel()
|
|
558
|
+
await channel.set_qos(prefetch_count=self.config.prefetch_count)
|
|
559
|
+
logger.debug(f"Created channel for queue {queue_name}")
|
|
560
|
+
return channel
|
|
175
561
|
|
|
176
|
-
|
|
562
|
+
@asynccontextmanager
|
|
563
|
+
async def create_channel(
|
|
564
|
+
self, queue_name: str
|
|
565
|
+
) -> AsyncGenerator[aio_pika.abc.AbstractChannel, None]:
|
|
566
|
+
"""
|
|
567
|
+
Create and yield a channel for the specified queue with retry mechanism.
|
|
568
|
+
This context manager ensures the channel is properly managed.
|
|
569
|
+
"""
|
|
570
|
+
try:
|
|
571
|
+
# Create a new channel with retry
|
|
572
|
+
channel = await retry_with_backoff(
|
|
573
|
+
fn=lambda: self._establish_channel(queue_name),
|
|
574
|
+
retry_config=self.config.consumer_retry_config,
|
|
575
|
+
retry_exceptions=(
|
|
576
|
+
AMQPConnectionError,
|
|
577
|
+
AMQPChannelError,
|
|
578
|
+
ConnectionError,
|
|
579
|
+
),
|
|
580
|
+
)
|
|
177
581
|
|
|
178
|
-
|
|
582
|
+
# Save in the channels dict for tracking
|
|
583
|
+
self.channels[queue_name] = channel
|
|
584
|
+
logger.debug(f"Created new channel for queue {queue_name}")
|
|
179
585
|
|
|
180
|
-
|
|
586
|
+
try:
|
|
587
|
+
yield channel
|
|
588
|
+
finally:
|
|
589
|
+
# Don't close the channel here as it might be used later
|
|
590
|
+
# It will be closed during shutdown
|
|
591
|
+
pass
|
|
592
|
+
except aio_pika.exceptions.AMQPError as e:
|
|
593
|
+
logger.error(
|
|
594
|
+
f"Error creating channel for queue {queue_name} after retries: {e}"
|
|
595
|
+
)
|
|
596
|
+
raise
|
|
181
597
|
|
|
598
|
+
async def _establish_connection(self) -> aio_pika.abc.AbstractConnection:
|
|
599
|
+
"""
|
|
600
|
+
Creates a new RabbitMQ connection with retry logic.
|
|
601
|
+
"""
|
|
602
|
+
try:
|
|
603
|
+
logger.info("Establishing connection to RabbitMQ")
|
|
604
|
+
connection = await aio_pika.connect(
|
|
605
|
+
self.config.url,
|
|
606
|
+
heartbeat=self.config.connection_heartbeat_interval,
|
|
607
|
+
)
|
|
608
|
+
logger.info("Connected to RabbitMQ successfully")
|
|
609
|
+
return connection
|
|
610
|
+
except Exception as e:
|
|
611
|
+
logger.error(f"Failed to connect to RabbitMQ: {e}")
|
|
612
|
+
raise
|
|
613
|
+
|
|
614
|
+
@asynccontextmanager
|
|
615
|
+
async def connect(self) -> AsyncGenerator[aio_pika.abc.AbstractConnection, None]:
|
|
616
|
+
"""
|
|
617
|
+
Create and manage the main connection to RabbitMQ with automatic retry.
|
|
618
|
+
"""
|
|
619
|
+
if self.connection is not None and not self.connection.is_closed:
|
|
620
|
+
logger.debug("Connection already exists, reusing existing connection")
|
|
182
621
|
try:
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
f"Scheduler queue '{queue_name}' not found and passive mode is enabled. "
|
|
189
|
-
f"Please use the declare command first to create the queue. Error: {e}"
|
|
190
|
-
)
|
|
191
|
-
continue
|
|
622
|
+
yield self.connection
|
|
623
|
+
finally:
|
|
624
|
+
# The existing connection will be handled by close_channels_and_connection
|
|
625
|
+
pass
|
|
626
|
+
return
|
|
192
627
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
628
|
+
try:
|
|
629
|
+
# Create a new connection with retry
|
|
630
|
+
self.connection = await retry_with_backoff(
|
|
631
|
+
self._establish_connection,
|
|
632
|
+
retry_config=self.config.connection_retry_config,
|
|
633
|
+
retry_exceptions=(
|
|
634
|
+
AMQPConnectionError,
|
|
635
|
+
ConnectionError,
|
|
636
|
+
OSError,
|
|
637
|
+
TimeoutError,
|
|
199
638
|
),
|
|
200
|
-
no_ack=True,
|
|
201
639
|
)
|
|
202
640
|
|
|
203
|
-
|
|
641
|
+
try:
|
|
642
|
+
yield self.connection
|
|
643
|
+
finally:
|
|
644
|
+
# Don't close the connection here; it will be closed in close_channels_and_connection
|
|
645
|
+
pass
|
|
646
|
+
except Exception as e:
|
|
647
|
+
logger.error(
|
|
648
|
+
f"Failed to establish connection to RabbitMQ after retries: {e}"
|
|
649
|
+
)
|
|
650
|
+
if self.connection:
|
|
651
|
+
try:
|
|
652
|
+
await self.connection.close()
|
|
653
|
+
except Exception as close_error:
|
|
654
|
+
logger.error(
|
|
655
|
+
f"Error closing connection after connect failure: {close_error}"
|
|
656
|
+
)
|
|
657
|
+
self.connection = None
|
|
658
|
+
raise
|
|
204
659
|
|
|
205
|
-
|
|
206
|
-
|
|
660
|
+
@asynccontextmanager
|
|
661
|
+
async def get_channel_ctx(
|
|
662
|
+
self, queue_name: str
|
|
663
|
+
) -> AsyncGenerator[aio_pika.abc.AbstractChannel, None]:
|
|
664
|
+
"""
|
|
665
|
+
Get a channel for a specific queue as a context manager.
|
|
666
|
+
This is safer than using get_channel directly as it ensures proper error handling.
|
|
667
|
+
"""
|
|
668
|
+
max_retries = 3
|
|
669
|
+
retry_delay = 1.0
|
|
670
|
+
|
|
671
|
+
for attempt in range(max_retries):
|
|
672
|
+
try:
|
|
673
|
+
channel = await self.get_channel(queue_name)
|
|
674
|
+
if channel is not None:
|
|
675
|
+
try:
|
|
676
|
+
yield channel
|
|
677
|
+
return
|
|
678
|
+
finally:
|
|
679
|
+
# We don't close the channel here as it's managed by the consumer
|
|
680
|
+
pass
|
|
681
|
+
|
|
682
|
+
# No channel available, check connection state
|
|
683
|
+
if (
|
|
684
|
+
self.connection
|
|
685
|
+
and not self.connection.is_closed
|
|
686
|
+
and self.connection_healthy
|
|
687
|
+
):
|
|
688
|
+
# Try to create a new channel
|
|
689
|
+
async with self.create_channel(queue_name) as new_channel:
|
|
690
|
+
yield new_channel
|
|
691
|
+
return
|
|
692
|
+
else:
|
|
693
|
+
# Connection is not healthy, wait for reconnection
|
|
694
|
+
if self.reconnection_in_progress:
|
|
695
|
+
try:
|
|
696
|
+
await asyncio.wait_for(
|
|
697
|
+
self.reconnection_event.wait(), timeout=30.0
|
|
698
|
+
)
|
|
699
|
+
# Retry after reconnection
|
|
700
|
+
continue
|
|
701
|
+
except asyncio.TimeoutError:
|
|
702
|
+
logger.warning(
|
|
703
|
+
f"Timeout waiting for reconnection for queue {queue_name}"
|
|
704
|
+
)
|
|
207
705
|
|
|
208
|
-
|
|
706
|
+
# Still no connection, trigger reconnection
|
|
707
|
+
if not self.reconnection_in_progress:
|
|
708
|
+
self._trigger_reconnection()
|
|
209
709
|
|
|
210
|
-
|
|
211
|
-
|
|
710
|
+
if attempt < max_retries - 1:
|
|
711
|
+
logger.info(
|
|
712
|
+
f"Retrying channel access for {queue_name} in {retry_delay}s"
|
|
713
|
+
)
|
|
714
|
+
await asyncio.sleep(retry_delay)
|
|
715
|
+
retry_delay *= 2
|
|
716
|
+
else:
|
|
717
|
+
raise RuntimeError(
|
|
718
|
+
f"Cannot get channel for queue {queue_name}: no connection available after {max_retries} attempts"
|
|
719
|
+
)
|
|
212
720
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
721
|
+
except Exception as e:
|
|
722
|
+
if attempt < max_retries - 1:
|
|
723
|
+
logger.warning(
|
|
724
|
+
f"Error getting channel for {queue_name}, retrying: {e}"
|
|
725
|
+
)
|
|
726
|
+
await asyncio.sleep(retry_delay)
|
|
727
|
+
retry_delay *= 2
|
|
728
|
+
else:
|
|
729
|
+
logger.error(
|
|
730
|
+
f"Failed to get channel for {queue_name} after {max_retries} attempts: {e}"
|
|
731
|
+
)
|
|
732
|
+
raise
|
|
733
|
+
|
|
734
|
+
async def _monitor_connection_health(self) -> None:
|
|
735
|
+
"""
|
|
736
|
+
Monitor connection health and trigger reconnection if needed.
|
|
737
|
+
This runs as a background task.
|
|
738
|
+
"""
|
|
739
|
+
while not self.shutdown_event.is_set():
|
|
740
|
+
try:
|
|
741
|
+
await asyncio.sleep(self.config.connection_health_check_interval)
|
|
216
742
|
|
|
217
|
-
|
|
218
|
-
|
|
743
|
+
if self.shutdown_event.is_set():
|
|
744
|
+
break
|
|
745
|
+
|
|
746
|
+
# Check connection health
|
|
747
|
+
if not await self._is_connection_healthy():
|
|
748
|
+
logger.warning(
|
|
749
|
+
"Connection health check failed, triggering reconnection"
|
|
750
|
+
)
|
|
751
|
+
if not self.reconnection_in_progress:
|
|
752
|
+
self._trigger_reconnection()
|
|
753
|
+
|
|
754
|
+
except asyncio.CancelledError:
|
|
755
|
+
logger.info("Connection health monitoring cancelled")
|
|
756
|
+
break
|
|
757
|
+
except Exception as e:
|
|
758
|
+
logger.error(f"Error in connection health monitoring: {e}")
|
|
759
|
+
await asyncio.sleep(5) # Wait before retrying
|
|
760
|
+
|
|
761
|
+
async def _is_connection_healthy(self) -> bool:
|
|
762
|
+
"""
|
|
763
|
+
Check if the connection is healthy.
|
|
764
|
+
"""
|
|
765
|
+
try:
|
|
766
|
+
if self.connection is None or self.connection.is_closed:
|
|
767
|
+
return False
|
|
768
|
+
|
|
769
|
+
# Try to create a temporary channel to test connection
|
|
770
|
+
async with self.connection.channel() as test_channel:
|
|
771
|
+
# If we can create a channel, connection is healthy
|
|
772
|
+
return True
|
|
773
|
+
|
|
774
|
+
except Exception as e:
|
|
775
|
+
logger.debug(f"Connection health check failed: {e}")
|
|
776
|
+
return False
|
|
777
|
+
|
|
778
|
+
def _trigger_reconnection(self) -> None:
|
|
779
|
+
"""
|
|
780
|
+
Trigger reconnection process.
|
|
781
|
+
"""
|
|
782
|
+
if not self.reconnection_in_progress and not self.shutdown_event.is_set():
|
|
783
|
+
self.reconnection_in_progress = True
|
|
784
|
+
self.connection_healthy = False
|
|
785
|
+
self.reconnection_event.clear()
|
|
786
|
+
|
|
787
|
+
# Start reconnection task
|
|
788
|
+
self.reconnection_task = asyncio.create_task(self._handle_reconnection())
|
|
789
|
+
self.reconnection_task.add_done_callback(self._on_reconnection_done)
|
|
790
|
+
|
|
791
|
+
def _on_reconnection_done(self, task: asyncio.Task[Any]) -> None:
|
|
792
|
+
"""
|
|
793
|
+
Handle completion of reconnection task.
|
|
794
|
+
"""
|
|
795
|
+
self.reconnection_in_progress = False
|
|
796
|
+
if task.exception():
|
|
797
|
+
logger.error(f"Reconnection task failed: {task.exception()}")
|
|
798
|
+
else:
|
|
799
|
+
logger.info("Reconnection completed successfully")
|
|
800
|
+
|
|
801
|
+
async def _handle_reconnection(self) -> None:
|
|
802
|
+
"""
|
|
803
|
+
Handle the reconnection process with exponential backoff.
|
|
804
|
+
"""
|
|
805
|
+
logger.info("Starting reconnection process")
|
|
806
|
+
|
|
807
|
+
# Close existing connection and channels
|
|
808
|
+
await self._cleanup_connection()
|
|
809
|
+
|
|
810
|
+
reconnection_config = self.config.reconnection_backoff_config
|
|
811
|
+
attempt = 0
|
|
812
|
+
|
|
813
|
+
while not self.shutdown_event.is_set():
|
|
814
|
+
try:
|
|
815
|
+
attempt += 1
|
|
816
|
+
logger.info(f"Reconnection attempt {attempt}")
|
|
817
|
+
|
|
818
|
+
# Establish new connection
|
|
819
|
+
self.connection = await self._establish_connection()
|
|
820
|
+
self.connection_healthy = True
|
|
821
|
+
|
|
822
|
+
# Re-establish all consumers
|
|
823
|
+
await self._reestablish_consumers()
|
|
824
|
+
|
|
825
|
+
logger.info("Reconnection successful")
|
|
826
|
+
self.reconnection_event.set()
|
|
827
|
+
return
|
|
828
|
+
|
|
829
|
+
except Exception as e:
|
|
830
|
+
logger.error(f"Reconnection attempt {attempt} failed: {e}")
|
|
831
|
+
|
|
832
|
+
if self.shutdown_event.is_set():
|
|
833
|
+
break
|
|
834
|
+
|
|
835
|
+
# Calculate backoff delay
|
|
836
|
+
delay = reconnection_config.initial_delay * (
|
|
837
|
+
reconnection_config.backoff_factor ** (attempt - 1)
|
|
838
|
+
)
|
|
839
|
+
if reconnection_config.jitter:
|
|
840
|
+
jitter_amount = delay * 0.25
|
|
841
|
+
delay = delay + random.uniform(-jitter_amount, jitter_amount)
|
|
842
|
+
delay = max(delay, 0.1)
|
|
843
|
+
|
|
844
|
+
delay = min(delay, reconnection_config.max_delay)
|
|
845
|
+
|
|
846
|
+
logger.info(f"Retrying reconnection in {delay:.2f} seconds")
|
|
847
|
+
await asyncio.sleep(delay)
|
|
848
|
+
|
|
849
|
+
async def _cleanup_connection(self) -> None:
|
|
850
|
+
"""
|
|
851
|
+
Clean up existing connection and channels.
|
|
852
|
+
"""
|
|
853
|
+
# Cancel existing consumers
|
|
854
|
+
for queue_name, channel in self.channels.items():
|
|
855
|
+
try:
|
|
856
|
+
if not channel.is_closed:
|
|
857
|
+
# Cancel consumer if we have its tag
|
|
858
|
+
if queue_name in self.consumer_tags:
|
|
859
|
+
try:
|
|
860
|
+
queue = await channel.get_queue(queue_name, ensure=False)
|
|
861
|
+
if queue:
|
|
862
|
+
await queue.cancel(self.consumer_tags[queue_name])
|
|
863
|
+
except Exception as cancel_error:
|
|
864
|
+
logger.warning(
|
|
865
|
+
f"Error cancelling consumer for {queue_name}: {cancel_error}"
|
|
866
|
+
)
|
|
867
|
+
del self.consumer_tags[queue_name]
|
|
868
|
+
except Exception as e:
|
|
869
|
+
logger.warning(f"Error cancelling consumer for {queue_name}: {e}")
|
|
870
|
+
|
|
871
|
+
# Close channels
|
|
872
|
+
for queue_name, channel in self.channels.items():
|
|
873
|
+
try:
|
|
874
|
+
if not channel.is_closed:
|
|
875
|
+
await channel.close()
|
|
876
|
+
except Exception as e:
|
|
877
|
+
logger.warning(f"Error closing channel for {queue_name}: {e}")
|
|
878
|
+
|
|
879
|
+
self.channels.clear()
|
|
880
|
+
|
|
881
|
+
# Close connection
|
|
882
|
+
if self.connection and not self.connection.is_closed:
|
|
883
|
+
try:
|
|
884
|
+
await self.connection.close()
|
|
885
|
+
except Exception as e:
|
|
886
|
+
logger.warning(f"Error closing connection: {e}")
|
|
887
|
+
|
|
888
|
+
self.connection = None
|
|
889
|
+
self.connection_healthy = False
|
|
890
|
+
|
|
891
|
+
async def _reestablish_consumers(self) -> None:
|
|
892
|
+
"""
|
|
893
|
+
Re-establish all consumers after reconnection.
|
|
894
|
+
"""
|
|
895
|
+
logger.info("Re-establishing consumers after reconnection")
|
|
896
|
+
|
|
897
|
+
# Re-establish message handlers
|
|
898
|
+
for handler in self.message_handler_set:
|
|
899
|
+
queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
|
|
900
|
+
try:
|
|
901
|
+
await self._setup_message_handler_consumer(handler)
|
|
902
|
+
logger.info(f"Re-established consumer for {queue_name}")
|
|
903
|
+
except Exception as e:
|
|
904
|
+
logger.error(f"Failed to re-establish consumer for {queue_name}: {e}")
|
|
905
|
+
|
|
906
|
+
# Re-establish scheduled actions
|
|
907
|
+
for scheduled_action in self.scheduled_actions:
|
|
908
|
+
queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
|
|
909
|
+
try:
|
|
910
|
+
await self._setup_scheduled_action_consumer(scheduled_action)
|
|
911
|
+
logger.info(f"Re-established scheduler consumer for {queue_name}")
|
|
912
|
+
except Exception as e:
|
|
913
|
+
logger.error(
|
|
914
|
+
f"Failed to re-establish scheduler consumer for {queue_name}: {e}"
|
|
915
|
+
)
|
|
219
916
|
|
|
220
917
|
|
|
221
918
|
def create_message_bus(
|
|
@@ -254,10 +951,150 @@ def create_message_bus(
|
|
|
254
951
|
exchange = query_params["exchange"][0]
|
|
255
952
|
prefetch_count = int(query_params["prefetch_count"][0])
|
|
256
953
|
|
|
954
|
+
# Parse optional retry configuration parameters
|
|
955
|
+
connection_retry_config = RetryConfig()
|
|
956
|
+
consumer_retry_config = RetryConfig(
|
|
957
|
+
max_retries=30, initial_delay=5, max_delay=60.0, backoff_factor=3.0
|
|
958
|
+
)
|
|
959
|
+
|
|
960
|
+
# Parse optional reconnection configuration parameters
|
|
961
|
+
reconnection_backoff_config = RetryConfig(
|
|
962
|
+
max_retries=-1, # Infinite retries for reconnection
|
|
963
|
+
initial_delay=2.0,
|
|
964
|
+
max_delay=120.0,
|
|
965
|
+
backoff_factor=2.0,
|
|
966
|
+
jitter=True,
|
|
967
|
+
)
|
|
968
|
+
|
|
969
|
+
# Parse heartbeat and health check intervals
|
|
970
|
+
connection_heartbeat_interval = 30.0
|
|
971
|
+
connection_health_check_interval = 10.0
|
|
972
|
+
|
|
973
|
+
# Connection retry config parameters
|
|
974
|
+
if (
|
|
975
|
+
"connection_retry_max" in query_params
|
|
976
|
+
and query_params["connection_retry_max"][0].isdigit()
|
|
977
|
+
):
|
|
978
|
+
connection_retry_config.max_retries = int(
|
|
979
|
+
query_params["connection_retry_max"][0]
|
|
980
|
+
)
|
|
981
|
+
|
|
982
|
+
if "connection_retry_delay" in query_params:
|
|
983
|
+
try:
|
|
984
|
+
connection_retry_config.initial_delay = float(
|
|
985
|
+
query_params["connection_retry_delay"][0]
|
|
986
|
+
)
|
|
987
|
+
except ValueError:
|
|
988
|
+
pass
|
|
989
|
+
|
|
990
|
+
if "connection_retry_max_delay" in query_params:
|
|
991
|
+
try:
|
|
992
|
+
connection_retry_config.max_delay = float(
|
|
993
|
+
query_params["connection_retry_max_delay"][0]
|
|
994
|
+
)
|
|
995
|
+
except ValueError:
|
|
996
|
+
pass
|
|
997
|
+
|
|
998
|
+
if "connection_retry_backoff" in query_params:
|
|
999
|
+
try:
|
|
1000
|
+
connection_retry_config.backoff_factor = float(
|
|
1001
|
+
query_params["connection_retry_backoff"][0]
|
|
1002
|
+
)
|
|
1003
|
+
except ValueError:
|
|
1004
|
+
pass
|
|
1005
|
+
|
|
1006
|
+
# Consumer retry config parameters
|
|
1007
|
+
if (
|
|
1008
|
+
"consumer_retry_max" in query_params
|
|
1009
|
+
and query_params["consumer_retry_max"][0].isdigit()
|
|
1010
|
+
):
|
|
1011
|
+
consumer_retry_config.max_retries = int(
|
|
1012
|
+
query_params["consumer_retry_max"][0]
|
|
1013
|
+
)
|
|
1014
|
+
|
|
1015
|
+
if "consumer_retry_delay" in query_params:
|
|
1016
|
+
try:
|
|
1017
|
+
consumer_retry_config.initial_delay = float(
|
|
1018
|
+
query_params["consumer_retry_delay"][0]
|
|
1019
|
+
)
|
|
1020
|
+
except ValueError:
|
|
1021
|
+
pass
|
|
1022
|
+
|
|
1023
|
+
if "consumer_retry_max_delay" in query_params:
|
|
1024
|
+
try:
|
|
1025
|
+
consumer_retry_config.max_delay = float(
|
|
1026
|
+
query_params["consumer_retry_max_delay"][0]
|
|
1027
|
+
)
|
|
1028
|
+
except ValueError:
|
|
1029
|
+
pass
|
|
1030
|
+
|
|
1031
|
+
if "consumer_retry_backoff" in query_params:
|
|
1032
|
+
try:
|
|
1033
|
+
consumer_retry_config.backoff_factor = float(
|
|
1034
|
+
query_params["consumer_retry_backoff"][0]
|
|
1035
|
+
)
|
|
1036
|
+
except ValueError:
|
|
1037
|
+
pass
|
|
1038
|
+
|
|
1039
|
+
# Reconnection backoff config parameters
|
|
1040
|
+
if (
|
|
1041
|
+
"reconnection_retry_max" in query_params
|
|
1042
|
+
and query_params["reconnection_retry_max"][0].isdigit()
|
|
1043
|
+
):
|
|
1044
|
+
reconnection_backoff_config.max_retries = int(
|
|
1045
|
+
query_params["reconnection_retry_max"][0]
|
|
1046
|
+
)
|
|
1047
|
+
|
|
1048
|
+
if "reconnection_retry_delay" in query_params:
|
|
1049
|
+
try:
|
|
1050
|
+
reconnection_backoff_config.initial_delay = float(
|
|
1051
|
+
query_params["reconnection_retry_delay"][0]
|
|
1052
|
+
)
|
|
1053
|
+
except ValueError:
|
|
1054
|
+
pass
|
|
1055
|
+
|
|
1056
|
+
if "reconnection_retry_max_delay" in query_params:
|
|
1057
|
+
try:
|
|
1058
|
+
reconnection_backoff_config.max_delay = float(
|
|
1059
|
+
query_params["reconnection_retry_max_delay"][0]
|
|
1060
|
+
)
|
|
1061
|
+
except ValueError:
|
|
1062
|
+
pass
|
|
1063
|
+
|
|
1064
|
+
if "reconnection_retry_backoff" in query_params:
|
|
1065
|
+
try:
|
|
1066
|
+
reconnection_backoff_config.backoff_factor = float(
|
|
1067
|
+
query_params["reconnection_retry_backoff"][0]
|
|
1068
|
+
)
|
|
1069
|
+
except ValueError:
|
|
1070
|
+
pass
|
|
1071
|
+
|
|
1072
|
+
# Heartbeat and health check intervals
|
|
1073
|
+
if "connection_heartbeat_interval" in query_params:
|
|
1074
|
+
try:
|
|
1075
|
+
connection_heartbeat_interval = float(
|
|
1076
|
+
query_params["connection_heartbeat_interval"][0]
|
|
1077
|
+
)
|
|
1078
|
+
except ValueError:
|
|
1079
|
+
pass
|
|
1080
|
+
|
|
1081
|
+
if "connection_health_check_interval" in query_params:
|
|
1082
|
+
try:
|
|
1083
|
+
connection_health_check_interval = float(
|
|
1084
|
+
query_params["connection_health_check_interval"][0]
|
|
1085
|
+
)
|
|
1086
|
+
except ValueError:
|
|
1087
|
+
pass
|
|
1088
|
+
|
|
257
1089
|
config = AioPikaWorkerConfig(
|
|
258
1090
|
url=broker_url,
|
|
259
1091
|
exchange=exchange,
|
|
260
1092
|
prefetch_count=prefetch_count,
|
|
1093
|
+
connection_retry_config=connection_retry_config,
|
|
1094
|
+
consumer_retry_config=consumer_retry_config,
|
|
1095
|
+
connection_heartbeat_interval=connection_heartbeat_interval,
|
|
1096
|
+
connection_health_check_interval=connection_health_check_interval,
|
|
1097
|
+
reconnection_backoff_config=reconnection_backoff_config,
|
|
261
1098
|
)
|
|
262
1099
|
|
|
263
1100
|
return AioPikaMicroserviceConsumer(
|
|
@@ -291,8 +1128,42 @@ class ScheduledMessageHandlerCallback:
|
|
|
291
1128
|
) -> None:
|
|
292
1129
|
|
|
293
1130
|
if self.consumer.shutdown_event.is_set():
|
|
1131
|
+
logger.info(
|
|
1132
|
+
f"Shutdown in progress. Requeuing scheduled message for {self.queue_name}"
|
|
1133
|
+
)
|
|
1134
|
+
try:
|
|
1135
|
+
# Use channel context for requeuing
|
|
1136
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1137
|
+
await aio_pika_message.reject(requeue=True)
|
|
1138
|
+
except RuntimeError:
|
|
1139
|
+
logger.warning(
|
|
1140
|
+
f"Could not requeue scheduled message during shutdown - channel not available"
|
|
1141
|
+
)
|
|
1142
|
+
except Exception as e:
|
|
1143
|
+
logger.error(
|
|
1144
|
+
f"Failed to requeue scheduled message during shutdown: {e}"
|
|
1145
|
+
)
|
|
294
1146
|
return
|
|
295
1147
|
|
|
1148
|
+
# Check if connection is healthy before processing
|
|
1149
|
+
if not self.consumer.connection_healthy:
|
|
1150
|
+
logger.warning(
|
|
1151
|
+
f"Connection not healthy, requeuing scheduled message for {self.queue_name}"
|
|
1152
|
+
)
|
|
1153
|
+
try:
|
|
1154
|
+
# Wait briefly for potential reconnection
|
|
1155
|
+
await asyncio.sleep(0.1)
|
|
1156
|
+
if not self.consumer.connection_healthy:
|
|
1157
|
+
# Still not healthy, requeue the message
|
|
1158
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1159
|
+
await aio_pika_message.reject(requeue=True)
|
|
1160
|
+
return
|
|
1161
|
+
except Exception as e:
|
|
1162
|
+
logger.error(
|
|
1163
|
+
f"Failed to requeue scheduled message due to connection issues: {e}"
|
|
1164
|
+
)
|
|
1165
|
+
return
|
|
1166
|
+
|
|
296
1167
|
async with self.consumer.lock:
|
|
297
1168
|
task = asyncio.create_task(self.handle_message(aio_pika_message))
|
|
298
1169
|
self.consumer.tasks.add(task)
|
|
@@ -300,14 +1171,48 @@ class ScheduledMessageHandlerCallback:
|
|
|
300
1171
|
|
|
301
1172
|
def handle_message_consume_done(self, task: asyncio.Task[Any]) -> None:
|
|
302
1173
|
self.consumer.tasks.discard(task)
|
|
1174
|
+
if task.cancelled():
|
|
1175
|
+
logger.warning(f"Scheduled task for {self.queue_name} was cancelled")
|
|
1176
|
+
return
|
|
1177
|
+
|
|
1178
|
+
if (error := task.exception()) is not None:
|
|
1179
|
+
logger.exception(
|
|
1180
|
+
f"Error processing scheduled action {self.queue_name}", exc_info=error
|
|
1181
|
+
)
|
|
303
1182
|
|
|
304
1183
|
async def handle_message(
|
|
305
1184
|
self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
|
|
306
1185
|
) -> None:
|
|
307
1186
|
|
|
308
1187
|
if self.consumer.shutdown_event.is_set():
|
|
309
|
-
logger.info("Shutdown event set.
|
|
310
|
-
|
|
1188
|
+
logger.info(f"Shutdown event set. Requeuing message for {self.queue_name}")
|
|
1189
|
+
try:
|
|
1190
|
+
# Use channel context for requeuing
|
|
1191
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1192
|
+
await aio_pika_message.reject(requeue=True)
|
|
1193
|
+
return
|
|
1194
|
+
except RuntimeError:
|
|
1195
|
+
logger.warning(
|
|
1196
|
+
f"Could not requeue message during shutdown - channel not available"
|
|
1197
|
+
)
|
|
1198
|
+
except Exception as e:
|
|
1199
|
+
logger.error(f"Failed to requeue message during shutdown: {e}")
|
|
1200
|
+
return
|
|
1201
|
+
|
|
1202
|
+
# Check connection health before processing
|
|
1203
|
+
if not self.consumer.connection_healthy:
|
|
1204
|
+
logger.warning(
|
|
1205
|
+
f"Connection not healthy, requeuing scheduled message for {self.queue_name}"
|
|
1206
|
+
)
|
|
1207
|
+
try:
|
|
1208
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1209
|
+
await aio_pika_message.reject(requeue=True)
|
|
1210
|
+
return
|
|
1211
|
+
except Exception as e:
|
|
1212
|
+
logger.error(
|
|
1213
|
+
f"Failed to requeue scheduled message due to connection issues: {e}"
|
|
1214
|
+
)
|
|
1215
|
+
return
|
|
311
1216
|
|
|
312
1217
|
sig = inspect.signature(self.scheduled_action.callable)
|
|
313
1218
|
if len(sig.parameters) == 1:
|
|
@@ -352,18 +1257,19 @@ class ScheduledMessageHandlerCallback:
|
|
|
352
1257
|
args: tuple[Any, ...],
|
|
353
1258
|
kwargs: dict[str, Any],
|
|
354
1259
|
) -> None:
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
1260
|
+
with provide_shutdown_state(self.consumer.shutdown_state):
|
|
1261
|
+
async with self.consumer.uow_context_provider(
|
|
1262
|
+
AppTransactionContext(
|
|
1263
|
+
controller_member_reflect=scheduled_action.controller_member,
|
|
1264
|
+
transaction_data=SchedulerTransactionData(
|
|
1265
|
+
scheduled_to=datetime.now(UTC),
|
|
1266
|
+
cron_expression=scheduled_action.spec.cron,
|
|
1267
|
+
triggered_at=datetime.now(UTC),
|
|
1268
|
+
),
|
|
1269
|
+
)
|
|
1270
|
+
):
|
|
365
1271
|
|
|
366
|
-
|
|
1272
|
+
await scheduled_action.callable(*args, **kwargs)
|
|
367
1273
|
|
|
368
1274
|
|
|
369
1275
|
class MessageHandlerCallback:
|
|
@@ -379,13 +1285,44 @@ class MessageHandlerCallback:
|
|
|
379
1285
|
self.queue_name = queue_name
|
|
380
1286
|
self.routing_key = routing_key
|
|
381
1287
|
self.message_handler = message_handler
|
|
1288
|
+
self.retry_state: dict[str, dict[str, Any]] = {}
|
|
382
1289
|
|
|
383
1290
|
async def message_consumer(
|
|
384
1291
|
self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
|
|
385
1292
|
) -> None:
|
|
386
1293
|
if self.consumer.shutdown_event.is_set():
|
|
1294
|
+
logger.info(
|
|
1295
|
+
f"Shutdown in progress. Requeuing message for {self.queue_name}"
|
|
1296
|
+
)
|
|
1297
|
+
try:
|
|
1298
|
+
# Use channel context for requeuing
|
|
1299
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1300
|
+
await aio_pika_message.reject(requeue=True)
|
|
1301
|
+
except RuntimeError:
|
|
1302
|
+
logger.warning(
|
|
1303
|
+
f"Could not requeue message during shutdown - channel not available"
|
|
1304
|
+
)
|
|
1305
|
+
except Exception as e:
|
|
1306
|
+
logger.error(f"Failed to requeue message during shutdown: {e}")
|
|
387
1307
|
return
|
|
388
1308
|
|
|
1309
|
+
# Check if connection is healthy before processing
|
|
1310
|
+
if not self.consumer.connection_healthy:
|
|
1311
|
+
logger.warning(
|
|
1312
|
+
f"Connection not healthy, requeuing message for {self.queue_name}"
|
|
1313
|
+
)
|
|
1314
|
+
try:
|
|
1315
|
+
# Wait briefly for potential reconnection
|
|
1316
|
+
await asyncio.sleep(0.1)
|
|
1317
|
+
if not self.consumer.connection_healthy:
|
|
1318
|
+
# Still not healthy, requeue the message
|
|
1319
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1320
|
+
await aio_pika_message.reject(requeue=True)
|
|
1321
|
+
return
|
|
1322
|
+
except Exception as e:
|
|
1323
|
+
logger.error(f"Failed to requeue message due to connection issues: {e}")
|
|
1324
|
+
return
|
|
1325
|
+
|
|
389
1326
|
async with self.consumer.lock:
|
|
390
1327
|
task = asyncio.create_task(self.handle_message(aio_pika_message))
|
|
391
1328
|
self.consumer.tasks.add(task)
|
|
@@ -394,10 +1331,13 @@ class MessageHandlerCallback:
|
|
|
394
1331
|
def handle_message_consume_done(self, task: asyncio.Task[Any]) -> None:
|
|
395
1332
|
self.consumer.tasks.discard(task)
|
|
396
1333
|
if task.cancelled():
|
|
1334
|
+
logger.warning(f"Task for queue {self.queue_name} was cancelled")
|
|
397
1335
|
return
|
|
398
1336
|
|
|
399
1337
|
if (error := task.exception()) is not None:
|
|
400
|
-
logger.exception(
|
|
1338
|
+
logger.exception(
|
|
1339
|
+
f"Error processing message for queue {self.queue_name}", exc_info=error
|
|
1340
|
+
)
|
|
401
1341
|
|
|
402
1342
|
async def __call__(
|
|
403
1343
|
self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
|
|
@@ -408,14 +1348,203 @@ class MessageHandlerCallback:
|
|
|
408
1348
|
self,
|
|
409
1349
|
aio_pika_message: aio_pika.abc.AbstractIncomingMessage,
|
|
410
1350
|
requeue: bool = False,
|
|
1351
|
+
retry_count: int = 0,
|
|
1352
|
+
exception: Optional[BaseException] = None,
|
|
411
1353
|
) -> None:
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
1354
|
+
"""
|
|
1355
|
+
Handle rejecting a message, with support for retry with exponential backoff.
|
|
1356
|
+
|
|
1357
|
+
Args:
|
|
1358
|
+
aio_pika_message: The message to reject
|
|
1359
|
+
requeue: Whether to requeue the message directly (True) or handle with retry logic (False)
|
|
1360
|
+
retry_count: The current retry count for this message
|
|
1361
|
+
exception: The exception that caused the rejection, if any
|
|
1362
|
+
"""
|
|
1363
|
+
message_id = aio_pika_message.message_id or str(uuid.uuid4())
|
|
1364
|
+
|
|
1365
|
+
# If auto_ack is enabled, we cannot retry the message through RabbitMQ reject mechanism
|
|
1366
|
+
if self.message_handler.spec.auto_ack:
|
|
1367
|
+
if requeue:
|
|
1368
|
+
logger.warning(
|
|
1369
|
+
f"Message {message_id} ({self.queue_name}) cannot be requeued because auto_ack is enabled"
|
|
1370
|
+
)
|
|
1371
|
+
return
|
|
1372
|
+
|
|
1373
|
+
try:
|
|
1374
|
+
# Check if we should retry with backoff
|
|
1375
|
+
if (
|
|
1376
|
+
not requeue
|
|
1377
|
+
and self.message_handler.spec.requeue_on_exception
|
|
1378
|
+
and exception is not None
|
|
1379
|
+
):
|
|
1380
|
+
# Get retry config from consumer
|
|
1381
|
+
retry_config = self.consumer.config.consumer_retry_config
|
|
1382
|
+
|
|
1383
|
+
# Check if we reached max retries
|
|
1384
|
+
if retry_count >= retry_config.max_retries:
|
|
1385
|
+
logger.warning(
|
|
1386
|
+
f"Message {message_id} ({self.queue_name}) failed after {retry_count} retries, "
|
|
1387
|
+
f"dead-lettering: {str(exception)}"
|
|
1388
|
+
)
|
|
1389
|
+
# Dead-letter the message after max retries
|
|
1390
|
+
try:
|
|
1391
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1392
|
+
await aio_pika_message.reject(requeue=False)
|
|
1393
|
+
except Exception as e:
|
|
1394
|
+
logger.error(f"Failed to dead-letter message {message_id}: {e}")
|
|
1395
|
+
return
|
|
1396
|
+
|
|
1397
|
+
# Calculate delay for this retry attempt
|
|
1398
|
+
delay = retry_config.initial_delay * (
|
|
1399
|
+
retry_config.backoff_factor**retry_count
|
|
1400
|
+
)
|
|
1401
|
+
if retry_config.jitter:
|
|
1402
|
+
jitter_amount = delay * 0.25
|
|
1403
|
+
delay = delay + random.uniform(-jitter_amount, jitter_amount)
|
|
1404
|
+
delay = max(
|
|
1405
|
+
delay, 0.1
|
|
1406
|
+
) # Ensure delay doesn't go negative due to jitter
|
|
1407
|
+
|
|
1408
|
+
delay = min(delay, retry_config.max_delay)
|
|
1409
|
+
|
|
1410
|
+
logger.info(
|
|
1411
|
+
f"Message {message_id} ({self.queue_name}) failed with {str(exception)}, "
|
|
1412
|
+
f"retry {retry_count+1}/{retry_config.max_retries} scheduled in {delay:.2f}s"
|
|
1413
|
+
)
|
|
1414
|
+
|
|
1415
|
+
# Store retry state for this message
|
|
1416
|
+
self.retry_state[message_id] = {
|
|
1417
|
+
"retry_count": retry_count + 1,
|
|
1418
|
+
"last_exception": exception,
|
|
1419
|
+
"next_retry": time.time() + delay,
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
# Schedule retry after delay
|
|
1423
|
+
asyncio.create_task(
|
|
1424
|
+
self._delayed_retry(
|
|
1425
|
+
aio_pika_message, delay, retry_count + 1, exception
|
|
1426
|
+
)
|
|
1427
|
+
)
|
|
1428
|
+
|
|
1429
|
+
# Acknowledge the current message since we'll handle retry ourselves
|
|
1430
|
+
try:
|
|
1431
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1432
|
+
await aio_pika_message.ack()
|
|
1433
|
+
except Exception as e:
|
|
1434
|
+
logger.error(
|
|
1435
|
+
f"Failed to acknowledge message {message_id} for retry: {e}"
|
|
1436
|
+
)
|
|
1437
|
+
return
|
|
1438
|
+
|
|
1439
|
+
# Standard reject without retry or with immediate requeue
|
|
1440
|
+
try:
|
|
1441
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1442
|
+
await aio_pika_message.reject(requeue=requeue)
|
|
1443
|
+
if requeue:
|
|
1444
|
+
logger.info(
|
|
1445
|
+
f"Message {message_id} ({self.queue_name}) requeued for immediate retry"
|
|
1446
|
+
)
|
|
1447
|
+
else:
|
|
1448
|
+
logger.info(
|
|
1449
|
+
f"Message {message_id} ({self.queue_name}) rejected without requeue"
|
|
1450
|
+
)
|
|
1451
|
+
except Exception as e:
|
|
1452
|
+
logger.error(f"Failed to reject message {message_id}: {e}")
|
|
1453
|
+
|
|
1454
|
+
except Exception as e:
|
|
1455
|
+
logger.exception(
|
|
1456
|
+
f"Unexpected error in handle_reject_message for {message_id} ({self.queue_name}): {e}"
|
|
417
1457
|
)
|
|
418
1458
|
|
|
1459
|
+
async def _delayed_retry(
|
|
1460
|
+
self,
|
|
1461
|
+
aio_pika_message: aio_pika.abc.AbstractIncomingMessage,
|
|
1462
|
+
delay: float,
|
|
1463
|
+
retry_count: int,
|
|
1464
|
+
exception: Optional[BaseException],
|
|
1465
|
+
) -> None:
|
|
1466
|
+
"""
|
|
1467
|
+
Handle delayed retry of a message after exponential backoff delay.
|
|
1468
|
+
|
|
1469
|
+
Args:
|
|
1470
|
+
aio_pika_message: The original message
|
|
1471
|
+
delay: Delay in seconds before retrying
|
|
1472
|
+
retry_count: The current retry count (after increment)
|
|
1473
|
+
exception: The exception that caused the failure
|
|
1474
|
+
"""
|
|
1475
|
+
message_id = aio_pika_message.message_id or str(uuid.uuid4())
|
|
1476
|
+
|
|
1477
|
+
try:
|
|
1478
|
+
# Wait for the backoff delay
|
|
1479
|
+
await asyncio.sleep(delay)
|
|
1480
|
+
|
|
1481
|
+
# Get message body and properties for republishing
|
|
1482
|
+
message_body = aio_pika_message.body
|
|
1483
|
+
headers = (
|
|
1484
|
+
aio_pika_message.headers.copy() if aio_pika_message.headers else {}
|
|
1485
|
+
)
|
|
1486
|
+
|
|
1487
|
+
# Add retry information to headers
|
|
1488
|
+
headers["x-retry-count"] = retry_count
|
|
1489
|
+
if exception:
|
|
1490
|
+
headers["x-last-error"] = str(exception)
|
|
1491
|
+
|
|
1492
|
+
# Clean up retry state
|
|
1493
|
+
if message_id in self.retry_state:
|
|
1494
|
+
del self.retry_state[message_id]
|
|
1495
|
+
|
|
1496
|
+
# Republish the message to the same queue with retry logic
|
|
1497
|
+
max_attempts = 3
|
|
1498
|
+
for attempt in range(max_attempts):
|
|
1499
|
+
try:
|
|
1500
|
+
async with self.consumer.get_channel_ctx(
|
|
1501
|
+
self.queue_name
|
|
1502
|
+
) as channel:
|
|
1503
|
+
exchange = await RabbitmqUtils.get_main_exchange(
|
|
1504
|
+
channel=channel,
|
|
1505
|
+
exchange_name=self.consumer.config.exchange,
|
|
1506
|
+
)
|
|
1507
|
+
|
|
1508
|
+
await exchange.publish(
|
|
1509
|
+
aio_pika.Message(
|
|
1510
|
+
body=message_body,
|
|
1511
|
+
headers=headers,
|
|
1512
|
+
message_id=message_id,
|
|
1513
|
+
content_type=aio_pika_message.content_type,
|
|
1514
|
+
content_encoding=aio_pika_message.content_encoding,
|
|
1515
|
+
delivery_mode=aio_pika_message.delivery_mode,
|
|
1516
|
+
),
|
|
1517
|
+
routing_key=self.routing_key,
|
|
1518
|
+
)
|
|
1519
|
+
|
|
1520
|
+
logger.info(
|
|
1521
|
+
f"Message {message_id} ({self.queue_name}) republished for retry {retry_count}"
|
|
1522
|
+
)
|
|
1523
|
+
return
|
|
1524
|
+
|
|
1525
|
+
except Exception as e:
|
|
1526
|
+
if attempt < max_attempts - 1:
|
|
1527
|
+
logger.warning(
|
|
1528
|
+
f"Failed to republish message {message_id} (attempt {attempt + 1}): {e}"
|
|
1529
|
+
)
|
|
1530
|
+
await asyncio.sleep(1.0 * (attempt + 1)) # Exponential backoff
|
|
1531
|
+
else:
|
|
1532
|
+
logger.error(
|
|
1533
|
+
f"Failed to republish message {message_id} after {max_attempts} attempts: {e}"
|
|
1534
|
+
)
|
|
1535
|
+
raise
|
|
1536
|
+
|
|
1537
|
+
except Exception as e:
|
|
1538
|
+
logger.exception(
|
|
1539
|
+
f"Failed to execute delayed retry for message {message_id} ({self.queue_name}): {e}"
|
|
1540
|
+
)
|
|
1541
|
+
# If we fail to republish, try to dead-letter the original message
|
|
1542
|
+
try:
|
|
1543
|
+
if message_id in self.retry_state:
|
|
1544
|
+
del self.retry_state[message_id]
|
|
1545
|
+
except Exception:
|
|
1546
|
+
pass
|
|
1547
|
+
|
|
419
1548
|
async def handle_message(
|
|
420
1549
|
self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
|
|
421
1550
|
) -> None:
|
|
@@ -472,51 +1601,92 @@ class MessageHandlerCallback:
|
|
|
472
1601
|
incoming_message_spec = MessageHandler.get_message_incoming(handler)
|
|
473
1602
|
assert incoming_message_spec is not None
|
|
474
1603
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
1604
|
+
with provide_shutdown_state(self.consumer.shutdown_state):
|
|
1605
|
+
async with self.consumer.uow_context_provider(
|
|
1606
|
+
AppTransactionContext(
|
|
1607
|
+
controller_member_reflect=handler_data.controller_member,
|
|
1608
|
+
transaction_data=MessageBusTransactionData(
|
|
1609
|
+
message=builded_message,
|
|
1610
|
+
topic=routing_key,
|
|
1611
|
+
),
|
|
1612
|
+
)
|
|
1613
|
+
):
|
|
1614
|
+
ctx: AsyncContextManager[Any]
|
|
1615
|
+
if incoming_message_spec.timeout is not None:
|
|
1616
|
+
ctx = asyncio.timeout(incoming_message_spec.timeout)
|
|
1617
|
+
else:
|
|
1618
|
+
ctx = none_context()
|
|
1619
|
+
async with ctx:
|
|
1620
|
+
try:
|
|
1621
|
+
with provide_bus_message_controller(
|
|
1622
|
+
AioPikaMessageBusController(aio_pika_message)
|
|
1623
|
+
):
|
|
1624
|
+
await handler(builded_message)
|
|
1625
|
+
if not incoming_message_spec.auto_ack:
|
|
1626
|
+
with suppress(aio_pika.MessageProcessError):
|
|
1627
|
+
# Use channel context for acknowledgement with retry
|
|
1628
|
+
try:
|
|
1629
|
+
async with self.consumer.get_channel_ctx(
|
|
1630
|
+
self.queue_name
|
|
1631
|
+
):
|
|
1632
|
+
await aio_pika_message.ack()
|
|
1633
|
+
except Exception as ack_error:
|
|
1634
|
+
logger.warning(
|
|
1635
|
+
f"Failed to acknowledge message {aio_pika_message.message_id or 'unknown'}: {ack_error}"
|
|
1636
|
+
)
|
|
1637
|
+
# Message will be redelivered if ack fails, which is acceptable
|
|
1638
|
+
except BaseException as base_exc:
|
|
1639
|
+
# Get message id for logging
|
|
1640
|
+
message_id = aio_pika_message.message_id or str(uuid.uuid4())
|
|
1641
|
+
|
|
1642
|
+
# Extract retry count from headers if available
|
|
1643
|
+
headers = aio_pika_message.headers or {}
|
|
1644
|
+
retry_count = int(str(headers.get("x-retry-count", 0)))
|
|
1645
|
+
|
|
1646
|
+
# Process exception handler if configured
|
|
1647
|
+
if incoming_message_spec.exception_handler is not None:
|
|
1648
|
+
try:
|
|
1649
|
+
incoming_message_spec.exception_handler(base_exc)
|
|
1650
|
+
except Exception as nested_exc:
|
|
1651
|
+
logger.exception(
|
|
1652
|
+
f"Error processing exception handler for message {message_id}: {base_exc} | {nested_exc}"
|
|
1653
|
+
)
|
|
1654
|
+
else:
|
|
503
1655
|
logger.exception(
|
|
504
|
-
f"Error processing
|
|
1656
|
+
f"Error processing message {message_id} on topic {routing_key}: {str(base_exc)}"
|
|
1657
|
+
)
|
|
1658
|
+
|
|
1659
|
+
# Handle rejection with retry logic
|
|
1660
|
+
if incoming_message_spec.requeue_on_exception:
|
|
1661
|
+
# Use our retry with backoff mechanism
|
|
1662
|
+
await self.handle_reject_message(
|
|
1663
|
+
aio_pika_message,
|
|
1664
|
+
requeue=False, # Don't requeue directly, use our backoff mechanism
|
|
1665
|
+
retry_count=retry_count,
|
|
1666
|
+
exception=base_exc,
|
|
1667
|
+
)
|
|
1668
|
+
else:
|
|
1669
|
+
# Message shouldn't be retried, reject it
|
|
1670
|
+
await self.handle_reject_message(
|
|
1671
|
+
aio_pika_message, requeue=False, exception=base_exc
|
|
505
1672
|
)
|
|
506
1673
|
else:
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
1674
|
+
# Message processed successfully, log and clean up any retry state
|
|
1675
|
+
message_id = aio_pika_message.message_id or str(uuid.uuid4())
|
|
1676
|
+
if message_id in self.retry_state:
|
|
1677
|
+
del self.retry_state[message_id]
|
|
1678
|
+
|
|
1679
|
+
# Log success with retry information if applicable
|
|
1680
|
+
headers = aio_pika_message.headers or {}
|
|
1681
|
+
if "x-retry-count" in headers:
|
|
1682
|
+
retry_count = int(str(headers.get("x-retry-count", 0)))
|
|
1683
|
+
logger.info(
|
|
1684
|
+
f"Message {message_id}#{self.queue_name} processed successfully after {retry_count} retries"
|
|
1685
|
+
)
|
|
1686
|
+
else:
|
|
1687
|
+
logger.info(
|
|
1688
|
+
f"Message {message_id}#{self.queue_name} processed successfully"
|
|
1689
|
+
)
|
|
520
1690
|
|
|
521
1691
|
|
|
522
1692
|
@asynccontextmanager
|
|
@@ -614,19 +1784,64 @@ class MessageBusWorker:
|
|
|
614
1784
|
def start_sync(self) -> None:
|
|
615
1785
|
|
|
616
1786
|
def on_shutdown(loop: asyncio.AbstractEventLoop) -> None:
|
|
617
|
-
logger.info("Shutting down")
|
|
618
|
-
|
|
1787
|
+
logger.info("Shutting down - signal received")
|
|
1788
|
+
# Schedule the shutdown to run in the event loop
|
|
1789
|
+
asyncio.create_task(self._graceful_shutdown())
|
|
1790
|
+
# wait until the shutdown is complete
|
|
619
1791
|
|
|
620
1792
|
with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner:
|
|
621
|
-
runner.get_loop()
|
|
622
|
-
|
|
623
|
-
|
|
1793
|
+
loop = runner.get_loop()
|
|
1794
|
+
loop.add_signal_handler(signal.SIGINT, on_shutdown, loop)
|
|
1795
|
+
# Add graceful shutdown handler for SIGTERM as well
|
|
1796
|
+
loop.add_signal_handler(signal.SIGTERM, on_shutdown, loop)
|
|
624
1797
|
runner.run(self.start_async())
|
|
625
1798
|
|
|
1799
|
+
async def _graceful_shutdown(self) -> None:
|
|
1800
|
+
"""Handles graceful shutdown process"""
|
|
1801
|
+
logger.info("Initiating graceful shutdown sequence")
|
|
1802
|
+
# Use the comprehensive close method that handles shutdown, task waiting and connection cleanup
|
|
1803
|
+
|
|
1804
|
+
self.consumer.shutdown()
|
|
1805
|
+
logger.info("Graceful shutdown completed")
|
|
1806
|
+
|
|
626
1807
|
|
|
627
1808
|
class AioPikaMessageBusController(BusMessageController):
|
|
628
1809
|
def __init__(self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage):
|
|
629
1810
|
self.aio_pika_message = aio_pika_message
|
|
1811
|
+
# We access consumer callback through context if available
|
|
1812
|
+
self._callback: Optional[MessageHandlerCallback] = None
|
|
1813
|
+
|
|
1814
|
+
def _get_callback(self) -> MessageHandlerCallback:
|
|
1815
|
+
"""
|
|
1816
|
+
Find the callback associated with this message.
|
|
1817
|
+
This allows us to access the retry mechanisms.
|
|
1818
|
+
"""
|
|
1819
|
+
if self._callback is None:
|
|
1820
|
+
# Get the context from current frame's locals
|
|
1821
|
+
frame = inspect.currentframe()
|
|
1822
|
+
if frame is not None:
|
|
1823
|
+
try:
|
|
1824
|
+
caller_frame = frame.f_back
|
|
1825
|
+
if caller_frame is not None:
|
|
1826
|
+
# Check for context with handler callback
|
|
1827
|
+
callback_ref = None
|
|
1828
|
+
# Look for handler_message call context
|
|
1829
|
+
while caller_frame is not None:
|
|
1830
|
+
if "self" in caller_frame.f_locals:
|
|
1831
|
+
self_obj = caller_frame.f_locals["self"]
|
|
1832
|
+
if isinstance(self_obj, MessageHandlerCallback):
|
|
1833
|
+
callback_ref = self_obj
|
|
1834
|
+
break
|
|
1835
|
+
caller_frame = caller_frame.f_back
|
|
1836
|
+
# Save callback reference if we found it
|
|
1837
|
+
self._callback = callback_ref
|
|
1838
|
+
finally:
|
|
1839
|
+
del frame # Avoid reference cycles
|
|
1840
|
+
|
|
1841
|
+
if self._callback is None:
|
|
1842
|
+
raise RuntimeError("Could not find callback context for message retry")
|
|
1843
|
+
|
|
1844
|
+
return self._callback
|
|
630
1845
|
|
|
631
1846
|
async def ack(self) -> None:
|
|
632
1847
|
await self.aio_pika_message.ack()
|
|
@@ -638,7 +1853,41 @@ class AioPikaMessageBusController(BusMessageController):
|
|
|
638
1853
|
await self.aio_pika_message.reject()
|
|
639
1854
|
|
|
640
1855
|
async def retry(self) -> None:
|
|
641
|
-
|
|
1856
|
+
"""
|
|
1857
|
+
Retry the message immediately by rejecting with requeue flag.
|
|
1858
|
+
This doesn't use the exponential backoff mechanism.
|
|
1859
|
+
"""
|
|
1860
|
+
callback = self._get_callback()
|
|
1861
|
+
await callback.handle_reject_message(self.aio_pika_message, requeue=True)
|
|
642
1862
|
|
|
643
1863
|
async def retry_later(self, delay: int) -> None:
|
|
644
|
-
|
|
1864
|
+
"""
|
|
1865
|
+
Retry the message after a specified delay using the exponential backoff mechanism.
|
|
1866
|
+
|
|
1867
|
+
Args:
|
|
1868
|
+
delay: Minimum delay in seconds before retrying
|
|
1869
|
+
"""
|
|
1870
|
+
try:
|
|
1871
|
+
callback = self._get_callback()
|
|
1872
|
+
|
|
1873
|
+
# Get current retry count from message headers
|
|
1874
|
+
headers = self.aio_pika_message.headers or {}
|
|
1875
|
+
retry_count = int(str(headers.get("x-retry-count", 0)))
|
|
1876
|
+
|
|
1877
|
+
# Handle retry with explicit delay
|
|
1878
|
+
asyncio.create_task(
|
|
1879
|
+
callback._delayed_retry(
|
|
1880
|
+
self.aio_pika_message,
|
|
1881
|
+
float(delay),
|
|
1882
|
+
retry_count + 1,
|
|
1883
|
+
None, # No specific exception
|
|
1884
|
+
)
|
|
1885
|
+
)
|
|
1886
|
+
|
|
1887
|
+
# Acknowledge the current message since we'll republish
|
|
1888
|
+
await self.aio_pika_message.ack()
|
|
1889
|
+
|
|
1890
|
+
except Exception as e:
|
|
1891
|
+
logger.exception(f"Failed to schedule retry_later: {e}")
|
|
1892
|
+
# Fall back to immediate retry
|
|
1893
|
+
await self.aio_pika_message.reject(requeue=True)
|