jararaca 0.3.12a13__py3-none-any.whl → 0.3.12a14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jararaca might be problematic. Click here for more details.
- jararaca/messagebus/worker.py +652 -193
- jararaca/scheduler/beat_worker.py +537 -86
- {jararaca-0.3.12a13.dist-info → jararaca-0.3.12a14.dist-info}/METADATA +1 -1
- {jararaca-0.3.12a13.dist-info → jararaca-0.3.12a14.dist-info}/RECORD +8 -8
- pyproject.toml +1 -1
- {jararaca-0.3.12a13.dist-info → jararaca-0.3.12a14.dist-info}/LICENSE +0 -0
- {jararaca-0.3.12a13.dist-info → jararaca-0.3.12a14.dist-info}/WHEEL +0 -0
- {jararaca-0.3.12a13.dist-info → jararaca-0.3.12a14.dist-info}/entry_points.txt +0 -0
jararaca/messagebus/worker.py
CHANGED
|
@@ -23,7 +23,14 @@ from urllib.parse import parse_qs, urlparse
|
|
|
23
23
|
import aio_pika
|
|
24
24
|
import aio_pika.abc
|
|
25
25
|
import uvloop
|
|
26
|
-
from aio_pika.exceptions import
|
|
26
|
+
from aio_pika.exceptions import (
|
|
27
|
+
AMQPChannelError,
|
|
28
|
+
AMQPConnectionError,
|
|
29
|
+
AMQPError,
|
|
30
|
+
ChannelClosed,
|
|
31
|
+
ChannelNotFoundEntity,
|
|
32
|
+
ConnectionClosed,
|
|
33
|
+
)
|
|
27
34
|
from pydantic import BaseModel
|
|
28
35
|
|
|
29
36
|
from jararaca.broker_backend import MessageBrokerBackend
|
|
@@ -80,6 +87,18 @@ class AioPikaWorkerConfig:
|
|
|
80
87
|
backoff_factor=2.0,
|
|
81
88
|
)
|
|
82
89
|
)
|
|
90
|
+
# Connection health monitoring settings
|
|
91
|
+
connection_heartbeat_interval: float = 30.0 # seconds
|
|
92
|
+
connection_health_check_interval: float = 10.0 # seconds
|
|
93
|
+
reconnection_backoff_config: RetryConfig = field(
|
|
94
|
+
default_factory=lambda: RetryConfig(
|
|
95
|
+
max_retries=-1, # Infinite retries for reconnection
|
|
96
|
+
initial_delay=2.0,
|
|
97
|
+
max_delay=120.0,
|
|
98
|
+
backoff_factor=2.0,
|
|
99
|
+
jitter=True,
|
|
100
|
+
)
|
|
101
|
+
)
|
|
83
102
|
|
|
84
103
|
|
|
85
104
|
class AioPikaMessage(MessageOf[Message]):
|
|
@@ -165,6 +184,15 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
165
184
|
self.connection: aio_pika.abc.AbstractConnection | None = None
|
|
166
185
|
self.channels: dict[str, aio_pika.abc.AbstractChannel] = {}
|
|
167
186
|
|
|
187
|
+
# Connection resilience attributes
|
|
188
|
+
self.connection_healthy = False
|
|
189
|
+
self.connection_lock = asyncio.Lock()
|
|
190
|
+
self.reconnection_event = asyncio.Event()
|
|
191
|
+
self.reconnection_in_progress = False
|
|
192
|
+
self.consumer_tags: dict[str, str] = {} # Track consumer tags for cleanup
|
|
193
|
+
self.health_check_task: asyncio.Task[Any] | None = None
|
|
194
|
+
self.reconnection_task: asyncio.Task[Any] | None = None
|
|
195
|
+
|
|
168
196
|
async def _verify_infrastructure(self) -> bool:
|
|
169
197
|
"""
|
|
170
198
|
Verify that the required RabbitMQ infrastructure (exchanges, queues) exists.
|
|
@@ -200,14 +228,18 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
200
228
|
routing_key = f"{handler.message_type.MESSAGE_TOPIC}.#"
|
|
201
229
|
|
|
202
230
|
async def setup_consumer() -> None:
|
|
231
|
+
# Wait for connection to be healthy if reconnection is in progress
|
|
232
|
+
if self.reconnection_in_progress:
|
|
233
|
+
await self.reconnection_event.wait()
|
|
234
|
+
|
|
203
235
|
# Create a channel using the context manager
|
|
204
236
|
async with self.create_channel(queue_name) as channel:
|
|
205
237
|
queue = await RabbitmqUtils.get_queue(
|
|
206
238
|
channel=channel, queue_name=queue_name
|
|
207
239
|
)
|
|
208
240
|
|
|
209
|
-
# Configure consumer
|
|
210
|
-
await queue.consume(
|
|
241
|
+
# Configure consumer and get the consumer tag
|
|
242
|
+
consumer_tag = await queue.consume(
|
|
211
243
|
callback=MessageHandlerCallback(
|
|
212
244
|
consumer=self,
|
|
213
245
|
queue_name=queue_name,
|
|
@@ -217,6 +249,9 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
217
249
|
no_ack=handler.spec.auto_ack,
|
|
218
250
|
)
|
|
219
251
|
|
|
252
|
+
# Store consumer tag for cleanup
|
|
253
|
+
self.consumer_tags[queue_name] = consumer_tag
|
|
254
|
+
|
|
220
255
|
logger.info(
|
|
221
256
|
f"Consuming message handler {queue_name} on dedicated channel"
|
|
222
257
|
)
|
|
@@ -226,7 +261,14 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
226
261
|
await retry_with_backoff(
|
|
227
262
|
setup_consumer,
|
|
228
263
|
retry_config=self.config.consumer_retry_config,
|
|
229
|
-
retry_exceptions=(
|
|
264
|
+
retry_exceptions=(
|
|
265
|
+
ChannelNotFoundEntity,
|
|
266
|
+
ChannelClosed,
|
|
267
|
+
AMQPError,
|
|
268
|
+
AMQPConnectionError,
|
|
269
|
+
AMQPChannelError,
|
|
270
|
+
ConnectionClosed,
|
|
271
|
+
),
|
|
230
272
|
)
|
|
231
273
|
return True
|
|
232
274
|
except Exception as e:
|
|
@@ -246,14 +288,18 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
246
288
|
routing_key = queue_name
|
|
247
289
|
|
|
248
290
|
async def setup_consumer() -> None:
|
|
291
|
+
# Wait for connection to be healthy if reconnection is in progress
|
|
292
|
+
if self.reconnection_in_progress:
|
|
293
|
+
await self.reconnection_event.wait()
|
|
294
|
+
|
|
249
295
|
# Create a channel using the context manager
|
|
250
296
|
async with self.create_channel(queue_name) as channel:
|
|
251
297
|
queue = await RabbitmqUtils.get_queue(
|
|
252
298
|
channel=channel, queue_name=queue_name
|
|
253
299
|
)
|
|
254
300
|
|
|
255
|
-
# Configure consumer
|
|
256
|
-
await queue.consume(
|
|
301
|
+
# Configure consumer and get the consumer tag
|
|
302
|
+
consumer_tag = await queue.consume(
|
|
257
303
|
callback=ScheduledMessageHandlerCallback(
|
|
258
304
|
consumer=self,
|
|
259
305
|
queue_name=queue_name,
|
|
@@ -263,6 +309,9 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
263
309
|
no_ack=True,
|
|
264
310
|
)
|
|
265
311
|
|
|
312
|
+
# Store consumer tag for cleanup
|
|
313
|
+
self.consumer_tags[queue_name] = consumer_tag
|
|
314
|
+
|
|
266
315
|
logger.info(f"Consuming scheduler {queue_name} on dedicated channel")
|
|
267
316
|
|
|
268
317
|
try:
|
|
@@ -270,7 +319,14 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
270
319
|
await retry_with_backoff(
|
|
271
320
|
setup_consumer,
|
|
272
321
|
retry_config=self.config.consumer_retry_config,
|
|
273
|
-
retry_exceptions=(
|
|
322
|
+
retry_exceptions=(
|
|
323
|
+
ChannelNotFoundEntity,
|
|
324
|
+
ChannelClosed,
|
|
325
|
+
AMQPError,
|
|
326
|
+
AMQPConnectionError,
|
|
327
|
+
AMQPChannelError,
|
|
328
|
+
ConnectionClosed,
|
|
329
|
+
),
|
|
274
330
|
)
|
|
275
331
|
return True
|
|
276
332
|
except Exception as e:
|
|
@@ -283,98 +339,107 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
283
339
|
"""
|
|
284
340
|
Main consume method that sets up all message handlers and scheduled actions with retry mechanisms.
|
|
285
341
|
"""
|
|
286
|
-
#
|
|
287
|
-
|
|
288
|
-
self.
|
|
289
|
-
retry_config=self.config.connection_retry_config,
|
|
290
|
-
retry_exceptions=(Exception,),
|
|
291
|
-
)
|
|
342
|
+
# Establish initial connection
|
|
343
|
+
async with self.connect() as connection:
|
|
344
|
+
self.connection_healthy = True
|
|
292
345
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
346
|
+
# Start connection health monitoring
|
|
347
|
+
self.health_check_task = asyncio.create_task(
|
|
348
|
+
self._monitor_connection_health()
|
|
349
|
+
)
|
|
297
350
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
351
|
+
# Verify infrastructure with retry
|
|
352
|
+
infra_check_success = await retry_with_backoff(
|
|
353
|
+
self._verify_infrastructure,
|
|
354
|
+
retry_config=self.config.connection_retry_config,
|
|
355
|
+
retry_exceptions=(Exception,),
|
|
356
|
+
)
|
|
302
357
|
|
|
303
|
-
|
|
358
|
+
if not infra_check_success:
|
|
359
|
+
logger.critical(
|
|
360
|
+
"Failed to verify RabbitMQ infrastructure. Shutting down."
|
|
361
|
+
)
|
|
362
|
+
self.shutdown_event.set()
|
|
363
|
+
return
|
|
304
364
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
tasks.
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
365
|
+
async def wait_for(
|
|
366
|
+
type: str, name: str, coroutine: Awaitable[bool]
|
|
367
|
+
) -> tuple[str, str, bool]:
|
|
368
|
+
return type, name, await coroutine
|
|
369
|
+
|
|
370
|
+
tasks: set[asyncio.Task[tuple[str, str, bool]]] = set()
|
|
371
|
+
|
|
372
|
+
# Setup message handlers
|
|
373
|
+
for handler in self.message_handler_set:
|
|
374
|
+
queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
|
|
375
|
+
self.incoming_map[queue_name] = handler
|
|
376
|
+
|
|
377
|
+
tasks.add(
|
|
378
|
+
task := asyncio.create_task(
|
|
379
|
+
wait_for(
|
|
380
|
+
"message_handler",
|
|
381
|
+
queue_name,
|
|
382
|
+
self._setup_message_handler_consumer(handler),
|
|
383
|
+
)
|
|
316
384
|
)
|
|
317
385
|
)
|
|
318
|
-
)
|
|
319
|
-
# task.add_done_callback(tasks.discard)
|
|
320
|
-
# success = await self._setup_message_handler_consumer(handler)
|
|
321
|
-
# if not success:
|
|
322
|
-
# logger.warning(
|
|
323
|
-
# f"Failed to set up consumer for {queue_name}, will not process messages from this queue"
|
|
324
|
-
# )
|
|
325
|
-
|
|
326
|
-
# Setup scheduled actions
|
|
327
|
-
for scheduled_action in self.scheduled_actions:
|
|
328
386
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
387
|
+
# Setup scheduled actions
|
|
388
|
+
for scheduled_action in self.scheduled_actions:
|
|
389
|
+
queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
|
|
390
|
+
tasks.add(
|
|
391
|
+
task := asyncio.create_task(
|
|
392
|
+
wait_for(
|
|
393
|
+
"scheduled_action",
|
|
394
|
+
queue_name,
|
|
395
|
+
self._setup_scheduled_action_consumer(scheduled_action),
|
|
396
|
+
)
|
|
336
397
|
)
|
|
337
398
|
)
|
|
338
|
-
)
|
|
339
|
-
# task.add_done_callback(tasks.discard)
|
|
340
|
-
|
|
341
|
-
# success = await self._setup_scheduled_action_consumer(scheduled_action)
|
|
342
|
-
# if not success:
|
|
343
|
-
# queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
|
|
344
|
-
# logger.warning(
|
|
345
|
-
# f"Failed to set up consumer for scheduled action {queue_name}, will not process scheduled tasks from this queue"
|
|
346
|
-
# )
|
|
347
|
-
|
|
348
|
-
async def handle_task_results() -> None:
|
|
349
|
-
for task in asyncio.as_completed(tasks):
|
|
350
|
-
type, name, success = await task
|
|
351
|
-
if success:
|
|
352
|
-
logger.info(f"Successfully set up {type} consumer for {name}")
|
|
353
|
-
else:
|
|
354
|
-
logger.warning(
|
|
355
|
-
f"Failed to set up {type} consumer for {name}, will not process messages from this queue"
|
|
356
|
-
)
|
|
357
399
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
400
|
+
async def handle_task_results() -> None:
|
|
401
|
+
for task in asyncio.as_completed(tasks):
|
|
402
|
+
type, name, success = await task
|
|
403
|
+
if success:
|
|
404
|
+
logger.info(f"Successfully set up {type} consumer for {name}")
|
|
405
|
+
else:
|
|
406
|
+
logger.warning(
|
|
407
|
+
f"Failed to set up {type} consumer for {name}, will not process messages from this queue"
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
handle_task_results_task = asyncio.create_task(handle_task_results())
|
|
411
|
+
|
|
412
|
+
# Wait for shutdown signal
|
|
413
|
+
await self.shutdown_event.wait()
|
|
414
|
+
logger.info("Shutdown event received, stopping consumers")
|
|
415
|
+
|
|
416
|
+
# Cancel health monitoring
|
|
417
|
+
if self.health_check_task:
|
|
418
|
+
self.health_check_task.cancel()
|
|
369
419
|
with suppress(asyncio.CancelledError):
|
|
370
|
-
await
|
|
371
|
-
logger.info("Worker shutting down")
|
|
420
|
+
await self.health_check_task
|
|
372
421
|
|
|
373
|
-
|
|
374
|
-
|
|
422
|
+
# Cancel reconnection task if running
|
|
423
|
+
if self.reconnection_task:
|
|
424
|
+
self.reconnection_task.cancel()
|
|
425
|
+
with suppress(asyncio.CancelledError):
|
|
426
|
+
await self.reconnection_task
|
|
375
427
|
|
|
376
|
-
|
|
377
|
-
|
|
428
|
+
handle_task_results_task.cancel()
|
|
429
|
+
with suppress(asyncio.CancelledError):
|
|
430
|
+
await handle_task_results_task
|
|
431
|
+
for task in tasks:
|
|
432
|
+
if not task.done():
|
|
433
|
+
task.cancel()
|
|
434
|
+
with suppress(asyncio.CancelledError):
|
|
435
|
+
await task
|
|
436
|
+
logger.info("Worker shutting down")
|
|
437
|
+
|
|
438
|
+
# Wait for all tasks to complete
|
|
439
|
+
await self.wait_all_tasks_done()
|
|
440
|
+
|
|
441
|
+
# Close all channels and the connection
|
|
442
|
+
await self.close_channels_and_connection()
|
|
378
443
|
|
|
379
444
|
async def wait_all_tasks_done(self) -> None:
|
|
380
445
|
if not self.tasks:
|
|
@@ -393,41 +458,8 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
393
458
|
|
|
394
459
|
async def close_channels_and_connection(self) -> None:
|
|
395
460
|
"""Close all channels and then the connection"""
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
for queue_name, channel in self.channels.items():
|
|
399
|
-
try:
|
|
400
|
-
if not channel.is_closed:
|
|
401
|
-
logger.info(f"Closing channel for queue {queue_name}")
|
|
402
|
-
channel_close_tasks.append(channel.close())
|
|
403
|
-
else:
|
|
404
|
-
logger.info(f"Channel for queue {queue_name} already closed")
|
|
405
|
-
except Exception as e:
|
|
406
|
-
logger.error(
|
|
407
|
-
f"Error preparing to close channel for queue {queue_name}: {e}"
|
|
408
|
-
)
|
|
409
|
-
|
|
410
|
-
# Wait for all channels to close (if any)
|
|
411
|
-
if channel_close_tasks:
|
|
412
|
-
try:
|
|
413
|
-
await asyncio.gather(*channel_close_tasks, return_exceptions=True)
|
|
414
|
-
except Exception as e:
|
|
415
|
-
logger.error(f"Error during channel closures: {e}")
|
|
416
|
-
|
|
417
|
-
# Clear channels dictionary
|
|
418
|
-
self.channels.clear()
|
|
419
|
-
|
|
420
|
-
# Close the connection
|
|
421
|
-
if self.connection:
|
|
422
|
-
try:
|
|
423
|
-
if not self.connection.is_closed:
|
|
424
|
-
logger.info("Closing RabbitMQ connection")
|
|
425
|
-
await self.connection.close()
|
|
426
|
-
else:
|
|
427
|
-
logger.info("RabbitMQ connection already closed")
|
|
428
|
-
except Exception as e:
|
|
429
|
-
logger.error(f"Error closing RabbitMQ connection: {e}")
|
|
430
|
-
self.connection = None
|
|
461
|
+
logger.info("Closing channels and connection...")
|
|
462
|
+
await self._cleanup_connection()
|
|
431
463
|
|
|
432
464
|
def shutdown(self) -> None:
|
|
433
465
|
"""Signal for shutdown"""
|
|
@@ -436,7 +468,21 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
436
468
|
|
|
437
469
|
async def close(self) -> None:
|
|
438
470
|
"""Implement MessageBusConsumer.close for cleanup"""
|
|
471
|
+
logger.info("Closing consumer...")
|
|
439
472
|
self.shutdown()
|
|
473
|
+
|
|
474
|
+
# Cancel health monitoring
|
|
475
|
+
if self.health_check_task:
|
|
476
|
+
self.health_check_task.cancel()
|
|
477
|
+
with suppress(asyncio.CancelledError):
|
|
478
|
+
await self.health_check_task
|
|
479
|
+
|
|
480
|
+
# Cancel reconnection task if running
|
|
481
|
+
if self.reconnection_task:
|
|
482
|
+
self.reconnection_task.cancel()
|
|
483
|
+
with suppress(asyncio.CancelledError):
|
|
484
|
+
await self.reconnection_task
|
|
485
|
+
|
|
440
486
|
await self.wait_all_tasks_done()
|
|
441
487
|
await self.close_channels_and_connection()
|
|
442
488
|
|
|
@@ -445,6 +491,16 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
445
491
|
Get the channel for a specific queue, or None if not found.
|
|
446
492
|
This helps with error handling when a channel might have been closed.
|
|
447
493
|
"""
|
|
494
|
+
# If reconnection is in progress, wait for it to complete
|
|
495
|
+
if self.reconnection_in_progress:
|
|
496
|
+
try:
|
|
497
|
+
await asyncio.wait_for(self.reconnection_event.wait(), timeout=30.0)
|
|
498
|
+
except asyncio.TimeoutError:
|
|
499
|
+
logger.warning(
|
|
500
|
+
f"Timeout waiting for reconnection when getting channel for {queue_name}"
|
|
501
|
+
)
|
|
502
|
+
return None
|
|
503
|
+
|
|
448
504
|
if queue_name not in self.channels:
|
|
449
505
|
logger.warning(f"No channel found for queue {queue_name}")
|
|
450
506
|
return None
|
|
@@ -453,18 +509,38 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
453
509
|
channel = self.channels[queue_name]
|
|
454
510
|
if channel.is_closed:
|
|
455
511
|
logger.warning(f"Channel for queue {queue_name} is closed")
|
|
456
|
-
#
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
512
|
+
# Remove the closed channel
|
|
513
|
+
del self.channels[queue_name]
|
|
514
|
+
|
|
515
|
+
# Attempt to recreate the channel if connection is healthy
|
|
516
|
+
if (
|
|
517
|
+
self.connection
|
|
518
|
+
and not self.connection.is_closed
|
|
519
|
+
and self.connection_healthy
|
|
520
|
+
):
|
|
521
|
+
try:
|
|
522
|
+
logger.info(f"Creating new channel for {queue_name}")
|
|
523
|
+
self.channels[queue_name] = await self.connection.channel()
|
|
524
|
+
await self.channels[queue_name].set_qos(
|
|
525
|
+
prefetch_count=self.config.prefetch_count
|
|
526
|
+
)
|
|
527
|
+
return self.channels[queue_name]
|
|
528
|
+
except Exception as e:
|
|
529
|
+
logger.error(
|
|
530
|
+
f"Failed to recreate channel for {queue_name}: {e}"
|
|
531
|
+
)
|
|
532
|
+
# Trigger reconnection if channel creation fails
|
|
533
|
+
self._trigger_reconnection()
|
|
534
|
+
return None
|
|
535
|
+
else:
|
|
536
|
+
# Connection is not healthy, trigger reconnection
|
|
537
|
+
self._trigger_reconnection()
|
|
538
|
+
return None
|
|
465
539
|
return channel
|
|
466
540
|
except Exception as e:
|
|
467
541
|
logger.error(f"Error accessing channel for queue {queue_name}: {e}")
|
|
542
|
+
# Trigger reconnection on any channel access error
|
|
543
|
+
self._trigger_reconnection()
|
|
468
544
|
return None
|
|
469
545
|
|
|
470
546
|
async def _establish_channel(self, queue_name: str) -> aio_pika.abc.AbstractChannel:
|
|
@@ -497,8 +573,8 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
497
573
|
fn=lambda: self._establish_channel(queue_name),
|
|
498
574
|
retry_config=self.config.consumer_retry_config,
|
|
499
575
|
retry_exceptions=(
|
|
500
|
-
|
|
501
|
-
|
|
576
|
+
AMQPConnectionError,
|
|
577
|
+
AMQPChannelError,
|
|
502
578
|
ConnectionError,
|
|
503
579
|
),
|
|
504
580
|
)
|
|
@@ -525,7 +601,10 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
525
601
|
"""
|
|
526
602
|
try:
|
|
527
603
|
logger.info("Establishing connection to RabbitMQ")
|
|
528
|
-
connection = await aio_pika.connect(
|
|
604
|
+
connection = await aio_pika.connect(
|
|
605
|
+
self.config.url,
|
|
606
|
+
heartbeat=self.config.connection_heartbeat_interval,
|
|
607
|
+
)
|
|
529
608
|
logger.info("Connected to RabbitMQ successfully")
|
|
530
609
|
return connection
|
|
531
610
|
except Exception as e:
|
|
@@ -552,7 +631,7 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
552
631
|
self._establish_connection,
|
|
553
632
|
retry_config=self.config.connection_retry_config,
|
|
554
633
|
retry_exceptions=(
|
|
555
|
-
|
|
634
|
+
AMQPConnectionError,
|
|
556
635
|
ConnectionError,
|
|
557
636
|
OSError,
|
|
558
637
|
TimeoutError,
|
|
@@ -586,22 +665,254 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
|
|
|
586
665
|
Get a channel for a specific queue as a context manager.
|
|
587
666
|
This is safer than using get_channel directly as it ensures proper error handling.
|
|
588
667
|
"""
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
668
|
+
max_retries = 3
|
|
669
|
+
retry_delay = 1.0
|
|
670
|
+
|
|
671
|
+
for attempt in range(max_retries):
|
|
672
|
+
try:
|
|
673
|
+
channel = await self.get_channel(queue_name)
|
|
674
|
+
if channel is not None:
|
|
675
|
+
try:
|
|
676
|
+
yield channel
|
|
677
|
+
return
|
|
678
|
+
finally:
|
|
679
|
+
# We don't close the channel here as it's managed by the consumer
|
|
680
|
+
pass
|
|
681
|
+
|
|
682
|
+
# No channel available, check connection state
|
|
683
|
+
if (
|
|
684
|
+
self.connection
|
|
685
|
+
and not self.connection.is_closed
|
|
686
|
+
and self.connection_healthy
|
|
687
|
+
):
|
|
688
|
+
# Try to create a new channel
|
|
689
|
+
async with self.create_channel(queue_name) as new_channel:
|
|
690
|
+
yield new_channel
|
|
691
|
+
return
|
|
692
|
+
else:
|
|
693
|
+
# Connection is not healthy, wait for reconnection
|
|
694
|
+
if self.reconnection_in_progress:
|
|
695
|
+
try:
|
|
696
|
+
await asyncio.wait_for(
|
|
697
|
+
self.reconnection_event.wait(), timeout=30.0
|
|
698
|
+
)
|
|
699
|
+
# Retry after reconnection
|
|
700
|
+
continue
|
|
701
|
+
except asyncio.TimeoutError:
|
|
702
|
+
logger.warning(
|
|
703
|
+
f"Timeout waiting for reconnection for queue {queue_name}"
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
# Still no connection, trigger reconnection
|
|
707
|
+
if not self.reconnection_in_progress:
|
|
708
|
+
self._trigger_reconnection()
|
|
709
|
+
|
|
710
|
+
if attempt < max_retries - 1:
|
|
711
|
+
logger.info(
|
|
712
|
+
f"Retrying channel access for {queue_name} in {retry_delay}s"
|
|
713
|
+
)
|
|
714
|
+
await asyncio.sleep(retry_delay)
|
|
715
|
+
retry_delay *= 2
|
|
716
|
+
else:
|
|
717
|
+
raise RuntimeError(
|
|
718
|
+
f"Cannot get channel for queue {queue_name}: no connection available after {max_retries} attempts"
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
except Exception as e:
|
|
722
|
+
if attempt < max_retries - 1:
|
|
723
|
+
logger.warning(
|
|
724
|
+
f"Error getting channel for {queue_name}, retrying: {e}"
|
|
725
|
+
)
|
|
726
|
+
await asyncio.sleep(retry_delay)
|
|
727
|
+
retry_delay *= 2
|
|
728
|
+
else:
|
|
729
|
+
logger.error(
|
|
730
|
+
f"Failed to get channel for {queue_name} after {max_retries} attempts: {e}"
|
|
731
|
+
)
|
|
732
|
+
raise
|
|
733
|
+
|
|
734
|
+
async def _monitor_connection_health(self) -> None:
|
|
735
|
+
"""
|
|
736
|
+
Monitor connection health and trigger reconnection if needed.
|
|
737
|
+
This runs as a background task.
|
|
738
|
+
"""
|
|
739
|
+
while not self.shutdown_event.is_set():
|
|
740
|
+
try:
|
|
741
|
+
await asyncio.sleep(self.config.connection_health_check_interval)
|
|
742
|
+
|
|
743
|
+
if self.shutdown_event.is_set():
|
|
744
|
+
break
|
|
745
|
+
|
|
746
|
+
# Check connection health
|
|
747
|
+
if not await self._is_connection_healthy():
|
|
748
|
+
logger.warning(
|
|
749
|
+
"Connection health check failed, triggering reconnection"
|
|
750
|
+
)
|
|
751
|
+
if not self.reconnection_in_progress:
|
|
752
|
+
self._trigger_reconnection()
|
|
753
|
+
|
|
754
|
+
except asyncio.CancelledError:
|
|
755
|
+
logger.info("Connection health monitoring cancelled")
|
|
756
|
+
break
|
|
757
|
+
except Exception as e:
|
|
758
|
+
logger.error(f"Error in connection health monitoring: {e}")
|
|
759
|
+
await asyncio.sleep(5) # Wait before retrying
|
|
760
|
+
|
|
761
|
+
async def _is_connection_healthy(self) -> bool:
|
|
762
|
+
"""
|
|
763
|
+
Check if the connection is healthy.
|
|
764
|
+
"""
|
|
765
|
+
try:
|
|
766
|
+
if self.connection is None or self.connection.is_closed:
|
|
767
|
+
return False
|
|
768
|
+
|
|
769
|
+
# Try to create a temporary channel to test connection
|
|
770
|
+
async with self.connection.channel() as test_channel:
|
|
771
|
+
# If we can create a channel, connection is healthy
|
|
772
|
+
return True
|
|
773
|
+
|
|
774
|
+
except Exception as e:
|
|
775
|
+
logger.debug(f"Connection health check failed: {e}")
|
|
776
|
+
return False
|
|
777
|
+
|
|
778
|
+
def _trigger_reconnection(self) -> None:
|
|
779
|
+
"""
|
|
780
|
+
Trigger reconnection process.
|
|
781
|
+
"""
|
|
782
|
+
if not self.reconnection_in_progress and not self.shutdown_event.is_set():
|
|
783
|
+
self.reconnection_in_progress = True
|
|
784
|
+
self.connection_healthy = False
|
|
785
|
+
self.reconnection_event.clear()
|
|
786
|
+
|
|
787
|
+
# Start reconnection task
|
|
788
|
+
self.reconnection_task = asyncio.create_task(self._handle_reconnection())
|
|
789
|
+
self.reconnection_task.add_done_callback(self._on_reconnection_done)
|
|
790
|
+
|
|
791
|
+
def _on_reconnection_done(self, task: asyncio.Task[Any]) -> None:
|
|
792
|
+
"""
|
|
793
|
+
Handle completion of reconnection task.
|
|
794
|
+
"""
|
|
795
|
+
self.reconnection_in_progress = False
|
|
796
|
+
if task.exception():
|
|
797
|
+
logger.error(f"Reconnection task failed: {task.exception()}")
|
|
599
798
|
else:
|
|
799
|
+
logger.info("Reconnection completed successfully")
|
|
800
|
+
|
|
801
|
+
async def _handle_reconnection(self) -> None:
|
|
802
|
+
"""
|
|
803
|
+
Handle the reconnection process with exponential backoff.
|
|
804
|
+
"""
|
|
805
|
+
logger.info("Starting reconnection process")
|
|
806
|
+
|
|
807
|
+
# Close existing connection and channels
|
|
808
|
+
await self._cleanup_connection()
|
|
809
|
+
|
|
810
|
+
reconnection_config = self.config.reconnection_backoff_config
|
|
811
|
+
attempt = 0
|
|
812
|
+
|
|
813
|
+
while not self.shutdown_event.is_set():
|
|
600
814
|
try:
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
815
|
+
attempt += 1
|
|
816
|
+
logger.info(f"Reconnection attempt {attempt}")
|
|
817
|
+
|
|
818
|
+
# Establish new connection
|
|
819
|
+
self.connection = await self._establish_connection()
|
|
820
|
+
self.connection_healthy = True
|
|
821
|
+
|
|
822
|
+
# Re-establish all consumers
|
|
823
|
+
await self._reestablish_consumers()
|
|
824
|
+
|
|
825
|
+
logger.info("Reconnection successful")
|
|
826
|
+
self.reconnection_event.set()
|
|
827
|
+
return
|
|
828
|
+
|
|
829
|
+
except Exception as e:
|
|
830
|
+
logger.error(f"Reconnection attempt {attempt} failed: {e}")
|
|
831
|
+
|
|
832
|
+
if self.shutdown_event.is_set():
|
|
833
|
+
break
|
|
834
|
+
|
|
835
|
+
# Calculate backoff delay
|
|
836
|
+
delay = reconnection_config.initial_delay * (
|
|
837
|
+
reconnection_config.backoff_factor ** (attempt - 1)
|
|
838
|
+
)
|
|
839
|
+
if reconnection_config.jitter:
|
|
840
|
+
jitter_amount = delay * 0.25
|
|
841
|
+
delay = delay + random.uniform(-jitter_amount, jitter_amount)
|
|
842
|
+
delay = max(delay, 0.1)
|
|
843
|
+
|
|
844
|
+
delay = min(delay, reconnection_config.max_delay)
|
|
845
|
+
|
|
846
|
+
logger.info(f"Retrying reconnection in {delay:.2f} seconds")
|
|
847
|
+
await asyncio.sleep(delay)
|
|
848
|
+
|
|
849
|
+
async def _cleanup_connection(self) -> None:
|
|
850
|
+
"""
|
|
851
|
+
Clean up existing connection and channels.
|
|
852
|
+
"""
|
|
853
|
+
# Cancel existing consumers
|
|
854
|
+
for queue_name, channel in self.channels.items():
|
|
855
|
+
try:
|
|
856
|
+
if not channel.is_closed:
|
|
857
|
+
# Cancel consumer if we have its tag
|
|
858
|
+
if queue_name in self.consumer_tags:
|
|
859
|
+
try:
|
|
860
|
+
queue = await channel.get_queue(queue_name, ensure=False)
|
|
861
|
+
if queue:
|
|
862
|
+
await queue.cancel(self.consumer_tags[queue_name])
|
|
863
|
+
except Exception as cancel_error:
|
|
864
|
+
logger.warning(
|
|
865
|
+
f"Error cancelling consumer for {queue_name}: {cancel_error}"
|
|
866
|
+
)
|
|
867
|
+
del self.consumer_tags[queue_name]
|
|
868
|
+
except Exception as e:
|
|
869
|
+
logger.warning(f"Error cancelling consumer for {queue_name}: {e}")
|
|
870
|
+
|
|
871
|
+
# Close channels
|
|
872
|
+
for queue_name, channel in self.channels.items():
|
|
873
|
+
try:
|
|
874
|
+
if not channel.is_closed:
|
|
875
|
+
await channel.close()
|
|
876
|
+
except Exception as e:
|
|
877
|
+
logger.warning(f"Error closing channel for {queue_name}: {e}")
|
|
878
|
+
|
|
879
|
+
self.channels.clear()
|
|
880
|
+
|
|
881
|
+
# Close connection
|
|
882
|
+
if self.connection and not self.connection.is_closed:
|
|
883
|
+
try:
|
|
884
|
+
await self.connection.close()
|
|
885
|
+
except Exception as e:
|
|
886
|
+
logger.warning(f"Error closing connection: {e}")
|
|
887
|
+
|
|
888
|
+
self.connection = None
|
|
889
|
+
self.connection_healthy = False
|
|
890
|
+
|
|
891
|
+
async def _reestablish_consumers(self) -> None:
|
|
892
|
+
"""
|
|
893
|
+
Re-establish all consumers after reconnection.
|
|
894
|
+
"""
|
|
895
|
+
logger.info("Re-establishing consumers after reconnection")
|
|
896
|
+
|
|
897
|
+
# Re-establish message handlers
|
|
898
|
+
for handler in self.message_handler_set:
|
|
899
|
+
queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
|
|
900
|
+
try:
|
|
901
|
+
await self._setup_message_handler_consumer(handler)
|
|
902
|
+
logger.info(f"Re-established consumer for {queue_name}")
|
|
903
|
+
except Exception as e:
|
|
904
|
+
logger.error(f"Failed to re-establish consumer for {queue_name}: {e}")
|
|
905
|
+
|
|
906
|
+
# Re-establish scheduled actions
|
|
907
|
+
for scheduled_action in self.scheduled_actions:
|
|
908
|
+
queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
|
|
909
|
+
try:
|
|
910
|
+
await self._setup_scheduled_action_consumer(scheduled_action)
|
|
911
|
+
logger.info(f"Re-established scheduler consumer for {queue_name}")
|
|
912
|
+
except Exception as e:
|
|
913
|
+
logger.error(
|
|
914
|
+
f"Failed to re-establish scheduler consumer for {queue_name}: {e}"
|
|
915
|
+
)
|
|
605
916
|
|
|
606
917
|
|
|
607
918
|
def create_message_bus(
|
|
@@ -646,6 +957,19 @@ def create_message_bus(
|
|
|
646
957
|
max_retries=30, initial_delay=5, max_delay=60.0, backoff_factor=3.0
|
|
647
958
|
)
|
|
648
959
|
|
|
960
|
+
# Parse optional reconnection configuration parameters
|
|
961
|
+
reconnection_backoff_config = RetryConfig(
|
|
962
|
+
max_retries=-1, # Infinite retries for reconnection
|
|
963
|
+
initial_delay=2.0,
|
|
964
|
+
max_delay=120.0,
|
|
965
|
+
backoff_factor=2.0,
|
|
966
|
+
jitter=True,
|
|
967
|
+
)
|
|
968
|
+
|
|
969
|
+
# Parse heartbeat and health check intervals
|
|
970
|
+
connection_heartbeat_interval = 30.0
|
|
971
|
+
connection_health_check_interval = 10.0
|
|
972
|
+
|
|
649
973
|
# Connection retry config parameters
|
|
650
974
|
if (
|
|
651
975
|
"connection_retry_max" in query_params
|
|
@@ -712,12 +1036,65 @@ def create_message_bus(
|
|
|
712
1036
|
except ValueError:
|
|
713
1037
|
pass
|
|
714
1038
|
|
|
1039
|
+
# Reconnection backoff config parameters
|
|
1040
|
+
if (
|
|
1041
|
+
"reconnection_retry_max" in query_params
|
|
1042
|
+
and query_params["reconnection_retry_max"][0].isdigit()
|
|
1043
|
+
):
|
|
1044
|
+
reconnection_backoff_config.max_retries = int(
|
|
1045
|
+
query_params["reconnection_retry_max"][0]
|
|
1046
|
+
)
|
|
1047
|
+
|
|
1048
|
+
if "reconnection_retry_delay" in query_params:
|
|
1049
|
+
try:
|
|
1050
|
+
reconnection_backoff_config.initial_delay = float(
|
|
1051
|
+
query_params["reconnection_retry_delay"][0]
|
|
1052
|
+
)
|
|
1053
|
+
except ValueError:
|
|
1054
|
+
pass
|
|
1055
|
+
|
|
1056
|
+
if "reconnection_retry_max_delay" in query_params:
|
|
1057
|
+
try:
|
|
1058
|
+
reconnection_backoff_config.max_delay = float(
|
|
1059
|
+
query_params["reconnection_retry_max_delay"][0]
|
|
1060
|
+
)
|
|
1061
|
+
except ValueError:
|
|
1062
|
+
pass
|
|
1063
|
+
|
|
1064
|
+
if "reconnection_retry_backoff" in query_params:
|
|
1065
|
+
try:
|
|
1066
|
+
reconnection_backoff_config.backoff_factor = float(
|
|
1067
|
+
query_params["reconnection_retry_backoff"][0]
|
|
1068
|
+
)
|
|
1069
|
+
except ValueError:
|
|
1070
|
+
pass
|
|
1071
|
+
|
|
1072
|
+
# Heartbeat and health check intervals
|
|
1073
|
+
if "connection_heartbeat_interval" in query_params:
|
|
1074
|
+
try:
|
|
1075
|
+
connection_heartbeat_interval = float(
|
|
1076
|
+
query_params["connection_heartbeat_interval"][0]
|
|
1077
|
+
)
|
|
1078
|
+
except ValueError:
|
|
1079
|
+
pass
|
|
1080
|
+
|
|
1081
|
+
if "connection_health_check_interval" in query_params:
|
|
1082
|
+
try:
|
|
1083
|
+
connection_health_check_interval = float(
|
|
1084
|
+
query_params["connection_health_check_interval"][0]
|
|
1085
|
+
)
|
|
1086
|
+
except ValueError:
|
|
1087
|
+
pass
|
|
1088
|
+
|
|
715
1089
|
config = AioPikaWorkerConfig(
|
|
716
1090
|
url=broker_url,
|
|
717
1091
|
exchange=exchange,
|
|
718
1092
|
prefetch_count=prefetch_count,
|
|
719
1093
|
connection_retry_config=connection_retry_config,
|
|
720
1094
|
consumer_retry_config=consumer_retry_config,
|
|
1095
|
+
connection_heartbeat_interval=connection_heartbeat_interval,
|
|
1096
|
+
connection_health_check_interval=connection_health_check_interval,
|
|
1097
|
+
reconnection_backoff_config=reconnection_backoff_config,
|
|
721
1098
|
)
|
|
722
1099
|
|
|
723
1100
|
return AioPikaMicroserviceConsumer(
|
|
@@ -768,6 +1145,25 @@ class ScheduledMessageHandlerCallback:
|
|
|
768
1145
|
)
|
|
769
1146
|
return
|
|
770
1147
|
|
|
1148
|
+
# Check if connection is healthy before processing
|
|
1149
|
+
if not self.consumer.connection_healthy:
|
|
1150
|
+
logger.warning(
|
|
1151
|
+
f"Connection not healthy, requeuing scheduled message for {self.queue_name}"
|
|
1152
|
+
)
|
|
1153
|
+
try:
|
|
1154
|
+
# Wait briefly for potential reconnection
|
|
1155
|
+
await asyncio.sleep(0.1)
|
|
1156
|
+
if not self.consumer.connection_healthy:
|
|
1157
|
+
# Still not healthy, requeue the message
|
|
1158
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1159
|
+
await aio_pika_message.reject(requeue=True)
|
|
1160
|
+
return
|
|
1161
|
+
except Exception as e:
|
|
1162
|
+
logger.error(
|
|
1163
|
+
f"Failed to requeue scheduled message due to connection issues: {e}"
|
|
1164
|
+
)
|
|
1165
|
+
return
|
|
1166
|
+
|
|
771
1167
|
async with self.consumer.lock:
|
|
772
1168
|
task = asyncio.create_task(self.handle_message(aio_pika_message))
|
|
773
1169
|
self.consumer.tasks.add(task)
|
|
@@ -803,6 +1199,21 @@ class ScheduledMessageHandlerCallback:
|
|
|
803
1199
|
logger.error(f"Failed to requeue message during shutdown: {e}")
|
|
804
1200
|
return
|
|
805
1201
|
|
|
1202
|
+
# Check connection health before processing
|
|
1203
|
+
if not self.consumer.connection_healthy:
|
|
1204
|
+
logger.warning(
|
|
1205
|
+
f"Connection not healthy, requeuing scheduled message for {self.queue_name}"
|
|
1206
|
+
)
|
|
1207
|
+
try:
|
|
1208
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1209
|
+
await aio_pika_message.reject(requeue=True)
|
|
1210
|
+
return
|
|
1211
|
+
except Exception as e:
|
|
1212
|
+
logger.error(
|
|
1213
|
+
f"Failed to requeue scheduled message due to connection issues: {e}"
|
|
1214
|
+
)
|
|
1215
|
+
return
|
|
1216
|
+
|
|
806
1217
|
sig = inspect.signature(self.scheduled_action.callable)
|
|
807
1218
|
if len(sig.parameters) == 1:
|
|
808
1219
|
|
|
@@ -895,6 +1306,23 @@ class MessageHandlerCallback:
|
|
|
895
1306
|
logger.error(f"Failed to requeue message during shutdown: {e}")
|
|
896
1307
|
return
|
|
897
1308
|
|
|
1309
|
+
# Check if connection is healthy before processing
|
|
1310
|
+
if not self.consumer.connection_healthy:
|
|
1311
|
+
logger.warning(
|
|
1312
|
+
f"Connection not healthy, requeuing message for {self.queue_name}"
|
|
1313
|
+
)
|
|
1314
|
+
try:
|
|
1315
|
+
# Wait briefly for potential reconnection
|
|
1316
|
+
await asyncio.sleep(0.1)
|
|
1317
|
+
if not self.consumer.connection_healthy:
|
|
1318
|
+
# Still not healthy, requeue the message
|
|
1319
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1320
|
+
await aio_pika_message.reject(requeue=True)
|
|
1321
|
+
return
|
|
1322
|
+
except Exception as e:
|
|
1323
|
+
logger.error(f"Failed to requeue message due to connection issues: {e}")
|
|
1324
|
+
return
|
|
1325
|
+
|
|
898
1326
|
async with self.consumer.lock:
|
|
899
1327
|
task = asyncio.create_task(self.handle_message(aio_pika_message))
|
|
900
1328
|
self.consumer.tasks.add(task)
|
|
@@ -959,8 +1387,11 @@ class MessageHandlerCallback:
|
|
|
959
1387
|
f"dead-lettering: {str(exception)}"
|
|
960
1388
|
)
|
|
961
1389
|
# Dead-letter the message after max retries
|
|
962
|
-
|
|
963
|
-
|
|
1390
|
+
try:
|
|
1391
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1392
|
+
await aio_pika_message.reject(requeue=False)
|
|
1393
|
+
except Exception as e:
|
|
1394
|
+
logger.error(f"Failed to dead-letter message {message_id}: {e}")
|
|
964
1395
|
return
|
|
965
1396
|
|
|
966
1397
|
# Calculate delay for this retry attempt
|
|
@@ -996,29 +1427,33 @@ class MessageHandlerCallback:
|
|
|
996
1427
|
)
|
|
997
1428
|
|
|
998
1429
|
# Acknowledge the current message since we'll handle retry ourselves
|
|
999
|
-
|
|
1000
|
-
|
|
1430
|
+
try:
|
|
1431
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1432
|
+
await aio_pika_message.ack()
|
|
1433
|
+
except Exception as e:
|
|
1434
|
+
logger.error(
|
|
1435
|
+
f"Failed to acknowledge message {message_id} for retry: {e}"
|
|
1436
|
+
)
|
|
1001
1437
|
return
|
|
1002
1438
|
|
|
1003
1439
|
# Standard reject without retry or with immediate requeue
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1440
|
+
try:
|
|
1441
|
+
async with self.consumer.get_channel_ctx(self.queue_name):
|
|
1442
|
+
await aio_pika_message.reject(requeue=requeue)
|
|
1443
|
+
if requeue:
|
|
1444
|
+
logger.info(
|
|
1445
|
+
f"Message {message_id} ({self.queue_name}) requeued for immediate retry"
|
|
1446
|
+
)
|
|
1447
|
+
else:
|
|
1448
|
+
logger.info(
|
|
1449
|
+
f"Message {message_id} ({self.queue_name}) rejected without requeue"
|
|
1450
|
+
)
|
|
1451
|
+
except Exception as e:
|
|
1452
|
+
logger.error(f"Failed to reject message {message_id}: {e}")
|
|
1014
1453
|
|
|
1015
|
-
except RuntimeError as e:
|
|
1016
|
-
logger.error(
|
|
1017
|
-
f"Error rejecting message {message_id} ({self.queue_name}): {e}"
|
|
1018
|
-
)
|
|
1019
1454
|
except Exception as e:
|
|
1020
1455
|
logger.exception(
|
|
1021
|
-
f"Unexpected error
|
|
1456
|
+
f"Unexpected error in handle_reject_message for {message_id} ({self.queue_name}): {e}"
|
|
1022
1457
|
)
|
|
1023
1458
|
|
|
1024
1459
|
async def _delayed_retry(
|
|
@@ -1033,7 +1468,7 @@ class MessageHandlerCallback:
|
|
|
1033
1468
|
|
|
1034
1469
|
Args:
|
|
1035
1470
|
aio_pika_message: The original message
|
|
1036
|
-
delay: Delay in seconds before
|
|
1471
|
+
delay: Delay in seconds before retrying
|
|
1037
1472
|
retry_count: The current retry count (after increment)
|
|
1038
1473
|
exception: The exception that caused the failure
|
|
1039
1474
|
"""
|
|
@@ -1058,28 +1493,46 @@ class MessageHandlerCallback:
|
|
|
1058
1493
|
if message_id in self.retry_state:
|
|
1059
1494
|
del self.retry_state[message_id]
|
|
1060
1495
|
|
|
1061
|
-
# Republish the message to the same queue
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1496
|
+
# Republish the message to the same queue with retry logic
|
|
1497
|
+
max_attempts = 3
|
|
1498
|
+
for attempt in range(max_attempts):
|
|
1499
|
+
try:
|
|
1500
|
+
async with self.consumer.get_channel_ctx(
|
|
1501
|
+
self.queue_name
|
|
1502
|
+
) as channel:
|
|
1503
|
+
exchange = await RabbitmqUtils.get_main_exchange(
|
|
1504
|
+
channel=channel,
|
|
1505
|
+
exchange_name=self.consumer.config.exchange,
|
|
1506
|
+
)
|
|
1067
1507
|
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1508
|
+
await exchange.publish(
|
|
1509
|
+
aio_pika.Message(
|
|
1510
|
+
body=message_body,
|
|
1511
|
+
headers=headers,
|
|
1512
|
+
message_id=message_id,
|
|
1513
|
+
content_type=aio_pika_message.content_type,
|
|
1514
|
+
content_encoding=aio_pika_message.content_encoding,
|
|
1515
|
+
delivery_mode=aio_pika_message.delivery_mode,
|
|
1516
|
+
),
|
|
1517
|
+
routing_key=self.routing_key,
|
|
1518
|
+
)
|
|
1079
1519
|
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1520
|
+
logger.info(
|
|
1521
|
+
f"Message {message_id} ({self.queue_name}) republished for retry {retry_count}"
|
|
1522
|
+
)
|
|
1523
|
+
return
|
|
1524
|
+
|
|
1525
|
+
except Exception as e:
|
|
1526
|
+
if attempt < max_attempts - 1:
|
|
1527
|
+
logger.warning(
|
|
1528
|
+
f"Failed to republish message {message_id} (attempt {attempt + 1}): {e}"
|
|
1529
|
+
)
|
|
1530
|
+
await asyncio.sleep(1.0 * (attempt + 1)) # Exponential backoff
|
|
1531
|
+
else:
|
|
1532
|
+
logger.error(
|
|
1533
|
+
f"Failed to republish message {message_id} after {max_attempts} attempts: {e}"
|
|
1534
|
+
)
|
|
1535
|
+
raise
|
|
1083
1536
|
|
|
1084
1537
|
except Exception as e:
|
|
1085
1538
|
logger.exception(
|
|
@@ -1171,11 +1624,17 @@ class MessageHandlerCallback:
|
|
|
1171
1624
|
await handler(builded_message)
|
|
1172
1625
|
if not incoming_message_spec.auto_ack:
|
|
1173
1626
|
with suppress(aio_pika.MessageProcessError):
|
|
1174
|
-
# Use channel context for acknowledgement
|
|
1175
|
-
|
|
1176
|
-
self.
|
|
1177
|
-
|
|
1178
|
-
|
|
1627
|
+
# Use channel context for acknowledgement with retry
|
|
1628
|
+
try:
|
|
1629
|
+
async with self.consumer.get_channel_ctx(
|
|
1630
|
+
self.queue_name
|
|
1631
|
+
):
|
|
1632
|
+
await aio_pika_message.ack()
|
|
1633
|
+
except Exception as ack_error:
|
|
1634
|
+
logger.warning(
|
|
1635
|
+
f"Failed to acknowledge message {aio_pika_message.message_id or 'unknown'}: {ack_error}"
|
|
1636
|
+
)
|
|
1637
|
+
# Message will be redelivered if ack fails, which is acceptable
|
|
1179
1638
|
except BaseException as base_exc:
|
|
1180
1639
|
# Get message id for logging
|
|
1181
1640
|
message_id = aio_pika_message.message_id or str(uuid.uuid4())
|