jararaca 0.3.11a16__py3-none-any.whl → 0.4.0a19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. README.md +121 -0
  2. jararaca/__init__.py +189 -17
  3. jararaca/__main__.py +4 -0
  4. jararaca/broker_backend/__init__.py +4 -0
  5. jararaca/broker_backend/mapper.py +4 -0
  6. jararaca/broker_backend/redis_broker_backend.py +9 -3
  7. jararaca/cli.py +915 -51
  8. jararaca/common/__init__.py +3 -0
  9. jararaca/core/__init__.py +3 -0
  10. jararaca/core/providers.py +8 -0
  11. jararaca/core/uow.py +41 -7
  12. jararaca/di.py +4 -0
  13. jararaca/files/entity.py.mako +4 -0
  14. jararaca/helpers/__init__.py +3 -0
  15. jararaca/helpers/global_scheduler/__init__.py +3 -0
  16. jararaca/helpers/global_scheduler/config.py +21 -0
  17. jararaca/helpers/global_scheduler/controller.py +42 -0
  18. jararaca/helpers/global_scheduler/registry.py +32 -0
  19. jararaca/lifecycle.py +6 -2
  20. jararaca/messagebus/__init__.py +4 -0
  21. jararaca/messagebus/bus_message_controller.py +4 -0
  22. jararaca/messagebus/consumers/__init__.py +3 -0
  23. jararaca/messagebus/decorators.py +121 -61
  24. jararaca/messagebus/implicit_headers.py +49 -0
  25. jararaca/messagebus/interceptors/__init__.py +3 -0
  26. jararaca/messagebus/interceptors/aiopika_publisher_interceptor.py +62 -11
  27. jararaca/messagebus/interceptors/message_publisher_collector.py +62 -0
  28. jararaca/messagebus/interceptors/publisher_interceptor.py +29 -3
  29. jararaca/messagebus/message.py +4 -0
  30. jararaca/messagebus/publisher.py +6 -0
  31. jararaca/messagebus/worker.py +1002 -459
  32. jararaca/microservice.py +113 -2
  33. jararaca/observability/constants.py +7 -0
  34. jararaca/observability/decorators.py +170 -13
  35. jararaca/observability/fastapi_exception_handler.py +37 -0
  36. jararaca/observability/hooks.py +109 -0
  37. jararaca/observability/interceptor.py +4 -0
  38. jararaca/observability/providers/__init__.py +3 -0
  39. jararaca/observability/providers/otel.py +225 -16
  40. jararaca/persistence/base.py +39 -3
  41. jararaca/persistence/exports.py +4 -0
  42. jararaca/persistence/interceptors/__init__.py +3 -0
  43. jararaca/persistence/interceptors/aiosqa_interceptor.py +86 -73
  44. jararaca/persistence/interceptors/constants.py +5 -0
  45. jararaca/persistence/interceptors/decorators.py +50 -0
  46. jararaca/persistence/session.py +3 -0
  47. jararaca/persistence/sort_filter.py +4 -0
  48. jararaca/persistence/utilities.py +73 -20
  49. jararaca/presentation/__init__.py +3 -0
  50. jararaca/presentation/decorators.py +88 -86
  51. jararaca/presentation/exceptions.py +23 -0
  52. jararaca/presentation/hooks.py +4 -0
  53. jararaca/presentation/http_microservice.py +4 -0
  54. jararaca/presentation/server.py +97 -45
  55. jararaca/presentation/websocket/__init__.py +3 -0
  56. jararaca/presentation/websocket/base_types.py +4 -0
  57. jararaca/presentation/websocket/context.py +4 -0
  58. jararaca/presentation/websocket/decorators.py +8 -41
  59. jararaca/presentation/websocket/redis.py +280 -53
  60. jararaca/presentation/websocket/types.py +4 -0
  61. jararaca/presentation/websocket/websocket_interceptor.py +46 -19
  62. jararaca/reflect/__init__.py +3 -0
  63. jararaca/reflect/controller_inspect.py +16 -10
  64. jararaca/reflect/decorators.py +252 -0
  65. jararaca/reflect/helpers.py +18 -0
  66. jararaca/reflect/metadata.py +34 -25
  67. jararaca/rpc/__init__.py +3 -0
  68. jararaca/rpc/http/__init__.py +101 -0
  69. jararaca/rpc/http/backends/__init__.py +14 -0
  70. jararaca/rpc/http/backends/httpx.py +43 -9
  71. jararaca/rpc/http/backends/otel.py +4 -0
  72. jararaca/rpc/http/decorators.py +380 -115
  73. jararaca/rpc/http/httpx.py +3 -0
  74. jararaca/scheduler/__init__.py +3 -0
  75. jararaca/scheduler/beat_worker.py +521 -105
  76. jararaca/scheduler/decorators.py +15 -22
  77. jararaca/scheduler/types.py +4 -0
  78. jararaca/tools/app_config/__init__.py +3 -0
  79. jararaca/tools/app_config/decorators.py +7 -19
  80. jararaca/tools/app_config/interceptor.py +6 -2
  81. jararaca/tools/typescript/__init__.py +3 -0
  82. jararaca/tools/typescript/decorators.py +120 -0
  83. jararaca/tools/typescript/interface_parser.py +1077 -174
  84. jararaca/utils/__init__.py +3 -0
  85. jararaca/utils/env_parse_utils.py +133 -0
  86. jararaca/utils/rabbitmq_utils.py +112 -39
  87. jararaca/utils/retry.py +19 -14
  88. jararaca-0.4.0a19.dist-info/LICENSE +674 -0
  89. jararaca-0.4.0a19.dist-info/LICENSES/GPL-3.0-or-later.txt +232 -0
  90. {jararaca-0.3.11a16.dist-info → jararaca-0.4.0a19.dist-info}/METADATA +12 -7
  91. jararaca-0.4.0a19.dist-info/RECORD +96 -0
  92. {jararaca-0.3.11a16.dist-info → jararaca-0.4.0a19.dist-info}/WHEEL +1 -1
  93. pyproject.toml +132 -0
  94. jararaca-0.3.11a16.dist-info/RECORD +0 -74
  95. /jararaca-0.3.11a16.dist-info/LICENSE → /LICENSE +0 -0
  96. {jararaca-0.3.11a16.dist-info → jararaca-0.4.0a19.dist-info}/entry_points.txt +0 -0
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2025 Lucas S
2
+ #
3
+ # SPDX-License-Identifier: GPL-3.0-or-later
4
+
1
5
  import asyncio
2
6
  import inspect
3
7
  import logging
@@ -9,22 +13,21 @@ from abc import ABC
9
13
  from contextlib import asynccontextmanager, suppress
10
14
  from dataclasses import dataclass, field
11
15
  from datetime import UTC, datetime
12
- from typing import (
13
- Any,
14
- AsyncContextManager,
15
- AsyncGenerator,
16
- Awaitable,
17
- Optional,
18
- Type,
19
- get_origin,
20
- )
16
+ from typing import Any, AsyncContextManager, AsyncGenerator, Awaitable, Optional, Type
21
17
  from urllib.parse import parse_qs, urlparse
22
18
 
23
19
  import aio_pika
24
20
  import aio_pika.abc
25
21
  import uvloop
26
- from aio_pika.exceptions import AMQPError, ChannelClosed, ChannelNotFoundEntity
27
- from pydantic import BaseModel
22
+ from aio_pika.exceptions import (
23
+ AMQPChannelError,
24
+ AMQPConnectionError,
25
+ AMQPError,
26
+ ChannelClosed,
27
+ ChannelNotFoundEntity,
28
+ ConnectionClosed,
29
+ )
30
+ from pydantic import ValidationError
28
31
 
29
32
  from jararaca.broker_backend import MessageBrokerBackend
30
33
  from jararaca.broker_backend.mapper import get_message_broker_backend_from_url
@@ -43,16 +46,21 @@ from jararaca.messagebus.decorators import (
43
46
  MessageHandlerData,
44
47
  ScheduleDispatchData,
45
48
  )
49
+ from jararaca.messagebus.implicit_headers import provide_implicit_headers
46
50
  from jararaca.messagebus.message import Message, MessageOf
47
51
  from jararaca.microservice import (
48
52
  AppTransactionContext,
49
53
  MessageBusTransactionData,
50
54
  Microservice,
51
55
  SchedulerTransactionData,
56
+ ShutdownState,
57
+ provide_shutdown_state,
58
+ providing_app_type,
52
59
  )
60
+ from jararaca.observability.hooks import record_exception, set_span_status
53
61
  from jararaca.scheduler.decorators import ScheduledActionData
54
62
  from jararaca.utils.rabbitmq_utils import RabbitmqUtils
55
- from jararaca.utils.retry import RetryConfig, retry_with_backoff
63
+ from jararaca.utils.retry import RetryPolicy, retry_with_backoff
56
64
 
57
65
  logger = logging.getLogger(__name__)
58
66
 
@@ -62,22 +70,25 @@ class AioPikaWorkerConfig:
62
70
  url: str
63
71
  exchange: str
64
72
  prefetch_count: int
65
- connection_retry_config: RetryConfig = field(
66
- default_factory=lambda: RetryConfig(
73
+ connection_retry_config: RetryPolicy = field(
74
+ default_factory=lambda: RetryPolicy(
67
75
  max_retries=15,
68
76
  initial_delay=1.0,
69
77
  max_delay=60.0,
70
78
  backoff_factor=2.0,
71
79
  )
72
80
  )
73
- consumer_retry_config: RetryConfig = field(
74
- default_factory=lambda: RetryConfig(
81
+ consumer_retry_policy: RetryPolicy = field(
82
+ default_factory=lambda: RetryPolicy(
75
83
  max_retries=15,
76
84
  initial_delay=0.5,
77
85
  max_delay=40.0,
78
86
  backoff_factor=2.0,
79
87
  )
80
88
  )
89
+ # Connection health monitoring settings
90
+ connection_heartbeat_interval: float = 30.0 # seconds
91
+ connection_health_check_interval: float = 10.0 # seconds
81
92
 
82
93
 
83
94
  class AioPikaMessage(MessageOf[Message]):
@@ -129,6 +140,20 @@ class MessageBusConsumer(ABC):
129
140
  """Close all resources related to the consumer"""
130
141
 
131
142
 
143
+ class _WorkerShutdownState(ShutdownState):
144
+ def __init__(self, shutdown_event: asyncio.Event):
145
+ self.shutdown_event = shutdown_event
146
+
147
+ def request_shutdown(self) -> None:
148
+ self.shutdown_event.set()
149
+
150
+ def is_shutdown_requested(self) -> bool:
151
+ return self.shutdown_event.is_set()
152
+
153
+ async def wait_for_shutdown(self) -> None:
154
+ await self.shutdown_event.wait()
155
+
156
+
132
157
  class AioPikaMicroserviceConsumer(MessageBusConsumer):
133
158
  def __init__(
134
159
  self,
@@ -146,11 +171,18 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
146
171
  self.incoming_map: dict[str, MessageHandlerData] = {}
147
172
  self.uow_context_provider = uow_context_provider
148
173
  self.shutdown_event = asyncio.Event()
174
+ self.shutdown_state = _WorkerShutdownState(self.shutdown_event)
149
175
  self.lock = asyncio.Lock()
150
176
  self.tasks: set[asyncio.Task[Any]] = set()
151
177
  self.connection: aio_pika.abc.AbstractConnection | None = None
152
178
  self.channels: dict[str, aio_pika.abc.AbstractChannel] = {}
153
179
 
180
+ # Connection resilience attributes
181
+ self.connection_healthy = False
182
+ self.connection_lock = asyncio.Lock()
183
+ self.consumer_tags: dict[str, str] = {} # Track consumer tags for cleanup
184
+ self.health_check_task: asyncio.Task[Any] | None = None
185
+
154
186
  async def _verify_infrastructure(self) -> bool:
155
187
  """
156
188
  Verify that the required RabbitMQ infrastructure (exchanges, queues) exists.
@@ -188,31 +220,48 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
188
220
  async def setup_consumer() -> None:
189
221
  # Create a channel using the context manager
190
222
  async with self.create_channel(queue_name) as channel:
191
- queue = await RabbitmqUtils.get_queue(
223
+ queue: aio_pika.abc.AbstractQueue = await RabbitmqUtils.get_queue(
192
224
  channel=channel, queue_name=queue_name
193
225
  )
194
226
 
195
- # Configure consumer right away while in the context
196
- await queue.consume(
227
+ # Configure consumer and get the consumer tag
228
+ consumer_tag = await queue.consume(
197
229
  callback=MessageHandlerCallback(
198
230
  consumer=self,
199
231
  queue_name=queue_name,
200
232
  routing_key=routing_key,
201
233
  message_handler=handler,
202
234
  ),
203
- no_ack=handler.spec.auto_ack,
235
+ # no_ack=handler.spec.auto_ack,
204
236
  )
205
237
 
238
+ # Store consumer tag for cleanup
239
+ self.consumer_tags[queue_name] = consumer_tag
240
+
206
241
  logger.info(
207
- f"Consuming message handler {queue_name} on dedicated channel"
242
+ "Consuming message handler %s on dedicated channel", queue_name
243
+ )
244
+
245
+ await self.shutdown_event.wait()
246
+
247
+ logger.warning(
248
+ "Shutdown event received, stopping consumer for %s", queue_name
208
249
  )
250
+ await queue.cancel(consumer_tag)
209
251
 
210
252
  try:
211
253
  # Setup with retry
212
254
  await retry_with_backoff(
213
255
  setup_consumer,
214
- retry_config=self.config.consumer_retry_config,
215
- retry_exceptions=(ChannelNotFoundEntity, ChannelClosed, AMQPError),
256
+ retry_policy=self.config.consumer_retry_policy,
257
+ retry_exceptions=(
258
+ ChannelNotFoundEntity,
259
+ ChannelClosed,
260
+ AMQPError,
261
+ AMQPConnectionError,
262
+ AMQPChannelError,
263
+ ConnectionClosed,
264
+ ),
216
265
  )
217
266
  return True
218
267
  except Exception as e:
@@ -238,8 +287,8 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
238
287
  channel=channel, queue_name=queue_name
239
288
  )
240
289
 
241
- # Configure consumer right away while in the context
242
- await queue.consume(
290
+ # Configure consumer and get the consumer tag
291
+ consumer_tag = await queue.consume(
243
292
  callback=ScheduledMessageHandlerCallback(
244
293
  consumer=self,
245
294
  queue_name=queue_name,
@@ -249,14 +298,31 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
249
298
  no_ack=True,
250
299
  )
251
300
 
252
- logger.info(f"Consuming scheduler {queue_name} on dedicated channel")
301
+ # Store consumer tag for cleanup
302
+ self.consumer_tags[queue_name] = consumer_tag
303
+
304
+ logger.debug("Consuming scheduler %s on dedicated channel", queue_name)
305
+
306
+ await self.shutdown_event.wait()
307
+
308
+ logger.warning(
309
+ "Shutdown event received, stopping consumer for %s", queue_name
310
+ )
311
+ await queue.cancel(consumer_tag)
253
312
 
254
313
  try:
255
314
  # Setup with retry
256
315
  await retry_with_backoff(
257
316
  setup_consumer,
258
- retry_config=self.config.consumer_retry_config,
259
- retry_exceptions=(ChannelNotFoundEntity, ChannelClosed, AMQPError),
317
+ retry_policy=self.config.consumer_retry_policy,
318
+ retry_exceptions=(
319
+ ChannelNotFoundEntity,
320
+ ChannelClosed,
321
+ AMQPError,
322
+ AMQPConnectionError,
323
+ AMQPChannelError,
324
+ ConnectionClosed,
325
+ ),
260
326
  )
261
327
  return True
262
328
  except Exception as e:
@@ -269,160 +335,195 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
269
335
  """
270
336
  Main consume method that sets up all message handlers and scheduled actions with retry mechanisms.
271
337
  """
272
- # Verify infrastructure with retry
273
- infra_check_success = await retry_with_backoff(
274
- self._verify_infrastructure,
275
- retry_config=self.config.connection_retry_config,
276
- retry_exceptions=(Exception,),
277
- )
278
-
279
- if not infra_check_success:
280
- logger.critical("Failed to verify RabbitMQ infrastructure. Shutting down.")
281
- self.shutdown_event.set()
282
- return
283
-
284
- async def wait_for(
285
- type: str, name: str, coroutine: Awaitable[bool]
286
- ) -> tuple[str, str, bool]:
287
- return type, name, await coroutine
338
+ # Establish initial connection
339
+ try:
340
+ async with self.connect() as connection:
341
+ self.connection_healthy = True
288
342
 
289
- tasks: set[asyncio.Task[tuple[str, str, bool]]] = set()
343
+ # Start connection health monitoring
344
+ self.health_check_task = asyncio.create_task(
345
+ self._monitor_connection_health(), name="ConnectionHealthMonitor"
346
+ )
290
347
 
291
- # Setup message handlers
292
- for handler in self.message_handler_set:
293
- queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
294
- self.incoming_map[queue_name] = handler
348
+ # Verify infrastructure with retry
349
+ infra_check_success = await retry_with_backoff(
350
+ self._verify_infrastructure,
351
+ retry_policy=self.config.connection_retry_config,
352
+ retry_exceptions=(Exception,),
353
+ )
295
354
 
296
- tasks.add(
297
- task := asyncio.create_task(
298
- wait_for(
299
- "message_handler",
300
- queue_name,
301
- self._setup_message_handler_consumer(handler),
355
+ if not infra_check_success:
356
+ logger.critical(
357
+ "Failed to verify RabbitMQ infrastructure. Shutting down."
302
358
  )
303
- )
304
- )
305
- # task.add_done_callback(tasks.discard)
306
- # success = await self._setup_message_handler_consumer(handler)
307
- # if not success:
308
- # logger.warning(
309
- # f"Failed to set up consumer for {queue_name}, will not process messages from this queue"
310
- # )
311
-
312
- # Setup scheduled actions
313
- for scheduled_action in self.scheduled_actions:
314
-
315
- queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
316
- tasks.add(
317
- task := asyncio.create_task(
318
- wait_for(
319
- "scheduled_action",
320
- queue_name,
321
- self._setup_scheduled_action_consumer(scheduled_action),
359
+ self.shutdown_event.set()
360
+ return
361
+
362
+ async def wait_for(
363
+ type: str, name: str, coroutine: Awaitable[bool]
364
+ ) -> tuple[str, str, bool]:
365
+ return type, name, await coroutine
366
+
367
+ tasks: set[asyncio.Task[tuple[str, str, bool]]] = set()
368
+
369
+ # Setup message handlers
370
+ for handler in self.message_handler_set:
371
+ queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
372
+ self.incoming_map[queue_name] = handler
373
+
374
+ tasks.add(
375
+ task := asyncio.create_task(
376
+ wait_for(
377
+ "message_handler",
378
+ queue_name,
379
+ self._setup_message_handler_consumer(handler),
380
+ ),
381
+ name=f"MessageHandler-{queue_name}-setup-consumer",
382
+ )
322
383
  )
323
- )
324
- )
325
- # task.add_done_callback(tasks.discard)
326
-
327
- # success = await self._setup_scheduled_action_consumer(scheduled_action)
328
- # if not success:
329
- # queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
330
- # logger.warning(
331
- # f"Failed to set up consumer for scheduled action {queue_name}, will not process scheduled tasks from this queue"
332
- # )
333
-
334
- async def handle_task_results() -> None:
335
- for task in asyncio.as_completed(tasks):
336
- type, name, success = await task
337
- if success:
338
- logger.info(f"Successfully set up {type} consumer for {name}")
339
- else:
340
- logger.warning(
341
- f"Failed to set up {type} consumer for {name}, will not process messages from this queue"
384
+
385
+ # Setup scheduled actions
386
+ for scheduled_action in self.scheduled_actions:
387
+ queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
388
+ tasks.add(
389
+ task := asyncio.create_task(
390
+ wait_for(
391
+ "scheduled_action",
392
+ queue_name,
393
+ self._setup_scheduled_action_consumer(scheduled_action),
394
+ ),
395
+ name=f"ScheduledAction-{queue_name}-setup-consumer",
396
+ )
342
397
  )
343
398
 
344
- handle_task_results_task = asyncio.create_task(handle_task_results())
399
+ async def handle_task_results() -> None:
400
+ for task in asyncio.as_completed(tasks):
401
+ type, name, success = await task
402
+ if success:
403
+ logger.debug(
404
+ "Successfully set up %s consumer for %s", type, name
405
+ )
406
+ else:
407
+ logger.warning(
408
+ "Failed to set up %s consumer for %s, will not process messages from this queue",
409
+ type,
410
+ name,
411
+ )
345
412
 
346
- # Wait for shutdown signal
347
- await self.shutdown_event.wait()
348
- logger.info("Shutdown event received, stopping consumers")
349
- handle_task_results_task.cancel()
350
- with suppress(asyncio.CancelledError):
351
- await handle_task_results_task
352
- for task in tasks:
353
- if not task.done():
354
- task.cancel()
355
- with suppress(asyncio.CancelledError):
356
- await task
357
- logger.info("Worker shutting down")
413
+ handle_task_results_task = asyncio.create_task(
414
+ handle_task_results(), name="HandleSetupTaskResults"
415
+ )
358
416
 
359
- # Wait for all tasks to complete
360
- await self.wait_all_tasks_done()
417
+ # Wait for shutdown signal
418
+ await self.shutdown_event.wait()
419
+ logger.debug("Shutdown event received, stopping consumers")
361
420
 
362
- # Close all channels and the connection
363
- await self.close_channels_and_connection()
421
+ await self.cancel_queue_consumers()
364
422
 
365
- async def wait_all_tasks_done(self) -> None:
366
- if not self.tasks:
367
- return
423
+ # Cancel health monitoring
424
+ if self.health_check_task:
425
+ self.health_check_task.cancel()
426
+ with suppress(asyncio.CancelledError):
427
+ await self.health_check_task
368
428
 
369
- logger.info(f"Waiting for {len(self.tasks)} in-flight tasks to complete")
370
- async with self.lock:
371
- # Use gather with return_exceptions=True to ensure all tasks are awaited
372
- # even if some raise exceptions
373
- results = await asyncio.gather(*self.tasks, return_exceptions=True)
429
+ handle_task_results_task.cancel()
430
+ with suppress(asyncio.CancelledError):
431
+ await handle_task_results_task
432
+ for task in tasks:
433
+ if not task.done():
434
+ task.cancel()
435
+ with suppress(asyncio.CancelledError):
436
+ await task
437
+ logger.debug("Worker shutting down")
438
+ # Wait for all tasks to complete
439
+ await self.wait_all_tasks_done()
440
+
441
+ # Close all channels and the connection
442
+ await self.close_channels_and_connection()
374
443
 
375
- # Log any exceptions that occurred
376
- for result in results:
377
- if isinstance(result, Exception):
378
- logger.error(f"Task raised an exception during shutdown: {result}")
444
+ except Exception as e:
445
+ logger.critical("Failed to establish initial connection to RabbitMQ: %s", e)
446
+ # Re-raise the exception so it can be caught by the caller
447
+ raise
379
448
 
380
- async def close_channels_and_connection(self) -> None:
381
- """Close all channels and then the connection"""
382
- # Close all channels
383
- channel_close_tasks = []
449
+ async def cancel_queue_consumers(self) -> None:
450
+ """
451
+ Cancel all active queue consumers.
452
+ """
453
+ logger.debug("Cancelling all active queue consumers...")
384
454
  for queue_name, channel in self.channels.items():
385
455
  try:
386
456
  if not channel.is_closed:
387
- logger.info(f"Closing channel for queue {queue_name}")
388
- channel_close_tasks.append(channel.close())
389
- else:
390
- logger.info(f"Channel for queue {queue_name} already closed")
457
+ # Cancel consumer if we have its tag
458
+ if queue_name in self.consumer_tags:
459
+ try:
460
+ queue = await channel.get_queue(queue_name, ensure=False)
461
+ if queue:
462
+ await queue.cancel(self.consumer_tags[queue_name])
463
+ except Exception as cancel_error:
464
+ logger.warning(
465
+ "Error cancelling consumer for %s: %s",
466
+ queue_name,
467
+ cancel_error,
468
+ )
469
+ del self.consumer_tags[queue_name]
391
470
  except Exception as e:
392
- logger.error(
393
- f"Error preparing to close channel for queue {queue_name}: {e}"
394
- )
471
+ logger.warning("Error cancelling consumer for %s: %s", queue_name, e)
395
472
 
396
- # Wait for all channels to close (if any)
397
- if channel_close_tasks:
398
- try:
399
- await asyncio.gather(*channel_close_tasks, return_exceptions=True)
400
- except Exception as e:
401
- logger.error(f"Error during channel closures: {e}")
473
+ async def wait_all_tasks_done(self) -> None:
474
+ if not self.tasks:
475
+ return
402
476
 
403
- # Clear channels dictionary
404
- self.channels.clear()
477
+ logger.warning(
478
+ "Waiting for (%s) in-flight tasks to complete: %s",
479
+ len(self.tasks),
480
+ ", ".join((task.get_name()) for task in self.tasks),
481
+ )
482
+ # async with self.lock:
483
+ # Use gather with return_exceptions=True to ensure all tasks are awaited
484
+ # even if some raise exceptions
485
+ # results = await asyncio.gather(*self.tasks, return_exceptions=True)
486
+ pending_tasks = [task for task in self.tasks if not task.done()]
487
+ while len(pending_tasks) > 0:
488
+ if not pending_tasks:
489
+ break
490
+ await asyncio.wait(pending_tasks, return_when=asyncio.FIRST_COMPLETED)
491
+
492
+ pending_tasks = [task for task in pending_tasks if not task.done()]
493
+ if len(pending_tasks) > 0:
494
+ logger.warning(
495
+ "Waiting for (%s) in-flight tasks to complete: %s",
496
+ len(pending_tasks),
497
+ ", ".join((task.get_name()) for task in pending_tasks),
498
+ )
405
499
 
406
- # Close the connection
407
- if self.connection:
408
- try:
409
- if not self.connection.is_closed:
410
- logger.info("Closing RabbitMQ connection")
411
- await self.connection.close()
412
- else:
413
- logger.info("RabbitMQ connection already closed")
414
- except Exception as e:
415
- logger.error(f"Error closing RabbitMQ connection: {e}")
416
- self.connection = None
500
+ logger.warning("All in-flight tasks have completed.")
501
+ # Log any exceptions that occurred
502
+ # for result in results:
503
+ # if isinstance(result, Exception):
504
+ # logger.error("Task raised an exception during shutdown: %s", result)
505
+
506
+ async def close_channels_and_connection(self) -> None:
507
+ """Close all channels and then the connection"""
508
+ logger.warning("Closing channels and connection...")
509
+ await self._cleanup_connection()
417
510
 
418
511
  def shutdown(self) -> None:
419
512
  """Signal for shutdown"""
420
- logger.info("Initiating graceful shutdown")
513
+ logger.warning("Initiating graceful shutdown")
421
514
  self.shutdown_event.set()
422
515
 
423
516
  async def close(self) -> None:
424
517
  """Implement MessageBusConsumer.close for cleanup"""
518
+ logger.warning("Closing consumer...")
425
519
  self.shutdown()
520
+
521
+ # Cancel health monitoring
522
+ if self.health_check_task:
523
+ self.health_check_task.cancel()
524
+ with suppress(asyncio.CancelledError):
525
+ await self.health_check_task
526
+
426
527
  await self.wait_all_tasks_done()
427
528
  await self.close_channels_and_connection()
428
529
 
@@ -432,25 +533,45 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
432
533
  This helps with error handling when a channel might have been closed.
433
534
  """
434
535
  if queue_name not in self.channels:
435
- logger.warning(f"No channel found for queue {queue_name}")
536
+ logger.warning("No channel found for queue %s", queue_name)
436
537
  return None
437
538
 
438
539
  try:
439
540
  channel = self.channels[queue_name]
440
541
  if channel.is_closed:
441
- logger.warning(f"Channel for queue {queue_name} is closed")
442
- # Attempt to recreate the channel if needed
443
- if self.connection and not self.connection.is_closed:
444
- logger.info(f"Creating new channel for {queue_name}")
445
- self.channels[queue_name] = await self.connection.channel()
446
- await self.channels[queue_name].set_qos(
447
- prefetch_count=self.config.prefetch_count
448
- )
449
- return self.channels[queue_name]
450
- return None
542
+ logger.warning("Channel for queue %s is closed", queue_name)
543
+ # Remove the closed channel
544
+ del self.channels[queue_name]
545
+
546
+ # Attempt to recreate the channel if connection is healthy
547
+ if (
548
+ self.connection
549
+ and not self.connection.is_closed
550
+ and self.connection_healthy
551
+ ):
552
+ try:
553
+ logger.debug("Creating new channel for %s", queue_name)
554
+ self.channels[queue_name] = await self.connection.channel()
555
+ await self.channels[queue_name].set_qos(
556
+ prefetch_count=self.config.prefetch_count
557
+ )
558
+ return self.channels[queue_name]
559
+ except Exception as e:
560
+ logger.error(
561
+ "Failed to recreate channel for %s: %s", queue_name, e
562
+ )
563
+ # Trigger shutdown if channel creation fails
564
+ self._trigger_shutdown()
565
+ return None
566
+ else:
567
+ # Connection is not healthy, trigger shutdown
568
+ self._trigger_shutdown()
569
+ return None
451
570
  return channel
452
571
  except Exception as e:
453
- logger.error(f"Error accessing channel for queue {queue_name}: {e}")
572
+ logger.error("Error accessing channel for queue %s: %s", queue_name, e)
573
+ # Trigger shutdown on any channel access error
574
+ self._trigger_shutdown()
454
575
  return None
455
576
 
456
577
  async def _establish_channel(self, queue_name: str) -> aio_pika.abc.AbstractChannel:
@@ -459,14 +580,14 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
459
580
  """
460
581
  if self.connection is None or self.connection.is_closed:
461
582
  logger.warning(
462
- f"Cannot create channel for {queue_name}: connection is not available"
583
+ "Cannot create channel for %s: connection is not available", queue_name
463
584
  )
464
585
  raise RuntimeError("Connection is not available")
465
586
 
466
- logger.debug(f"Creating channel for queue {queue_name}")
587
+ logger.debug("Creating channel for queue %s", queue_name)
467
588
  channel = await self.connection.channel()
468
589
  await channel.set_qos(prefetch_count=self.config.prefetch_count)
469
- logger.debug(f"Created channel for queue {queue_name}")
590
+ logger.debug("Created channel for queue %s", queue_name)
470
591
  return channel
471
592
 
472
593
  @asynccontextmanager
@@ -481,17 +602,17 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
481
602
  # Create a new channel with retry
482
603
  channel = await retry_with_backoff(
483
604
  fn=lambda: self._establish_channel(queue_name),
484
- retry_config=self.config.consumer_retry_config,
605
+ retry_policy=self.config.consumer_retry_policy,
485
606
  retry_exceptions=(
486
- aio_pika.exceptions.AMQPConnectionError,
487
- aio_pika.exceptions.AMQPChannelError,
607
+ AMQPConnectionError,
608
+ AMQPChannelError,
488
609
  ConnectionError,
489
610
  ),
490
611
  )
491
612
 
492
613
  # Save in the channels dict for tracking
493
614
  self.channels[queue_name] = channel
494
- logger.debug(f"Created new channel for queue {queue_name}")
615
+ logger.debug("Created new channel for queue %s", queue_name)
495
616
 
496
617
  try:
497
618
  yield channel
@@ -501,7 +622,7 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
501
622
  pass
502
623
  except aio_pika.exceptions.AMQPError as e:
503
624
  logger.error(
504
- f"Error creating channel for queue {queue_name} after retries: {e}"
625
+ "Error creating channel for queue %s after retries: %s", queue_name, e
505
626
  )
506
627
  raise
507
628
 
@@ -510,12 +631,15 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
510
631
  Creates a new RabbitMQ connection with retry logic.
511
632
  """
512
633
  try:
513
- logger.info("Establishing connection to RabbitMQ")
514
- connection = await aio_pika.connect(self.config.url)
515
- logger.info("Connected to RabbitMQ successfully")
634
+ logger.debug("Establishing connection to RabbitMQ")
635
+ connection = await aio_pika.connect(
636
+ self.config.url,
637
+ heartbeat=self.config.connection_heartbeat_interval,
638
+ )
639
+ logger.debug("Connected to RabbitMQ successfully")
516
640
  return connection
517
641
  except Exception as e:
518
- logger.error(f"Failed to connect to RabbitMQ: {e}")
642
+ logger.error("Failed to connect to RabbitMQ: %s", e)
519
643
  raise
520
644
 
521
645
  @asynccontextmanager
@@ -536,9 +660,9 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
536
660
  # Create a new connection with retry
537
661
  self.connection = await retry_with_backoff(
538
662
  self._establish_connection,
539
- retry_config=self.config.connection_retry_config,
663
+ retry_policy=self.config.connection_retry_config,
540
664
  retry_exceptions=(
541
- aio_pika.exceptions.AMQPConnectionError,
665
+ AMQPConnectionError,
542
666
  ConnectionError,
543
667
  OSError,
544
668
  TimeoutError,
@@ -552,14 +676,15 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
552
676
  pass
553
677
  except Exception as e:
554
678
  logger.error(
555
- f"Failed to establish connection to RabbitMQ after retries: {e}"
679
+ "Failed to establish connection to RabbitMQ after retries: %s", e
556
680
  )
557
681
  if self.connection:
558
682
  try:
559
683
  await self.connection.close()
560
684
  except Exception as close_error:
561
685
  logger.error(
562
- f"Error closing connection after connect failure: {close_error}"
686
+ "Error closing connection after connect failure: %s",
687
+ close_error,
563
688
  )
564
689
  self.connection = None
565
690
  raise
@@ -572,22 +697,173 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
572
697
  Get a channel for a specific queue as a context manager.
573
698
  This is safer than using get_channel directly as it ensures proper error handling.
574
699
  """
575
- channel = await self.get_channel(queue_name)
576
- if channel is None:
577
- if self.connection and not self.connection.is_closed:
578
- # Try to create a new channel
579
- async with self.create_channel(queue_name) as new_channel:
580
- yield new_channel
581
- else:
582
- raise RuntimeError(
583
- f"Cannot get channel for queue {queue_name}: no connection available"
700
+ max_retries = 3
701
+ retry_delay = 1.0
702
+
703
+ for attempt in range(max_retries):
704
+ try:
705
+ channel = await self.get_channel(queue_name)
706
+ if channel is not None:
707
+ try:
708
+ yield channel
709
+ return
710
+ finally:
711
+ # We don't close the channel here as it's managed by the consumer
712
+ pass
713
+
714
+ # No channel available, check connection state
715
+ if (
716
+ self.connection
717
+ and not self.connection.is_closed
718
+ and self.connection_healthy
719
+ ):
720
+ # Try to create a new channel
721
+ async with self.create_channel(queue_name) as new_channel:
722
+ yield new_channel
723
+ return
724
+ else:
725
+ # Connection is not healthy, trigger shutdown
726
+ logger.error(
727
+ "Connection not healthy while getting channel for %s, triggering shutdown",
728
+ queue_name,
729
+ )
730
+ self._trigger_shutdown()
731
+ raise RuntimeError(
732
+ f"Cannot get channel for queue {queue_name}: connection is not healthy"
733
+ )
734
+
735
+ except Exception as e:
736
+ if attempt < max_retries - 1:
737
+ logger.warning(
738
+ "Error getting channel for %s, retrying: %s", queue_name, e
739
+ )
740
+ await self._wait_delay_or_shutdown(retry_delay)
741
+ retry_delay *= 2
742
+ else:
743
+ logger.error(
744
+ "Failed to get channel for %s after %s attempts: %s",
745
+ queue_name,
746
+ max_retries,
747
+ e,
748
+ )
749
+ raise
750
+
751
+ async def _wait_delay_or_shutdown(self, delay: float) -> None:
752
+ """
753
+ Wait for the specified delay or exit early if shutdown is initiated.
754
+
755
+ Args:
756
+ delay: Delay in seconds to wait
757
+ """
758
+
759
+ wait_cor = asyncio.create_task(asyncio.sleep(delay), name="delayed-retry-wait")
760
+ wait_shutdown_cor = asyncio.create_task(
761
+ self.shutdown_event.wait(), name="delayed-retry-shutdown-wait"
762
+ )
763
+
764
+ await asyncio.wait(
765
+ [wait_cor, wait_shutdown_cor],
766
+ return_when=asyncio.FIRST_COMPLETED,
767
+ )
768
+
769
+ async def _monitor_connection_health(self) -> None:
770
+ """
771
+ Monitor connection health and trigger shutdown if connection is lost.
772
+ This runs as a background task.
773
+ """
774
+ while not self.shutdown_event.is_set():
775
+ try:
776
+ await self._wait_delay_or_shutdown(
777
+ self.config.connection_health_check_interval
584
778
  )
585
- else:
779
+
780
+ if self.shutdown_event.is_set():
781
+ break
782
+
783
+ # Check connection health
784
+ if not await self._is_connection_healthy():
785
+ logger.error(
786
+ "Connection health check failed, initiating worker shutdown"
787
+ )
788
+ self.shutdown()
789
+ break
790
+
791
+ except asyncio.CancelledError:
792
+ logger.debug("Connection health monitoring cancelled")
793
+ break
794
+ except Exception as e:
795
+ logger.error("Error in connection health monitoring: %s", e)
796
+ await self._wait_delay_or_shutdown(5) # Wait before retrying
797
+
798
+ async def _is_connection_healthy(self) -> bool:
799
+ """
800
+ Check if the connection is healthy.
801
+ """
802
+ try:
803
+ if self.connection is None or self.connection.is_closed:
804
+ return False
805
+
806
+ # Try to create a temporary channel to test connection
807
+ async with self.connection.channel() as test_channel:
808
+ # If we can create a channel, connection is healthy
809
+ return True
810
+
811
+ except Exception as e:
812
+ logger.debug("Connection health check failed: %s", e)
813
+ return False
814
+
815
+ def _trigger_shutdown(self) -> None:
816
+ """
817
+ Trigger worker shutdown due to connection loss.
818
+ """
819
+ if not self.shutdown_event.is_set():
820
+ logger.error("Connection lost, initiating worker shutdown")
821
+ self.connection_healthy = False
822
+ self.shutdown()
823
+
824
+ async def _cleanup_connection(self) -> None:
825
+ """
826
+ Clean up existing connection and channels.
827
+ """
828
+ # Cancel existing consumers
829
+ for queue_name, channel in self.channels.items():
586
830
  try:
587
- yield channel
588
- finally:
589
- # We don't close the channel here as it's managed by the consumer
590
- pass
831
+ if not channel.is_closed:
832
+ # Cancel consumer if we have its tag
833
+ if queue_name in self.consumer_tags:
834
+ try:
835
+ queue = await channel.get_queue(queue_name, ensure=False)
836
+ if queue:
837
+ await queue.cancel(self.consumer_tags[queue_name])
838
+ except Exception as cancel_error:
839
+ logger.warning(
840
+ "Error cancelling consumer for %s: %s",
841
+ queue_name,
842
+ cancel_error,
843
+ )
844
+ del self.consumer_tags[queue_name]
845
+ except Exception as e:
846
+ logger.warning("Error cancelling consumer for %s: %s", queue_name, e)
847
+
848
+ # Close channels
849
+ for queue_name, channel in self.channels.items():
850
+ try:
851
+ if not channel.is_closed:
852
+ await channel.close()
853
+ except Exception as e:
854
+ logger.warning("Error closing channel for %s: %s", queue_name, e)
855
+
856
+ self.channels.clear()
857
+
858
+ # Close connection
859
+ if self.connection and not self.connection.is_closed:
860
+ try:
861
+ await self.connection.close()
862
+ except Exception as e:
863
+ logger.warning("Error closing connection: %s", e)
864
+
865
+ self.connection = None
866
+ self.connection_healthy = False
591
867
 
592
868
 
593
869
  def create_message_bus(
@@ -627,11 +903,15 @@ def create_message_bus(
627
903
  prefetch_count = int(query_params["prefetch_count"][0])
628
904
 
629
905
  # Parse optional retry configuration parameters
630
- connection_retry_config = RetryConfig()
631
- consumer_retry_config = RetryConfig(
632
- max_retries=30, initial_delay=5, max_delay=60.0, backoff_factor=3.0
906
+ connection_retry_config = RetryPolicy()
907
+ consumer_retry_config = RetryPolicy(
908
+ max_retries=5, initial_delay=5, max_delay=60.0, backoff_factor=3.0
633
909
  )
634
910
 
911
+ # Parse heartbeat and health check intervals
912
+ connection_heartbeat_interval = 30.0
913
+ connection_health_check_interval = 10.0
914
+
635
915
  # Connection retry config parameters
636
916
  if (
637
917
  "connection_retry_max" in query_params
@@ -698,12 +978,31 @@ def create_message_bus(
698
978
  except ValueError:
699
979
  pass
700
980
 
981
+ # Heartbeat and health check intervals
982
+ if "connection_heartbeat_interval" in query_params:
983
+ try:
984
+ connection_heartbeat_interval = float(
985
+ query_params["connection_heartbeat_interval"][0]
986
+ )
987
+ except ValueError:
988
+ pass
989
+
990
+ if "connection_health_check_interval" in query_params:
991
+ try:
992
+ connection_health_check_interval = float(
993
+ query_params["connection_health_check_interval"][0]
994
+ )
995
+ except ValueError:
996
+ pass
997
+
701
998
  config = AioPikaWorkerConfig(
702
999
  url=broker_url,
703
1000
  exchange=exchange,
704
1001
  prefetch_count=prefetch_count,
705
1002
  connection_retry_config=connection_retry_config,
706
- consumer_retry_config=consumer_retry_config,
1003
+ consumer_retry_policy=consumer_retry_config,
1004
+ connection_heartbeat_interval=connection_heartbeat_interval,
1005
+ connection_health_check_interval=connection_health_check_interval,
707
1006
  )
708
1007
 
709
1008
  return AioPikaMicroserviceConsumer(
@@ -737,37 +1036,59 @@ class ScheduledMessageHandlerCallback:
737
1036
  ) -> None:
738
1037
 
739
1038
  if self.consumer.shutdown_event.is_set():
740
- logger.info(
741
- f"Shutdown in progress. Requeuing scheduled message for {self.queue_name}"
1039
+ logger.debug(
1040
+ "Shutdown in progress. Requeuing scheduled message for %s",
1041
+ self.queue_name,
742
1042
  )
743
1043
  try:
744
1044
  # Use channel context for requeuing
745
- async with self.consumer.get_channel_ctx(self.queue_name):
746
- await aio_pika_message.reject(requeue=True)
1045
+ await aio_pika_message.reject(requeue=True)
747
1046
  except RuntimeError:
748
1047
  logger.warning(
749
- f"Could not requeue scheduled message during shutdown - channel not available"
1048
+ "Could not requeue scheduled message during shutdown - channel not available"
750
1049
  )
751
1050
  except Exception as e:
752
1051
  logger.error(
753
- f"Failed to requeue scheduled message during shutdown: {e}"
1052
+ "Failed to requeue scheduled message during shutdown: %s", e
754
1053
  )
755
1054
  return
756
1055
 
1056
+ # Check if connection is healthy before processing
1057
+ if not self.consumer.connection_healthy:
1058
+ logger.warning(
1059
+ "Connection not healthy, requeuing scheduled message for %s",
1060
+ self.queue_name,
1061
+ )
1062
+ try:
1063
+ if not self.consumer.connection_healthy:
1064
+ # Still not healthy, requeue the message
1065
+
1066
+ await aio_pika_message.reject(requeue=True)
1067
+ return
1068
+ except Exception as e:
1069
+ logger.error(
1070
+ "Failed to requeue scheduled message due to connection issues: %s",
1071
+ e,
1072
+ )
1073
+ return
1074
+
757
1075
  async with self.consumer.lock:
758
- task = asyncio.create_task(self.handle_message(aio_pika_message))
1076
+ task = asyncio.create_task(
1077
+ self.handle_message(aio_pika_message),
1078
+ name=f"ScheduledAction-{self.queue_name}-handle-message-{aio_pika_message.message_id}",
1079
+ )
759
1080
  self.consumer.tasks.add(task)
760
1081
  task.add_done_callback(self.handle_message_consume_done)
761
1082
 
762
1083
  def handle_message_consume_done(self, task: asyncio.Task[Any]) -> None:
763
1084
  self.consumer.tasks.discard(task)
764
1085
  if task.cancelled():
765
- logger.warning(f"Scheduled task for {self.queue_name} was cancelled")
1086
+ logger.warning("Scheduled task for %s was cancelled", self.queue_name)
766
1087
  return
767
1088
 
768
1089
  if (error := task.exception()) is not None:
769
1090
  logger.exception(
770
- f"Error processing scheduled action {self.queue_name}", exc_info=error
1091
+ "Error processing scheduled action %s", self.queue_name, exc_info=error
771
1092
  )
772
1093
 
773
1094
  async def handle_message(
@@ -775,18 +1096,37 @@ class ScheduledMessageHandlerCallback:
775
1096
  ) -> None:
776
1097
 
777
1098
  if self.consumer.shutdown_event.is_set():
778
- logger.info(f"Shutdown event set. Requeuing message for {self.queue_name}")
1099
+ logger.debug(
1100
+ "Shutdown event set. Requeuing message for %s", self.queue_name
1101
+ )
779
1102
  try:
780
1103
  # Use channel context for requeuing
781
- async with self.consumer.get_channel_ctx(self.queue_name):
782
- await aio_pika_message.reject(requeue=True)
1104
+
1105
+ await aio_pika_message.reject(requeue=True)
783
1106
  return
784
1107
  except RuntimeError:
785
1108
  logger.warning(
786
- f"Could not requeue message during shutdown - channel not available"
1109
+ "Could not requeue message during shutdown - channel not available"
787
1110
  )
788
1111
  except Exception as e:
789
- logger.error(f"Failed to requeue message during shutdown: {e}")
1112
+ logger.error("Failed to requeue message during shutdown: %s", e)
1113
+ return
1114
+
1115
+ # Check connection health before processing
1116
+ if not self.consumer.connection_healthy:
1117
+ logger.warning(
1118
+ "Connection not healthy, requeuing scheduled message for %s",
1119
+ self.queue_name,
1120
+ )
1121
+ try:
1122
+
1123
+ await aio_pika_message.reject(requeue=True)
1124
+ return
1125
+ except Exception as e:
1126
+ logger.error(
1127
+ "Failed to requeue scheduled message due to connection issues: %s",
1128
+ e,
1129
+ )
790
1130
  return
791
1131
 
792
1132
  sig = inspect.signature(self.scheduled_action.callable)
@@ -797,7 +1137,8 @@ class ScheduledMessageHandlerCallback:
797
1137
  self.scheduled_action,
798
1138
  (ScheduleDispatchData(int(aio_pika_message.body.decode("utf-8"))),),
799
1139
  {},
800
- )
1140
+ ),
1141
+ name=f"ScheduledAction-{self.queue_name}-handle-message-{aio_pika_message.message_id}",
801
1142
  )
802
1143
 
803
1144
  elif len(sig.parameters) == 0:
@@ -806,7 +1147,8 @@ class ScheduledMessageHandlerCallback:
806
1147
  self.scheduled_action,
807
1148
  (),
808
1149
  {},
809
- )
1150
+ ),
1151
+ name=f"ScheduledAction-{self.queue_name}-handle-message-{aio_pika_message.message_id}",
810
1152
  )
811
1153
  else:
812
1154
  logger.warning(
@@ -823,7 +1165,7 @@ class ScheduledMessageHandlerCallback:
823
1165
  except Exception as e:
824
1166
 
825
1167
  logger.exception(
826
- f"Error processing scheduled action {self.queue_name}: {e}"
1168
+ "Error processing scheduled action %s: %s", self.queue_name, e
827
1169
  )
828
1170
 
829
1171
  async def run_with_context(
@@ -832,18 +1174,22 @@ class ScheduledMessageHandlerCallback:
832
1174
  args: tuple[Any, ...],
833
1175
  kwargs: dict[str, Any],
834
1176
  ) -> None:
835
- async with self.consumer.uow_context_provider(
836
- AppTransactionContext(
837
- controller_member_reflect=scheduled_action.controller_member,
838
- transaction_data=SchedulerTransactionData(
839
- scheduled_to=datetime.now(UTC),
840
- cron_expression=scheduled_action.spec.cron,
841
- triggered_at=datetime.now(UTC),
842
- ),
843
- )
844
- ):
845
1177
 
846
- await scheduled_action.callable(*args, **kwargs)
1178
+ with provide_shutdown_state(self.consumer.shutdown_state):
1179
+ async with self.consumer.uow_context_provider(
1180
+ AppTransactionContext(
1181
+ controller_member_reflect=scheduled_action.controller_member,
1182
+ transaction_data=SchedulerTransactionData(
1183
+ task_name=scheduled_action.spec.name
1184
+ or scheduled_action.callable.__qualname__,
1185
+ scheduled_to=datetime.now(UTC),
1186
+ cron_expression=scheduled_action.spec.cron,
1187
+ triggered_at=datetime.now(UTC),
1188
+ ),
1189
+ )
1190
+ ):
1191
+
1192
+ await scheduled_action.callable(*args, **kwargs)
847
1193
 
848
1194
 
849
1195
  class MessageHandlerCallback:
@@ -865,36 +1211,60 @@ class MessageHandlerCallback:
865
1211
  self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
866
1212
  ) -> None:
867
1213
  if self.consumer.shutdown_event.is_set():
868
- logger.info(
869
- f"Shutdown in progress. Requeuing message for {self.queue_name}"
1214
+ logger.debug(
1215
+ "Shutdown in progress. Requeuing message for %s", self.queue_name
870
1216
  )
871
1217
  try:
872
1218
  # Use channel context for requeuing
873
- async with self.consumer.get_channel_ctx(self.queue_name):
874
- await aio_pika_message.reject(requeue=True)
1219
+
1220
+ await aio_pika_message.reject(requeue=True)
875
1221
  except RuntimeError:
876
1222
  logger.warning(
877
- f"Could not requeue message during shutdown - channel not available"
1223
+ "Could not requeue message during shutdown - channel not available"
878
1224
  )
879
1225
  except Exception as e:
880
- logger.error(f"Failed to requeue message during shutdown: {e}")
1226
+ logger.error("Failed to requeue message during shutdown: %s", e)
881
1227
  return
882
1228
 
1229
+ # Check if connection is healthy before processing
1230
+ if not self.consumer.connection_healthy:
1231
+ logger.warning(
1232
+ "Connection not healthy, requeuing message for %s", self.queue_name
1233
+ )
1234
+ try:
1235
+ if not self.consumer.connection_healthy:
1236
+ # Still not healthy, requeue the message
1237
+
1238
+ await aio_pika_message.reject(requeue=True)
1239
+ return
1240
+ except Exception as e:
1241
+ logger.error(
1242
+ "Failed to requeue message due to connection issues: %s", e
1243
+ )
1244
+ return
1245
+
883
1246
  async with self.consumer.lock:
884
- task = asyncio.create_task(self.handle_message(aio_pika_message))
1247
+ task = asyncio.create_task(
1248
+ self.handle_message(aio_pika_message),
1249
+ name=f"MessageHandler-{self.queue_name}-handle-message-{aio_pika_message.message_id}",
1250
+ )
885
1251
  self.consumer.tasks.add(task)
886
- task.add_done_callback(self.handle_message_consume_done)
887
1252
 
888
- def handle_message_consume_done(self, task: asyncio.Task[Any]) -> None:
889
- self.consumer.tasks.discard(task)
890
- if task.cancelled():
891
- logger.warning(f"Task for queue {self.queue_name} was cancelled")
892
- return
1253
+ def handle_message_consume_done(task: asyncio.Task[Any]) -> None:
1254
+ self.consumer.tasks.discard(task)
1255
+ if task.cancelled():
1256
+ logger.warning("Task for queue %s was cancelled", self.queue_name)
1257
+ return
893
1258
 
894
- if (error := task.exception()) is not None:
895
- logger.exception(
896
- f"Error processing message for queue {self.queue_name}", exc_info=error
897
- )
1259
+ if (error := task.exception()) is not None:
1260
+ logger.exception(
1261
+ "Error processing message id %s for queue %s",
1262
+ aio_pika_message.message_id,
1263
+ self.queue_name,
1264
+ exc_info=error,
1265
+ )
1266
+
1267
+ task.add_done_callback(handle_message_consume_done)
898
1268
 
899
1269
  async def __call__(
900
1270
  self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
@@ -904,6 +1274,8 @@ class MessageHandlerCallback:
904
1274
  async def handle_reject_message(
905
1275
  self,
906
1276
  aio_pika_message: aio_pika.abc.AbstractIncomingMessage,
1277
+ *,
1278
+ requeue_timeout: float = 0,
907
1279
  requeue: bool = False,
908
1280
  retry_count: int = 0,
909
1281
  exception: Optional[BaseException] = None,
@@ -919,33 +1291,35 @@ class MessageHandlerCallback:
919
1291
  """
920
1292
  message_id = aio_pika_message.message_id or str(uuid.uuid4())
921
1293
 
922
- # If auto_ack is enabled, we cannot retry the message through RabbitMQ reject mechanism
923
- if self.message_handler.spec.auto_ack:
924
- if requeue:
925
- logger.warning(
926
- f"Message {message_id} ({self.queue_name}) cannot be requeued because auto_ack is enabled"
927
- )
928
- return
929
-
930
1294
  try:
931
1295
  # Check if we should retry with backoff
932
1296
  if (
933
1297
  not requeue
934
- and self.message_handler.spec.requeue_on_exception
1298
+ and self.message_handler.spec.nack_on_exception
935
1299
  and exception is not None
936
1300
  ):
937
1301
  # Get retry config from consumer
938
- retry_config = self.consumer.config.consumer_retry_config
1302
+ retry_config = (
1303
+ self.message_handler.spec.retry_config
1304
+ or self.consumer.config.consumer_retry_policy
1305
+ )
939
1306
 
940
1307
  # Check if we reached max retries
941
1308
  if retry_count >= retry_config.max_retries:
942
1309
  logger.warning(
943
- f"Message {message_id} ({self.queue_name}) failed after {retry_count} retries, "
944
- f"dead-lettering: {str(exception)}"
1310
+ "Message %s (%s) failed after %s retries, dead-lettering: %s",
1311
+ message_id,
1312
+ self.queue_name,
1313
+ retry_count,
1314
+ str(exception),
945
1315
  )
946
1316
  # Dead-letter the message after max retries
947
- async with self.consumer.get_channel_ctx(self.queue_name):
1317
+ try:
948
1318
  await aio_pika_message.reject(requeue=False)
1319
+ except Exception as e:
1320
+ logger.error(
1321
+ "Failed to dead-letter message %s: %s", message_id, e
1322
+ )
949
1323
  return
950
1324
 
951
1325
  # Calculate delay for this retry attempt
@@ -961,9 +1335,14 @@ class MessageHandlerCallback:
961
1335
 
962
1336
  delay = min(delay, retry_config.max_delay)
963
1337
 
964
- logger.info(
965
- f"Message {message_id} ({self.queue_name}) failed with {str(exception)}, "
966
- f"retry {retry_count+1}/{retry_config.max_retries} scheduled in {delay:.2f}s"
1338
+ logger.warning(
1339
+ "Message %s (%s) failed with %s, retry %s/%s scheduled in %.2fs",
1340
+ message_id,
1341
+ self.queue_name,
1342
+ str(exception),
1343
+ retry_count + 1,
1344
+ retry_config.max_retries,
1345
+ delay,
967
1346
  )
968
1347
 
969
1348
  # Store retry state for this message
@@ -974,36 +1353,50 @@ class MessageHandlerCallback:
974
1353
  }
975
1354
 
976
1355
  # Schedule retry after delay
977
- asyncio.create_task(
1356
+ task = asyncio.create_task(
978
1357
  self._delayed_retry(
979
1358
  aio_pika_message, delay, retry_count + 1, exception
980
- )
1359
+ ),
1360
+ name=f"MessageHandler-{self.queue_name}-delayed-retry-{message_id}",
981
1361
  )
1362
+ self.consumer.tasks.add(task)
982
1363
 
983
1364
  # Acknowledge the current message since we'll handle retry ourselves
984
- async with self.consumer.get_channel_ctx(self.queue_name):
1365
+ try:
985
1366
  await aio_pika_message.ack()
1367
+ except Exception as e:
1368
+ logger.error(
1369
+ "Failed to acknowledge message %s for retry: %s", message_id, e
1370
+ )
986
1371
  return
987
1372
 
988
1373
  # Standard reject without retry or with immediate requeue
989
- async with self.consumer.get_channel_ctx(self.queue_name):
1374
+ try:
1375
+ await self._wait_delay_or_shutdown(
1376
+ requeue_timeout
1377
+ ) # Optional delay before requeueing
990
1378
  await aio_pika_message.reject(requeue=requeue)
991
1379
  if requeue:
992
- logger.info(
993
- f"Message {message_id} ({self.queue_name}) requeued for immediate retry"
1380
+ logger.warning(
1381
+ "Message %s (%s) requeued for immediate retry",
1382
+ message_id,
1383
+ self.queue_name,
994
1384
  )
995
1385
  else:
996
- logger.info(
997
- f"Message {message_id} ({self.queue_name}) rejected without requeue"
1386
+ logger.warning(
1387
+ "Message %s (%s) rejected without requeue",
1388
+ message_id,
1389
+ self.queue_name,
998
1390
  )
1391
+ except Exception as e:
1392
+ logger.error("Failed to reject message %s: %s", message_id, e)
999
1393
 
1000
- except RuntimeError as e:
1001
- logger.error(
1002
- f"Error rejecting message {message_id} ({self.queue_name}): {e}"
1003
- )
1004
1394
  except Exception as e:
1005
1395
  logger.exception(
1006
- f"Unexpected error rejecting message {message_id} ({self.queue_name}): {e}"
1396
+ "Unexpected error in handle_reject_message for %s (%s): %s",
1397
+ message_id,
1398
+ self.queue_name,
1399
+ e,
1007
1400
  )
1008
1401
 
1009
1402
  async def _delayed_retry(
@@ -1018,7 +1411,7 @@ class MessageHandlerCallback:
1018
1411
 
1019
1412
  Args:
1020
1413
  aio_pika_message: The original message
1021
- delay: Delay in seconds before retry
1414
+ delay: Delay in seconds before retrying
1022
1415
  retry_count: The current retry count (after increment)
1023
1416
  exception: The exception that caused the failure
1024
1417
  """
@@ -1026,7 +1419,7 @@ class MessageHandlerCallback:
1026
1419
 
1027
1420
  try:
1028
1421
  # Wait for the backoff delay
1029
- await asyncio.sleep(delay)
1422
+ await self._wait_delay_or_shutdown(delay)
1030
1423
 
1031
1424
  # Get message body and properties for republishing
1032
1425
  message_body = aio_pika_message.body
@@ -1043,32 +1436,64 @@ class MessageHandlerCallback:
1043
1436
  if message_id in self.retry_state:
1044
1437
  del self.retry_state[message_id]
1045
1438
 
1046
- # Republish the message to the same queue
1047
- async with self.consumer.get_channel_ctx(self.queue_name) as channel:
1048
- exchange = await RabbitmqUtils.get_main_exchange(
1049
- channel=channel,
1050
- exchange_name=self.consumer.config.exchange,
1051
- )
1439
+ # Republish the message to the same queue with retry logic
1440
+ max_attempts = 3
1441
+ for attempt in range(max_attempts):
1442
+ try:
1443
+ async with self.consumer.get_channel_ctx(
1444
+ self.queue_name
1445
+ ) as channel:
1446
+ exchange = await RabbitmqUtils.get_main_exchange(
1447
+ channel=channel,
1448
+ exchange_name=self.consumer.config.exchange,
1449
+ )
1052
1450
 
1053
- await exchange.publish(
1054
- aio_pika.Message(
1055
- body=message_body,
1056
- headers=headers,
1057
- message_id=message_id,
1058
- content_type=aio_pika_message.content_type,
1059
- content_encoding=aio_pika_message.content_encoding,
1060
- delivery_mode=aio_pika_message.delivery_mode,
1061
- ),
1062
- routing_key=self.routing_key,
1063
- )
1451
+ await exchange.publish(
1452
+ aio_pika.Message(
1453
+ body=message_body,
1454
+ headers=headers,
1455
+ message_id=message_id,
1456
+ content_type=aio_pika_message.content_type,
1457
+ content_encoding=aio_pika_message.content_encoding,
1458
+ delivery_mode=aio_pika_message.delivery_mode,
1459
+ ),
1460
+ routing_key=self.routing_key,
1461
+ )
1064
1462
 
1065
- logger.info(
1066
- f"Message {message_id} ({self.queue_name}) republished for retry {retry_count}"
1067
- )
1463
+ logger.warning(
1464
+ "Message %s (%s) republished for retry %s",
1465
+ message_id,
1466
+ self.queue_name,
1467
+ retry_count,
1468
+ )
1469
+ return
1470
+
1471
+ except Exception as e:
1472
+ if attempt < max_attempts - 1:
1473
+ logger.warning(
1474
+ "Failed to republish message %s (attempt %s): %s",
1475
+ message_id,
1476
+ attempt + 1,
1477
+ e,
1478
+ )
1479
+ await self._wait_delay_or_shutdown(
1480
+ (1.0 * (attempt + 1))
1481
+ ) # Exponential backoff
1482
+ else:
1483
+ logger.error(
1484
+ "Failed to republish message %s after %s attempts: %s",
1485
+ message_id,
1486
+ max_attempts,
1487
+ e,
1488
+ )
1489
+ raise
1068
1490
 
1069
1491
  except Exception as e:
1070
1492
  logger.exception(
1071
- f"Failed to execute delayed retry for message {message_id} ({self.queue_name}): {e}"
1493
+ "Failed to execute delayed retry for message %s (%s): %s",
1494
+ message_id,
1495
+ self.queue_name,
1496
+ e,
1072
1497
  )
1073
1498
  # If we fail to republish, try to dead-letter the original message
1074
1499
  try:
@@ -1077,6 +1502,9 @@ class MessageHandlerCallback:
1077
1502
  except Exception:
1078
1503
  pass
1079
1504
 
1505
+ async def _wait_delay_or_shutdown(self, delay: float) -> None:
1506
+ await self.consumer._wait_delay_or_shutdown(delay)
1507
+
1080
1508
  async def handle_message(
1081
1509
  self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
1082
1510
  ) -> None:
@@ -1091,124 +1519,224 @@ class MessageHandlerCallback:
1091
1519
  handler_data = self.message_handler
1092
1520
 
1093
1521
  handler = handler_data.instance_callable
1522
+ handler_method = handler_data.controller_member.member_function
1094
1523
 
1095
- sig = inspect.signature(handler)
1524
+ # sig = inspect.signature(handler)
1096
1525
 
1097
- if len(sig.parameters) != 1:
1098
- logger.warning(
1099
- "Handler for topic '%s' must have exactly one parameter which is MessageOf[T extends Message]"
1100
- % routing_key
1101
- )
1102
- return
1526
+ # if len(sig.parameters) != 1:
1527
+ # logger.warning(
1528
+ # "Handler for topic '%s' must have exactly one parameter which is MessageOf[T extends Message]"
1529
+ # % routing_key
1530
+ # )
1531
+ # return
1103
1532
 
1104
- parameter = list(sig.parameters.values())[0]
1533
+ # parameter = list(sig.parameters.values())[0]
1105
1534
 
1106
- param_origin = get_origin(parameter.annotation)
1535
+ # param_origin = get_origin(parameter.annotation)
1107
1536
 
1108
- if param_origin is not MessageOf:
1109
- logger.warning(
1110
- "Handler for topic '%s' must have exactly one parameter of type Message"
1111
- % routing_key
1112
- )
1113
- return
1537
+ # if param_origin is not MessageOf:
1538
+ # logger.warning(
1539
+ # "Handler for topic '%s' must have exactly one parameter of type Message"
1540
+ # % routing_key
1541
+ # )
1542
+ # return
1114
1543
 
1115
- if len(parameter.annotation.__args__) != 1:
1116
- logger.warning(
1117
- "Handler for topic '%s' must have exactly one parameter of type Message"
1118
- % routing_key
1119
- )
1120
- return
1544
+ # if len(parameter.annotation.__args__) != 1:
1545
+ # logger.warning(
1546
+ # "Handler for topic '%s' must have exactly one parameter of type Message"
1547
+ # % routing_key
1548
+ # )
1549
+ # return
1121
1550
 
1122
- message_type = parameter.annotation.__args__[0]
1551
+ # message_type = parameter.annotation.__args__[0]
1123
1552
 
1124
- if not issubclass(message_type, BaseModel):
1125
- logger.warning(
1126
- "Handler for topic '%s' must have exactly one parameter of type MessageOf[BaseModel]"
1127
- % routing_key
1128
- )
1129
- return
1553
+ # if not issubclass(message_type, BaseModel):
1554
+ # logger.warning(
1555
+ # "Handler for topic '%s' must have exactly one parameter of type MessageOf[BaseModel]"
1556
+ # % routing_key
1557
+ # )
1558
+ # return
1130
1559
 
1131
- builded_message = AioPikaMessage(aio_pika_message, message_type)
1560
+ mode, message_type = MessageHandler.validate_decorated_fn(handler_method)
1132
1561
 
1133
- incoming_message_spec = MessageHandler.get_message_incoming(handler)
1134
- assert incoming_message_spec is not None
1562
+ built_message = AioPikaMessage(aio_pika_message, message_type)
1135
1563
 
1136
- async with self.consumer.uow_context_provider(
1137
- AppTransactionContext(
1138
- controller_member_reflect=handler_data.controller_member,
1139
- transaction_data=MessageBusTransactionData(
1140
- message=builded_message,
1141
- topic=routing_key,
1142
- ),
1143
- )
1564
+ incoming_message_spec = MessageHandler.get_last(handler)
1565
+ assert incoming_message_spec is not None, "Incoming message spec must be set"
1566
+ # Extract retry count from headers if available
1567
+ headers = aio_pika_message.headers or {}
1568
+ retry_count = int(str(headers.get("x-retry-count", 0)))
1569
+
1570
+ with provide_implicit_headers(aio_pika_message.headers), provide_shutdown_state(
1571
+ self.consumer.shutdown_state
1144
1572
  ):
1145
- ctx: AsyncContextManager[Any]
1146
- if incoming_message_spec.timeout is not None:
1147
- ctx = asyncio.timeout(incoming_message_spec.timeout)
1148
- else:
1149
- ctx = none_context()
1150
- async with ctx:
1151
- try:
1152
- with provide_bus_message_controller(
1153
- AioPikaMessageBusController(aio_pika_message)
1154
- ):
1155
- await handler(builded_message)
1156
- if not incoming_message_spec.auto_ack:
1157
- with suppress(aio_pika.MessageProcessError):
1158
- # Use channel context for acknowledgement
1159
- async with self.consumer.get_channel_ctx(self.queue_name):
1160
- await aio_pika_message.ack()
1161
- except BaseException as base_exc:
1162
- # Get message id for logging
1163
- message_id = aio_pika_message.message_id or str(uuid.uuid4())
1164
-
1165
- # Extract retry count from headers if available
1166
- headers = aio_pika_message.headers or {}
1167
- retry_count = int(str(headers.get("x-retry-count", 0)))
1168
-
1169
- # Process exception handler if configured
1170
- if incoming_message_spec.exception_handler is not None:
1171
- try:
1172
- incoming_message_spec.exception_handler(base_exc)
1173
- except Exception as nested_exc:
1174
- logger.exception(
1175
- f"Error processing exception handler for message {message_id}: {base_exc} | {nested_exc}"
1573
+ async with self.consumer.uow_context_provider(
1574
+ AppTransactionContext(
1575
+ controller_member_reflect=handler_data.controller_member,
1576
+ transaction_data=MessageBusTransactionData(
1577
+ message_id=aio_pika_message.message_id,
1578
+ processing_attempt=retry_count + 1,
1579
+ message_type=message_type,
1580
+ message=built_message,
1581
+ topic=routing_key,
1582
+ ),
1583
+ )
1584
+ ):
1585
+ maybe_timeout_ctx: AsyncContextManager[Any]
1586
+ if incoming_message_spec.timeout is not None:
1587
+ maybe_timeout_ctx = asyncio.timeout(incoming_message_spec.timeout)
1588
+ else:
1589
+ maybe_timeout_ctx = none_context()
1590
+
1591
+ start_time = time.perf_counter()
1592
+ async with maybe_timeout_ctx:
1593
+ try:
1594
+ with provide_bus_message_controller(
1595
+ AioPikaMessageBusController(aio_pika_message)
1596
+ ):
1597
+ try:
1598
+ if mode == "WRAPPED":
1599
+ future = handler(built_message)
1600
+ else:
1601
+ try:
1602
+
1603
+ payload = built_message.payload()
1604
+ except ValidationError as exc:
1605
+ logger.exception(
1606
+ "Validation error parsing message %s on topic %s",
1607
+ aio_pika_message.message_id or "unknown",
1608
+ routing_key,
1609
+ )
1610
+ aio_pika_message.headers["x-last-error"] = (
1611
+ "Validation error parsing message payload"
1612
+ )
1613
+ await aio_pika_message.reject(requeue=False)
1614
+ record_exception(
1615
+ exc,
1616
+ )
1617
+ set_span_status("ERROR")
1618
+ return
1619
+ future = handler(payload)
1620
+
1621
+ await future
1622
+
1623
+ with suppress(aio_pika.MessageProcessError):
1624
+ # Use channel context for acknowledgement with retry
1625
+ try:
1626
+ await aio_pika_message.ack()
1627
+ set_span_status("OK")
1628
+ except Exception as ack_error:
1629
+ logger.warning(
1630
+ "Failed to acknowledge message %s: %s",
1631
+ aio_pika_message.message_id or "unknown",
1632
+ ack_error,
1633
+ )
1634
+ successfully = True
1635
+ except Exception as base_exc:
1636
+ set_span_status("ERROR")
1637
+ record_exception(
1638
+ base_exc,
1639
+ {
1640
+ "message_id": aio_pika_message.message_id
1641
+ or "unknown",
1642
+ "routing_key": routing_key,
1643
+ },
1644
+ )
1645
+ successfully = False
1646
+ # Get message id for logging
1647
+ message_id = aio_pika_message.message_id or "unknown"
1648
+
1649
+ # Process exception handler if configured
1650
+ if incoming_message_spec.exception_handler is not None:
1651
+ try:
1652
+ incoming_message_spec.exception_handler(
1653
+ base_exc
1654
+ )
1655
+ except Exception as nested_exc:
1656
+ logger.exception(
1657
+ "Error processing exception handler for message %s: %s | %s",
1658
+ message_id,
1659
+ base_exc,
1660
+ nested_exc,
1661
+ )
1662
+ else:
1663
+ logger.exception(
1664
+ "Error processing message %s on topic %s: %s",
1665
+ message_id,
1666
+ routing_key,
1667
+ str(base_exc),
1668
+ )
1669
+
1670
+ # Handle rejection with retry logic
1671
+ if incoming_message_spec.nack_on_exception:
1672
+ await self.handle_reject_message(
1673
+ aio_pika_message,
1674
+ requeue_timeout=incoming_message_spec.nack_delay_on_exception,
1675
+ requeue=False, # Don't requeue directly, use our backoff mechanism
1676
+ retry_count=retry_count,
1677
+ exception=base_exc,
1678
+ )
1679
+ else:
1680
+ # Message shouldn't be retried, reject it
1681
+ await self.handle_reject_message(
1682
+ aio_pika_message,
1683
+ requeue=False,
1684
+ requeue_timeout=incoming_message_spec.nack_delay_on_exception,
1685
+ exception=base_exc,
1686
+ )
1687
+
1688
+ elapsed_time = time.perf_counter() - start_time
1689
+ # Message processed successfully, log and clean up any retry state
1690
+ message_id = aio_pika_message.message_id or str(
1691
+ uuid.uuid4()
1692
+ )
1693
+ if message_id in self.retry_state:
1694
+ del self.retry_state[message_id]
1695
+
1696
+ # Log success with retry information if applicable
1697
+ headers = aio_pika_message.headers or {}
1698
+ traceparent = headers.get("traceparent")
1699
+ trace_info = (
1700
+ f" [traceparent={str(traceparent)}]"
1701
+ if traceparent
1702
+ else ""
1176
1703
  )
1177
- else:
1178
- logger.exception(
1179
- f"Error processing message {message_id} on topic {routing_key}: {str(base_exc)}"
1180
- )
1181
1704
 
1182
- # Handle rejection with retry logic
1183
- if incoming_message_spec.requeue_on_exception:
1184
- # Use our retry with backoff mechanism
1185
- await self.handle_reject_message(
1186
- aio_pika_message,
1187
- requeue=False, # Don't requeue directly, use our backoff mechanism
1188
- retry_count=retry_count,
1189
- exception=base_exc,
1190
- )
1191
- else:
1192
- # Message shouldn't be retried, reject it
1193
- await self.handle_reject_message(
1194
- aio_pika_message, requeue=False, exception=base_exc
1195
- )
1196
- else:
1197
- # Message processed successfully, log and clean up any retry state
1198
- message_id = aio_pika_message.message_id or str(uuid.uuid4())
1199
- if message_id in self.retry_state:
1200
- del self.retry_state[message_id]
1201
-
1202
- # Log success with retry information if applicable
1203
- headers = aio_pika_message.headers or {}
1204
- if "x-retry-count" in headers:
1205
- retry_count = int(str(headers.get("x-retry-count", 0)))
1206
- logger.info(
1207
- f"Message {message_id}#{self.queue_name} processed successfully after {retry_count} retries"
1208
- )
1209
- else:
1210
- logger.info(
1211
- f"Message {message_id}#{self.queue_name} processed successfully"
1705
+ if "x-retry-count" in headers:
1706
+ retry_count = int(str(headers.get("x-retry-count", 0)))
1707
+ logger.debug(
1708
+ "Message %s#%s processed "
1709
+ + (
1710
+ "successfully"
1711
+ if successfully
1712
+ else "with errors"
1713
+ )
1714
+ + " after %s retries in %.4fs%s",
1715
+ message_id,
1716
+ self.queue_name,
1717
+ retry_count,
1718
+ elapsed_time,
1719
+ trace_info,
1720
+ )
1721
+ else:
1722
+ logger.debug(
1723
+ "Message %s#%s processed "
1724
+ + (
1725
+ "successfully"
1726
+ if successfully
1727
+ else "with errors"
1728
+ )
1729
+ + " in %.4fs%s",
1730
+ message_id,
1731
+ self.queue_name,
1732
+ elapsed_time,
1733
+ trace_info,
1734
+ )
1735
+ ...
1736
+
1737
+ except Exception as base_exc:
1738
+ logger.critical(
1739
+ f"Critical error processing message {aio_pika_message.message_id} when providing bus message controller: {base_exc}"
1212
1740
  )
1213
1741
 
1214
1742
 
@@ -1248,68 +1776,73 @@ class MessageBusWorker:
1248
1776
  async def start_async(self) -> None:
1249
1777
  all_message_handlers_set: MESSAGE_HANDLER_DATA_SET = set()
1250
1778
  all_scheduled_actions_set: SCHEDULED_ACTION_DATA_SET = set()
1251
- async with self.lifecycle():
1252
- for instance_class in self.app.controllers:
1253
- controller = MessageBusController.get_messagebus(instance_class)
1254
-
1255
- if controller is None:
1256
- continue
1257
-
1258
- instance: Any = self.container.get_by_type(instance_class)
1259
-
1260
- factory = controller.get_messagebus_factory()
1261
- handlers, schedulers = factory(instance)
1262
-
1263
- message_handler_data_map: dict[str, MessageHandlerData] = {}
1264
- all_scheduled_actions_set.update(schedulers)
1265
- for handler_data in handlers:
1266
- message_type = handler_data.spec.message_type
1267
- topic = message_type.MESSAGE_TOPIC
1268
-
1269
- # Filter handlers by name if specified
1270
- if (
1271
- self.handler_names is not None
1272
- and handler_data.spec.name is not None
1273
- ):
1274
- if handler_data.spec.name not in self.handler_names:
1275
- continue
1276
- elif (
1277
- self.handler_names is not None
1278
- and handler_data.spec.name is None
1279
- ):
1280
- # Skip handlers without names when filtering is requested
1281
- continue
1779
+ with providing_app_type("worker"):
1780
+ async with self.lifecycle():
1781
+ for instance_class in self.app.controllers:
1782
+ controller = MessageBusController.get_last(instance_class)
1282
1783
 
1283
- if (
1284
- topic in message_handler_data_map
1285
- and message_type.MESSAGE_TYPE == "task"
1286
- ):
1287
- logger.warning(
1288
- "Task handler for topic '%s' already registered. Skipping"
1289
- % topic
1290
- )
1784
+ if controller is None:
1291
1785
  continue
1292
- message_handler_data_map[topic] = handler_data
1293
- all_message_handlers_set.add(handler_data)
1294
1786
 
1295
- broker_backend = get_message_broker_backend_from_url(url=self.backend_url)
1787
+ instance: Any = self.container.get_by_type(instance_class)
1788
+
1789
+ factory = controller.get_messagebus_factory()
1790
+ handlers, schedulers = factory(instance)
1791
+
1792
+ message_handler_data_map: dict[str, MessageHandlerData] = {}
1793
+ all_scheduled_actions_set.update(schedulers)
1794
+ for handler_data in handlers:
1795
+ message_type = handler_data.spec.message_type
1796
+ topic = message_type.MESSAGE_TOPIC
1797
+
1798
+ # Filter handlers by name if specified
1799
+ if (
1800
+ self.handler_names is not None
1801
+ and handler_data.spec.name is not None
1802
+ ):
1803
+ if handler_data.spec.name not in self.handler_names:
1804
+ continue
1805
+ elif (
1806
+ self.handler_names is not None
1807
+ and handler_data.spec.name is None
1808
+ ):
1809
+ # Skip handlers without names when filtering is requested
1810
+ continue
1811
+
1812
+ if (
1813
+ topic in message_handler_data_map
1814
+ and message_type.MESSAGE_TYPE == "task"
1815
+ ):
1816
+ logger.warning(
1817
+ "Task handler for topic '%s' already registered. Skipping"
1818
+ % topic
1819
+ )
1820
+ continue
1821
+ message_handler_data_map[topic] = handler_data
1822
+ all_message_handlers_set.add(handler_data)
1296
1823
 
1297
- consumer = self._consumer = create_message_bus(
1298
- broker_url=self.broker_url,
1299
- broker_backend=broker_backend,
1300
- scheduled_actions=all_scheduled_actions_set,
1301
- message_handler_set=all_message_handlers_set,
1302
- uow_context_provider=self.uow_context_provider,
1303
- )
1824
+ broker_backend = get_message_broker_backend_from_url(
1825
+ url=self.backend_url
1826
+ )
1304
1827
 
1305
- await consumer.consume()
1828
+ consumer = self._consumer = create_message_bus(
1829
+ broker_url=self.broker_url,
1830
+ broker_backend=broker_backend,
1831
+ scheduled_actions=all_scheduled_actions_set,
1832
+ message_handler_set=all_message_handlers_set,
1833
+ uow_context_provider=self.uow_context_provider,
1834
+ )
1835
+
1836
+ await consumer.consume()
1306
1837
 
1307
1838
  def start_sync(self) -> None:
1308
1839
 
1309
1840
  def on_shutdown(loop: asyncio.AbstractEventLoop) -> None:
1310
- logger.info("Shutting down - signal received")
1841
+ logger.warning("Shutting down - signal received")
1311
1842
  # Schedule the shutdown to run in the event loop
1312
- asyncio.create_task(self._graceful_shutdown())
1843
+ asyncio.create_task(
1844
+ self._graceful_shutdown(), name="Worker-Graceful-Shutdown"
1845
+ )
1313
1846
  # wait until the shutdown is complete
1314
1847
 
1315
1848
  with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner:
@@ -1317,15 +1850,22 @@ class MessageBusWorker:
1317
1850
  loop.add_signal_handler(signal.SIGINT, on_shutdown, loop)
1318
1851
  # Add graceful shutdown handler for SIGTERM as well
1319
1852
  loop.add_signal_handler(signal.SIGTERM, on_shutdown, loop)
1320
- runner.run(self.start_async())
1853
+ try:
1854
+ runner.run(self.start_async())
1855
+ except Exception as e:
1856
+ logger.critical("Worker failed to start due to connection error: %s", e)
1857
+ # Exit with error code 1 to indicate startup failure
1858
+ import sys
1859
+
1860
+ sys.exit(1)
1321
1861
 
1322
1862
  async def _graceful_shutdown(self) -> None:
1323
1863
  """Handles graceful shutdown process"""
1324
- logger.info("Initiating graceful shutdown sequence")
1864
+ logger.warning("Initiating graceful shutdown sequence")
1325
1865
  # Use the comprehensive close method that handles shutdown, task waiting and connection cleanup
1326
1866
 
1327
1867
  self.consumer.shutdown()
1328
- logger.info("Graceful shutdown completed")
1868
+ logger.warning("Graceful shutdown completed")
1329
1869
 
1330
1870
 
1331
1871
  class AioPikaMessageBusController(BusMessageController):
@@ -1375,13 +1915,15 @@ class AioPikaMessageBusController(BusMessageController):
1375
1915
  async def reject(self) -> None:
1376
1916
  await self.aio_pika_message.reject()
1377
1917
 
1378
- async def retry(self) -> None:
1918
+ async def retry(self, delay: float = 5) -> None:
1379
1919
  """
1380
1920
  Retry the message immediately by rejecting with requeue flag.
1381
1921
  This doesn't use the exponential backoff mechanism.
1382
1922
  """
1383
1923
  callback = self._get_callback()
1384
- await callback.handle_reject_message(self.aio_pika_message, requeue=True)
1924
+ await callback.handle_reject_message(
1925
+ self.aio_pika_message, requeue=True, requeue_timeout=delay
1926
+ )
1385
1927
 
1386
1928
  async def retry_later(self, delay: int) -> None:
1387
1929
  """
@@ -1404,13 +1946,14 @@ class AioPikaMessageBusController(BusMessageController):
1404
1946
  float(delay),
1405
1947
  retry_count + 1,
1406
1948
  None, # No specific exception
1407
- )
1949
+ ),
1950
+ name=f"MessageHandler-{callback.queue_name}-delayed-retry-{self.aio_pika_message.message_id or 'unknown'}-{int(time.time())}",
1408
1951
  )
1409
1952
 
1410
1953
  # Acknowledge the current message since we'll republish
1411
1954
  await self.aio_pika_message.ack()
1412
1955
 
1413
1956
  except Exception as e:
1414
- logger.exception(f"Failed to schedule retry_later: {e}")
1957
+ logger.exception("Failed to schedule retry_later: %s", e)
1415
1958
  # Fall back to immediate retry
1416
1959
  await self.aio_pika_message.reject(requeue=True)