PyPI - jararaca - Versions diffs - 0.3.11a15__py3-none-any.whl → 0.3.12__py3-none-any.whl - Mend

jararaca 0.3.11a15py3-none-any.whl → 0.3.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of jararaca might be problematic. Click here for more details.

Files changed (27) hide show

README.md +120 -0
jararaca/__init__.py +106 -8
jararaca/cli.py +216 -31
jararaca/messagebus/worker.py +1386 -137
jararaca/microservice.py +42 -0
jararaca/persistence/interceptors/aiosqa_interceptor.py +82 -73
jararaca/persistence/interceptors/constants.py +1 -0
jararaca/persistence/interceptors/decorators.py +45 -0
jararaca/presentation/server.py +57 -11
jararaca/presentation/websocket/redis.py +113 -7
jararaca/reflect/metadata.py +1 -1
jararaca/rpc/http/__init__.py +97 -0
jararaca/rpc/http/backends/__init__.py +10 -0
jararaca/rpc/http/backends/httpx.py +39 -9
jararaca/rpc/http/decorators.py +302 -6
jararaca/scheduler/beat_worker.py +550 -91
jararaca/tools/typescript/__init__.py +0 -0
jararaca/tools/typescript/decorators.py +95 -0
jararaca/tools/typescript/interface_parser.py +699 -156
jararaca/utils/retry.py +141 -0
jararaca-0.3.12.dist-info/LICENSE +674 -0
{jararaca-0.3.11a15.dist-info → jararaca-0.3.12.dist-info}/METADATA +4 -3
{jararaca-0.3.11a15.dist-info → jararaca-0.3.12.dist-info}/RECORD +27 -19
{jararaca-0.3.11a15.dist-info → jararaca-0.3.12.dist-info}/WHEEL +1 -1
pyproject.toml +86 -0
/jararaca-0.3.11a15.dist-info/LICENSE → /LICENSE +0 -0
{jararaca-0.3.11a15.dist-info → jararaca-0.3.12.dist-info}/entry_points.txt +0 -0

jararaca/messagebus/worker.py CHANGED Viewed

@@ -1,18 +1,36 @@
 import asyncio
 import inspect
 import logging
+import random
 import signal
+import time
+import uuid
 from abc import ABC
 from contextlib import asynccontextmanager, suppress
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import UTC, datetime
-from typing import Any, AsyncContextManager, AsyncGenerator, Type, get_origin
+from typing import (
+    Any,
+    AsyncContextManager,
+    AsyncGenerator,
+    Awaitable,
+    Optional,
+    Type,
+    get_origin,
+)
 from urllib.parse import parse_qs, urlparse
 import aio_pika
 import aio_pika.abc
 import uvloop
-from aio_pika.exceptions import AMQPError, ChannelClosed, ChannelNotFoundEntity
+from aio_pika.exceptions import (
+    AMQPChannelError,
+    AMQPConnectionError,
+    AMQPError,
+    ChannelClosed,
+    ChannelNotFoundEntity,
+    ConnectionClosed,
+)
 from pydantic import BaseModel
 from jararaca.broker_backend import MessageBrokerBackend
@@ -38,9 +56,12 @@ from jararaca.microservice import (
     MessageBusTransactionData,
     Microservice,
     SchedulerTransactionData,
+    ShutdownState,
+    provide_shutdown_state,
 )
 from jararaca.scheduler.decorators import ScheduledActionData
 from jararaca.utils.rabbitmq_utils import RabbitmqUtils
+from jararaca.utils.retry import RetryConfig, retry_with_backoff
 logger = logging.getLogger(__name__)
@@ -50,6 +71,34 @@ class AioPikaWorkerConfig:
     url: str
     exchange: str
     prefetch_count: int
+    connection_retry_config: RetryConfig = field(
+        default_factory=lambda: RetryConfig(
+            max_retries=15,
+            initial_delay=1.0,
+            max_delay=60.0,
+            backoff_factor=2.0,
+        )
+    )
+    consumer_retry_config: RetryConfig = field(
+        default_factory=lambda: RetryConfig(
+            max_retries=15,
+            initial_delay=0.5,
+            max_delay=40.0,
+            backoff_factor=2.0,
+        )
+    )
+    # Connection health monitoring settings
+    connection_heartbeat_interval: float = 30.0  # seconds
+    connection_health_check_interval: float = 10.0  # seconds
+    reconnection_backoff_config: RetryConfig = field(
+        default_factory=lambda: RetryConfig(
+            max_retries=-1,  # Infinite retries for reconnection
+            initial_delay=2.0,
+            max_delay=120.0,
+            backoff_factor=2.0,
+            jitter=True,
+        )
+    )
 class AioPikaMessage(MessageOf[Message]):
@@ -97,6 +146,20 @@ class MessageBusConsumer(ABC):
     def shutdown(self) -> None: ...
+    async def close(self) -> None:
+        """Close all resources related to the consumer"""
+class _WorkerShutdownState(ShutdownState):
+    def __init__(self, shutdown_event: asyncio.Event):
+        self.shutdown_event = shutdown_event
+    def request_shutdown(self) -> None:
+        self.shutdown_event.set()
+    def is_shutdown_requested(self) -> bool:
+        return self.shutdown_event.is_set()
 class AioPikaMicroserviceConsumer(MessageBusConsumer):
     def __init__(
@@ -115,107 +178,741 @@ class AioPikaMicroserviceConsumer(MessageBusConsumer):
         self.incoming_map: dict[str, MessageHandlerData] = {}
         self.uow_context_provider = uow_context_provider
         self.shutdown_event = asyncio.Event()
+        self.shutdown_state = _WorkerShutdownState(self.shutdown_event)
         self.lock = asyncio.Lock()
         self.tasks: set[asyncio.Task[Any]] = set()
+        self.connection: aio_pika.abc.AbstractConnection | None = None
+        self.channels: dict[str, aio_pika.abc.AbstractChannel] = {}
+        # Connection resilience attributes
+        self.connection_healthy = False
+        self.connection_lock = asyncio.Lock()
+        self.reconnection_event = asyncio.Event()
+        self.reconnection_in_progress = False
+        self.consumer_tags: dict[str, str] = {}  # Track consumer tags for cleanup
+        self.health_check_task: asyncio.Task[Any] | None = None
+        self.reconnection_task: asyncio.Task[Any] | None = None
+    async def _verify_infrastructure(self) -> bool:
+        """
+        Verify that the required RabbitMQ infrastructure (exchanges, queues) exists.
+        Returns True if all required infrastructure is in place.
+        """
+        try:
+            async with self.connect() as connection:
+                # Create a main channel just for checking infrastructure
+                async with connection.channel() as main_channel:
+                    # Get existing exchange and queues to verify infrastructure is in place
+                    await RabbitmqUtils.get_main_exchange(
+                        channel=main_channel,
+                        exchange_name=self.config.exchange,
+                    )
+                    await RabbitmqUtils.get_dl_exchange(channel=main_channel)
+                    await RabbitmqUtils.get_dl_queue(channel=main_channel)
+                    return True
+        except (ChannelNotFoundEntity, ChannelClosed, AMQPError) as e:
+            logger.critical(
+                f"Required exchange or queue infrastructure not found. "
+                f"Please use the declare command first to create the required infrastructure. Error: {e}"
+            )
+            return False
+    async def _setup_message_handler_consumer(
+        self, handler: MessageHandlerData
+    ) -> bool:
+        """
+        Set up a consumer for a message handler with retry mechanism.
+        Returns True if successful, False otherwise.
+        """
+        queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
+        routing_key = f"{handler.message_type.MESSAGE_TOPIC}.#"
+        async def setup_consumer() -> None:
+            # Wait for connection to be healthy if reconnection is in progress
+            if self.reconnection_in_progress:
+                await self.reconnection_event.wait()
+            # Create a channel using the context manager
+            async with self.create_channel(queue_name) as channel:
+                queue = await RabbitmqUtils.get_queue(
+                    channel=channel, queue_name=queue_name
+                )
-    async def consume(self) -> None:
+                # Configure consumer and get the consumer tag
+                consumer_tag = await queue.consume(
+                    callback=MessageHandlerCallback(
+                        consumer=self,
+                        queue_name=queue_name,
+                        routing_key=routing_key,
+                        message_handler=handler,
+                    ),
+                    no_ack=handler.spec.auto_ack,
+                )
-        connection = await aio_pika.connect(self.config.url)
+                # Store consumer tag for cleanup
+                self.consumer_tags[queue_name] = consumer_tag
-        channel = await connection.channel()
+                logger.info(
+                    f"Consuming message handler {queue_name} on dedicated channel"
+                )
-        await channel.set_qos(prefetch_count=self.config.prefetch_count)
+        try:
+            # Setup with retry
+            await retry_with_backoff(
+                setup_consumer,
+                retry_config=self.config.consumer_retry_config,
+                retry_exceptions=(
+                    ChannelNotFoundEntity,
+                    ChannelClosed,
+                    AMQPError,
+                    AMQPConnectionError,
+                    AMQPChannelError,
+                    ConnectionClosed,
+                ),
+            )
+            return True
+        except Exception as e:
+            logger.error(
+                f"Failed to setup consumer for queue '{queue_name}' after retries: {e}"
+            )
+            return False
+    async def _setup_scheduled_action_consumer(
+        self, scheduled_action: ScheduledActionData
+    ) -> bool:
+        """
+        Set up a consumer for a scheduled action with retry mechanism.
+        Returns True if successful, False otherwise.
+        """
+        queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
+        routing_key = queue_name
+        async def setup_consumer() -> None:
+            # Wait for connection to be healthy if reconnection is in progress
+            if self.reconnection_in_progress:
+                await self.reconnection_event.wait()
+            # Create a channel using the context manager
+            async with self.create_channel(queue_name) as channel:
+                queue = await RabbitmqUtils.get_queue(
+                    channel=channel, queue_name=queue_name
+                )
+                # Configure consumer and get the consumer tag
+                consumer_tag = await queue.consume(
+                    callback=ScheduledMessageHandlerCallback(
+                        consumer=self,
+                        queue_name=queue_name,
+                        routing_key=routing_key,
+                        scheduled_action=scheduled_action,
+                    ),
+                    no_ack=True,
+                )
+                # Store consumer tag for cleanup
+                self.consumer_tags[queue_name] = consumer_tag
+                logger.info(f"Consuming scheduler {queue_name} on dedicated channel")
-        # Get existing exchange and queues
         try:
-            exchange = await RabbitmqUtils.get_main_exchange(
-                channel=channel,
-                exchange_name=self.config.exchange,
+            # Setup with retry
+            await retry_with_backoff(
+                setup_consumer,
+                retry_config=self.config.consumer_retry_config,
+                retry_exceptions=(
+                    ChannelNotFoundEntity,
+                    ChannelClosed,
+                    AMQPError,
+                    AMQPConnectionError,
+                    AMQPChannelError,
+                    ConnectionClosed,
+                ),
             )
+            return True
+        except Exception as e:
+            logger.error(
+                f"Failed to setup consumer for scheduler queue '{queue_name}' after retries: {e}"
+            )
+            return False
-            dlx = await RabbitmqUtils.get_dl_exchange(channel=channel)
-            dlq = await RabbitmqUtils.get_dl_queue(channel=channel)
-        except (ChannelNotFoundEntity, ChannelClosed, AMQPError) as e:
-            logger.critical(
-                f"Required exchange or queue infrastructure not found and passive mode is enabled. "
-                f"Please use the declare command first to create the required infrastructure. Error: {e}"
+    async def consume(self) -> None:
+        """
+        Main consume method that sets up all message handlers and scheduled actions with retry mechanisms.
+        """
+        # Establish initial connection
+        async with self.connect() as connection:
+            self.connection_healthy = True
+            # Start connection health monitoring
+            self.health_check_task = asyncio.create_task(
+                self._monitor_connection_health()
             )
-            self.shutdown_event.set()
+            # Verify infrastructure with retry
+            infra_check_success = await retry_with_backoff(
+                self._verify_infrastructure,
+                retry_config=self.config.connection_retry_config,
+                retry_exceptions=(Exception,),
+            )
+            if not infra_check_success:
+                logger.critical(
+                    "Failed to verify RabbitMQ infrastructure. Shutting down."
+                )
+                self.shutdown_event.set()
+                return
+            async def wait_for(
+                type: str, name: str, coroutine: Awaitable[bool]
+            ) -> tuple[str, str, bool]:
+                return type, name, await coroutine
+            tasks: set[asyncio.Task[tuple[str, str, bool]]] = set()
+            # Setup message handlers
+            for handler in self.message_handler_set:
+                queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
+                self.incoming_map[queue_name] = handler
+                tasks.add(
+                    task := asyncio.create_task(
+                        wait_for(
+                            "message_handler",
+                            queue_name,
+                            self._setup_message_handler_consumer(handler),
+                        )
+                    )
+                )
+            # Setup scheduled actions
+            for scheduled_action in self.scheduled_actions:
+                queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
+                tasks.add(
+                    task := asyncio.create_task(
+                        wait_for(
+                            "scheduled_action",
+                            queue_name,
+                            self._setup_scheduled_action_consumer(scheduled_action),
+                        )
+                    )
+                )
+            async def handle_task_results() -> None:
+                for task in asyncio.as_completed(tasks):
+                    type, name, success = await task
+                    if success:
+                        logger.info(f"Successfully set up {type} consumer for {name}")
+                    else:
+                        logger.warning(
+                            f"Failed to set up {type} consumer for {name}, will not process messages from this queue"
+                        )
+            handle_task_results_task = asyncio.create_task(handle_task_results())
+            # Wait for shutdown signal
+            await self.shutdown_event.wait()
+            logger.info("Shutdown event received, stopping consumers")
+            # Cancel health monitoring
+            if self.health_check_task:
+                self.health_check_task.cancel()
+                with suppress(asyncio.CancelledError):
+                    await self.health_check_task
+            # Cancel reconnection task if running
+            if self.reconnection_task:
+                self.reconnection_task.cancel()
+                with suppress(asyncio.CancelledError):
+                    await self.reconnection_task
+            handle_task_results_task.cancel()
+            with suppress(asyncio.CancelledError):
+                await handle_task_results_task
+            for task in tasks:
+                if not task.done():
+                    task.cancel()
+                    with suppress(asyncio.CancelledError):
+                        await task
+            logger.info("Worker shutting down")
+            # Wait for all tasks to complete
+            await self.wait_all_tasks_done()
+            # Close all channels and the connection
+            await self.close_channels_and_connection()
+    async def wait_all_tasks_done(self) -> None:
+        if not self.tasks:
             return
-        for handler in self.message_handler_set:
+        logger.info(f"Waiting for {len(self.tasks)} in-flight tasks to complete")
+        async with self.lock:
+            # Use gather with return_exceptions=True to ensure all tasks are awaited
+            # even if some raise exceptions
+            results = await asyncio.gather(*self.tasks, return_exceptions=True)
-            queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
-            routing_key = f"{handler.message_type.MESSAGE_TOPIC}.#"
+            # Log any exceptions that occurred
+            for result in results:
+                if isinstance(result, Exception):
+                    logger.error(f"Task raised an exception during shutdown: {result}")
+    async def close_channels_and_connection(self) -> None:
+        """Close all channels and then the connection"""
+        logger.info("Closing channels and connection...")
+        await self._cleanup_connection()
-            self.incoming_map[queue_name] = handler
+    def shutdown(self) -> None:
+        """Signal for shutdown"""
+        logger.info("Initiating graceful shutdown")
+        self.shutdown_event.set()
+    async def close(self) -> None:
+        """Implement MessageBusConsumer.close for cleanup"""
+        logger.info("Closing consumer...")
+        self.shutdown()
+        # Cancel health monitoring
+        if self.health_check_task:
+            self.health_check_task.cancel()
+            with suppress(asyncio.CancelledError):
+                await self.health_check_task
+        # Cancel reconnection task if running
+        if self.reconnection_task:
+            self.reconnection_task.cancel()
+            with suppress(asyncio.CancelledError):
+                await self.reconnection_task
+        await self.wait_all_tasks_done()
+        await self.close_channels_and_connection()
+    async def get_channel(self, queue_name: str) -> aio_pika.abc.AbstractChannel | None:
+        """
+        Get the channel for a specific queue, or None if not found.
+        This helps with error handling when a channel might have been closed.
+        """
+        # If reconnection is in progress, wait for it to complete
+        if self.reconnection_in_progress:
             try:
-                queue = await RabbitmqUtils.get_queue(
-                    channel=channel, queue_name=queue_name
-                )
-            except (ChannelNotFoundEntity, ChannelClosed, AMQPError) as e:
-                logger.error(
-                    f"Queue '{queue_name}' not found and passive mode is enabled. "
-                    f"Please use the declare command first to create the queue. Error: {e}"
+                await asyncio.wait_for(self.reconnection_event.wait(), timeout=30.0)
+            except asyncio.TimeoutError:
+                logger.warning(
+                    f"Timeout waiting for reconnection when getting channel for {queue_name}"
                 )
-                continue
+                return None
-            await queue.consume(
-                callback=MessageHandlerCallback(
-                    consumer=self,
-                    queue_name=queue_name,
-                    routing_key=routing_key,
-                    message_handler=handler,
-                ),
-                no_ack=handler.spec.auto_ack,
+        if queue_name not in self.channels:
+            logger.warning(f"No channel found for queue {queue_name}")
+            return None
+        try:
+            channel = self.channels[queue_name]
+            if channel.is_closed:
+                logger.warning(f"Channel for queue {queue_name} is closed")
+                # Remove the closed channel
+                del self.channels[queue_name]
+                # Attempt to recreate the channel if connection is healthy
+                if (
+                    self.connection
+                    and not self.connection.is_closed
+                    and self.connection_healthy
+                ):
+                    try:
+                        logger.info(f"Creating new channel for {queue_name}")
+                        self.channels[queue_name] = await self.connection.channel()
+                        await self.channels[queue_name].set_qos(
+                            prefetch_count=self.config.prefetch_count
+                        )
+                        return self.channels[queue_name]
+                    except Exception as e:
+                        logger.error(
+                            f"Failed to recreate channel for {queue_name}: {e}"
+                        )
+                        # Trigger reconnection if channel creation fails
+                        self._trigger_reconnection()
+                        return None
+                else:
+                    # Connection is not healthy, trigger reconnection
+                    self._trigger_reconnection()
+                    return None
+            return channel
+        except Exception as e:
+            logger.error(f"Error accessing channel for queue {queue_name}: {e}")
+            # Trigger reconnection on any channel access error
+            self._trigger_reconnection()
+            return None
+    async def _establish_channel(self, queue_name: str) -> aio_pika.abc.AbstractChannel:
+        """
+        Creates a new channel for the specified queue with proper QoS settings.
+        """
+        if self.connection is None or self.connection.is_closed:
+            logger.warning(
+                f"Cannot create channel for {queue_name}: connection is not available"
             )
+            raise RuntimeError("Connection is not available")
-            logger.info(f"Consuming message handler {queue_name}")
+        logger.debug(f"Creating channel for queue {queue_name}")
+        channel = await self.connection.channel()
+        await channel.set_qos(prefetch_count=self.config.prefetch_count)
+        logger.debug(f"Created channel for queue {queue_name}")
+        return channel
-        for scheduled_action in self.scheduled_actions:
+    @asynccontextmanager
+    async def create_channel(
+        self, queue_name: str
+    ) -> AsyncGenerator[aio_pika.abc.AbstractChannel, None]:
+        """
+        Create and yield a channel for the specified queue with retry mechanism.
+        This context manager ensures the channel is properly managed.
+        """
+        try:
+            # Create a new channel with retry
+            channel = await retry_with_backoff(
+                fn=lambda: self._establish_channel(queue_name),
+                retry_config=self.config.consumer_retry_config,
+                retry_exceptions=(
+                    AMQPConnectionError,
+                    AMQPChannelError,
+                    ConnectionError,
+                ),
+            )
-            queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
+            # Save in the channels dict for tracking
+            self.channels[queue_name] = channel
+            logger.debug(f"Created new channel for queue {queue_name}")
-            routing_key = queue_name
+            try:
+                yield channel
+            finally:
+                # Don't close the channel here as it might be used later
+                # It will be closed during shutdown
+                pass
+        except aio_pika.exceptions.AMQPError as e:
+            logger.error(
+                f"Error creating channel for queue {queue_name} after retries: {e}"
+            )
+            raise
+    async def _establish_connection(self) -> aio_pika.abc.AbstractConnection:
+        """
+        Creates a new RabbitMQ connection with retry logic.
+        """
+        try:
+            logger.info("Establishing connection to RabbitMQ")
+            connection = await aio_pika.connect(
+                self.config.url,
+                heartbeat=self.config.connection_heartbeat_interval,
+            )
+            logger.info("Connected to RabbitMQ successfully")
+            return connection
+        except Exception as e:
+            logger.error(f"Failed to connect to RabbitMQ: {e}")
+            raise
+    @asynccontextmanager
+    async def connect(self) -> AsyncGenerator[aio_pika.abc.AbstractConnection, None]:
+        """
+        Create and manage the main connection to RabbitMQ with automatic retry.
+        """
+        if self.connection is not None and not self.connection.is_closed:
+            logger.debug("Connection already exists, reusing existing connection")
             try:
-                queue = await RabbitmqUtils.get_queue(
-                    channel=channel, queue_name=queue_name
-                )
-            except (ChannelNotFoundEntity, ChannelClosed, AMQPError) as e:
-                logger.error(
-                    f"Scheduler queue '{queue_name}' not found and passive mode is enabled. "
-                    f"Please use the declare command first to create the queue. Error: {e}"
-                )
-                continue
+                yield self.connection
+            finally:
+                # The existing connection will be handled by close_channels_and_connection
+                pass
+            return
-            await queue.consume(
-                callback=ScheduledMessageHandlerCallback(
-                    consumer=self,
-                    queue_name=queue_name,
-                    routing_key=routing_key,
-                    scheduled_action=scheduled_action,
+        try:
+            # Create a new connection with retry
+            self.connection = await retry_with_backoff(
+                self._establish_connection,
+                retry_config=self.config.connection_retry_config,
+                retry_exceptions=(
+                    AMQPConnectionError,
+                    ConnectionError,
+                    OSError,
+                    TimeoutError,
                 ),
-                no_ack=True,
             )
-            logger.info(f"Consuming scheduler {queue_name}")
+            try:
+                yield self.connection
+            finally:
+                # Don't close the connection here; it will be closed in close_channels_and_connection
+                pass
+        except Exception as e:
+            logger.error(
+                f"Failed to establish connection to RabbitMQ after retries: {e}"
+            )
+            if self.connection:
+                try:
+                    await self.connection.close()
+                except Exception as close_error:
+                    logger.error(
+                        f"Error closing connection after connect failure: {close_error}"
+                    )
+                self.connection = None
+            raise
-        await self.shutdown_event.wait()
-        logger.info("Worker shutting down")
+    @asynccontextmanager
+    async def get_channel_ctx(
+        self, queue_name: str
+    ) -> AsyncGenerator[aio_pika.abc.AbstractChannel, None]:
+        """
+        Get a channel for a specific queue as a context manager.
+        This is safer than using get_channel directly as it ensures proper error handling.
+        """
+        max_retries = 3
+        retry_delay = 1.0
+        for attempt in range(max_retries):
+            try:
+                channel = await self.get_channel(queue_name)
+                if channel is not None:
+                    try:
+                        yield channel
+                        return
+                    finally:
+                        # We don't close the channel here as it's managed by the consumer
+                        pass
+                # No channel available, check connection state
+                if (
+                    self.connection
+                    and not self.connection.is_closed
+                    and self.connection_healthy
+                ):
+                    # Try to create a new channel
+                    async with self.create_channel(queue_name) as new_channel:
+                        yield new_channel
+                        return
+                else:
+                    # Connection is not healthy, wait for reconnection
+                    if self.reconnection_in_progress:
+                        try:
+                            await asyncio.wait_for(
+                                self.reconnection_event.wait(), timeout=30.0
+                            )
+                            # Retry after reconnection
+                            continue
+                        except asyncio.TimeoutError:
+                            logger.warning(
+                                f"Timeout waiting for reconnection for queue {queue_name}"
+                            )
-        await self.wait_all_tasks_done()
+                    # Still no connection, trigger reconnection
+                    if not self.reconnection_in_progress:
+                        self._trigger_reconnection()
-        await channel.close()
-        await connection.close()
+                    if attempt < max_retries - 1:
+                        logger.info(
+                            f"Retrying channel access for {queue_name} in {retry_delay}s"
+                        )
+                        await asyncio.sleep(retry_delay)
+                        retry_delay *= 2
+                    else:
+                        raise RuntimeError(
+                            f"Cannot get channel for queue {queue_name}: no connection available after {max_retries} attempts"
+                        )
-    async def wait_all_tasks_done(self) -> None:
-        async with self.lock:
-            await asyncio.gather(*self.tasks)
+            except Exception as e:
+                if attempt < max_retries - 1:
+                    logger.warning(
+                        f"Error getting channel for {queue_name}, retrying: {e}"
+                    )
+                    await asyncio.sleep(retry_delay)
+                    retry_delay *= 2
+                else:
+                    logger.error(
+                        f"Failed to get channel for {queue_name} after {max_retries} attempts: {e}"
+                    )
+                    raise
+    async def _monitor_connection_health(self) -> None:
+        """
+        Monitor connection health and trigger reconnection if needed.
+        This runs as a background task.
+        """
+        while not self.shutdown_event.is_set():
+            try:
+                await asyncio.sleep(self.config.connection_health_check_interval)
-    def shutdown(self) -> None:
-        self.shutdown_event.set()
+                if self.shutdown_event.is_set():
+                    break
+                # Check connection health
+                if not await self._is_connection_healthy():
+                    logger.warning(
+                        "Connection health check failed, triggering reconnection"
+                    )
+                    if not self.reconnection_in_progress:
+                        self._trigger_reconnection()
+            except asyncio.CancelledError:
+                logger.info("Connection health monitoring cancelled")
+                break
+            except Exception as e:
+                logger.error(f"Error in connection health monitoring: {e}")
+                await asyncio.sleep(5)  # Wait before retrying
+    async def _is_connection_healthy(self) -> bool:
+        """
+        Check if the connection is healthy.
+        """
+        try:
+            if self.connection is None or self.connection.is_closed:
+                return False
+            # Try to create a temporary channel to test connection
+            async with self.connection.channel() as test_channel:
+                # If we can create a channel, connection is healthy
+                return True
+        except Exception as e:
+            logger.debug(f"Connection health check failed: {e}")
+            return False
+    def _trigger_reconnection(self) -> None:
+        """
+        Trigger reconnection process.
+        """
+        if not self.reconnection_in_progress and not self.shutdown_event.is_set():
+            self.reconnection_in_progress = True
+            self.connection_healthy = False
+            self.reconnection_event.clear()
+            # Start reconnection task
+            self.reconnection_task = asyncio.create_task(self._handle_reconnection())
+            self.reconnection_task.add_done_callback(self._on_reconnection_done)
+    def _on_reconnection_done(self, task: asyncio.Task[Any]) -> None:
+        """
+        Handle completion of reconnection task.
+        """
+        self.reconnection_in_progress = False
+        if task.exception():
+            logger.error(f"Reconnection task failed: {task.exception()}")
+        else:
+            logger.info("Reconnection completed successfully")
+    async def _handle_reconnection(self) -> None:
+        """
+        Handle the reconnection process with exponential backoff.
+        """
+        logger.info("Starting reconnection process")
+        # Close existing connection and channels
+        await self._cleanup_connection()
+        reconnection_config = self.config.reconnection_backoff_config
+        attempt = 0
+        while not self.shutdown_event.is_set():
+            try:
+                attempt += 1
+                logger.info(f"Reconnection attempt {attempt}")
+                # Establish new connection
+                self.connection = await self._establish_connection()
+                self.connection_healthy = True
+                # Re-establish all consumers
+                await self._reestablish_consumers()
+                logger.info("Reconnection successful")
+                self.reconnection_event.set()
+                return
+            except Exception as e:
+                logger.error(f"Reconnection attempt {attempt} failed: {e}")
+                if self.shutdown_event.is_set():
+                    break
+                # Calculate backoff delay
+                delay = reconnection_config.initial_delay * (
+                    reconnection_config.backoff_factor ** (attempt - 1)
+                )
+                if reconnection_config.jitter:
+                    jitter_amount = delay * 0.25
+                    delay = delay + random.uniform(-jitter_amount, jitter_amount)
+                    delay = max(delay, 0.1)
+                delay = min(delay, reconnection_config.max_delay)
+                logger.info(f"Retrying reconnection in {delay:.2f} seconds")
+                await asyncio.sleep(delay)
+    async def _cleanup_connection(self) -> None:
+        """
+        Clean up existing connection and channels.
+        """
+        # Cancel existing consumers
+        for queue_name, channel in self.channels.items():
+            try:
+                if not channel.is_closed:
+                    # Cancel consumer if we have its tag
+                    if queue_name in self.consumer_tags:
+                        try:
+                            queue = await channel.get_queue(queue_name, ensure=False)
+                            if queue:
+                                await queue.cancel(self.consumer_tags[queue_name])
+                        except Exception as cancel_error:
+                            logger.warning(
+                                f"Error cancelling consumer for {queue_name}: {cancel_error}"
+                            )
+                        del self.consumer_tags[queue_name]
+            except Exception as e:
+                logger.warning(f"Error cancelling consumer for {queue_name}: {e}")
+        # Close channels
+        for queue_name, channel in self.channels.items():
+            try:
+                if not channel.is_closed:
+                    await channel.close()
+            except Exception as e:
+                logger.warning(f"Error closing channel for {queue_name}: {e}")
+        self.channels.clear()
+        # Close connection
+        if self.connection and not self.connection.is_closed:
+            try:
+                await self.connection.close()
+            except Exception as e:
+                logger.warning(f"Error closing connection: {e}")
+        self.connection = None
+        self.connection_healthy = False
+    async def _reestablish_consumers(self) -> None:
+        """
+        Re-establish all consumers after reconnection.
+        """
+        logger.info("Re-establishing consumers after reconnection")
+        # Re-establish message handlers
+        for handler in self.message_handler_set:
+            queue_name = f"{handler.message_type.MESSAGE_TOPIC}.{handler.instance_callable.__module__}.{handler.instance_callable.__qualname__}"
+            try:
+                await self._setup_message_handler_consumer(handler)
+                logger.info(f"Re-established consumer for {queue_name}")
+            except Exception as e:
+                logger.error(f"Failed to re-establish consumer for {queue_name}: {e}")
+        # Re-establish scheduled actions
+        for scheduled_action in self.scheduled_actions:
+            queue_name = f"{scheduled_action.callable.__module__}.{scheduled_action.callable.__qualname__}"
+            try:
+                await self._setup_scheduled_action_consumer(scheduled_action)
+                logger.info(f"Re-established scheduler consumer for {queue_name}")
+            except Exception as e:
+                logger.error(
+                    f"Failed to re-establish scheduler consumer for {queue_name}: {e}"
+                )
 def create_message_bus(
@@ -254,10 +951,150 @@ def create_message_bus(
         exchange = query_params["exchange"][0]
         prefetch_count = int(query_params["prefetch_count"][0])
+        # Parse optional retry configuration parameters
+        connection_retry_config = RetryConfig()
+        consumer_retry_config = RetryConfig(
+            max_retries=30, initial_delay=5, max_delay=60.0, backoff_factor=3.0
+        )
+        # Parse optional reconnection configuration parameters
+        reconnection_backoff_config = RetryConfig(
+            max_retries=-1,  # Infinite retries for reconnection
+            initial_delay=2.0,
+            max_delay=120.0,
+            backoff_factor=2.0,
+            jitter=True,
+        )
+        # Parse heartbeat and health check intervals
+        connection_heartbeat_interval = 30.0
+        connection_health_check_interval = 10.0
+        # Connection retry config parameters
+        if (
+            "connection_retry_max" in query_params
+            and query_params["connection_retry_max"][0].isdigit()
+        ):
+            connection_retry_config.max_retries = int(
+                query_params["connection_retry_max"][0]
+            )
+        if "connection_retry_delay" in query_params:
+            try:
+                connection_retry_config.initial_delay = float(
+                    query_params["connection_retry_delay"][0]
+                )
+            except ValueError:
+                pass
+        if "connection_retry_max_delay" in query_params:
+            try:
+                connection_retry_config.max_delay = float(
+                    query_params["connection_retry_max_delay"][0]
+                )
+            except ValueError:
+                pass
+        if "connection_retry_backoff" in query_params:
+            try:
+                connection_retry_config.backoff_factor = float(
+                    query_params["connection_retry_backoff"][0]
+                )
+            except ValueError:
+                pass
+        # Consumer retry config parameters
+        if (
+            "consumer_retry_max" in query_params
+            and query_params["consumer_retry_max"][0].isdigit()
+        ):
+            consumer_retry_config.max_retries = int(
+                query_params["consumer_retry_max"][0]
+            )
+        if "consumer_retry_delay" in query_params:
+            try:
+                consumer_retry_config.initial_delay = float(
+                    query_params["consumer_retry_delay"][0]
+                )
+            except ValueError:
+                pass
+        if "consumer_retry_max_delay" in query_params:
+            try:
+                consumer_retry_config.max_delay = float(
+                    query_params["consumer_retry_max_delay"][0]
+                )
+            except ValueError:
+                pass
+        if "consumer_retry_backoff" in query_params:
+            try:
+                consumer_retry_config.backoff_factor = float(
+                    query_params["consumer_retry_backoff"][0]
+                )
+            except ValueError:
+                pass
+        # Reconnection backoff config parameters
+        if (
+            "reconnection_retry_max" in query_params
+            and query_params["reconnection_retry_max"][0].isdigit()
+        ):
+            reconnection_backoff_config.max_retries = int(
+                query_params["reconnection_retry_max"][0]
+            )
+        if "reconnection_retry_delay" in query_params:
+            try:
+                reconnection_backoff_config.initial_delay = float(
+                    query_params["reconnection_retry_delay"][0]
+                )
+            except ValueError:
+                pass
+        if "reconnection_retry_max_delay" in query_params:
+            try:
+                reconnection_backoff_config.max_delay = float(
+                    query_params["reconnection_retry_max_delay"][0]
+                )
+            except ValueError:
+                pass
+        if "reconnection_retry_backoff" in query_params:
+            try:
+                reconnection_backoff_config.backoff_factor = float(
+                    query_params["reconnection_retry_backoff"][0]
+                )
+            except ValueError:
+                pass
+        # Heartbeat and health check intervals
+        if "connection_heartbeat_interval" in query_params:
+            try:
+                connection_heartbeat_interval = float(
+                    query_params["connection_heartbeat_interval"][0]
+                )
+            except ValueError:
+                pass
+        if "connection_health_check_interval" in query_params:
+            try:
+                connection_health_check_interval = float(
+                    query_params["connection_health_check_interval"][0]
+                )
+            except ValueError:
+                pass
         config = AioPikaWorkerConfig(
             url=broker_url,
             exchange=exchange,
             prefetch_count=prefetch_count,
+            connection_retry_config=connection_retry_config,
+            consumer_retry_config=consumer_retry_config,
+            connection_heartbeat_interval=connection_heartbeat_interval,
+            connection_health_check_interval=connection_health_check_interval,
+            reconnection_backoff_config=reconnection_backoff_config,
         )
         return AioPikaMicroserviceConsumer(
@@ -291,8 +1128,42 @@ class ScheduledMessageHandlerCallback:
     ) -> None:
         if self.consumer.shutdown_event.is_set():
+            logger.info(
+                f"Shutdown in progress. Requeuing scheduled message for {self.queue_name}"
+            )
+            try:
+                # Use channel context for requeuing
+                async with self.consumer.get_channel_ctx(self.queue_name):
+                    await aio_pika_message.reject(requeue=True)
+            except RuntimeError:
+                logger.warning(
+                    f"Could not requeue scheduled message during shutdown - channel not available"
+                )
+            except Exception as e:
+                logger.error(
+                    f"Failed to requeue scheduled message during shutdown: {e}"
+                )
             return
+        # Check if connection is healthy before processing
+        if not self.consumer.connection_healthy:
+            logger.warning(
+                f"Connection not healthy, requeuing scheduled message for {self.queue_name}"
+            )
+            try:
+                # Wait briefly for potential reconnection
+                await asyncio.sleep(0.1)
+                if not self.consumer.connection_healthy:
+                    # Still not healthy, requeue the message
+                    async with self.consumer.get_channel_ctx(self.queue_name):
+                        await aio_pika_message.reject(requeue=True)
+                    return
+            except Exception as e:
+                logger.error(
+                    f"Failed to requeue scheduled message due to connection issues: {e}"
+                )
+                return
         async with self.consumer.lock:
             task = asyncio.create_task(self.handle_message(aio_pika_message))
             self.consumer.tasks.add(task)
@@ -300,14 +1171,48 @@ class ScheduledMessageHandlerCallback:
     def handle_message_consume_done(self, task: asyncio.Task[Any]) -> None:
         self.consumer.tasks.discard(task)
+        if task.cancelled():
+            logger.warning(f"Scheduled task for {self.queue_name} was cancelled")
+            return
+        if (error := task.exception()) is not None:
+            logger.exception(
+                f"Error processing scheduled action {self.queue_name}", exc_info=error
+            )
     async def handle_message(
         self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
     ) -> None:
         if self.consumer.shutdown_event.is_set():
-            logger.info("Shutdown event set. Rqueuing message")
-            await aio_pika_message.reject(requeue=True)
+            logger.info(f"Shutdown event set. Requeuing message for {self.queue_name}")
+            try:
+                # Use channel context for requeuing
+                async with self.consumer.get_channel_ctx(self.queue_name):
+                    await aio_pika_message.reject(requeue=True)
+                return
+            except RuntimeError:
+                logger.warning(
+                    f"Could not requeue message during shutdown - channel not available"
+                )
+            except Exception as e:
+                logger.error(f"Failed to requeue message during shutdown: {e}")
+                return
+        # Check connection health before processing
+        if not self.consumer.connection_healthy:
+            logger.warning(
+                f"Connection not healthy, requeuing scheduled message for {self.queue_name}"
+            )
+            try:
+                async with self.consumer.get_channel_ctx(self.queue_name):
+                    await aio_pika_message.reject(requeue=True)
+                return
+            except Exception as e:
+                logger.error(
+                    f"Failed to requeue scheduled message due to connection issues: {e}"
+                )
+                return
         sig = inspect.signature(self.scheduled_action.callable)
         if len(sig.parameters) == 1:
@@ -352,18 +1257,19 @@ class ScheduledMessageHandlerCallback:
         args: tuple[Any, ...],
         kwargs: dict[str, Any],
     ) -> None:
-        async with self.consumer.uow_context_provider(
-            AppTransactionContext(
-                controller_member_reflect=scheduled_action.controller_member,
-                transaction_data=SchedulerTransactionData(
-                    scheduled_to=datetime.now(UTC),
-                    cron_expression=scheduled_action.spec.cron,
-                    triggered_at=datetime.now(UTC),
-                ),
-            )
-        ):
+        with provide_shutdown_state(self.consumer.shutdown_state):
+            async with self.consumer.uow_context_provider(
+                AppTransactionContext(
+                    controller_member_reflect=scheduled_action.controller_member,
+                    transaction_data=SchedulerTransactionData(
+                        scheduled_to=datetime.now(UTC),
+                        cron_expression=scheduled_action.spec.cron,
+                        triggered_at=datetime.now(UTC),
+                    ),
+                )
+            ):
-            await scheduled_action.callable(*args, **kwargs)
+                await scheduled_action.callable(*args, **kwargs)
 class MessageHandlerCallback:
@@ -379,13 +1285,44 @@ class MessageHandlerCallback:
         self.queue_name = queue_name
         self.routing_key = routing_key
         self.message_handler = message_handler
+        self.retry_state: dict[str, dict[str, Any]] = {}
     async def message_consumer(
         self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
     ) -> None:
         if self.consumer.shutdown_event.is_set():
+            logger.info(
+                f"Shutdown in progress. Requeuing message for {self.queue_name}"
+            )
+            try:
+                # Use channel context for requeuing
+                async with self.consumer.get_channel_ctx(self.queue_name):
+                    await aio_pika_message.reject(requeue=True)
+            except RuntimeError:
+                logger.warning(
+                    f"Could not requeue message during shutdown - channel not available"
+                )
+            except Exception as e:
+                logger.error(f"Failed to requeue message during shutdown: {e}")
             return
+        # Check if connection is healthy before processing
+        if not self.consumer.connection_healthy:
+            logger.warning(
+                f"Connection not healthy, requeuing message for {self.queue_name}"
+            )
+            try:
+                # Wait briefly for potential reconnection
+                await asyncio.sleep(0.1)
+                if not self.consumer.connection_healthy:
+                    # Still not healthy, requeue the message
+                    async with self.consumer.get_channel_ctx(self.queue_name):
+                        await aio_pika_message.reject(requeue=True)
+                    return
+            except Exception as e:
+                logger.error(f"Failed to requeue message due to connection issues: {e}")
+                return
         async with self.consumer.lock:
             task = asyncio.create_task(self.handle_message(aio_pika_message))
             self.consumer.tasks.add(task)
@@ -394,10 +1331,13 @@ class MessageHandlerCallback:
     def handle_message_consume_done(self, task: asyncio.Task[Any]) -> None:
         self.consumer.tasks.discard(task)
         if task.cancelled():
+            logger.warning(f"Task for queue {self.queue_name} was cancelled")
             return
         if (error := task.exception()) is not None:
-            logger.exception("Error processing message", exc_info=error)
+            logger.exception(
+                f"Error processing message for queue {self.queue_name}", exc_info=error
+            )
     async def __call__(
         self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
@@ -408,14 +1348,203 @@ class MessageHandlerCallback:
         self,
         aio_pika_message: aio_pika.abc.AbstractIncomingMessage,
         requeue: bool = False,
+        retry_count: int = 0,
+        exception: Optional[BaseException] = None,
     ) -> None:
-        if self.message_handler.spec.auto_ack is False:
-            await aio_pika_message.reject(requeue=requeue)
-        elif requeue:
-            logger.warning(
-                f"Message {aio_pika_message.message_id} ({self.queue_name}) cannot be requeued because auto_ack is enabled"
+        """
+        Handle rejecting a message, with support for retry with exponential backoff.
+        Args:
+            aio_pika_message: The message to reject
+            requeue: Whether to requeue the message directly (True) or handle with retry logic (False)
+            retry_count: The current retry count for this message
+            exception: The exception that caused the rejection, if any
+        """
+        message_id = aio_pika_message.message_id or str(uuid.uuid4())
+        # If auto_ack is enabled, we cannot retry the message through RabbitMQ reject mechanism
+        if self.message_handler.spec.auto_ack:
+            if requeue:
+                logger.warning(
+                    f"Message {message_id} ({self.queue_name}) cannot be requeued because auto_ack is enabled"
+                )
+            return
+        try:
+            # Check if we should retry with backoff
+            if (
+                not requeue
+                and self.message_handler.spec.requeue_on_exception
+                and exception is not None
+            ):
+                # Get retry config from consumer
+                retry_config = self.consumer.config.consumer_retry_config
+                # Check if we reached max retries
+                if retry_count >= retry_config.max_retries:
+                    logger.warning(
+                        f"Message {message_id} ({self.queue_name}) failed after {retry_count} retries, "
+                        f"dead-lettering: {str(exception)}"
+                    )
+                    # Dead-letter the message after max retries
+                    try:
+                        async with self.consumer.get_channel_ctx(self.queue_name):
+                            await aio_pika_message.reject(requeue=False)
+                    except Exception as e:
+                        logger.error(f"Failed to dead-letter message {message_id}: {e}")
+                    return
+                # Calculate delay for this retry attempt
+                delay = retry_config.initial_delay * (
+                    retry_config.backoff_factor**retry_count
+                )
+                if retry_config.jitter:
+                    jitter_amount = delay * 0.25
+                    delay = delay + random.uniform(-jitter_amount, jitter_amount)
+                    delay = max(
+                        delay, 0.1
+                    )  # Ensure delay doesn't go negative due to jitter
+                delay = min(delay, retry_config.max_delay)
+                logger.info(
+                    f"Message {message_id} ({self.queue_name}) failed with {str(exception)}, "
+                    f"retry {retry_count+1}/{retry_config.max_retries} scheduled in {delay:.2f}s"
+                )
+                # Store retry state for this message
+                self.retry_state[message_id] = {
+                    "retry_count": retry_count + 1,
+                    "last_exception": exception,
+                    "next_retry": time.time() + delay,
+                }
+                # Schedule retry after delay
+                asyncio.create_task(
+                    self._delayed_retry(
+                        aio_pika_message, delay, retry_count + 1, exception
+                    )
+                )
+                # Acknowledge the current message since we'll handle retry ourselves
+                try:
+                    async with self.consumer.get_channel_ctx(self.queue_name):
+                        await aio_pika_message.ack()
+                except Exception as e:
+                    logger.error(
+                        f"Failed to acknowledge message {message_id} for retry: {e}"
+                    )
+                return
+            # Standard reject without retry or with immediate requeue
+            try:
+                async with self.consumer.get_channel_ctx(self.queue_name):
+                    await aio_pika_message.reject(requeue=requeue)
+                    if requeue:
+                        logger.info(
+                            f"Message {message_id} ({self.queue_name}) requeued for immediate retry"
+                        )
+                    else:
+                        logger.info(
+                            f"Message {message_id} ({self.queue_name}) rejected without requeue"
+                        )
+            except Exception as e:
+                logger.error(f"Failed to reject message {message_id}: {e}")
+        except Exception as e:
+            logger.exception(
+                f"Unexpected error in handle_reject_message for {message_id} ({self.queue_name}): {e}"
             )
+    async def _delayed_retry(
+        self,
+        aio_pika_message: aio_pika.abc.AbstractIncomingMessage,
+        delay: float,
+        retry_count: int,
+        exception: Optional[BaseException],
+    ) -> None:
+        """
+        Handle delayed retry of a message after exponential backoff delay.
+        Args:
+            aio_pika_message: The original message
+            delay: Delay in seconds before retrying
+            retry_count: The current retry count (after increment)
+            exception: The exception that caused the failure
+        """
+        message_id = aio_pika_message.message_id or str(uuid.uuid4())
+        try:
+            # Wait for the backoff delay
+            await asyncio.sleep(delay)
+            # Get message body and properties for republishing
+            message_body = aio_pika_message.body
+            headers = (
+                aio_pika_message.headers.copy() if aio_pika_message.headers else {}
+            )
+            # Add retry information to headers
+            headers["x-retry-count"] = retry_count
+            if exception:
+                headers["x-last-error"] = str(exception)
+            # Clean up retry state
+            if message_id in self.retry_state:
+                del self.retry_state[message_id]
+            # Republish the message to the same queue with retry logic
+            max_attempts = 3
+            for attempt in range(max_attempts):
+                try:
+                    async with self.consumer.get_channel_ctx(
+                        self.queue_name
+                    ) as channel:
+                        exchange = await RabbitmqUtils.get_main_exchange(
+                            channel=channel,
+                            exchange_name=self.consumer.config.exchange,
+                        )
+                        await exchange.publish(
+                            aio_pika.Message(
+                                body=message_body,
+                                headers=headers,
+                                message_id=message_id,
+                                content_type=aio_pika_message.content_type,
+                                content_encoding=aio_pika_message.content_encoding,
+                                delivery_mode=aio_pika_message.delivery_mode,
+                            ),
+                            routing_key=self.routing_key,
+                        )
+                        logger.info(
+                            f"Message {message_id} ({self.queue_name}) republished for retry {retry_count}"
+                        )
+                        return
+                except Exception as e:
+                    if attempt < max_attempts - 1:
+                        logger.warning(
+                            f"Failed to republish message {message_id} (attempt {attempt + 1}): {e}"
+                        )
+                        await asyncio.sleep(1.0 * (attempt + 1))  # Exponential backoff
+                    else:
+                        logger.error(
+                            f"Failed to republish message {message_id} after {max_attempts} attempts: {e}"
+                        )
+                        raise
+        except Exception as e:
+            logger.exception(
+                f"Failed to execute delayed retry for message {message_id} ({self.queue_name}): {e}"
+            )
+            # If we fail to republish, try to dead-letter the original message
+            try:
+                if message_id in self.retry_state:
+                    del self.retry_state[message_id]
+            except Exception:
+                pass
     async def handle_message(
         self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage
     ) -> None:
@@ -472,51 +1601,92 @@ class MessageHandlerCallback:
         incoming_message_spec = MessageHandler.get_message_incoming(handler)
         assert incoming_message_spec is not None
-        async with self.consumer.uow_context_provider(
-            AppTransactionContext(
-                controller_member_reflect=handler_data.controller_member,
-                transaction_data=MessageBusTransactionData(
-                    message=builded_message,
-                    topic=routing_key,
-                ),
-            )
-        ):
-            ctx: AsyncContextManager[Any]
-            if incoming_message_spec.timeout is not None:
-                ctx = asyncio.timeout(incoming_message_spec.timeout)
-            else:
-                ctx = none_context()
-            async with ctx:
-                try:
-                    with provide_bus_message_controller(
-                        AioPikaMessageBusController(aio_pika_message)
-                    ):
-                        await handler(builded_message)
-                    if not incoming_message_spec.auto_ack:
-                        with suppress(aio_pika.MessageProcessError):
-                            await aio_pika_message.ack()
-                except BaseException as base_exc:
-                    if incoming_message_spec.exception_handler is not None:
-                        try:
-                            incoming_message_spec.exception_handler(base_exc)
-                        except Exception as nested_exc:
+        with provide_shutdown_state(self.consumer.shutdown_state):
+            async with self.consumer.uow_context_provider(
+                AppTransactionContext(
+                    controller_member_reflect=handler_data.controller_member,
+                    transaction_data=MessageBusTransactionData(
+                        message=builded_message,
+                        topic=routing_key,
+                    ),
+                )
+            ):
+                ctx: AsyncContextManager[Any]
+                if incoming_message_spec.timeout is not None:
+                    ctx = asyncio.timeout(incoming_message_spec.timeout)
+                else:
+                    ctx = none_context()
+                async with ctx:
+                    try:
+                        with provide_bus_message_controller(
+                            AioPikaMessageBusController(aio_pika_message)
+                        ):
+                            await handler(builded_message)
+                        if not incoming_message_spec.auto_ack:
+                            with suppress(aio_pika.MessageProcessError):
+                                # Use channel context for acknowledgement with retry
+                                try:
+                                    async with self.consumer.get_channel_ctx(
+                                        self.queue_name
+                                    ):
+                                        await aio_pika_message.ack()
+                                except Exception as ack_error:
+                                    logger.warning(
+                                        f"Failed to acknowledge message {aio_pika_message.message_id or 'unknown'}: {ack_error}"
+                                    )
+                                    # Message will be redelivered if ack fails, which is acceptable
+                    except BaseException as base_exc:
+                        # Get message id for logging
+                        message_id = aio_pika_message.message_id or str(uuid.uuid4())
+                        # Extract retry count from headers if available
+                        headers = aio_pika_message.headers or {}
+                        retry_count = int(str(headers.get("x-retry-count", 0)))
+                        # Process exception handler if configured
+                        if incoming_message_spec.exception_handler is not None:
+                            try:
+                                incoming_message_spec.exception_handler(base_exc)
+                            except Exception as nested_exc:
+                                logger.exception(
+                                    f"Error processing exception handler for message {message_id}: {base_exc} | {nested_exc}"
+                                )
+                        else:
                             logger.exception(
-                                f"Error processing exception handler: {base_exc} | {nested_exc}"
+                                f"Error processing message {message_id} on topic {routing_key}: {str(base_exc)}"
+                            )
+                        # Handle rejection with retry logic
+                        if incoming_message_spec.requeue_on_exception:
+                            # Use our retry with backoff mechanism
+                            await self.handle_reject_message(
+                                aio_pika_message,
+                                requeue=False,  # Don't requeue directly, use our backoff mechanism
+                                retry_count=retry_count,
+                                exception=base_exc,
+                            )
+                        else:
+                            # Message shouldn't be retried, reject it
+                            await self.handle_reject_message(
+                                aio_pika_message, requeue=False, exception=base_exc
                             )
                     else:
-                        logger.exception(
-                            f"Error processing message on topic {routing_key}"
-                        )
-                    if incoming_message_spec.requeue_on_exception:
-                        await self.handle_reject_message(aio_pika_message, requeue=True)
-                    else:
-                        await self.handle_reject_message(
-                            aio_pika_message, requeue=False
-                        )
-                else:
-                    logger.info(
-                        f"Message {aio_pika_message.message_id}#{self.queue_name} processed successfully"
-                    )
+                        # Message processed successfully, log and clean up any retry state
+                        message_id = aio_pika_message.message_id or str(uuid.uuid4())
+                        if message_id in self.retry_state:
+                            del self.retry_state[message_id]
+                        # Log success with retry information if applicable
+                        headers = aio_pika_message.headers or {}
+                        if "x-retry-count" in headers:
+                            retry_count = int(str(headers.get("x-retry-count", 0)))
+                            logger.info(
+                                f"Message {message_id}#{self.queue_name} processed successfully after {retry_count} retries"
+                            )
+                        else:
+                            logger.info(
+                                f"Message {message_id}#{self.queue_name} processed successfully"
+                            )
 @asynccontextmanager
@@ -614,19 +1784,64 @@ class MessageBusWorker:
     def start_sync(self) -> None:
         def on_shutdown(loop: asyncio.AbstractEventLoop) -> None:
-            logger.info("Shutting down")
-            self.consumer.shutdown()
+            logger.info("Shutting down - signal received")
+            # Schedule the shutdown to run in the event loop
+            asyncio.create_task(self._graceful_shutdown())
+            # wait until the shutdown is complete
         with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner:
-            runner.get_loop().add_signal_handler(
-                signal.SIGINT, on_shutdown, runner.get_loop()
-            )
+            loop = runner.get_loop()
+            loop.add_signal_handler(signal.SIGINT, on_shutdown, loop)
+            # Add graceful shutdown handler for SIGTERM as well
+            loop.add_signal_handler(signal.SIGTERM, on_shutdown, loop)
             runner.run(self.start_async())
+    async def _graceful_shutdown(self) -> None:
+        """Handles graceful shutdown process"""
+        logger.info("Initiating graceful shutdown sequence")
+        # Use the comprehensive close method that handles shutdown, task waiting and connection cleanup
+        self.consumer.shutdown()
+        logger.info("Graceful shutdown completed")
 class AioPikaMessageBusController(BusMessageController):
     def __init__(self, aio_pika_message: aio_pika.abc.AbstractIncomingMessage):
         self.aio_pika_message = aio_pika_message
+        # We access consumer callback through context if available
+        self._callback: Optional[MessageHandlerCallback] = None
+    def _get_callback(self) -> MessageHandlerCallback:
+        """
+        Find the callback associated with this message.
+        This allows us to access the retry mechanisms.
+        """
+        if self._callback is None:
+            # Get the context from current frame's locals
+            frame = inspect.currentframe()
+            if frame is not None:
+                try:
+                    caller_frame = frame.f_back
+                    if caller_frame is not None:
+                        # Check for context with handler callback
+                        callback_ref = None
+                        # Look for handler_message call context
+                        while caller_frame is not None:
+                            if "self" in caller_frame.f_locals:
+                                self_obj = caller_frame.f_locals["self"]
+                                if isinstance(self_obj, MessageHandlerCallback):
+                                    callback_ref = self_obj
+                                    break
+                            caller_frame = caller_frame.f_back
+                        # Save callback reference if we found it
+                        self._callback = callback_ref
+                finally:
+                    del frame  # Avoid reference cycles
+            if self._callback is None:
+                raise RuntimeError("Could not find callback context for message retry")
+        return self._callback
     async def ack(self) -> None:
         await self.aio_pika_message.ack()
@@ -638,7 +1853,41 @@ class AioPikaMessageBusController(BusMessageController):
         await self.aio_pika_message.reject()
     async def retry(self) -> None:
-        await self.aio_pika_message.reject(requeue=True)
+        """
+        Retry the message immediately by rejecting with requeue flag.
+        This doesn't use the exponential backoff mechanism.
+        """
+        callback = self._get_callback()
+        await callback.handle_reject_message(self.aio_pika_message, requeue=True)
     async def retry_later(self, delay: int) -> None:
-        raise NotImplementedError("Not implemented")
+        """
+        Retry the message after a specified delay using the exponential backoff mechanism.
+        Args:
+            delay: Minimum delay in seconds before retrying
+        """
+        try:
+            callback = self._get_callback()
+            # Get current retry count from message headers
+            headers = self.aio_pika_message.headers or {}
+            retry_count = int(str(headers.get("x-retry-count", 0)))
+            # Handle retry with explicit delay
+            asyncio.create_task(
+                callback._delayed_retry(
+                    self.aio_pika_message,
+                    float(delay),
+                    retry_count + 1,
+                    None,  # No specific exception
+                )
+            )
+            # Acknowledge the current message since we'll republish
+            await self.aio_pika_message.ack()
+        except Exception as e:
+            logger.exception(f"Failed to schedule retry_later: {e}")
+            # Fall back to immediate retry
+            await self.aio_pika_message.reject(requeue=True)

jararaca 0.3.11a15__py3-none-any.whl → 0.3.12__py3-none-any.whl

Potentially problematic release.

jararaca 0.3.11a15py3-none-any.whl → 0.3.12py3-none-any.whl