jararaca 0.3.12a12__py3-none-any.whl → 0.3.12a14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jararaca might be problematic. Click here for more details.

@@ -1,9 +1,11 @@
1
1
  import asyncio
2
2
  import contextlib
3
3
  import logging
4
+ import random
4
5
  import signal
5
6
  import time
6
7
  from abc import ABC, abstractmethod
8
+ from dataclasses import dataclass, field
7
9
  from datetime import UTC, datetime
8
10
  from typing import Any
9
11
  from urllib.parse import parse_qs
@@ -15,6 +17,13 @@ import urllib3.util
15
17
  import uvloop
16
18
  from aio_pika import connect_robust
17
19
  from aio_pika.abc import AbstractChannel, AbstractRobustConnection
20
+ from aio_pika.exceptions import (
21
+ AMQPChannelError,
22
+ AMQPConnectionError,
23
+ AMQPError,
24
+ ChannelClosed,
25
+ ConnectionClosed,
26
+ )
18
27
  from aio_pika.pool import Pool
19
28
 
20
29
  from jararaca.broker_backend import MessageBrokerBackend
@@ -30,6 +39,7 @@ from jararaca.scheduler.decorators import (
30
39
  )
31
40
  from jararaca.scheduler.types import DelayedMessageData
32
41
  from jararaca.utils.rabbitmq_utils import RabbitmqUtils
42
+ from jararaca.utils.retry import RetryConfig, retry_with_backoff
33
43
 
34
44
  logger = logging.getLogger(__name__)
35
45
 
@@ -100,17 +110,23 @@ class _MessageBrokerDispatcher(ABC):
100
110
 
101
111
  class _RabbitMQBrokerDispatcher(_MessageBrokerDispatcher):
102
112
 
103
- def __init__(self, url: str) -> None:
113
+ def __init__(self, url: str, config: "BeatWorkerConfig | None" = None) -> None:
104
114
  self.url = url
115
+ self.config = config or BeatWorkerConfig()
116
+ self.connection_healthy = False
117
+ self.reconnection_in_progress = False
118
+ self.shutdown_event = asyncio.Event()
119
+ self.health_check_task: asyncio.Task[Any] | None = None
120
+ self.reconnection_lock = asyncio.Lock()
105
121
 
106
122
  self.conn_pool: "Pool[AbstractRobustConnection]" = Pool(
107
123
  self._create_connection,
108
- max_size=10,
124
+ max_size=self.config.max_pool_size,
109
125
  )
110
126
 
111
127
  self.channel_pool: "Pool[AbstractChannel]" = Pool(
112
128
  self._create_channel,
113
- max_size=10,
129
+ max_size=self.config.max_pool_size,
114
130
  )
115
131
 
116
132
  splitted = urllib3.util.parse_url(url)
@@ -131,86 +147,339 @@ class _RabbitMQBrokerDispatcher(_MessageBrokerDispatcher):
131
147
 
132
148
  async def _create_connection(self) -> AbstractRobustConnection:
133
149
  """
134
- Create a connection to the RabbitMQ server.
135
- This is used to send messages to the RabbitMQ server.
150
+ Create a robust connection to the RabbitMQ server with retry logic.
136
151
  """
137
- connection = await connect_robust(self.url)
138
- return connection
152
+
153
+ async def _establish_connection() -> AbstractRobustConnection:
154
+ logger.info("Establishing connection to RabbitMQ")
155
+ connection = await connect_robust(
156
+ self.url,
157
+ heartbeat=self.config.connection_heartbeat_interval,
158
+ )
159
+ logger.info("Connected to RabbitMQ successfully")
160
+ return connection
161
+
162
+ return await retry_with_backoff(
163
+ _establish_connection,
164
+ retry_config=self.config.connection_retry_config,
165
+ retry_exceptions=(
166
+ AMQPConnectionError,
167
+ ConnectionError,
168
+ OSError,
169
+ TimeoutError,
170
+ ),
171
+ )
139
172
 
140
173
  async def _create_channel(self) -> AbstractChannel:
141
174
  """
142
- Create a channel to the RabbitMQ server.
143
- This is used to send messages to the RabbitMQ server.
175
+ Create a channel to the RabbitMQ server with retry logic.
144
176
  """
145
- async with self.conn_pool.acquire() as connection:
146
- channel = await connection.channel()
147
- return channel
177
+
178
+ async def _establish_channel() -> AbstractChannel:
179
+ async with self.conn_pool.acquire() as connection:
180
+ channel = await connection.channel()
181
+ return channel
182
+
183
+ return await retry_with_backoff(
184
+ _establish_channel,
185
+ retry_config=self.config.connection_retry_config,
186
+ retry_exceptions=(
187
+ AMQPConnectionError,
188
+ AMQPChannelError,
189
+ ChannelClosed,
190
+ ConnectionError,
191
+ ),
192
+ )
148
193
 
149
194
  async def dispatch_scheduled_action(self, action_id: str, timestamp: int) -> None:
150
195
  """
151
- Dispatch a message to the RabbitMQ server.
152
- This is used to send a message to the RabbitMQ server
153
- to trigger the scheduled action.
196
+ Dispatch a message to the RabbitMQ server with retry logic.
154
197
  """
198
+ if not self.connection_healthy:
199
+ await self._wait_for_connection()
155
200
 
156
- logger.info(f"Dispatching message to {action_id} at {timestamp}")
157
- async with self.channel_pool.acquire() as channel:
158
- exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
201
+ async def _dispatch() -> None:
202
+ logger.info(f"Dispatching message to {action_id} at {timestamp}")
203
+ async with self.channel_pool.acquire() as channel:
204
+ exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
159
205
 
160
- await exchange.publish(
161
- aio_pika.Message(body=str(timestamp).encode()),
162
- routing_key=action_id,
206
+ await exchange.publish(
207
+ aio_pika.Message(body=str(timestamp).encode()),
208
+ routing_key=action_id,
209
+ )
210
+ logger.info(f"Dispatched message to {action_id} at {timestamp}")
211
+
212
+ try:
213
+ await retry_with_backoff(
214
+ _dispatch,
215
+ retry_config=self.config.dispatch_retry_config,
216
+ retry_exceptions=(
217
+ AMQPConnectionError,
218
+ AMQPChannelError,
219
+ ChannelClosed,
220
+ ConnectionClosed,
221
+ AMQPError,
222
+ ),
223
+ )
224
+ except Exception as e:
225
+ logger.error(
226
+ f"Failed to dispatch message to {action_id} after retries: {e}"
163
227
  )
164
- logger.info(f"Dispatched message to {action_id} at {timestamp}")
228
+ # Trigger reconnection if dispatch fails
229
+ if not self.reconnection_in_progress:
230
+ asyncio.create_task(self._handle_reconnection())
231
+ raise
165
232
 
166
233
  async def dispatch_delayed_message(
167
234
  self, delayed_message: DelayedMessageData
168
235
  ) -> None:
169
236
  """
170
- Dispatch a delayed message to the RabbitMQ server.
171
- This is used to send a message to the RabbitMQ server
172
- to trigger the scheduled action.
237
+ Dispatch a delayed message to the RabbitMQ server with retry logic.
173
238
  """
174
- async with self.channel_pool.acquire() as channel:
239
+ if not self.connection_healthy:
240
+ await self._wait_for_connection()
241
+
242
+ async def _dispatch() -> None:
243
+ async with self.channel_pool.acquire() as channel:
244
+ exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
245
+ await exchange.publish(
246
+ aio_pika.Message(
247
+ body=delayed_message.payload,
248
+ ),
249
+ routing_key=f"{delayed_message.message_topic}.",
250
+ )
175
251
 
176
- exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
177
- await exchange.publish(
178
- aio_pika.Message(
179
- body=delayed_message.payload,
252
+ try:
253
+ await retry_with_backoff(
254
+ _dispatch,
255
+ retry_config=self.config.dispatch_retry_config,
256
+ retry_exceptions=(
257
+ AMQPConnectionError,
258
+ AMQPChannelError,
259
+ ChannelClosed,
260
+ ConnectionClosed,
261
+ AMQPError,
180
262
  ),
181
- routing_key=f"{delayed_message.message_topic}.",
182
263
  )
264
+ except Exception as e:
265
+ logger.error(f"Failed to dispatch delayed message after retries: {e}")
266
+ # Trigger reconnection if dispatch fails
267
+ if not self.reconnection_in_progress:
268
+ asyncio.create_task(self._handle_reconnection())
269
+ raise
183
270
 
184
271
  async def initialize(self, scheduled_actions: list[ScheduledActionData]) -> None:
185
272
  """
186
- Initialize the RabbitMQ server.
187
- This is used to create the exchange and queues for the scheduled actions.
273
+ Initialize the RabbitMQ server with retry logic.
188
274
  """
189
275
 
190
- async with self.channel_pool.acquire() as channel:
191
- await RabbitmqUtils.get_main_exchange(channel, self.exchange)
276
+ async def _initialize() -> None:
277
+ async with self.channel_pool.acquire() as channel:
278
+ await RabbitmqUtils.get_main_exchange(channel, self.exchange)
192
279
 
193
- for sched_act_data in scheduled_actions:
194
- queue_name = ScheduledAction.get_function_id(sched_act_data.callable)
280
+ for sched_act_data in scheduled_actions:
281
+ queue_name = ScheduledAction.get_function_id(
282
+ sched_act_data.callable
283
+ )
284
+
285
+ # Try to get existing queue
286
+ await RabbitmqUtils.get_scheduled_action_queue(
287
+ channel=channel,
288
+ queue_name=queue_name,
289
+ )
195
290
 
196
- # Try to get existing queue
197
- await RabbitmqUtils.get_scheduled_action_queue(
198
- channel=channel,
199
- queue_name=queue_name,
291
+ try:
292
+ logger.info("Initializing RabbitMQ connection...")
293
+ await retry_with_backoff(
294
+ _initialize,
295
+ retry_config=self.config.connection_retry_config,
296
+ retry_exceptions=(
297
+ AMQPConnectionError,
298
+ AMQPChannelError,
299
+ ChannelClosed,
300
+ ConnectionClosed,
301
+ AMQPError,
302
+ ),
303
+ )
304
+
305
+ # Verify connection is actually healthy after initialization
306
+ if not await self._is_connection_healthy():
307
+ logger.warning(
308
+ "Connection health check failed after initialization, retrying..."
200
309
  )
310
+ # Wait a bit and try again
311
+ await asyncio.sleep(2.0)
312
+ if not await self._is_connection_healthy():
313
+ raise ConnectionError("Connection not healthy after initialization")
314
+
315
+ self.connection_healthy = True
316
+ logger.info("RabbitMQ connection initialized successfully")
317
+
318
+ # Start health monitoring
319
+ self.health_check_task = asyncio.create_task(
320
+ self._monitor_connection_health()
321
+ )
322
+
323
+ except Exception as e:
324
+ logger.error(f"Failed to initialize RabbitMQ after retries: {e}")
325
+ raise
201
326
 
202
327
  async def dispose(self) -> None:
203
- await self.channel_pool.close()
204
- await self.conn_pool.close()
328
+ """Clean up resources"""
329
+ logger.info("Disposing RabbitMQ broker dispatcher")
330
+ self.shutdown_event.set()
331
+
332
+ # Cancel health monitoring
333
+ if self.health_check_task:
334
+ self.health_check_task.cancel()
335
+ try:
336
+ await self.health_check_task
337
+ except asyncio.CancelledError:
338
+ pass
339
+
340
+ # Clean up pools
341
+ await self._cleanup_pools()
342
+
343
+ async def _monitor_connection_health(self) -> None:
344
+ """Monitor connection health and trigger reconnection if needed"""
345
+ while not self.shutdown_event.is_set():
346
+ try:
347
+ await asyncio.sleep(self.config.health_check_interval)
348
+
349
+ if self.shutdown_event.is_set():
350
+ break
351
+
352
+ # Check connection health
353
+ if not await self._is_connection_healthy():
354
+ logger.warning(
355
+ "Connection health check failed, triggering reconnection"
356
+ )
357
+ if not self.reconnection_in_progress:
358
+ asyncio.create_task(self._handle_reconnection())
359
+
360
+ except asyncio.CancelledError:
361
+ logger.info("Connection health monitoring cancelled")
362
+ break
363
+ except Exception as e:
364
+ logger.error(f"Error in connection health monitoring: {e}")
365
+ await asyncio.sleep(5) # Wait before retrying
366
+
367
+ async def _is_connection_healthy(self) -> bool:
368
+ """Check if the connection is healthy"""
369
+ try:
370
+ # Try to acquire a connection from the pool
371
+ async with self.conn_pool.acquire() as connection:
372
+ if connection.is_closed:
373
+ return False
374
+
375
+ # Try to create a channel to test connection
376
+ channel = await connection.channel()
377
+ await channel.close()
378
+ return True
379
+
380
+ except Exception as e:
381
+ logger.debug(f"Connection health check failed: {e}")
382
+ return False
383
+
384
+ async def _handle_reconnection(self) -> None:
385
+ """Handle reconnection process with exponential backoff"""
386
+ async with self.reconnection_lock:
387
+ if self.reconnection_in_progress:
388
+ return
389
+
390
+ self.reconnection_in_progress = True
391
+ self.connection_healthy = False
392
+
393
+ logger.info("Starting reconnection process")
394
+
395
+ attempt = 0
396
+ while not self.shutdown_event.is_set():
397
+ try:
398
+ attempt += 1
399
+ logger.info(f"Reconnection attempt {attempt}")
400
+
401
+ # Close existing pools
402
+ await self._cleanup_pools()
403
+
404
+ # Recreate pools
405
+ self.conn_pool = Pool(
406
+ self._create_connection,
407
+ max_size=self.config.max_pool_size,
408
+ )
409
+ self.channel_pool = Pool(
410
+ self._create_channel,
411
+ max_size=self.config.max_pool_size,
412
+ )
413
+
414
+ # Test connection
415
+ if await self._is_connection_healthy():
416
+ self.connection_healthy = True
417
+ logger.info("Reconnection successful")
418
+ break
419
+ else:
420
+ raise ConnectionError(
421
+ "Connection health check failed after reconnection"
422
+ )
423
+
424
+ except Exception as e:
425
+ logger.error(f"Reconnection attempt {attempt} failed: {e}")
426
+
427
+ if self.shutdown_event.is_set():
428
+ break
429
+
430
+ # Calculate backoff delay
431
+ delay = self.config.reconnection_delay * (2 ** min(attempt - 1, 10))
432
+ if self.config.connection_retry_config.jitter:
433
+ jitter_amount = delay * 0.25
434
+ delay = delay + random.uniform(-jitter_amount, jitter_amount)
435
+ delay = max(delay, 0.1)
436
+
437
+ delay = min(delay, self.config.connection_retry_config.max_delay)
438
+
439
+ logger.info(f"Retrying reconnection in {delay:.2f} seconds")
440
+ await asyncio.sleep(delay)
441
+
442
+ self.reconnection_in_progress = False
443
+
444
+ async def _cleanup_pools(self) -> None:
445
+ """Clean up existing connection pools"""
446
+ try:
447
+ if hasattr(self, "channel_pool"):
448
+ await self.channel_pool.close()
449
+ except Exception as e:
450
+ logger.warning(f"Error closing channel pool: {e}")
451
+
452
+ try:
453
+ if hasattr(self, "conn_pool"):
454
+ await self.conn_pool.close()
455
+ except Exception as e:
456
+ logger.warning(f"Error closing connection pool: {e}")
457
+
458
+ async def _wait_for_connection(self) -> None:
459
+ """Wait for connection to be healthy"""
460
+ max_wait = 30.0 # Maximum wait time
461
+ wait_time = 0.0
462
+
463
+ while not self.connection_healthy and wait_time < max_wait:
464
+ if self.shutdown_event.is_set():
465
+ raise ConnectionError("Shutdown requested while waiting for connection")
205
466
 
467
+ await asyncio.sleep(0.5)
468
+ wait_time += 0.5
206
469
 
207
- def _get_message_broker_dispatcher_from_url(url: str) -> _MessageBrokerDispatcher:
470
+ if not self.connection_healthy:
471
+ raise ConnectionError("Connection not healthy after maximum wait time")
472
+
473
+
474
+ def _get_message_broker_dispatcher_from_url(
475
+ url: str, config: "BeatWorkerConfig | None" = None
476
+ ) -> _MessageBrokerDispatcher:
208
477
  """
209
478
  Factory function to create a message broker instance from a URL.
210
479
  Currently, only RabbitMQ is supported.
211
480
  """
212
481
  if url.startswith("amqp://") or url.startswith("amqps://"):
213
- return _RabbitMQBrokerDispatcher(url=url)
482
+ return _RabbitMQBrokerDispatcher(url=url, config=config)
214
483
  else:
215
484
  raise ValueError(f"Unsupported message broker URL: {url}")
216
485
 
@@ -218,6 +487,42 @@ def _get_message_broker_dispatcher_from_url(url: str) -> _MessageBrokerDispatche
218
487
  # endregion
219
488
 
220
489
 
490
+ @dataclass
491
+ class BeatWorkerConfig:
492
+ """Configuration for beat worker connection resilience"""
493
+
494
+ connection_retry_config: RetryConfig = field(
495
+ default_factory=lambda: RetryConfig(
496
+ max_retries=10,
497
+ initial_delay=2.0,
498
+ max_delay=60.0,
499
+ backoff_factor=2.0,
500
+ jitter=True,
501
+ )
502
+ )
503
+ dispatch_retry_config: RetryConfig = field(
504
+ default_factory=lambda: RetryConfig(
505
+ max_retries=3,
506
+ initial_delay=1.0,
507
+ max_delay=10.0,
508
+ backoff_factor=2.0,
509
+ jitter=True,
510
+ )
511
+ )
512
+ connection_heartbeat_interval: float = 30.0
513
+ health_check_interval: float = 15.0
514
+ max_reconnection_attempts: int = -1 # Infinite retries
515
+ reconnection_delay: float = 5.0
516
+
517
+ # Connection establishment timeouts
518
+ connection_wait_timeout: float = 300.0 # 5 minutes to wait for initial connection
519
+ reconnection_wait_timeout: float = 600.0 # 10 minutes to wait for reconnection
520
+
521
+ # Pool configuration
522
+ max_pool_size: int = 10
523
+ pool_recycle_time: float = 3600.0 # 1 hour
524
+
525
+
221
526
  class BeatWorker:
222
527
 
223
528
  def __init__(
@@ -227,11 +532,13 @@ class BeatWorker:
227
532
  broker_url: str,
228
533
  backend_url: str,
229
534
  scheduled_action_names: set[str] | None = None,
535
+ config: "BeatWorkerConfig | None" = None,
230
536
  ) -> None:
231
537
  self.app = app
538
+ self.config = config or BeatWorkerConfig()
232
539
 
233
540
  self.broker: _MessageBrokerDispatcher = _get_message_broker_dispatcher_from_url(
234
- broker_url
541
+ broker_url, self.config
235
542
  )
236
543
  self.backend: MessageBrokerBackend = get_message_broker_backend_from_url(
237
544
  backend_url
@@ -271,15 +578,45 @@ class BeatWorker:
271
578
  self.app, self.container, self.scheduler_names
272
579
  )
273
580
 
581
+ # Initialize and wait for connection to be established
582
+ logger.info("Initializing broker connection...")
274
583
  await self.broker.initialize(scheduled_actions)
275
584
 
585
+ # Wait for connection to be healthy before starting scheduler
586
+ logger.info("Waiting for connection to be established...")
587
+ await self._wait_for_broker_connection()
588
+
589
+ logger.info("Connection established, starting scheduler...")
276
590
  await self.run_scheduled_actions(scheduled_actions)
277
591
 
278
592
  async def run_scheduled_actions(
279
593
  self, scheduled_actions: list[ScheduledActionData]
280
594
  ) -> None:
281
595
 
596
+ logger.info("Starting scheduled actions processing loop")
597
+
598
+ # Ensure we have a healthy connection before starting the main loop
599
+ if (
600
+ hasattr(self.broker, "connection_healthy")
601
+ and not self.broker.connection_healthy
602
+ ):
603
+ logger.warning(
604
+ "Connection not healthy at start of processing loop, waiting..."
605
+ )
606
+ await self._wait_for_broker_reconnection()
607
+
282
608
  while not self.shutdown_event.is_set():
609
+ # Check connection health before processing scheduled actions
610
+ if (
611
+ hasattr(self.broker, "connection_healthy")
612
+ and not self.broker.connection_healthy
613
+ ):
614
+ logger.warning(
615
+ "Broker connection is not healthy, waiting for reconnection..."
616
+ )
617
+ await self._wait_for_broker_reconnection()
618
+ continue
619
+
283
620
  now = int(time.time())
284
621
  for sched_act_data in scheduled_actions:
285
622
  func = sched_act_data.callable
@@ -287,64 +624,178 @@ class BeatWorker:
287
624
  if self.shutdown_event.is_set():
288
625
  break
289
626
 
290
- async with self.backend.lock():
627
+ try:
628
+ async with self.backend.lock():
291
629
 
292
- last_dispatch_time: int | None = (
293
- await self.backend.get_last_dispatch_time(
294
- ScheduledAction.get_function_id(func)
630
+ last_dispatch_time: int | None = (
631
+ await self.backend.get_last_dispatch_time(
632
+ ScheduledAction.get_function_id(func)
633
+ )
295
634
  )
296
- )
297
635
 
298
- if last_dispatch_time is not None:
299
- cron = croniter.croniter(
300
- scheduled_action.cron, last_dispatch_time
301
- )
302
- next_run: datetime = cron.get_next(datetime).replace(tzinfo=UTC)
303
- if next_run > datetime.now(UTC):
304
- logger.info(
305
- f"Skipping {func.__module__}.{func.__qualname__} until {next_run}"
636
+ if last_dispatch_time is not None:
637
+ cron = croniter.croniter(
638
+ scheduled_action.cron, last_dispatch_time
306
639
  )
307
- continue
308
-
309
- if not scheduled_action.allow_overlap:
310
- if (
311
- await self.backend.get_in_execution_count(
312
- ScheduledAction.get_function_id(func)
640
+ next_run: datetime = cron.get_next(datetime).replace(
641
+ tzinfo=UTC
642
+ )
643
+ if next_run > datetime.now(UTC):
644
+ logger.info(
645
+ f"Skipping {func.__module__}.{func.__qualname__} until {next_run}"
646
+ )
647
+ continue
648
+
649
+ if not scheduled_action.allow_overlap:
650
+ if (
651
+ await self.backend.get_in_execution_count(
652
+ ScheduledAction.get_function_id(func)
653
+ )
654
+ > 0
655
+ ):
656
+ continue
657
+
658
+ try:
659
+ await self.broker.dispatch_scheduled_action(
660
+ ScheduledAction.get_function_id(func),
661
+ now,
313
662
  )
314
- > 0
315
- ):
316
- continue
317
663
 
318
- await self.broker.dispatch_scheduled_action(
319
- ScheduledAction.get_function_id(func),
320
- now,
321
- )
664
+ await self.backend.set_last_dispatch_time(
665
+ ScheduledAction.get_function_id(func), now
666
+ )
322
667
 
323
- await self.backend.set_last_dispatch_time(
324
- ScheduledAction.get_function_id(func), now
325
- )
668
+ logger.info(
669
+ f"Scheduled {func.__module__}.{func.__qualname__} at {now}"
670
+ )
671
+ except Exception as e:
672
+ logger.error(
673
+ f"Failed to dispatch scheduled action {func.__module__}.{func.__qualname__}: {e}"
674
+ )
675
+ # Continue with other scheduled actions even if one fails
676
+ continue
326
677
 
327
- logger.info(
328
- f"Scheduled {func.__module__}.{func.__qualname__} at {now}"
678
+ except Exception as e:
679
+ logger.error(
680
+ f"Error processing scheduled action {func.__module__}.{func.__qualname__}: {e}"
329
681
  )
330
-
331
- for (
332
- delayed_message_data
333
- ) in await self.backend.dequeue_next_delayed_messages(now):
334
- await self.broker.dispatch_delayed_message(delayed_message_data)
682
+ # Continue with other scheduled actions even if one fails
683
+ continue
684
+
685
+ # Handle delayed messages
686
+ try:
687
+ delayed_messages = await self.backend.dequeue_next_delayed_messages(now)
688
+ for delayed_message_data in delayed_messages:
689
+ try:
690
+ await self.broker.dispatch_delayed_message(delayed_message_data)
691
+ except Exception as e:
692
+ logger.error(f"Failed to dispatch delayed message: {e}")
693
+ # Continue with other delayed messages even if one fails
694
+ continue
695
+ except Exception as e:
696
+ logger.error(f"Error processing delayed messages: {e}")
335
697
 
336
698
  with contextlib.suppress(asyncio.TimeoutError):
337
699
  await asyncio.wait_for(self.shutdown_event.wait(), self.interval)
338
700
 
339
- # await self.shutdown_event.wait(self.interval)
340
-
341
701
  logger.info("Scheduler stopped")
342
702
 
343
- await self.backend.dispose()
344
- await self.broker.dispose()
703
+ try:
704
+ await self.backend.dispose()
705
+ except Exception as e:
706
+ logger.error(f"Error disposing backend: {e}")
707
+
708
+ try:
709
+ await self.broker.dispose()
710
+ except Exception as e:
711
+ logger.error(f"Error disposing broker: {e}")
345
712
 
346
713
  async def _graceful_shutdown(self) -> None:
347
714
  """Handles graceful shutdown process"""
348
715
  logger.info("Initiating graceful shutdown sequence")
349
716
  self.shutdown_event.set()
350
717
  logger.info("Graceful shutdown completed")
718
+
719
+ async def _wait_for_broker_connection(self) -> None:
720
+ """
721
+ Wait for the broker connection to be established and healthy.
722
+ This ensures the scheduler doesn't start until RabbitMQ is ready.
723
+ """
724
+ max_wait_time = self.config.connection_wait_timeout
725
+ check_interval = 2.0 # Check every 2 seconds
726
+ elapsed_time = 0.0
727
+
728
+ logger.info(
729
+ f"Waiting for broker connection to be established (timeout: {max_wait_time}s)..."
730
+ )
731
+
732
+ while elapsed_time < max_wait_time:
733
+ if self.shutdown_event.is_set():
734
+ raise ConnectionError(
735
+ "Shutdown requested while waiting for broker connection"
736
+ )
737
+
738
+ # Check if broker connection is healthy
739
+ if (
740
+ hasattr(self.broker, "connection_healthy")
741
+ and self.broker.connection_healthy
742
+ ):
743
+ logger.info("Broker connection is healthy")
744
+ return
745
+
746
+ # If broker doesn't have health status, try a simple health check
747
+ if not hasattr(self.broker, "connection_healthy"):
748
+ try:
749
+ # For non-RabbitMQ brokers, assume connection is ready after initialization
750
+ logger.info("Broker connection assumed to be ready")
751
+ return
752
+ except Exception as e:
753
+ logger.debug(f"Broker connection check failed: {e}")
754
+
755
+ if elapsed_time % 10.0 == 0.0: # Log every 10 seconds
756
+ logger.info(
757
+ f"Still waiting for broker connection... ({elapsed_time:.1f}s elapsed)"
758
+ )
759
+
760
+ await asyncio.sleep(check_interval)
761
+ elapsed_time += check_interval
762
+
763
+ raise ConnectionError(
764
+ f"Broker connection not established after {max_wait_time} seconds"
765
+ )
766
+
767
+ async def _wait_for_broker_reconnection(self) -> None:
768
+ """
769
+ Wait for the broker to reconnect when connection is lost during operation.
770
+ This pauses the scheduler until the connection is restored.
771
+ """
772
+ max_wait_time = self.config.reconnection_wait_timeout
773
+ check_interval = 5.0 # Check every 5 seconds
774
+ elapsed_time = 0.0
775
+
776
+ logger.info(f"Waiting for broker reconnection (timeout: {max_wait_time}s)...")
777
+
778
+ while elapsed_time < max_wait_time:
779
+ if self.shutdown_event.is_set():
780
+ logger.info("Shutdown requested while waiting for broker reconnection")
781
+ return
782
+
783
+ # Check if broker connection is healthy again
784
+ if (
785
+ hasattr(self.broker, "connection_healthy")
786
+ and self.broker.connection_healthy
787
+ ):
788
+ logger.info("Broker connection restored, resuming scheduler")
789
+ return
790
+
791
+ if elapsed_time % 30.0 == 0.0: # Log every 30 seconds
792
+ logger.info(
793
+ f"Still waiting for broker reconnection... ({elapsed_time:.1f}s elapsed)"
794
+ )
795
+
796
+ await asyncio.sleep(check_interval)
797
+ elapsed_time += check_interval
798
+
799
+ logger.error(f"Broker connection not restored after {max_wait_time} seconds")
800
+ # Don't raise an exception here, just continue and let the scheduler retry
801
+ # This allows the scheduler to be more resilient to long-term connection issues