jararaca 0.3.11a16__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jararaca might be problematic. Click here for more details.

@@ -1,11 +1,12 @@
1
1
  import asyncio
2
2
  import contextlib
3
3
  import logging
4
+ import random
4
5
  import signal
5
6
  import time
6
7
  from abc import ABC, abstractmethod
8
+ from dataclasses import dataclass, field
7
9
  from datetime import UTC, datetime
8
- from types import FrameType
9
10
  from typing import Any
10
11
  from urllib.parse import parse_qs
11
12
 
@@ -16,6 +17,13 @@ import urllib3.util
16
17
  import uvloop
17
18
  from aio_pika import connect_robust
18
19
  from aio_pika.abc import AbstractChannel, AbstractRobustConnection
20
+ from aio_pika.exceptions import (
21
+ AMQPChannelError,
22
+ AMQPConnectionError,
23
+ AMQPError,
24
+ ChannelClosed,
25
+ ConnectionClosed,
26
+ )
19
27
  from aio_pika.pool import Pool
20
28
 
21
29
  from jararaca.broker_backend import MessageBrokerBackend
@@ -31,6 +39,7 @@ from jararaca.scheduler.decorators import (
31
39
  )
32
40
  from jararaca.scheduler.types import DelayedMessageData
33
41
  from jararaca.utils.rabbitmq_utils import RabbitmqUtils
42
+ from jararaca.utils.retry import RetryConfig, retry_with_backoff
34
43
 
35
44
  logger = logging.getLogger(__name__)
36
45
 
@@ -101,17 +110,23 @@ class _MessageBrokerDispatcher(ABC):
101
110
 
102
111
  class _RabbitMQBrokerDispatcher(_MessageBrokerDispatcher):
103
112
 
104
- def __init__(self, url: str) -> None:
113
+ def __init__(self, url: str, config: "BeatWorkerConfig | None" = None) -> None:
105
114
  self.url = url
115
+ self.config = config or BeatWorkerConfig()
116
+ self.connection_healthy = False
117
+ self.reconnection_in_progress = False
118
+ self.shutdown_event = asyncio.Event()
119
+ self.health_check_task: asyncio.Task[Any] | None = None
120
+ self.reconnection_lock = asyncio.Lock()
106
121
 
107
122
  self.conn_pool: "Pool[AbstractRobustConnection]" = Pool(
108
123
  self._create_connection,
109
- max_size=10,
124
+ max_size=self.config.max_pool_size,
110
125
  )
111
126
 
112
127
  self.channel_pool: "Pool[AbstractChannel]" = Pool(
113
128
  self._create_channel,
114
- max_size=10,
129
+ max_size=self.config.max_pool_size,
115
130
  )
116
131
 
117
132
  splitted = urllib3.util.parse_url(url)
@@ -132,86 +147,339 @@ class _RabbitMQBrokerDispatcher(_MessageBrokerDispatcher):
132
147
 
133
148
  async def _create_connection(self) -> AbstractRobustConnection:
134
149
  """
135
- Create a connection to the RabbitMQ server.
136
- This is used to send messages to the RabbitMQ server.
150
+ Create a robust connection to the RabbitMQ server with retry logic.
137
151
  """
138
- connection = await connect_robust(self.url)
139
- return connection
152
+
153
+ async def _establish_connection() -> AbstractRobustConnection:
154
+ logger.info("Establishing connection to RabbitMQ")
155
+ connection = await connect_robust(
156
+ self.url,
157
+ heartbeat=self.config.connection_heartbeat_interval,
158
+ )
159
+ logger.info("Connected to RabbitMQ successfully")
160
+ return connection
161
+
162
+ return await retry_with_backoff(
163
+ _establish_connection,
164
+ retry_config=self.config.connection_retry_config,
165
+ retry_exceptions=(
166
+ AMQPConnectionError,
167
+ ConnectionError,
168
+ OSError,
169
+ TimeoutError,
170
+ ),
171
+ )
140
172
 
141
173
  async def _create_channel(self) -> AbstractChannel:
142
174
  """
143
- Create a channel to the RabbitMQ server.
144
- This is used to send messages to the RabbitMQ server.
175
+ Create a channel to the RabbitMQ server with retry logic.
145
176
  """
146
- async with self.conn_pool.acquire() as connection:
147
- channel = await connection.channel()
148
- return channel
177
+
178
+ async def _establish_channel() -> AbstractChannel:
179
+ async with self.conn_pool.acquire() as connection:
180
+ channel = await connection.channel()
181
+ return channel
182
+
183
+ return await retry_with_backoff(
184
+ _establish_channel,
185
+ retry_config=self.config.connection_retry_config,
186
+ retry_exceptions=(
187
+ AMQPConnectionError,
188
+ AMQPChannelError,
189
+ ChannelClosed,
190
+ ConnectionError,
191
+ ),
192
+ )
149
193
 
150
194
  async def dispatch_scheduled_action(self, action_id: str, timestamp: int) -> None:
151
195
  """
152
- Dispatch a message to the RabbitMQ server.
153
- This is used to send a message to the RabbitMQ server
154
- to trigger the scheduled action.
196
+ Dispatch a message to the RabbitMQ server with retry logic.
155
197
  """
198
+ if not self.connection_healthy:
199
+ await self._wait_for_connection()
156
200
 
157
- logger.info(f"Dispatching message to {action_id} at {timestamp}")
158
- async with self.channel_pool.acquire() as channel:
159
- exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
201
+ async def _dispatch() -> None:
202
+ logger.info(f"Dispatching message to {action_id} at {timestamp}")
203
+ async with self.channel_pool.acquire() as channel:
204
+ exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
160
205
 
161
- await exchange.publish(
162
- aio_pika.Message(body=str(timestamp).encode()),
163
- routing_key=action_id,
206
+ await exchange.publish(
207
+ aio_pika.Message(body=str(timestamp).encode()),
208
+ routing_key=action_id,
209
+ )
210
+ logger.info(f"Dispatched message to {action_id} at {timestamp}")
211
+
212
+ try:
213
+ await retry_with_backoff(
214
+ _dispatch,
215
+ retry_config=self.config.dispatch_retry_config,
216
+ retry_exceptions=(
217
+ AMQPConnectionError,
218
+ AMQPChannelError,
219
+ ChannelClosed,
220
+ ConnectionClosed,
221
+ AMQPError,
222
+ ),
164
223
  )
165
- logger.info(f"Dispatched message to {action_id} at {timestamp}")
224
+ except Exception as e:
225
+ logger.error(
226
+ f"Failed to dispatch message to {action_id} after retries: {e}"
227
+ )
228
+ # Trigger reconnection if dispatch fails
229
+ if not self.reconnection_in_progress:
230
+ asyncio.create_task(self._handle_reconnection())
231
+ raise
166
232
 
167
233
  async def dispatch_delayed_message(
168
234
  self, delayed_message: DelayedMessageData
169
235
  ) -> None:
170
236
  """
171
- Dispatch a delayed message to the RabbitMQ server.
172
- This is used to send a message to the RabbitMQ server
173
- to trigger the scheduled action.
237
+ Dispatch a delayed message to the RabbitMQ server with retry logic.
174
238
  """
175
- async with self.channel_pool.acquire() as channel:
239
+ if not self.connection_healthy:
240
+ await self._wait_for_connection()
241
+
242
+ async def _dispatch() -> None:
243
+ async with self.channel_pool.acquire() as channel:
244
+ exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
245
+ await exchange.publish(
246
+ aio_pika.Message(
247
+ body=delayed_message.payload,
248
+ ),
249
+ routing_key=f"{delayed_message.message_topic}.",
250
+ )
176
251
 
177
- exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
178
- await exchange.publish(
179
- aio_pika.Message(
180
- body=delayed_message.payload,
252
+ try:
253
+ await retry_with_backoff(
254
+ _dispatch,
255
+ retry_config=self.config.dispatch_retry_config,
256
+ retry_exceptions=(
257
+ AMQPConnectionError,
258
+ AMQPChannelError,
259
+ ChannelClosed,
260
+ ConnectionClosed,
261
+ AMQPError,
181
262
  ),
182
- routing_key=f"{delayed_message.message_topic}.",
183
263
  )
264
+ except Exception as e:
265
+ logger.error(f"Failed to dispatch delayed message after retries: {e}")
266
+ # Trigger reconnection if dispatch fails
267
+ if not self.reconnection_in_progress:
268
+ asyncio.create_task(self._handle_reconnection())
269
+ raise
184
270
 
185
271
  async def initialize(self, scheduled_actions: list[ScheduledActionData]) -> None:
186
272
  """
187
- Initialize the RabbitMQ server.
188
- This is used to create the exchange and queues for the scheduled actions.
273
+ Initialize the RabbitMQ server with retry logic.
189
274
  """
190
275
 
191
- async with self.channel_pool.acquire() as channel:
192
- await RabbitmqUtils.get_main_exchange(channel, self.exchange)
276
+ async def _initialize() -> None:
277
+ async with self.channel_pool.acquire() as channel:
278
+ await RabbitmqUtils.get_main_exchange(channel, self.exchange)
193
279
 
194
- for sched_act_data in scheduled_actions:
195
- queue_name = ScheduledAction.get_function_id(sched_act_data.callable)
280
+ for sched_act_data in scheduled_actions:
281
+ queue_name = ScheduledAction.get_function_id(
282
+ sched_act_data.callable
283
+ )
284
+
285
+ # Try to get existing queue
286
+ await RabbitmqUtils.get_scheduled_action_queue(
287
+ channel=channel,
288
+ queue_name=queue_name,
289
+ )
290
+
291
+ try:
292
+ logger.info("Initializing RabbitMQ connection...")
293
+ await retry_with_backoff(
294
+ _initialize,
295
+ retry_config=self.config.connection_retry_config,
296
+ retry_exceptions=(
297
+ AMQPConnectionError,
298
+ AMQPChannelError,
299
+ ChannelClosed,
300
+ ConnectionClosed,
301
+ AMQPError,
302
+ ),
303
+ )
196
304
 
197
- # Try to get existing queue
198
- await RabbitmqUtils.get_scheduled_action_queue(
199
- channel=channel,
200
- queue_name=queue_name,
305
+ # Verify connection is actually healthy after initialization
306
+ if not await self._is_connection_healthy():
307
+ logger.warning(
308
+ "Connection health check failed after initialization, retrying..."
201
309
  )
310
+ # Wait a bit and try again
311
+ await asyncio.sleep(2.0)
312
+ if not await self._is_connection_healthy():
313
+ raise ConnectionError("Connection not healthy after initialization")
314
+
315
+ self.connection_healthy = True
316
+ logger.info("RabbitMQ connection initialized successfully")
317
+
318
+ # Start health monitoring
319
+ self.health_check_task = asyncio.create_task(
320
+ self._monitor_connection_health()
321
+ )
322
+
323
+ except Exception as e:
324
+ logger.error(f"Failed to initialize RabbitMQ after retries: {e}")
325
+ raise
202
326
 
203
327
  async def dispose(self) -> None:
204
- await self.channel_pool.close()
205
- await self.conn_pool.close()
328
+ """Clean up resources"""
329
+ logger.info("Disposing RabbitMQ broker dispatcher")
330
+ self.shutdown_event.set()
331
+
332
+ # Cancel health monitoring
333
+ if self.health_check_task:
334
+ self.health_check_task.cancel()
335
+ try:
336
+ await self.health_check_task
337
+ except asyncio.CancelledError:
338
+ pass
339
+
340
+ # Clean up pools
341
+ await self._cleanup_pools()
342
+
343
+ async def _monitor_connection_health(self) -> None:
344
+ """Monitor connection health and trigger reconnection if needed"""
345
+ while not self.shutdown_event.is_set():
346
+ try:
347
+ await asyncio.sleep(self.config.health_check_interval)
348
+
349
+ if self.shutdown_event.is_set():
350
+ break
351
+
352
+ # Check connection health
353
+ if not await self._is_connection_healthy():
354
+ logger.warning(
355
+ "Connection health check failed, triggering reconnection"
356
+ )
357
+ if not self.reconnection_in_progress:
358
+ asyncio.create_task(self._handle_reconnection())
359
+
360
+ except asyncio.CancelledError:
361
+ logger.info("Connection health monitoring cancelled")
362
+ break
363
+ except Exception as e:
364
+ logger.error(f"Error in connection health monitoring: {e}")
365
+ await asyncio.sleep(5) # Wait before retrying
366
+
367
+ async def _is_connection_healthy(self) -> bool:
368
+ """Check if the connection is healthy"""
369
+ try:
370
+ # Try to acquire a connection from the pool
371
+ async with self.conn_pool.acquire() as connection:
372
+ if connection.is_closed:
373
+ return False
374
+
375
+ # Try to create a channel to test connection
376
+ channel = await connection.channel()
377
+ await channel.close()
378
+ return True
379
+
380
+ except Exception as e:
381
+ logger.debug(f"Connection health check failed: {e}")
382
+ return False
383
+
384
+ async def _handle_reconnection(self) -> None:
385
+ """Handle reconnection process with exponential backoff"""
386
+ async with self.reconnection_lock:
387
+ if self.reconnection_in_progress:
388
+ return
389
+
390
+ self.reconnection_in_progress = True
391
+ self.connection_healthy = False
392
+
393
+ logger.info("Starting reconnection process")
394
+
395
+ attempt = 0
396
+ while not self.shutdown_event.is_set():
397
+ try:
398
+ attempt += 1
399
+ logger.info(f"Reconnection attempt {attempt}")
400
+
401
+ # Close existing pools
402
+ await self._cleanup_pools()
403
+
404
+ # Recreate pools
405
+ self.conn_pool = Pool(
406
+ self._create_connection,
407
+ max_size=self.config.max_pool_size,
408
+ )
409
+ self.channel_pool = Pool(
410
+ self._create_channel,
411
+ max_size=self.config.max_pool_size,
412
+ )
413
+
414
+ # Test connection
415
+ if await self._is_connection_healthy():
416
+ self.connection_healthy = True
417
+ logger.info("Reconnection successful")
418
+ break
419
+ else:
420
+ raise ConnectionError(
421
+ "Connection health check failed after reconnection"
422
+ )
423
+
424
+ except Exception as e:
425
+ logger.error(f"Reconnection attempt {attempt} failed: {e}")
426
+
427
+ if self.shutdown_event.is_set():
428
+ break
429
+
430
+ # Calculate backoff delay
431
+ delay = self.config.reconnection_delay * (2 ** min(attempt - 1, 10))
432
+ if self.config.connection_retry_config.jitter:
433
+ jitter_amount = delay * 0.25
434
+ delay = delay + random.uniform(-jitter_amount, jitter_amount)
435
+ delay = max(delay, 0.1)
436
+
437
+ delay = min(delay, self.config.connection_retry_config.max_delay)
438
+
439
+ logger.info(f"Retrying reconnection in {delay:.2f} seconds")
440
+ await asyncio.sleep(delay)
441
+
442
+ self.reconnection_in_progress = False
443
+
444
+ async def _cleanup_pools(self) -> None:
445
+ """Clean up existing connection pools"""
446
+ try:
447
+ if hasattr(self, "channel_pool"):
448
+ await self.channel_pool.close()
449
+ except Exception as e:
450
+ logger.warning(f"Error closing channel pool: {e}")
451
+
452
+ try:
453
+ if hasattr(self, "conn_pool"):
454
+ await self.conn_pool.close()
455
+ except Exception as e:
456
+ logger.warning(f"Error closing connection pool: {e}")
457
+
458
+ async def _wait_for_connection(self) -> None:
459
+ """Wait for connection to be healthy"""
460
+ max_wait = 30.0 # Maximum wait time
461
+ wait_time = 0.0
462
+
463
+ while not self.connection_healthy and wait_time < max_wait:
464
+ if self.shutdown_event.is_set():
465
+ raise ConnectionError("Shutdown requested while waiting for connection")
466
+
467
+ await asyncio.sleep(0.5)
468
+ wait_time += 0.5
469
+
470
+ if not self.connection_healthy:
471
+ raise ConnectionError("Connection not healthy after maximum wait time")
206
472
 
207
473
 
208
- def _get_message_broker_dispatcher_from_url(url: str) -> _MessageBrokerDispatcher:
474
+ def _get_message_broker_dispatcher_from_url(
475
+ url: str, config: "BeatWorkerConfig | None" = None
476
+ ) -> _MessageBrokerDispatcher:
209
477
  """
210
478
  Factory function to create a message broker instance from a URL.
211
479
  Currently, only RabbitMQ is supported.
212
480
  """
213
481
  if url.startswith("amqp://") or url.startswith("amqps://"):
214
- return _RabbitMQBrokerDispatcher(url=url)
482
+ return _RabbitMQBrokerDispatcher(url=url, config=config)
215
483
  else:
216
484
  raise ValueError(f"Unsupported message broker URL: {url}")
217
485
 
@@ -219,6 +487,42 @@ def _get_message_broker_dispatcher_from_url(url: str) -> _MessageBrokerDispatche
219
487
  # endregion
220
488
 
221
489
 
490
+ @dataclass
491
+ class BeatWorkerConfig:
492
+ """Configuration for beat worker connection resilience"""
493
+
494
+ connection_retry_config: RetryConfig = field(
495
+ default_factory=lambda: RetryConfig(
496
+ max_retries=10,
497
+ initial_delay=2.0,
498
+ max_delay=60.0,
499
+ backoff_factor=2.0,
500
+ jitter=True,
501
+ )
502
+ )
503
+ dispatch_retry_config: RetryConfig = field(
504
+ default_factory=lambda: RetryConfig(
505
+ max_retries=3,
506
+ initial_delay=1.0,
507
+ max_delay=10.0,
508
+ backoff_factor=2.0,
509
+ jitter=True,
510
+ )
511
+ )
512
+ connection_heartbeat_interval: float = 30.0
513
+ health_check_interval: float = 15.0
514
+ max_reconnection_attempts: int = -1 # Infinite retries
515
+ reconnection_delay: float = 5.0
516
+
517
+ # Connection establishment timeouts
518
+ connection_wait_timeout: float = 300.0 # 5 minutes to wait for initial connection
519
+ reconnection_wait_timeout: float = 600.0 # 10 minutes to wait for reconnection
520
+
521
+ # Pool configuration
522
+ max_pool_size: int = 10
523
+ pool_recycle_time: float = 3600.0 # 1 hour
524
+
525
+
222
526
  class BeatWorker:
223
527
 
224
528
  def __init__(
@@ -228,11 +532,13 @@ class BeatWorker:
228
532
  broker_url: str,
229
533
  backend_url: str,
230
534
  scheduled_action_names: set[str] | None = None,
535
+ config: "BeatWorkerConfig | None" = None,
231
536
  ) -> None:
232
537
  self.app = app
538
+ self.config = config or BeatWorkerConfig()
233
539
 
234
540
  self.broker: _MessageBrokerDispatcher = _get_message_broker_dispatcher_from_url(
235
- broker_url
541
+ broker_url, self.config
236
542
  )
237
543
  self.backend: MessageBrokerBackend = get_message_broker_backend_from_url(
238
544
  backend_url
@@ -249,13 +555,16 @@ class BeatWorker:
249
555
 
250
556
  def run(self) -> None:
251
557
 
252
- def on_signal_received(signal: int, frame_type: FrameType | None) -> None:
253
- logger.info("Received shutdown signal")
254
- self.shutdown_event.set()
255
-
256
- signal.signal(signal.SIGINT, on_signal_received)
558
+ def on_shutdown(loop: asyncio.AbstractEventLoop) -> None:
559
+ logger.info("Shutting down - signal received")
560
+ # Schedule the shutdown to run in the event loop
561
+ asyncio.create_task(self._graceful_shutdown())
257
562
 
258
563
  with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner:
564
+ loop = runner.get_loop()
565
+ loop.add_signal_handler(signal.SIGINT, on_shutdown, loop)
566
+ # Add graceful shutdown handler for SIGTERM as well
567
+ loop.add_signal_handler(signal.SIGTERM, on_shutdown, loop)
259
568
  runner.run(self.start_scheduler())
260
569
 
261
570
  async def start_scheduler(self) -> None:
@@ -269,15 +578,45 @@ class BeatWorker:
269
578
  self.app, self.container, self.scheduler_names
270
579
  )
271
580
 
581
+ # Initialize and wait for connection to be established
582
+ logger.info("Initializing broker connection...")
272
583
  await self.broker.initialize(scheduled_actions)
273
584
 
585
+ # Wait for connection to be healthy before starting scheduler
586
+ logger.info("Waiting for connection to be established...")
587
+ await self._wait_for_broker_connection()
588
+
589
+ logger.info("Connection established, starting scheduler...")
274
590
  await self.run_scheduled_actions(scheduled_actions)
275
591
 
276
592
  async def run_scheduled_actions(
277
593
  self, scheduled_actions: list[ScheduledActionData]
278
594
  ) -> None:
279
595
 
596
+ logger.info("Starting scheduled actions processing loop")
597
+
598
+ # Ensure we have a healthy connection before starting the main loop
599
+ if (
600
+ hasattr(self.broker, "connection_healthy")
601
+ and not self.broker.connection_healthy
602
+ ):
603
+ logger.warning(
604
+ "Connection not healthy at start of processing loop, waiting..."
605
+ )
606
+ await self._wait_for_broker_reconnection()
607
+
280
608
  while not self.shutdown_event.is_set():
609
+ # Check connection health before processing scheduled actions
610
+ if (
611
+ hasattr(self.broker, "connection_healthy")
612
+ and not self.broker.connection_healthy
613
+ ):
614
+ logger.warning(
615
+ "Broker connection is not healthy, waiting for reconnection..."
616
+ )
617
+ await self._wait_for_broker_reconnection()
618
+ continue
619
+
281
620
  now = int(time.time())
282
621
  for sched_act_data in scheduled_actions:
283
622
  func = sched_act_data.callable
@@ -285,58 +624,178 @@ class BeatWorker:
285
624
  if self.shutdown_event.is_set():
286
625
  break
287
626
 
288
- async with self.backend.lock():
627
+ try:
628
+ async with self.backend.lock():
289
629
 
290
- last_dispatch_time: int | None = (
291
- await self.backend.get_last_dispatch_time(
292
- ScheduledAction.get_function_id(func)
630
+ last_dispatch_time: int | None = (
631
+ await self.backend.get_last_dispatch_time(
632
+ ScheduledAction.get_function_id(func)
633
+ )
293
634
  )
294
- )
295
635
 
296
- if last_dispatch_time is not None:
297
- cron = croniter.croniter(
298
- scheduled_action.cron, last_dispatch_time
299
- )
300
- next_run: datetime = cron.get_next(datetime).replace(tzinfo=UTC)
301
- if next_run > datetime.now(UTC):
302
- logger.info(
303
- f"Skipping {func.__module__}.{func.__qualname__} until {next_run}"
636
+ if last_dispatch_time is not None:
637
+ cron = croniter.croniter(
638
+ scheduled_action.cron, last_dispatch_time
639
+ )
640
+ next_run: datetime = cron.get_next(datetime).replace(
641
+ tzinfo=UTC
642
+ )
643
+ if next_run > datetime.now(UTC):
644
+ logger.info(
645
+ f"Skipping {func.__module__}.{func.__qualname__} until {next_run}"
646
+ )
647
+ continue
648
+
649
+ if not scheduled_action.allow_overlap:
650
+ if (
651
+ await self.backend.get_in_execution_count(
652
+ ScheduledAction.get_function_id(func)
653
+ )
654
+ > 0
655
+ ):
656
+ continue
657
+
658
+ try:
659
+ await self.broker.dispatch_scheduled_action(
660
+ ScheduledAction.get_function_id(func),
661
+ now,
304
662
  )
305
- continue
306
663
 
307
- if not scheduled_action.allow_overlap:
308
- if (
309
- await self.backend.get_in_execution_count(
310
- ScheduledAction.get_function_id(func)
664
+ await self.backend.set_last_dispatch_time(
665
+ ScheduledAction.get_function_id(func), now
666
+ )
667
+
668
+ logger.info(
669
+ f"Scheduled {func.__module__}.{func.__qualname__} at {now}"
311
670
  )
312
- > 0
313
- ):
671
+ except Exception as e:
672
+ logger.error(
673
+ f"Failed to dispatch scheduled action {func.__module__}.{func.__qualname__}: {e}"
674
+ )
675
+ # Continue with other scheduled actions even if one fails
314
676
  continue
315
677
 
316
- await self.broker.dispatch_scheduled_action(
317
- ScheduledAction.get_function_id(func),
318
- now,
678
+ except Exception as e:
679
+ logger.error(
680
+ f"Error processing scheduled action {func.__module__}.{func.__qualname__}: {e}"
319
681
  )
682
+ # Continue with other scheduled actions even if one fails
683
+ continue
684
+
685
+ # Handle delayed messages
686
+ try:
687
+ delayed_messages = await self.backend.dequeue_next_delayed_messages(now)
688
+ for delayed_message_data in delayed_messages:
689
+ try:
690
+ await self.broker.dispatch_delayed_message(delayed_message_data)
691
+ except Exception as e:
692
+ logger.error(f"Failed to dispatch delayed message: {e}")
693
+ # Continue with other delayed messages even if one fails
694
+ continue
695
+ except Exception as e:
696
+ logger.error(f"Error processing delayed messages: {e}")
320
697
 
321
- await self.backend.set_last_dispatch_time(
322
- ScheduledAction.get_function_id(func), now
323
- )
698
+ with contextlib.suppress(asyncio.TimeoutError):
699
+ await asyncio.wait_for(self.shutdown_event.wait(), self.interval)
324
700
 
325
- logger.info(
326
- f"Scheduled {func.__module__}.{func.__qualname__} at {now}"
327
- )
701
+ logger.info("Scheduler stopped")
328
702
 
329
- for (
330
- delayed_message_data
331
- ) in await self.backend.dequeue_next_delayed_messages(now):
332
- await self.broker.dispatch_delayed_message(delayed_message_data)
703
+ try:
704
+ await self.backend.dispose()
705
+ except Exception as e:
706
+ logger.error(f"Error disposing backend: {e}")
333
707
 
334
- with contextlib.suppress(asyncio.TimeoutError):
335
- await asyncio.wait_for(self.shutdown_event.wait(), self.interval)
708
+ try:
709
+ await self.broker.dispose()
710
+ except Exception as e:
711
+ logger.error(f"Error disposing broker: {e}")
336
712
 
337
- # await self.shutdown_event.wait(self.interval)
713
+ async def _graceful_shutdown(self) -> None:
714
+ """Handles graceful shutdown process"""
715
+ logger.info("Initiating graceful shutdown sequence")
716
+ self.shutdown_event.set()
717
+ logger.info("Graceful shutdown completed")
338
718
 
339
- logger.info("Scheduler stopped")
719
+ async def _wait_for_broker_connection(self) -> None:
720
+ """
721
+ Wait for the broker connection to be established and healthy.
722
+ This ensures the scheduler doesn't start until RabbitMQ is ready.
723
+ """
724
+ max_wait_time = self.config.connection_wait_timeout
725
+ check_interval = 2.0 # Check every 2 seconds
726
+ elapsed_time = 0.0
727
+
728
+ logger.info(
729
+ f"Waiting for broker connection to be established (timeout: {max_wait_time}s)..."
730
+ )
731
+
732
+ while elapsed_time < max_wait_time:
733
+ if self.shutdown_event.is_set():
734
+ raise ConnectionError(
735
+ "Shutdown requested while waiting for broker connection"
736
+ )
737
+
738
+ # Check if broker connection is healthy
739
+ if (
740
+ hasattr(self.broker, "connection_healthy")
741
+ and self.broker.connection_healthy
742
+ ):
743
+ logger.info("Broker connection is healthy")
744
+ return
745
+
746
+ # If broker doesn't have health status, try a simple health check
747
+ if not hasattr(self.broker, "connection_healthy"):
748
+ try:
749
+ # For non-RabbitMQ brokers, assume connection is ready after initialization
750
+ logger.info("Broker connection assumed to be ready")
751
+ return
752
+ except Exception as e:
753
+ logger.debug(f"Broker connection check failed: {e}")
754
+
755
+ if elapsed_time % 10.0 == 0.0: # Log every 10 seconds
756
+ logger.info(
757
+ f"Still waiting for broker connection... ({elapsed_time:.1f}s elapsed)"
758
+ )
759
+
760
+ await asyncio.sleep(check_interval)
761
+ elapsed_time += check_interval
762
+
763
+ raise ConnectionError(
764
+ f"Broker connection not established after {max_wait_time} seconds"
765
+ )
766
+
767
+ async def _wait_for_broker_reconnection(self) -> None:
768
+ """
769
+ Wait for the broker to reconnect when connection is lost during operation.
770
+ This pauses the scheduler until the connection is restored.
771
+ """
772
+ max_wait_time = self.config.reconnection_wait_timeout
773
+ check_interval = 5.0 # Check every 5 seconds
774
+ elapsed_time = 0.0
775
+
776
+ logger.info(f"Waiting for broker reconnection (timeout: {max_wait_time}s)...")
777
+
778
+ while elapsed_time < max_wait_time:
779
+ if self.shutdown_event.is_set():
780
+ logger.info("Shutdown requested while waiting for broker reconnection")
781
+ return
782
+
783
+ # Check if broker connection is healthy again
784
+ if (
785
+ hasattr(self.broker, "connection_healthy")
786
+ and self.broker.connection_healthy
787
+ ):
788
+ logger.info("Broker connection restored, resuming scheduler")
789
+ return
790
+
791
+ if elapsed_time % 30.0 == 0.0: # Log every 30 seconds
792
+ logger.info(
793
+ f"Still waiting for broker reconnection... ({elapsed_time:.1f}s elapsed)"
794
+ )
795
+
796
+ await asyncio.sleep(check_interval)
797
+ elapsed_time += check_interval
340
798
 
341
- await self.backend.dispose()
342
- await self.broker.dispose()
799
+ logger.error(f"Broker connection not restored after {max_wait_time} seconds")
800
+ # Don't raise an exception here, just continue and let the scheduler retry
801
+ # This allows the scheduler to be more resilient to long-term connection issues