jararaca 0.3.12a13__py3-none-any.whl → 0.3.12a15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jararaca might be problematic. Click here for more details.
- jararaca/__init__.py +7 -1
- jararaca/messagebus/worker.py +652 -193
- jararaca/scheduler/beat_worker.py +537 -86
- jararaca/tools/typescript/decorators.py +33 -0
- jararaca/tools/typescript/interface_parser.py +383 -63
- {jararaca-0.3.12a13.dist-info → jararaca-0.3.12a15.dist-info}/METADATA +1 -1
- {jararaca-0.3.12a13.dist-info → jararaca-0.3.12a15.dist-info}/RECORD +11 -11
- pyproject.toml +1 -1
- {jararaca-0.3.12a13.dist-info → jararaca-0.3.12a15.dist-info}/LICENSE +0 -0
- {jararaca-0.3.12a13.dist-info → jararaca-0.3.12a15.dist-info}/WHEEL +0 -0
- {jararaca-0.3.12a13.dist-info → jararaca-0.3.12a15.dist-info}/entry_points.txt +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import contextlib
|
|
3
3
|
import logging
|
|
4
|
+
import random
|
|
4
5
|
import signal
|
|
5
6
|
import time
|
|
6
7
|
from abc import ABC, abstractmethod
|
|
8
|
+
from dataclasses import dataclass, field
|
|
7
9
|
from datetime import UTC, datetime
|
|
8
10
|
from typing import Any
|
|
9
11
|
from urllib.parse import parse_qs
|
|
@@ -15,6 +17,13 @@ import urllib3.util
|
|
|
15
17
|
import uvloop
|
|
16
18
|
from aio_pika import connect_robust
|
|
17
19
|
from aio_pika.abc import AbstractChannel, AbstractRobustConnection
|
|
20
|
+
from aio_pika.exceptions import (
|
|
21
|
+
AMQPChannelError,
|
|
22
|
+
AMQPConnectionError,
|
|
23
|
+
AMQPError,
|
|
24
|
+
ChannelClosed,
|
|
25
|
+
ConnectionClosed,
|
|
26
|
+
)
|
|
18
27
|
from aio_pika.pool import Pool
|
|
19
28
|
|
|
20
29
|
from jararaca.broker_backend import MessageBrokerBackend
|
|
@@ -30,6 +39,7 @@ from jararaca.scheduler.decorators import (
|
|
|
30
39
|
)
|
|
31
40
|
from jararaca.scheduler.types import DelayedMessageData
|
|
32
41
|
from jararaca.utils.rabbitmq_utils import RabbitmqUtils
|
|
42
|
+
from jararaca.utils.retry import RetryConfig, retry_with_backoff
|
|
33
43
|
|
|
34
44
|
logger = logging.getLogger(__name__)
|
|
35
45
|
|
|
@@ -100,17 +110,23 @@ class _MessageBrokerDispatcher(ABC):
|
|
|
100
110
|
|
|
101
111
|
class _RabbitMQBrokerDispatcher(_MessageBrokerDispatcher):
|
|
102
112
|
|
|
103
|
-
def __init__(self, url: str) -> None:
|
|
113
|
+
def __init__(self, url: str, config: "BeatWorkerConfig | None" = None) -> None:
|
|
104
114
|
self.url = url
|
|
115
|
+
self.config = config or BeatWorkerConfig()
|
|
116
|
+
self.connection_healthy = False
|
|
117
|
+
self.reconnection_in_progress = False
|
|
118
|
+
self.shutdown_event = asyncio.Event()
|
|
119
|
+
self.health_check_task: asyncio.Task[Any] | None = None
|
|
120
|
+
self.reconnection_lock = asyncio.Lock()
|
|
105
121
|
|
|
106
122
|
self.conn_pool: "Pool[AbstractRobustConnection]" = Pool(
|
|
107
123
|
self._create_connection,
|
|
108
|
-
max_size=
|
|
124
|
+
max_size=self.config.max_pool_size,
|
|
109
125
|
)
|
|
110
126
|
|
|
111
127
|
self.channel_pool: "Pool[AbstractChannel]" = Pool(
|
|
112
128
|
self._create_channel,
|
|
113
|
-
max_size=
|
|
129
|
+
max_size=self.config.max_pool_size,
|
|
114
130
|
)
|
|
115
131
|
|
|
116
132
|
splitted = urllib3.util.parse_url(url)
|
|
@@ -131,86 +147,339 @@ class _RabbitMQBrokerDispatcher(_MessageBrokerDispatcher):
|
|
|
131
147
|
|
|
132
148
|
async def _create_connection(self) -> AbstractRobustConnection:
|
|
133
149
|
"""
|
|
134
|
-
Create a connection to the RabbitMQ server.
|
|
135
|
-
This is used to send messages to the RabbitMQ server.
|
|
150
|
+
Create a robust connection to the RabbitMQ server with retry logic.
|
|
136
151
|
"""
|
|
137
|
-
|
|
138
|
-
|
|
152
|
+
|
|
153
|
+
async def _establish_connection() -> AbstractRobustConnection:
|
|
154
|
+
logger.info("Establishing connection to RabbitMQ")
|
|
155
|
+
connection = await connect_robust(
|
|
156
|
+
self.url,
|
|
157
|
+
heartbeat=self.config.connection_heartbeat_interval,
|
|
158
|
+
)
|
|
159
|
+
logger.info("Connected to RabbitMQ successfully")
|
|
160
|
+
return connection
|
|
161
|
+
|
|
162
|
+
return await retry_with_backoff(
|
|
163
|
+
_establish_connection,
|
|
164
|
+
retry_config=self.config.connection_retry_config,
|
|
165
|
+
retry_exceptions=(
|
|
166
|
+
AMQPConnectionError,
|
|
167
|
+
ConnectionError,
|
|
168
|
+
OSError,
|
|
169
|
+
TimeoutError,
|
|
170
|
+
),
|
|
171
|
+
)
|
|
139
172
|
|
|
140
173
|
async def _create_channel(self) -> AbstractChannel:
|
|
141
174
|
"""
|
|
142
|
-
Create a channel to the RabbitMQ server.
|
|
143
|
-
This is used to send messages to the RabbitMQ server.
|
|
175
|
+
Create a channel to the RabbitMQ server with retry logic.
|
|
144
176
|
"""
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
177
|
+
|
|
178
|
+
async def _establish_channel() -> AbstractChannel:
|
|
179
|
+
async with self.conn_pool.acquire() as connection:
|
|
180
|
+
channel = await connection.channel()
|
|
181
|
+
return channel
|
|
182
|
+
|
|
183
|
+
return await retry_with_backoff(
|
|
184
|
+
_establish_channel,
|
|
185
|
+
retry_config=self.config.connection_retry_config,
|
|
186
|
+
retry_exceptions=(
|
|
187
|
+
AMQPConnectionError,
|
|
188
|
+
AMQPChannelError,
|
|
189
|
+
ChannelClosed,
|
|
190
|
+
ConnectionError,
|
|
191
|
+
),
|
|
192
|
+
)
|
|
148
193
|
|
|
149
194
|
async def dispatch_scheduled_action(self, action_id: str, timestamp: int) -> None:
|
|
150
195
|
"""
|
|
151
|
-
Dispatch a message to the RabbitMQ server.
|
|
152
|
-
This is used to send a message to the RabbitMQ server
|
|
153
|
-
to trigger the scheduled action.
|
|
196
|
+
Dispatch a message to the RabbitMQ server with retry logic.
|
|
154
197
|
"""
|
|
198
|
+
if not self.connection_healthy:
|
|
199
|
+
await self._wait_for_connection()
|
|
155
200
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
201
|
+
async def _dispatch() -> None:
|
|
202
|
+
logger.info(f"Dispatching message to {action_id} at {timestamp}")
|
|
203
|
+
async with self.channel_pool.acquire() as channel:
|
|
204
|
+
exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
|
|
159
205
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
206
|
+
await exchange.publish(
|
|
207
|
+
aio_pika.Message(body=str(timestamp).encode()),
|
|
208
|
+
routing_key=action_id,
|
|
209
|
+
)
|
|
210
|
+
logger.info(f"Dispatched message to {action_id} at {timestamp}")
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
await retry_with_backoff(
|
|
214
|
+
_dispatch,
|
|
215
|
+
retry_config=self.config.dispatch_retry_config,
|
|
216
|
+
retry_exceptions=(
|
|
217
|
+
AMQPConnectionError,
|
|
218
|
+
AMQPChannelError,
|
|
219
|
+
ChannelClosed,
|
|
220
|
+
ConnectionClosed,
|
|
221
|
+
AMQPError,
|
|
222
|
+
),
|
|
223
|
+
)
|
|
224
|
+
except Exception as e:
|
|
225
|
+
logger.error(
|
|
226
|
+
f"Failed to dispatch message to {action_id} after retries: {e}"
|
|
163
227
|
)
|
|
164
|
-
|
|
228
|
+
# Trigger reconnection if dispatch fails
|
|
229
|
+
if not self.reconnection_in_progress:
|
|
230
|
+
asyncio.create_task(self._handle_reconnection())
|
|
231
|
+
raise
|
|
165
232
|
|
|
166
233
|
async def dispatch_delayed_message(
|
|
167
234
|
self, delayed_message: DelayedMessageData
|
|
168
235
|
) -> None:
|
|
169
236
|
"""
|
|
170
|
-
Dispatch a delayed message to the RabbitMQ server.
|
|
171
|
-
This is used to send a message to the RabbitMQ server
|
|
172
|
-
to trigger the scheduled action.
|
|
237
|
+
Dispatch a delayed message to the RabbitMQ server with retry logic.
|
|
173
238
|
"""
|
|
174
|
-
|
|
239
|
+
if not self.connection_healthy:
|
|
240
|
+
await self._wait_for_connection()
|
|
241
|
+
|
|
242
|
+
async def _dispatch() -> None:
|
|
243
|
+
async with self.channel_pool.acquire() as channel:
|
|
244
|
+
exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
|
|
245
|
+
await exchange.publish(
|
|
246
|
+
aio_pika.Message(
|
|
247
|
+
body=delayed_message.payload,
|
|
248
|
+
),
|
|
249
|
+
routing_key=f"{delayed_message.message_topic}.",
|
|
250
|
+
)
|
|
175
251
|
|
|
176
|
-
|
|
177
|
-
await
|
|
178
|
-
|
|
179
|
-
|
|
252
|
+
try:
|
|
253
|
+
await retry_with_backoff(
|
|
254
|
+
_dispatch,
|
|
255
|
+
retry_config=self.config.dispatch_retry_config,
|
|
256
|
+
retry_exceptions=(
|
|
257
|
+
AMQPConnectionError,
|
|
258
|
+
AMQPChannelError,
|
|
259
|
+
ChannelClosed,
|
|
260
|
+
ConnectionClosed,
|
|
261
|
+
AMQPError,
|
|
180
262
|
),
|
|
181
|
-
routing_key=f"{delayed_message.message_topic}.",
|
|
182
263
|
)
|
|
264
|
+
except Exception as e:
|
|
265
|
+
logger.error(f"Failed to dispatch delayed message after retries: {e}")
|
|
266
|
+
# Trigger reconnection if dispatch fails
|
|
267
|
+
if not self.reconnection_in_progress:
|
|
268
|
+
asyncio.create_task(self._handle_reconnection())
|
|
269
|
+
raise
|
|
183
270
|
|
|
184
271
|
async def initialize(self, scheduled_actions: list[ScheduledActionData]) -> None:
|
|
185
272
|
"""
|
|
186
|
-
Initialize the RabbitMQ server.
|
|
187
|
-
This is used to create the exchange and queues for the scheduled actions.
|
|
273
|
+
Initialize the RabbitMQ server with retry logic.
|
|
188
274
|
"""
|
|
189
275
|
|
|
190
|
-
async
|
|
191
|
-
|
|
276
|
+
async def _initialize() -> None:
|
|
277
|
+
async with self.channel_pool.acquire() as channel:
|
|
278
|
+
await RabbitmqUtils.get_main_exchange(channel, self.exchange)
|
|
192
279
|
|
|
193
|
-
|
|
194
|
-
|
|
280
|
+
for sched_act_data in scheduled_actions:
|
|
281
|
+
queue_name = ScheduledAction.get_function_id(
|
|
282
|
+
sched_act_data.callable
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# Try to get existing queue
|
|
286
|
+
await RabbitmqUtils.get_scheduled_action_queue(
|
|
287
|
+
channel=channel,
|
|
288
|
+
queue_name=queue_name,
|
|
289
|
+
)
|
|
195
290
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
291
|
+
try:
|
|
292
|
+
logger.info("Initializing RabbitMQ connection...")
|
|
293
|
+
await retry_with_backoff(
|
|
294
|
+
_initialize,
|
|
295
|
+
retry_config=self.config.connection_retry_config,
|
|
296
|
+
retry_exceptions=(
|
|
297
|
+
AMQPConnectionError,
|
|
298
|
+
AMQPChannelError,
|
|
299
|
+
ChannelClosed,
|
|
300
|
+
ConnectionClosed,
|
|
301
|
+
AMQPError,
|
|
302
|
+
),
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Verify connection is actually healthy after initialization
|
|
306
|
+
if not await self._is_connection_healthy():
|
|
307
|
+
logger.warning(
|
|
308
|
+
"Connection health check failed after initialization, retrying..."
|
|
200
309
|
)
|
|
310
|
+
# Wait a bit and try again
|
|
311
|
+
await asyncio.sleep(2.0)
|
|
312
|
+
if not await self._is_connection_healthy():
|
|
313
|
+
raise ConnectionError("Connection not healthy after initialization")
|
|
314
|
+
|
|
315
|
+
self.connection_healthy = True
|
|
316
|
+
logger.info("RabbitMQ connection initialized successfully")
|
|
317
|
+
|
|
318
|
+
# Start health monitoring
|
|
319
|
+
self.health_check_task = asyncio.create_task(
|
|
320
|
+
self._monitor_connection_health()
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.error(f"Failed to initialize RabbitMQ after retries: {e}")
|
|
325
|
+
raise
|
|
201
326
|
|
|
202
327
|
async def dispose(self) -> None:
|
|
203
|
-
|
|
204
|
-
|
|
328
|
+
"""Clean up resources"""
|
|
329
|
+
logger.info("Disposing RabbitMQ broker dispatcher")
|
|
330
|
+
self.shutdown_event.set()
|
|
331
|
+
|
|
332
|
+
# Cancel health monitoring
|
|
333
|
+
if self.health_check_task:
|
|
334
|
+
self.health_check_task.cancel()
|
|
335
|
+
try:
|
|
336
|
+
await self.health_check_task
|
|
337
|
+
except asyncio.CancelledError:
|
|
338
|
+
pass
|
|
339
|
+
|
|
340
|
+
# Clean up pools
|
|
341
|
+
await self._cleanup_pools()
|
|
342
|
+
|
|
343
|
+
async def _monitor_connection_health(self) -> None:
|
|
344
|
+
"""Monitor connection health and trigger reconnection if needed"""
|
|
345
|
+
while not self.shutdown_event.is_set():
|
|
346
|
+
try:
|
|
347
|
+
await asyncio.sleep(self.config.health_check_interval)
|
|
348
|
+
|
|
349
|
+
if self.shutdown_event.is_set():
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
# Check connection health
|
|
353
|
+
if not await self._is_connection_healthy():
|
|
354
|
+
logger.warning(
|
|
355
|
+
"Connection health check failed, triggering reconnection"
|
|
356
|
+
)
|
|
357
|
+
if not self.reconnection_in_progress:
|
|
358
|
+
asyncio.create_task(self._handle_reconnection())
|
|
359
|
+
|
|
360
|
+
except asyncio.CancelledError:
|
|
361
|
+
logger.info("Connection health monitoring cancelled")
|
|
362
|
+
break
|
|
363
|
+
except Exception as e:
|
|
364
|
+
logger.error(f"Error in connection health monitoring: {e}")
|
|
365
|
+
await asyncio.sleep(5) # Wait before retrying
|
|
366
|
+
|
|
367
|
+
async def _is_connection_healthy(self) -> bool:
|
|
368
|
+
"""Check if the connection is healthy"""
|
|
369
|
+
try:
|
|
370
|
+
# Try to acquire a connection from the pool
|
|
371
|
+
async with self.conn_pool.acquire() as connection:
|
|
372
|
+
if connection.is_closed:
|
|
373
|
+
return False
|
|
374
|
+
|
|
375
|
+
# Try to create a channel to test connection
|
|
376
|
+
channel = await connection.channel()
|
|
377
|
+
await channel.close()
|
|
378
|
+
return True
|
|
379
|
+
|
|
380
|
+
except Exception as e:
|
|
381
|
+
logger.debug(f"Connection health check failed: {e}")
|
|
382
|
+
return False
|
|
383
|
+
|
|
384
|
+
async def _handle_reconnection(self) -> None:
|
|
385
|
+
"""Handle reconnection process with exponential backoff"""
|
|
386
|
+
async with self.reconnection_lock:
|
|
387
|
+
if self.reconnection_in_progress:
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
self.reconnection_in_progress = True
|
|
391
|
+
self.connection_healthy = False
|
|
392
|
+
|
|
393
|
+
logger.info("Starting reconnection process")
|
|
394
|
+
|
|
395
|
+
attempt = 0
|
|
396
|
+
while not self.shutdown_event.is_set():
|
|
397
|
+
try:
|
|
398
|
+
attempt += 1
|
|
399
|
+
logger.info(f"Reconnection attempt {attempt}")
|
|
400
|
+
|
|
401
|
+
# Close existing pools
|
|
402
|
+
await self._cleanup_pools()
|
|
403
|
+
|
|
404
|
+
# Recreate pools
|
|
405
|
+
self.conn_pool = Pool(
|
|
406
|
+
self._create_connection,
|
|
407
|
+
max_size=self.config.max_pool_size,
|
|
408
|
+
)
|
|
409
|
+
self.channel_pool = Pool(
|
|
410
|
+
self._create_channel,
|
|
411
|
+
max_size=self.config.max_pool_size,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
# Test connection
|
|
415
|
+
if await self._is_connection_healthy():
|
|
416
|
+
self.connection_healthy = True
|
|
417
|
+
logger.info("Reconnection successful")
|
|
418
|
+
break
|
|
419
|
+
else:
|
|
420
|
+
raise ConnectionError(
|
|
421
|
+
"Connection health check failed after reconnection"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
except Exception as e:
|
|
425
|
+
logger.error(f"Reconnection attempt {attempt} failed: {e}")
|
|
426
|
+
|
|
427
|
+
if self.shutdown_event.is_set():
|
|
428
|
+
break
|
|
429
|
+
|
|
430
|
+
# Calculate backoff delay
|
|
431
|
+
delay = self.config.reconnection_delay * (2 ** min(attempt - 1, 10))
|
|
432
|
+
if self.config.connection_retry_config.jitter:
|
|
433
|
+
jitter_amount = delay * 0.25
|
|
434
|
+
delay = delay + random.uniform(-jitter_amount, jitter_amount)
|
|
435
|
+
delay = max(delay, 0.1)
|
|
436
|
+
|
|
437
|
+
delay = min(delay, self.config.connection_retry_config.max_delay)
|
|
438
|
+
|
|
439
|
+
logger.info(f"Retrying reconnection in {delay:.2f} seconds")
|
|
440
|
+
await asyncio.sleep(delay)
|
|
441
|
+
|
|
442
|
+
self.reconnection_in_progress = False
|
|
443
|
+
|
|
444
|
+
async def _cleanup_pools(self) -> None:
|
|
445
|
+
"""Clean up existing connection pools"""
|
|
446
|
+
try:
|
|
447
|
+
if hasattr(self, "channel_pool"):
|
|
448
|
+
await self.channel_pool.close()
|
|
449
|
+
except Exception as e:
|
|
450
|
+
logger.warning(f"Error closing channel pool: {e}")
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
if hasattr(self, "conn_pool"):
|
|
454
|
+
await self.conn_pool.close()
|
|
455
|
+
except Exception as e:
|
|
456
|
+
logger.warning(f"Error closing connection pool: {e}")
|
|
457
|
+
|
|
458
|
+
async def _wait_for_connection(self) -> None:
|
|
459
|
+
"""Wait for connection to be healthy"""
|
|
460
|
+
max_wait = 30.0 # Maximum wait time
|
|
461
|
+
wait_time = 0.0
|
|
462
|
+
|
|
463
|
+
while not self.connection_healthy and wait_time < max_wait:
|
|
464
|
+
if self.shutdown_event.is_set():
|
|
465
|
+
raise ConnectionError("Shutdown requested while waiting for connection")
|
|
205
466
|
|
|
467
|
+
await asyncio.sleep(0.5)
|
|
468
|
+
wait_time += 0.5
|
|
206
469
|
|
|
207
|
-
|
|
470
|
+
if not self.connection_healthy:
|
|
471
|
+
raise ConnectionError("Connection not healthy after maximum wait time")
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _get_message_broker_dispatcher_from_url(
|
|
475
|
+
url: str, config: "BeatWorkerConfig | None" = None
|
|
476
|
+
) -> _MessageBrokerDispatcher:
|
|
208
477
|
"""
|
|
209
478
|
Factory function to create a message broker instance from a URL.
|
|
210
479
|
Currently, only RabbitMQ is supported.
|
|
211
480
|
"""
|
|
212
481
|
if url.startswith("amqp://") or url.startswith("amqps://"):
|
|
213
|
-
return _RabbitMQBrokerDispatcher(url=url)
|
|
482
|
+
return _RabbitMQBrokerDispatcher(url=url, config=config)
|
|
214
483
|
else:
|
|
215
484
|
raise ValueError(f"Unsupported message broker URL: {url}")
|
|
216
485
|
|
|
@@ -218,6 +487,42 @@ def _get_message_broker_dispatcher_from_url(url: str) -> _MessageBrokerDispatche
|
|
|
218
487
|
# endregion
|
|
219
488
|
|
|
220
489
|
|
|
490
|
+
@dataclass
|
|
491
|
+
class BeatWorkerConfig:
|
|
492
|
+
"""Configuration for beat worker connection resilience"""
|
|
493
|
+
|
|
494
|
+
connection_retry_config: RetryConfig = field(
|
|
495
|
+
default_factory=lambda: RetryConfig(
|
|
496
|
+
max_retries=10,
|
|
497
|
+
initial_delay=2.0,
|
|
498
|
+
max_delay=60.0,
|
|
499
|
+
backoff_factor=2.0,
|
|
500
|
+
jitter=True,
|
|
501
|
+
)
|
|
502
|
+
)
|
|
503
|
+
dispatch_retry_config: RetryConfig = field(
|
|
504
|
+
default_factory=lambda: RetryConfig(
|
|
505
|
+
max_retries=3,
|
|
506
|
+
initial_delay=1.0,
|
|
507
|
+
max_delay=10.0,
|
|
508
|
+
backoff_factor=2.0,
|
|
509
|
+
jitter=True,
|
|
510
|
+
)
|
|
511
|
+
)
|
|
512
|
+
connection_heartbeat_interval: float = 30.0
|
|
513
|
+
health_check_interval: float = 15.0
|
|
514
|
+
max_reconnection_attempts: int = -1 # Infinite retries
|
|
515
|
+
reconnection_delay: float = 5.0
|
|
516
|
+
|
|
517
|
+
# Connection establishment timeouts
|
|
518
|
+
connection_wait_timeout: float = 300.0 # 5 minutes to wait for initial connection
|
|
519
|
+
reconnection_wait_timeout: float = 600.0 # 10 minutes to wait for reconnection
|
|
520
|
+
|
|
521
|
+
# Pool configuration
|
|
522
|
+
max_pool_size: int = 10
|
|
523
|
+
pool_recycle_time: float = 3600.0 # 1 hour
|
|
524
|
+
|
|
525
|
+
|
|
221
526
|
class BeatWorker:
|
|
222
527
|
|
|
223
528
|
def __init__(
|
|
@@ -227,11 +532,13 @@ class BeatWorker:
|
|
|
227
532
|
broker_url: str,
|
|
228
533
|
backend_url: str,
|
|
229
534
|
scheduled_action_names: set[str] | None = None,
|
|
535
|
+
config: "BeatWorkerConfig | None" = None,
|
|
230
536
|
) -> None:
|
|
231
537
|
self.app = app
|
|
538
|
+
self.config = config or BeatWorkerConfig()
|
|
232
539
|
|
|
233
540
|
self.broker: _MessageBrokerDispatcher = _get_message_broker_dispatcher_from_url(
|
|
234
|
-
broker_url
|
|
541
|
+
broker_url, self.config
|
|
235
542
|
)
|
|
236
543
|
self.backend: MessageBrokerBackend = get_message_broker_backend_from_url(
|
|
237
544
|
backend_url
|
|
@@ -271,15 +578,45 @@ class BeatWorker:
|
|
|
271
578
|
self.app, self.container, self.scheduler_names
|
|
272
579
|
)
|
|
273
580
|
|
|
581
|
+
# Initialize and wait for connection to be established
|
|
582
|
+
logger.info("Initializing broker connection...")
|
|
274
583
|
await self.broker.initialize(scheduled_actions)
|
|
275
584
|
|
|
585
|
+
# Wait for connection to be healthy before starting scheduler
|
|
586
|
+
logger.info("Waiting for connection to be established...")
|
|
587
|
+
await self._wait_for_broker_connection()
|
|
588
|
+
|
|
589
|
+
logger.info("Connection established, starting scheduler...")
|
|
276
590
|
await self.run_scheduled_actions(scheduled_actions)
|
|
277
591
|
|
|
278
592
|
async def run_scheduled_actions(
|
|
279
593
|
self, scheduled_actions: list[ScheduledActionData]
|
|
280
594
|
) -> None:
|
|
281
595
|
|
|
596
|
+
logger.info("Starting scheduled actions processing loop")
|
|
597
|
+
|
|
598
|
+
# Ensure we have a healthy connection before starting the main loop
|
|
599
|
+
if (
|
|
600
|
+
hasattr(self.broker, "connection_healthy")
|
|
601
|
+
and not self.broker.connection_healthy
|
|
602
|
+
):
|
|
603
|
+
logger.warning(
|
|
604
|
+
"Connection not healthy at start of processing loop, waiting..."
|
|
605
|
+
)
|
|
606
|
+
await self._wait_for_broker_reconnection()
|
|
607
|
+
|
|
282
608
|
while not self.shutdown_event.is_set():
|
|
609
|
+
# Check connection health before processing scheduled actions
|
|
610
|
+
if (
|
|
611
|
+
hasattr(self.broker, "connection_healthy")
|
|
612
|
+
and not self.broker.connection_healthy
|
|
613
|
+
):
|
|
614
|
+
logger.warning(
|
|
615
|
+
"Broker connection is not healthy, waiting for reconnection..."
|
|
616
|
+
)
|
|
617
|
+
await self._wait_for_broker_reconnection()
|
|
618
|
+
continue
|
|
619
|
+
|
|
283
620
|
now = int(time.time())
|
|
284
621
|
for sched_act_data in scheduled_actions:
|
|
285
622
|
func = sched_act_data.callable
|
|
@@ -287,64 +624,178 @@ class BeatWorker:
|
|
|
287
624
|
if self.shutdown_event.is_set():
|
|
288
625
|
break
|
|
289
626
|
|
|
290
|
-
|
|
627
|
+
try:
|
|
628
|
+
async with self.backend.lock():
|
|
291
629
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
630
|
+
last_dispatch_time: int | None = (
|
|
631
|
+
await self.backend.get_last_dispatch_time(
|
|
632
|
+
ScheduledAction.get_function_id(func)
|
|
633
|
+
)
|
|
295
634
|
)
|
|
296
|
-
)
|
|
297
635
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
)
|
|
302
|
-
next_run: datetime = cron.get_next(datetime).replace(tzinfo=UTC)
|
|
303
|
-
if next_run > datetime.now(UTC):
|
|
304
|
-
logger.info(
|
|
305
|
-
f"Skipping {func.__module__}.{func.__qualname__} until {next_run}"
|
|
636
|
+
if last_dispatch_time is not None:
|
|
637
|
+
cron = croniter.croniter(
|
|
638
|
+
scheduled_action.cron, last_dispatch_time
|
|
306
639
|
)
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
640
|
+
next_run: datetime = cron.get_next(datetime).replace(
|
|
641
|
+
tzinfo=UTC
|
|
642
|
+
)
|
|
643
|
+
if next_run > datetime.now(UTC):
|
|
644
|
+
logger.info(
|
|
645
|
+
f"Skipping {func.__module__}.{func.__qualname__} until {next_run}"
|
|
646
|
+
)
|
|
647
|
+
continue
|
|
648
|
+
|
|
649
|
+
if not scheduled_action.allow_overlap:
|
|
650
|
+
if (
|
|
651
|
+
await self.backend.get_in_execution_count(
|
|
652
|
+
ScheduledAction.get_function_id(func)
|
|
653
|
+
)
|
|
654
|
+
> 0
|
|
655
|
+
):
|
|
656
|
+
continue
|
|
657
|
+
|
|
658
|
+
try:
|
|
659
|
+
await self.broker.dispatch_scheduled_action(
|
|
660
|
+
ScheduledAction.get_function_id(func),
|
|
661
|
+
now,
|
|
313
662
|
)
|
|
314
|
-
> 0
|
|
315
|
-
):
|
|
316
|
-
continue
|
|
317
663
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
)
|
|
664
|
+
await self.backend.set_last_dispatch_time(
|
|
665
|
+
ScheduledAction.get_function_id(func), now
|
|
666
|
+
)
|
|
322
667
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
668
|
+
logger.info(
|
|
669
|
+
f"Scheduled {func.__module__}.{func.__qualname__} at {now}"
|
|
670
|
+
)
|
|
671
|
+
except Exception as e:
|
|
672
|
+
logger.error(
|
|
673
|
+
f"Failed to dispatch scheduled action {func.__module__}.{func.__qualname__}: {e}"
|
|
674
|
+
)
|
|
675
|
+
# Continue with other scheduled actions even if one fails
|
|
676
|
+
continue
|
|
326
677
|
|
|
327
|
-
|
|
328
|
-
|
|
678
|
+
except Exception as e:
|
|
679
|
+
logger.error(
|
|
680
|
+
f"Error processing scheduled action {func.__module__}.{func.__qualname__}: {e}"
|
|
329
681
|
)
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
682
|
+
# Continue with other scheduled actions even if one fails
|
|
683
|
+
continue
|
|
684
|
+
|
|
685
|
+
# Handle delayed messages
|
|
686
|
+
try:
|
|
687
|
+
delayed_messages = await self.backend.dequeue_next_delayed_messages(now)
|
|
688
|
+
for delayed_message_data in delayed_messages:
|
|
689
|
+
try:
|
|
690
|
+
await self.broker.dispatch_delayed_message(delayed_message_data)
|
|
691
|
+
except Exception as e:
|
|
692
|
+
logger.error(f"Failed to dispatch delayed message: {e}")
|
|
693
|
+
# Continue with other delayed messages even if one fails
|
|
694
|
+
continue
|
|
695
|
+
except Exception as e:
|
|
696
|
+
logger.error(f"Error processing delayed messages: {e}")
|
|
335
697
|
|
|
336
698
|
with contextlib.suppress(asyncio.TimeoutError):
|
|
337
699
|
await asyncio.wait_for(self.shutdown_event.wait(), self.interval)
|
|
338
700
|
|
|
339
|
-
# await self.shutdown_event.wait(self.interval)
|
|
340
|
-
|
|
341
701
|
logger.info("Scheduler stopped")
|
|
342
702
|
|
|
343
|
-
|
|
344
|
-
|
|
703
|
+
try:
|
|
704
|
+
await self.backend.dispose()
|
|
705
|
+
except Exception as e:
|
|
706
|
+
logger.error(f"Error disposing backend: {e}")
|
|
707
|
+
|
|
708
|
+
try:
|
|
709
|
+
await self.broker.dispose()
|
|
710
|
+
except Exception as e:
|
|
711
|
+
logger.error(f"Error disposing broker: {e}")
|
|
345
712
|
|
|
346
713
|
async def _graceful_shutdown(self) -> None:
|
|
347
714
|
"""Handles graceful shutdown process"""
|
|
348
715
|
logger.info("Initiating graceful shutdown sequence")
|
|
349
716
|
self.shutdown_event.set()
|
|
350
717
|
logger.info("Graceful shutdown completed")
|
|
718
|
+
|
|
719
|
+
async def _wait_for_broker_connection(self) -> None:
|
|
720
|
+
"""
|
|
721
|
+
Wait for the broker connection to be established and healthy.
|
|
722
|
+
This ensures the scheduler doesn't start until RabbitMQ is ready.
|
|
723
|
+
"""
|
|
724
|
+
max_wait_time = self.config.connection_wait_timeout
|
|
725
|
+
check_interval = 2.0 # Check every 2 seconds
|
|
726
|
+
elapsed_time = 0.0
|
|
727
|
+
|
|
728
|
+
logger.info(
|
|
729
|
+
f"Waiting for broker connection to be established (timeout: {max_wait_time}s)..."
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
while elapsed_time < max_wait_time:
|
|
733
|
+
if self.shutdown_event.is_set():
|
|
734
|
+
raise ConnectionError(
|
|
735
|
+
"Shutdown requested while waiting for broker connection"
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
# Check if broker connection is healthy
|
|
739
|
+
if (
|
|
740
|
+
hasattr(self.broker, "connection_healthy")
|
|
741
|
+
and self.broker.connection_healthy
|
|
742
|
+
):
|
|
743
|
+
logger.info("Broker connection is healthy")
|
|
744
|
+
return
|
|
745
|
+
|
|
746
|
+
# If broker doesn't have health status, try a simple health check
|
|
747
|
+
if not hasattr(self.broker, "connection_healthy"):
|
|
748
|
+
try:
|
|
749
|
+
# For non-RabbitMQ brokers, assume connection is ready after initialization
|
|
750
|
+
logger.info("Broker connection assumed to be ready")
|
|
751
|
+
return
|
|
752
|
+
except Exception as e:
|
|
753
|
+
logger.debug(f"Broker connection check failed: {e}")
|
|
754
|
+
|
|
755
|
+
if elapsed_time % 10.0 == 0.0: # Log every 10 seconds
|
|
756
|
+
logger.info(
|
|
757
|
+
f"Still waiting for broker connection... ({elapsed_time:.1f}s elapsed)"
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
await asyncio.sleep(check_interval)
|
|
761
|
+
elapsed_time += check_interval
|
|
762
|
+
|
|
763
|
+
raise ConnectionError(
|
|
764
|
+
f"Broker connection not established after {max_wait_time} seconds"
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
async def _wait_for_broker_reconnection(self) -> None:
|
|
768
|
+
"""
|
|
769
|
+
Wait for the broker to reconnect when connection is lost during operation.
|
|
770
|
+
This pauses the scheduler until the connection is restored.
|
|
771
|
+
"""
|
|
772
|
+
max_wait_time = self.config.reconnection_wait_timeout
|
|
773
|
+
check_interval = 5.0 # Check every 5 seconds
|
|
774
|
+
elapsed_time = 0.0
|
|
775
|
+
|
|
776
|
+
logger.info(f"Waiting for broker reconnection (timeout: {max_wait_time}s)...")
|
|
777
|
+
|
|
778
|
+
while elapsed_time < max_wait_time:
|
|
779
|
+
if self.shutdown_event.is_set():
|
|
780
|
+
logger.info("Shutdown requested while waiting for broker reconnection")
|
|
781
|
+
return
|
|
782
|
+
|
|
783
|
+
# Check if broker connection is healthy again
|
|
784
|
+
if (
|
|
785
|
+
hasattr(self.broker, "connection_healthy")
|
|
786
|
+
and self.broker.connection_healthy
|
|
787
|
+
):
|
|
788
|
+
logger.info("Broker connection restored, resuming scheduler")
|
|
789
|
+
return
|
|
790
|
+
|
|
791
|
+
if elapsed_time % 30.0 == 0.0: # Log every 30 seconds
|
|
792
|
+
logger.info(
|
|
793
|
+
f"Still waiting for broker reconnection... ({elapsed_time:.1f}s elapsed)"
|
|
794
|
+
)
|
|
795
|
+
|
|
796
|
+
await asyncio.sleep(check_interval)
|
|
797
|
+
elapsed_time += check_interval
|
|
798
|
+
|
|
799
|
+
logger.error(f"Broker connection not restored after {max_wait_time} seconds")
|
|
800
|
+
# Don't raise an exception here, just continue and let the scheduler retry
|
|
801
|
+
# This allows the scheduler to be more resilient to long-term connection issues
|