jararaca 0.3.11a16__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jararaca might be problematic. Click here for more details.
- README.md +120 -0
- jararaca/__init__.py +106 -8
- jararaca/cli.py +216 -31
- jararaca/messagebus/worker.py +749 -272
- jararaca/microservice.py +42 -0
- jararaca/persistence/interceptors/aiosqa_interceptor.py +82 -73
- jararaca/persistence/interceptors/constants.py +1 -0
- jararaca/persistence/interceptors/decorators.py +45 -0
- jararaca/presentation/server.py +57 -11
- jararaca/presentation/websocket/redis.py +113 -7
- jararaca/reflect/metadata.py +1 -1
- jararaca/rpc/http/__init__.py +97 -0
- jararaca/rpc/http/backends/__init__.py +10 -0
- jararaca/rpc/http/backends/httpx.py +39 -9
- jararaca/rpc/http/decorators.py +302 -6
- jararaca/scheduler/beat_worker.py +550 -91
- jararaca/tools/typescript/__init__.py +0 -0
- jararaca/tools/typescript/decorators.py +95 -0
- jararaca/tools/typescript/interface_parser.py +699 -156
- jararaca-0.3.12.dist-info/LICENSE +674 -0
- {jararaca-0.3.11a16.dist-info → jararaca-0.3.12.dist-info}/METADATA +4 -3
- {jararaca-0.3.11a16.dist-info → jararaca-0.3.12.dist-info}/RECORD +26 -19
- {jararaca-0.3.11a16.dist-info → jararaca-0.3.12.dist-info}/WHEEL +1 -1
- pyproject.toml +86 -0
- /jararaca-0.3.11a16.dist-info/LICENSE → /LICENSE +0 -0
- {jararaca-0.3.11a16.dist-info → jararaca-0.3.12.dist-info}/entry_points.txt +0 -0
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import contextlib
|
|
3
3
|
import logging
|
|
4
|
+
import random
|
|
4
5
|
import signal
|
|
5
6
|
import time
|
|
6
7
|
from abc import ABC, abstractmethod
|
|
8
|
+
from dataclasses import dataclass, field
|
|
7
9
|
from datetime import UTC, datetime
|
|
8
|
-
from types import FrameType
|
|
9
10
|
from typing import Any
|
|
10
11
|
from urllib.parse import parse_qs
|
|
11
12
|
|
|
@@ -16,6 +17,13 @@ import urllib3.util
|
|
|
16
17
|
import uvloop
|
|
17
18
|
from aio_pika import connect_robust
|
|
18
19
|
from aio_pika.abc import AbstractChannel, AbstractRobustConnection
|
|
20
|
+
from aio_pika.exceptions import (
|
|
21
|
+
AMQPChannelError,
|
|
22
|
+
AMQPConnectionError,
|
|
23
|
+
AMQPError,
|
|
24
|
+
ChannelClosed,
|
|
25
|
+
ConnectionClosed,
|
|
26
|
+
)
|
|
19
27
|
from aio_pika.pool import Pool
|
|
20
28
|
|
|
21
29
|
from jararaca.broker_backend import MessageBrokerBackend
|
|
@@ -31,6 +39,7 @@ from jararaca.scheduler.decorators import (
|
|
|
31
39
|
)
|
|
32
40
|
from jararaca.scheduler.types import DelayedMessageData
|
|
33
41
|
from jararaca.utils.rabbitmq_utils import RabbitmqUtils
|
|
42
|
+
from jararaca.utils.retry import RetryConfig, retry_with_backoff
|
|
34
43
|
|
|
35
44
|
logger = logging.getLogger(__name__)
|
|
36
45
|
|
|
@@ -101,17 +110,23 @@ class _MessageBrokerDispatcher(ABC):
|
|
|
101
110
|
|
|
102
111
|
class _RabbitMQBrokerDispatcher(_MessageBrokerDispatcher):
|
|
103
112
|
|
|
104
|
-
def __init__(self, url: str) -> None:
|
|
113
|
+
def __init__(self, url: str, config: "BeatWorkerConfig | None" = None) -> None:
|
|
105
114
|
self.url = url
|
|
115
|
+
self.config = config or BeatWorkerConfig()
|
|
116
|
+
self.connection_healthy = False
|
|
117
|
+
self.reconnection_in_progress = False
|
|
118
|
+
self.shutdown_event = asyncio.Event()
|
|
119
|
+
self.health_check_task: asyncio.Task[Any] | None = None
|
|
120
|
+
self.reconnection_lock = asyncio.Lock()
|
|
106
121
|
|
|
107
122
|
self.conn_pool: "Pool[AbstractRobustConnection]" = Pool(
|
|
108
123
|
self._create_connection,
|
|
109
|
-
max_size=
|
|
124
|
+
max_size=self.config.max_pool_size,
|
|
110
125
|
)
|
|
111
126
|
|
|
112
127
|
self.channel_pool: "Pool[AbstractChannel]" = Pool(
|
|
113
128
|
self._create_channel,
|
|
114
|
-
max_size=
|
|
129
|
+
max_size=self.config.max_pool_size,
|
|
115
130
|
)
|
|
116
131
|
|
|
117
132
|
splitted = urllib3.util.parse_url(url)
|
|
@@ -132,86 +147,339 @@ class _RabbitMQBrokerDispatcher(_MessageBrokerDispatcher):
|
|
|
132
147
|
|
|
133
148
|
async def _create_connection(self) -> AbstractRobustConnection:
|
|
134
149
|
"""
|
|
135
|
-
Create a connection to the RabbitMQ server.
|
|
136
|
-
This is used to send messages to the RabbitMQ server.
|
|
150
|
+
Create a robust connection to the RabbitMQ server with retry logic.
|
|
137
151
|
"""
|
|
138
|
-
|
|
139
|
-
|
|
152
|
+
|
|
153
|
+
async def _establish_connection() -> AbstractRobustConnection:
|
|
154
|
+
logger.info("Establishing connection to RabbitMQ")
|
|
155
|
+
connection = await connect_robust(
|
|
156
|
+
self.url,
|
|
157
|
+
heartbeat=self.config.connection_heartbeat_interval,
|
|
158
|
+
)
|
|
159
|
+
logger.info("Connected to RabbitMQ successfully")
|
|
160
|
+
return connection
|
|
161
|
+
|
|
162
|
+
return await retry_with_backoff(
|
|
163
|
+
_establish_connection,
|
|
164
|
+
retry_config=self.config.connection_retry_config,
|
|
165
|
+
retry_exceptions=(
|
|
166
|
+
AMQPConnectionError,
|
|
167
|
+
ConnectionError,
|
|
168
|
+
OSError,
|
|
169
|
+
TimeoutError,
|
|
170
|
+
),
|
|
171
|
+
)
|
|
140
172
|
|
|
141
173
|
async def _create_channel(self) -> AbstractChannel:
|
|
142
174
|
"""
|
|
143
|
-
Create a channel to the RabbitMQ server.
|
|
144
|
-
This is used to send messages to the RabbitMQ server.
|
|
175
|
+
Create a channel to the RabbitMQ server with retry logic.
|
|
145
176
|
"""
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
177
|
+
|
|
178
|
+
async def _establish_channel() -> AbstractChannel:
|
|
179
|
+
async with self.conn_pool.acquire() as connection:
|
|
180
|
+
channel = await connection.channel()
|
|
181
|
+
return channel
|
|
182
|
+
|
|
183
|
+
return await retry_with_backoff(
|
|
184
|
+
_establish_channel,
|
|
185
|
+
retry_config=self.config.connection_retry_config,
|
|
186
|
+
retry_exceptions=(
|
|
187
|
+
AMQPConnectionError,
|
|
188
|
+
AMQPChannelError,
|
|
189
|
+
ChannelClosed,
|
|
190
|
+
ConnectionError,
|
|
191
|
+
),
|
|
192
|
+
)
|
|
149
193
|
|
|
150
194
|
async def dispatch_scheduled_action(self, action_id: str, timestamp: int) -> None:
|
|
151
195
|
"""
|
|
152
|
-
Dispatch a message to the RabbitMQ server.
|
|
153
|
-
This is used to send a message to the RabbitMQ server
|
|
154
|
-
to trigger the scheduled action.
|
|
196
|
+
Dispatch a message to the RabbitMQ server with retry logic.
|
|
155
197
|
"""
|
|
198
|
+
if not self.connection_healthy:
|
|
199
|
+
await self._wait_for_connection()
|
|
156
200
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
201
|
+
async def _dispatch() -> None:
|
|
202
|
+
logger.info(f"Dispatching message to {action_id} at {timestamp}")
|
|
203
|
+
async with self.channel_pool.acquire() as channel:
|
|
204
|
+
exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
|
|
160
205
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
206
|
+
await exchange.publish(
|
|
207
|
+
aio_pika.Message(body=str(timestamp).encode()),
|
|
208
|
+
routing_key=action_id,
|
|
209
|
+
)
|
|
210
|
+
logger.info(f"Dispatched message to {action_id} at {timestamp}")
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
await retry_with_backoff(
|
|
214
|
+
_dispatch,
|
|
215
|
+
retry_config=self.config.dispatch_retry_config,
|
|
216
|
+
retry_exceptions=(
|
|
217
|
+
AMQPConnectionError,
|
|
218
|
+
AMQPChannelError,
|
|
219
|
+
ChannelClosed,
|
|
220
|
+
ConnectionClosed,
|
|
221
|
+
AMQPError,
|
|
222
|
+
),
|
|
164
223
|
)
|
|
165
|
-
|
|
224
|
+
except Exception as e:
|
|
225
|
+
logger.error(
|
|
226
|
+
f"Failed to dispatch message to {action_id} after retries: {e}"
|
|
227
|
+
)
|
|
228
|
+
# Trigger reconnection if dispatch fails
|
|
229
|
+
if not self.reconnection_in_progress:
|
|
230
|
+
asyncio.create_task(self._handle_reconnection())
|
|
231
|
+
raise
|
|
166
232
|
|
|
167
233
|
async def dispatch_delayed_message(
|
|
168
234
|
self, delayed_message: DelayedMessageData
|
|
169
235
|
) -> None:
|
|
170
236
|
"""
|
|
171
|
-
Dispatch a delayed message to the RabbitMQ server.
|
|
172
|
-
This is used to send a message to the RabbitMQ server
|
|
173
|
-
to trigger the scheduled action.
|
|
237
|
+
Dispatch a delayed message to the RabbitMQ server with retry logic.
|
|
174
238
|
"""
|
|
175
|
-
|
|
239
|
+
if not self.connection_healthy:
|
|
240
|
+
await self._wait_for_connection()
|
|
241
|
+
|
|
242
|
+
async def _dispatch() -> None:
|
|
243
|
+
async with self.channel_pool.acquire() as channel:
|
|
244
|
+
exchange = await RabbitmqUtils.get_main_exchange(channel, self.exchange)
|
|
245
|
+
await exchange.publish(
|
|
246
|
+
aio_pika.Message(
|
|
247
|
+
body=delayed_message.payload,
|
|
248
|
+
),
|
|
249
|
+
routing_key=f"{delayed_message.message_topic}.",
|
|
250
|
+
)
|
|
176
251
|
|
|
177
|
-
|
|
178
|
-
await
|
|
179
|
-
|
|
180
|
-
|
|
252
|
+
try:
|
|
253
|
+
await retry_with_backoff(
|
|
254
|
+
_dispatch,
|
|
255
|
+
retry_config=self.config.dispatch_retry_config,
|
|
256
|
+
retry_exceptions=(
|
|
257
|
+
AMQPConnectionError,
|
|
258
|
+
AMQPChannelError,
|
|
259
|
+
ChannelClosed,
|
|
260
|
+
ConnectionClosed,
|
|
261
|
+
AMQPError,
|
|
181
262
|
),
|
|
182
|
-
routing_key=f"{delayed_message.message_topic}.",
|
|
183
263
|
)
|
|
264
|
+
except Exception as e:
|
|
265
|
+
logger.error(f"Failed to dispatch delayed message after retries: {e}")
|
|
266
|
+
# Trigger reconnection if dispatch fails
|
|
267
|
+
if not self.reconnection_in_progress:
|
|
268
|
+
asyncio.create_task(self._handle_reconnection())
|
|
269
|
+
raise
|
|
184
270
|
|
|
185
271
|
async def initialize(self, scheduled_actions: list[ScheduledActionData]) -> None:
|
|
186
272
|
"""
|
|
187
|
-
Initialize the RabbitMQ server.
|
|
188
|
-
This is used to create the exchange and queues for the scheduled actions.
|
|
273
|
+
Initialize the RabbitMQ server with retry logic.
|
|
189
274
|
"""
|
|
190
275
|
|
|
191
|
-
async
|
|
192
|
-
|
|
276
|
+
async def _initialize() -> None:
|
|
277
|
+
async with self.channel_pool.acquire() as channel:
|
|
278
|
+
await RabbitmqUtils.get_main_exchange(channel, self.exchange)
|
|
193
279
|
|
|
194
|
-
|
|
195
|
-
|
|
280
|
+
for sched_act_data in scheduled_actions:
|
|
281
|
+
queue_name = ScheduledAction.get_function_id(
|
|
282
|
+
sched_act_data.callable
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# Try to get existing queue
|
|
286
|
+
await RabbitmqUtils.get_scheduled_action_queue(
|
|
287
|
+
channel=channel,
|
|
288
|
+
queue_name=queue_name,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
try:
|
|
292
|
+
logger.info("Initializing RabbitMQ connection...")
|
|
293
|
+
await retry_with_backoff(
|
|
294
|
+
_initialize,
|
|
295
|
+
retry_config=self.config.connection_retry_config,
|
|
296
|
+
retry_exceptions=(
|
|
297
|
+
AMQPConnectionError,
|
|
298
|
+
AMQPChannelError,
|
|
299
|
+
ChannelClosed,
|
|
300
|
+
ConnectionClosed,
|
|
301
|
+
AMQPError,
|
|
302
|
+
),
|
|
303
|
+
)
|
|
196
304
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
305
|
+
# Verify connection is actually healthy after initialization
|
|
306
|
+
if not await self._is_connection_healthy():
|
|
307
|
+
logger.warning(
|
|
308
|
+
"Connection health check failed after initialization, retrying..."
|
|
201
309
|
)
|
|
310
|
+
# Wait a bit and try again
|
|
311
|
+
await asyncio.sleep(2.0)
|
|
312
|
+
if not await self._is_connection_healthy():
|
|
313
|
+
raise ConnectionError("Connection not healthy after initialization")
|
|
314
|
+
|
|
315
|
+
self.connection_healthy = True
|
|
316
|
+
logger.info("RabbitMQ connection initialized successfully")
|
|
317
|
+
|
|
318
|
+
# Start health monitoring
|
|
319
|
+
self.health_check_task = asyncio.create_task(
|
|
320
|
+
self._monitor_connection_health()
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.error(f"Failed to initialize RabbitMQ after retries: {e}")
|
|
325
|
+
raise
|
|
202
326
|
|
|
203
327
|
async def dispose(self) -> None:
|
|
204
|
-
|
|
205
|
-
|
|
328
|
+
"""Clean up resources"""
|
|
329
|
+
logger.info("Disposing RabbitMQ broker dispatcher")
|
|
330
|
+
self.shutdown_event.set()
|
|
331
|
+
|
|
332
|
+
# Cancel health monitoring
|
|
333
|
+
if self.health_check_task:
|
|
334
|
+
self.health_check_task.cancel()
|
|
335
|
+
try:
|
|
336
|
+
await self.health_check_task
|
|
337
|
+
except asyncio.CancelledError:
|
|
338
|
+
pass
|
|
339
|
+
|
|
340
|
+
# Clean up pools
|
|
341
|
+
await self._cleanup_pools()
|
|
342
|
+
|
|
343
|
+
async def _monitor_connection_health(self) -> None:
|
|
344
|
+
"""Monitor connection health and trigger reconnection if needed"""
|
|
345
|
+
while not self.shutdown_event.is_set():
|
|
346
|
+
try:
|
|
347
|
+
await asyncio.sleep(self.config.health_check_interval)
|
|
348
|
+
|
|
349
|
+
if self.shutdown_event.is_set():
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
# Check connection health
|
|
353
|
+
if not await self._is_connection_healthy():
|
|
354
|
+
logger.warning(
|
|
355
|
+
"Connection health check failed, triggering reconnection"
|
|
356
|
+
)
|
|
357
|
+
if not self.reconnection_in_progress:
|
|
358
|
+
asyncio.create_task(self._handle_reconnection())
|
|
359
|
+
|
|
360
|
+
except asyncio.CancelledError:
|
|
361
|
+
logger.info("Connection health monitoring cancelled")
|
|
362
|
+
break
|
|
363
|
+
except Exception as e:
|
|
364
|
+
logger.error(f"Error in connection health monitoring: {e}")
|
|
365
|
+
await asyncio.sleep(5) # Wait before retrying
|
|
366
|
+
|
|
367
|
+
async def _is_connection_healthy(self) -> bool:
|
|
368
|
+
"""Check if the connection is healthy"""
|
|
369
|
+
try:
|
|
370
|
+
# Try to acquire a connection from the pool
|
|
371
|
+
async with self.conn_pool.acquire() as connection:
|
|
372
|
+
if connection.is_closed:
|
|
373
|
+
return False
|
|
374
|
+
|
|
375
|
+
# Try to create a channel to test connection
|
|
376
|
+
channel = await connection.channel()
|
|
377
|
+
await channel.close()
|
|
378
|
+
return True
|
|
379
|
+
|
|
380
|
+
except Exception as e:
|
|
381
|
+
logger.debug(f"Connection health check failed: {e}")
|
|
382
|
+
return False
|
|
383
|
+
|
|
384
|
+
async def _handle_reconnection(self) -> None:
|
|
385
|
+
"""Handle reconnection process with exponential backoff"""
|
|
386
|
+
async with self.reconnection_lock:
|
|
387
|
+
if self.reconnection_in_progress:
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
self.reconnection_in_progress = True
|
|
391
|
+
self.connection_healthy = False
|
|
392
|
+
|
|
393
|
+
logger.info("Starting reconnection process")
|
|
394
|
+
|
|
395
|
+
attempt = 0
|
|
396
|
+
while not self.shutdown_event.is_set():
|
|
397
|
+
try:
|
|
398
|
+
attempt += 1
|
|
399
|
+
logger.info(f"Reconnection attempt {attempt}")
|
|
400
|
+
|
|
401
|
+
# Close existing pools
|
|
402
|
+
await self._cleanup_pools()
|
|
403
|
+
|
|
404
|
+
# Recreate pools
|
|
405
|
+
self.conn_pool = Pool(
|
|
406
|
+
self._create_connection,
|
|
407
|
+
max_size=self.config.max_pool_size,
|
|
408
|
+
)
|
|
409
|
+
self.channel_pool = Pool(
|
|
410
|
+
self._create_channel,
|
|
411
|
+
max_size=self.config.max_pool_size,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
# Test connection
|
|
415
|
+
if await self._is_connection_healthy():
|
|
416
|
+
self.connection_healthy = True
|
|
417
|
+
logger.info("Reconnection successful")
|
|
418
|
+
break
|
|
419
|
+
else:
|
|
420
|
+
raise ConnectionError(
|
|
421
|
+
"Connection health check failed after reconnection"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
except Exception as e:
|
|
425
|
+
logger.error(f"Reconnection attempt {attempt} failed: {e}")
|
|
426
|
+
|
|
427
|
+
if self.shutdown_event.is_set():
|
|
428
|
+
break
|
|
429
|
+
|
|
430
|
+
# Calculate backoff delay
|
|
431
|
+
delay = self.config.reconnection_delay * (2 ** min(attempt - 1, 10))
|
|
432
|
+
if self.config.connection_retry_config.jitter:
|
|
433
|
+
jitter_amount = delay * 0.25
|
|
434
|
+
delay = delay + random.uniform(-jitter_amount, jitter_amount)
|
|
435
|
+
delay = max(delay, 0.1)
|
|
436
|
+
|
|
437
|
+
delay = min(delay, self.config.connection_retry_config.max_delay)
|
|
438
|
+
|
|
439
|
+
logger.info(f"Retrying reconnection in {delay:.2f} seconds")
|
|
440
|
+
await asyncio.sleep(delay)
|
|
441
|
+
|
|
442
|
+
self.reconnection_in_progress = False
|
|
443
|
+
|
|
444
|
+
async def _cleanup_pools(self) -> None:
|
|
445
|
+
"""Clean up existing connection pools"""
|
|
446
|
+
try:
|
|
447
|
+
if hasattr(self, "channel_pool"):
|
|
448
|
+
await self.channel_pool.close()
|
|
449
|
+
except Exception as e:
|
|
450
|
+
logger.warning(f"Error closing channel pool: {e}")
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
if hasattr(self, "conn_pool"):
|
|
454
|
+
await self.conn_pool.close()
|
|
455
|
+
except Exception as e:
|
|
456
|
+
logger.warning(f"Error closing connection pool: {e}")
|
|
457
|
+
|
|
458
|
+
async def _wait_for_connection(self) -> None:
|
|
459
|
+
"""Wait for connection to be healthy"""
|
|
460
|
+
max_wait = 30.0 # Maximum wait time
|
|
461
|
+
wait_time = 0.0
|
|
462
|
+
|
|
463
|
+
while not self.connection_healthy and wait_time < max_wait:
|
|
464
|
+
if self.shutdown_event.is_set():
|
|
465
|
+
raise ConnectionError("Shutdown requested while waiting for connection")
|
|
466
|
+
|
|
467
|
+
await asyncio.sleep(0.5)
|
|
468
|
+
wait_time += 0.5
|
|
469
|
+
|
|
470
|
+
if not self.connection_healthy:
|
|
471
|
+
raise ConnectionError("Connection not healthy after maximum wait time")
|
|
206
472
|
|
|
207
473
|
|
|
208
|
-
def _get_message_broker_dispatcher_from_url(
|
|
474
|
+
def _get_message_broker_dispatcher_from_url(
|
|
475
|
+
url: str, config: "BeatWorkerConfig | None" = None
|
|
476
|
+
) -> _MessageBrokerDispatcher:
|
|
209
477
|
"""
|
|
210
478
|
Factory function to create a message broker instance from a URL.
|
|
211
479
|
Currently, only RabbitMQ is supported.
|
|
212
480
|
"""
|
|
213
481
|
if url.startswith("amqp://") or url.startswith("amqps://"):
|
|
214
|
-
return _RabbitMQBrokerDispatcher(url=url)
|
|
482
|
+
return _RabbitMQBrokerDispatcher(url=url, config=config)
|
|
215
483
|
else:
|
|
216
484
|
raise ValueError(f"Unsupported message broker URL: {url}")
|
|
217
485
|
|
|
@@ -219,6 +487,42 @@ def _get_message_broker_dispatcher_from_url(url: str) -> _MessageBrokerDispatche
|
|
|
219
487
|
# endregion
|
|
220
488
|
|
|
221
489
|
|
|
490
|
+
@dataclass
|
|
491
|
+
class BeatWorkerConfig:
|
|
492
|
+
"""Configuration for beat worker connection resilience"""
|
|
493
|
+
|
|
494
|
+
connection_retry_config: RetryConfig = field(
|
|
495
|
+
default_factory=lambda: RetryConfig(
|
|
496
|
+
max_retries=10,
|
|
497
|
+
initial_delay=2.0,
|
|
498
|
+
max_delay=60.0,
|
|
499
|
+
backoff_factor=2.0,
|
|
500
|
+
jitter=True,
|
|
501
|
+
)
|
|
502
|
+
)
|
|
503
|
+
dispatch_retry_config: RetryConfig = field(
|
|
504
|
+
default_factory=lambda: RetryConfig(
|
|
505
|
+
max_retries=3,
|
|
506
|
+
initial_delay=1.0,
|
|
507
|
+
max_delay=10.0,
|
|
508
|
+
backoff_factor=2.0,
|
|
509
|
+
jitter=True,
|
|
510
|
+
)
|
|
511
|
+
)
|
|
512
|
+
connection_heartbeat_interval: float = 30.0
|
|
513
|
+
health_check_interval: float = 15.0
|
|
514
|
+
max_reconnection_attempts: int = -1 # Infinite retries
|
|
515
|
+
reconnection_delay: float = 5.0
|
|
516
|
+
|
|
517
|
+
# Connection establishment timeouts
|
|
518
|
+
connection_wait_timeout: float = 300.0 # 5 minutes to wait for initial connection
|
|
519
|
+
reconnection_wait_timeout: float = 600.0 # 10 minutes to wait for reconnection
|
|
520
|
+
|
|
521
|
+
# Pool configuration
|
|
522
|
+
max_pool_size: int = 10
|
|
523
|
+
pool_recycle_time: float = 3600.0 # 1 hour
|
|
524
|
+
|
|
525
|
+
|
|
222
526
|
class BeatWorker:
|
|
223
527
|
|
|
224
528
|
def __init__(
|
|
@@ -228,11 +532,13 @@ class BeatWorker:
|
|
|
228
532
|
broker_url: str,
|
|
229
533
|
backend_url: str,
|
|
230
534
|
scheduled_action_names: set[str] | None = None,
|
|
535
|
+
config: "BeatWorkerConfig | None" = None,
|
|
231
536
|
) -> None:
|
|
232
537
|
self.app = app
|
|
538
|
+
self.config = config or BeatWorkerConfig()
|
|
233
539
|
|
|
234
540
|
self.broker: _MessageBrokerDispatcher = _get_message_broker_dispatcher_from_url(
|
|
235
|
-
broker_url
|
|
541
|
+
broker_url, self.config
|
|
236
542
|
)
|
|
237
543
|
self.backend: MessageBrokerBackend = get_message_broker_backend_from_url(
|
|
238
544
|
backend_url
|
|
@@ -249,13 +555,16 @@ class BeatWorker:
|
|
|
249
555
|
|
|
250
556
|
def run(self) -> None:
|
|
251
557
|
|
|
252
|
-
def
|
|
253
|
-
logger.info("
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
signal.signal(signal.SIGINT, on_signal_received)
|
|
558
|
+
def on_shutdown(loop: asyncio.AbstractEventLoop) -> None:
|
|
559
|
+
logger.info("Shutting down - signal received")
|
|
560
|
+
# Schedule the shutdown to run in the event loop
|
|
561
|
+
asyncio.create_task(self._graceful_shutdown())
|
|
257
562
|
|
|
258
563
|
with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner:
|
|
564
|
+
loop = runner.get_loop()
|
|
565
|
+
loop.add_signal_handler(signal.SIGINT, on_shutdown, loop)
|
|
566
|
+
# Add graceful shutdown handler for SIGTERM as well
|
|
567
|
+
loop.add_signal_handler(signal.SIGTERM, on_shutdown, loop)
|
|
259
568
|
runner.run(self.start_scheduler())
|
|
260
569
|
|
|
261
570
|
async def start_scheduler(self) -> None:
|
|
@@ -269,15 +578,45 @@ class BeatWorker:
|
|
|
269
578
|
self.app, self.container, self.scheduler_names
|
|
270
579
|
)
|
|
271
580
|
|
|
581
|
+
# Initialize and wait for connection to be established
|
|
582
|
+
logger.info("Initializing broker connection...")
|
|
272
583
|
await self.broker.initialize(scheduled_actions)
|
|
273
584
|
|
|
585
|
+
# Wait for connection to be healthy before starting scheduler
|
|
586
|
+
logger.info("Waiting for connection to be established...")
|
|
587
|
+
await self._wait_for_broker_connection()
|
|
588
|
+
|
|
589
|
+
logger.info("Connection established, starting scheduler...")
|
|
274
590
|
await self.run_scheduled_actions(scheduled_actions)
|
|
275
591
|
|
|
276
592
|
async def run_scheduled_actions(
|
|
277
593
|
self, scheduled_actions: list[ScheduledActionData]
|
|
278
594
|
) -> None:
|
|
279
595
|
|
|
596
|
+
logger.info("Starting scheduled actions processing loop")
|
|
597
|
+
|
|
598
|
+
# Ensure we have a healthy connection before starting the main loop
|
|
599
|
+
if (
|
|
600
|
+
hasattr(self.broker, "connection_healthy")
|
|
601
|
+
and not self.broker.connection_healthy
|
|
602
|
+
):
|
|
603
|
+
logger.warning(
|
|
604
|
+
"Connection not healthy at start of processing loop, waiting..."
|
|
605
|
+
)
|
|
606
|
+
await self._wait_for_broker_reconnection()
|
|
607
|
+
|
|
280
608
|
while not self.shutdown_event.is_set():
|
|
609
|
+
# Check connection health before processing scheduled actions
|
|
610
|
+
if (
|
|
611
|
+
hasattr(self.broker, "connection_healthy")
|
|
612
|
+
and not self.broker.connection_healthy
|
|
613
|
+
):
|
|
614
|
+
logger.warning(
|
|
615
|
+
"Broker connection is not healthy, waiting for reconnection..."
|
|
616
|
+
)
|
|
617
|
+
await self._wait_for_broker_reconnection()
|
|
618
|
+
continue
|
|
619
|
+
|
|
281
620
|
now = int(time.time())
|
|
282
621
|
for sched_act_data in scheduled_actions:
|
|
283
622
|
func = sched_act_data.callable
|
|
@@ -285,58 +624,178 @@ class BeatWorker:
|
|
|
285
624
|
if self.shutdown_event.is_set():
|
|
286
625
|
break
|
|
287
626
|
|
|
288
|
-
|
|
627
|
+
try:
|
|
628
|
+
async with self.backend.lock():
|
|
289
629
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
630
|
+
last_dispatch_time: int | None = (
|
|
631
|
+
await self.backend.get_last_dispatch_time(
|
|
632
|
+
ScheduledAction.get_function_id(func)
|
|
633
|
+
)
|
|
293
634
|
)
|
|
294
|
-
)
|
|
295
635
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
636
|
+
if last_dispatch_time is not None:
|
|
637
|
+
cron = croniter.croniter(
|
|
638
|
+
scheduled_action.cron, last_dispatch_time
|
|
639
|
+
)
|
|
640
|
+
next_run: datetime = cron.get_next(datetime).replace(
|
|
641
|
+
tzinfo=UTC
|
|
642
|
+
)
|
|
643
|
+
if next_run > datetime.now(UTC):
|
|
644
|
+
logger.info(
|
|
645
|
+
f"Skipping {func.__module__}.{func.__qualname__} until {next_run}"
|
|
646
|
+
)
|
|
647
|
+
continue
|
|
648
|
+
|
|
649
|
+
if not scheduled_action.allow_overlap:
|
|
650
|
+
if (
|
|
651
|
+
await self.backend.get_in_execution_count(
|
|
652
|
+
ScheduledAction.get_function_id(func)
|
|
653
|
+
)
|
|
654
|
+
> 0
|
|
655
|
+
):
|
|
656
|
+
continue
|
|
657
|
+
|
|
658
|
+
try:
|
|
659
|
+
await self.broker.dispatch_scheduled_action(
|
|
660
|
+
ScheduledAction.get_function_id(func),
|
|
661
|
+
now,
|
|
304
662
|
)
|
|
305
|
-
continue
|
|
306
663
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
664
|
+
await self.backend.set_last_dispatch_time(
|
|
665
|
+
ScheduledAction.get_function_id(func), now
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
logger.info(
|
|
669
|
+
f"Scheduled {func.__module__}.{func.__qualname__} at {now}"
|
|
311
670
|
)
|
|
312
|
-
|
|
313
|
-
|
|
671
|
+
except Exception as e:
|
|
672
|
+
logger.error(
|
|
673
|
+
f"Failed to dispatch scheduled action {func.__module__}.{func.__qualname__}: {e}"
|
|
674
|
+
)
|
|
675
|
+
# Continue with other scheduled actions even if one fails
|
|
314
676
|
continue
|
|
315
677
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
678
|
+
except Exception as e:
|
|
679
|
+
logger.error(
|
|
680
|
+
f"Error processing scheduled action {func.__module__}.{func.__qualname__}: {e}"
|
|
319
681
|
)
|
|
682
|
+
# Continue with other scheduled actions even if one fails
|
|
683
|
+
continue
|
|
684
|
+
|
|
685
|
+
# Handle delayed messages
|
|
686
|
+
try:
|
|
687
|
+
delayed_messages = await self.backend.dequeue_next_delayed_messages(now)
|
|
688
|
+
for delayed_message_data in delayed_messages:
|
|
689
|
+
try:
|
|
690
|
+
await self.broker.dispatch_delayed_message(delayed_message_data)
|
|
691
|
+
except Exception as e:
|
|
692
|
+
logger.error(f"Failed to dispatch delayed message: {e}")
|
|
693
|
+
# Continue with other delayed messages even if one fails
|
|
694
|
+
continue
|
|
695
|
+
except Exception as e:
|
|
696
|
+
logger.error(f"Error processing delayed messages: {e}")
|
|
320
697
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
)
|
|
698
|
+
with contextlib.suppress(asyncio.TimeoutError):
|
|
699
|
+
await asyncio.wait_for(self.shutdown_event.wait(), self.interval)
|
|
324
700
|
|
|
325
|
-
|
|
326
|
-
f"Scheduled {func.__module__}.{func.__qualname__} at {now}"
|
|
327
|
-
)
|
|
701
|
+
logger.info("Scheduler stopped")
|
|
328
702
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
703
|
+
try:
|
|
704
|
+
await self.backend.dispose()
|
|
705
|
+
except Exception as e:
|
|
706
|
+
logger.error(f"Error disposing backend: {e}")
|
|
333
707
|
|
|
334
|
-
|
|
335
|
-
|
|
708
|
+
try:
|
|
709
|
+
await self.broker.dispose()
|
|
710
|
+
except Exception as e:
|
|
711
|
+
logger.error(f"Error disposing broker: {e}")
|
|
336
712
|
|
|
337
|
-
|
|
713
|
+
async def _graceful_shutdown(self) -> None:
|
|
714
|
+
"""Handles graceful shutdown process"""
|
|
715
|
+
logger.info("Initiating graceful shutdown sequence")
|
|
716
|
+
self.shutdown_event.set()
|
|
717
|
+
logger.info("Graceful shutdown completed")
|
|
338
718
|
|
|
339
|
-
|
|
719
|
+
async def _wait_for_broker_connection(self) -> None:
|
|
720
|
+
"""
|
|
721
|
+
Wait for the broker connection to be established and healthy.
|
|
722
|
+
This ensures the scheduler doesn't start until RabbitMQ is ready.
|
|
723
|
+
"""
|
|
724
|
+
max_wait_time = self.config.connection_wait_timeout
|
|
725
|
+
check_interval = 2.0 # Check every 2 seconds
|
|
726
|
+
elapsed_time = 0.0
|
|
727
|
+
|
|
728
|
+
logger.info(
|
|
729
|
+
f"Waiting for broker connection to be established (timeout: {max_wait_time}s)..."
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
while elapsed_time < max_wait_time:
|
|
733
|
+
if self.shutdown_event.is_set():
|
|
734
|
+
raise ConnectionError(
|
|
735
|
+
"Shutdown requested while waiting for broker connection"
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
# Check if broker connection is healthy
|
|
739
|
+
if (
|
|
740
|
+
hasattr(self.broker, "connection_healthy")
|
|
741
|
+
and self.broker.connection_healthy
|
|
742
|
+
):
|
|
743
|
+
logger.info("Broker connection is healthy")
|
|
744
|
+
return
|
|
745
|
+
|
|
746
|
+
# If broker doesn't have health status, try a simple health check
|
|
747
|
+
if not hasattr(self.broker, "connection_healthy"):
|
|
748
|
+
try:
|
|
749
|
+
# For non-RabbitMQ brokers, assume connection is ready after initialization
|
|
750
|
+
logger.info("Broker connection assumed to be ready")
|
|
751
|
+
return
|
|
752
|
+
except Exception as e:
|
|
753
|
+
logger.debug(f"Broker connection check failed: {e}")
|
|
754
|
+
|
|
755
|
+
if elapsed_time % 10.0 == 0.0: # Log every 10 seconds
|
|
756
|
+
logger.info(
|
|
757
|
+
f"Still waiting for broker connection... ({elapsed_time:.1f}s elapsed)"
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
await asyncio.sleep(check_interval)
|
|
761
|
+
elapsed_time += check_interval
|
|
762
|
+
|
|
763
|
+
raise ConnectionError(
|
|
764
|
+
f"Broker connection not established after {max_wait_time} seconds"
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
async def _wait_for_broker_reconnection(self) -> None:
|
|
768
|
+
"""
|
|
769
|
+
Wait for the broker to reconnect when connection is lost during operation.
|
|
770
|
+
This pauses the scheduler until the connection is restored.
|
|
771
|
+
"""
|
|
772
|
+
max_wait_time = self.config.reconnection_wait_timeout
|
|
773
|
+
check_interval = 5.0 # Check every 5 seconds
|
|
774
|
+
elapsed_time = 0.0
|
|
775
|
+
|
|
776
|
+
logger.info(f"Waiting for broker reconnection (timeout: {max_wait_time}s)...")
|
|
777
|
+
|
|
778
|
+
while elapsed_time < max_wait_time:
|
|
779
|
+
if self.shutdown_event.is_set():
|
|
780
|
+
logger.info("Shutdown requested while waiting for broker reconnection")
|
|
781
|
+
return
|
|
782
|
+
|
|
783
|
+
# Check if broker connection is healthy again
|
|
784
|
+
if (
|
|
785
|
+
hasattr(self.broker, "connection_healthy")
|
|
786
|
+
and self.broker.connection_healthy
|
|
787
|
+
):
|
|
788
|
+
logger.info("Broker connection restored, resuming scheduler")
|
|
789
|
+
return
|
|
790
|
+
|
|
791
|
+
if elapsed_time % 30.0 == 0.0: # Log every 30 seconds
|
|
792
|
+
logger.info(
|
|
793
|
+
f"Still waiting for broker reconnection... ({elapsed_time:.1f}s elapsed)"
|
|
794
|
+
)
|
|
795
|
+
|
|
796
|
+
await asyncio.sleep(check_interval)
|
|
797
|
+
elapsed_time += check_interval
|
|
340
798
|
|
|
341
|
-
|
|
342
|
-
|
|
799
|
+
logger.error(f"Broker connection not restored after {max_wait_time} seconds")
|
|
800
|
+
# Don't raise an exception here, just continue and let the scheduler retry
|
|
801
|
+
# This allows the scheduler to be more resilient to long-term connection issues
|