avtomatika 1.0b7__tar.gz → 1.0b8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {avtomatika-1.0b7 → avtomatika-1.0b8}/PKG-INFO +50 -2
- avtomatika-1.0b7/src/avtomatika.egg-info/PKG-INFO → avtomatika-1.0b8/README.md +44 -47
- {avtomatika-1.0b7 → avtomatika-1.0b8}/pyproject.toml +5 -1
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/app_keys.py +1 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/config.py +10 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/data_types.py +2 -1
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/dispatcher.py +8 -26
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/engine.py +19 -1
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/executor.py +34 -6
- avtomatika-1.0b8/src/avtomatika/health_checker.py +57 -0
- avtomatika-1.0b8/src/avtomatika/history/base.py +105 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/history/noop.py +18 -7
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/history/postgres.py +8 -6
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/history/sqlite.py +7 -5
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/metrics.py +1 -1
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/reputation.py +46 -40
- avtomatika-1.0b8/src/avtomatika/s3.py +323 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/storage/base.py +45 -4
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/storage/memory.py +44 -6
- avtomatika-1.0b8/src/avtomatika/storage/redis.py +443 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/utils/webhook_sender.py +44 -2
- avtomatika-1.0b8/src/avtomatika/watcher.py +78 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/ws_manager.py +7 -6
- avtomatika-1.0b7/README.md → avtomatika-1.0b8/src/avtomatika.egg-info/PKG-INFO +95 -2
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika.egg-info/SOURCES.txt +2 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika.egg-info/requires.txt +7 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_dispatcher.py +31 -44
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_dispatcher_extended.py +7 -3
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_engine.py +38 -4
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_executor.py +3 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_handlers.py +8 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_history.py +11 -6
- avtomatika-1.0b8/tests/test_postgres_history.py +84 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_reputation.py +2 -2
- avtomatika-1.0b8/tests/test_s3.py +265 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_webhook_sender.py +23 -3
- avtomatika-1.0b7/src/avtomatika/health_checker.py +0 -39
- avtomatika-1.0b7/src/avtomatika/history/base.py +0 -51
- avtomatika-1.0b7/src/avtomatika/storage/redis.py +0 -510
- avtomatika-1.0b7/src/avtomatika/watcher.py +0 -80
- avtomatika-1.0b7/tests/test_postgres_history.py +0 -107
- {avtomatika-1.0b7 → avtomatika-1.0b8}/LICENSE +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/setup.cfg +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/__init__.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/api/handlers.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/api/routes.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/api.html +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/blueprint.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/client_config_loader.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/compression.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/constants.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/context.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/datastore.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/logging_config.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/py.typed +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/quota.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/ratelimit.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/scheduler.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/scheduler_config_loader.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/security.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/storage/__init__.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/telemetry.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/utils/__init__.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika/worker_config_loader.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika.egg-info/dependency_links.txt +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/src/avtomatika.egg-info/top_level.txt +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_blueprint_conditions.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_blueprint_integrity.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_blueprints.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_client_config_loader.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_compression.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_config_validation.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_context.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_error_handling.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_health_checker.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_integration.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_logging_config.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_memory_locking.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_memory_storage.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_metrics.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_noop_history.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_optimization.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_ratelimit.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_redis_locking.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_redis_storage.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_scheduler.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_telemetry.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_watcher.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_worker_config_loader.py +0 -0
- {avtomatika-1.0b7 → avtomatika-1.0b8}/tests/test_ws_manager.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b8
|
|
4
4
|
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
5
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
|
|
@@ -20,6 +20,9 @@ Requires-Dist: msgpack~=1.1
|
|
|
20
20
|
Requires-Dist: orjson~=3.11
|
|
21
21
|
Provides-Extra: redis
|
|
22
22
|
Requires-Dist: redis~=7.1; extra == "redis"
|
|
23
|
+
Provides-Extra: s3
|
|
24
|
+
Requires-Dist: obstore>=0.2; extra == "s3"
|
|
25
|
+
Requires-Dist: aiofiles~=23.2; extra == "s3"
|
|
23
26
|
Provides-Extra: history
|
|
24
27
|
Requires-Dist: aiosqlite~=0.22; extra == "history"
|
|
25
28
|
Requires-Dist: asyncpg~=0.30; extra == "history"
|
|
@@ -37,10 +40,13 @@ Requires-Dist: pytest-mock~=3.14; extra == "test"
|
|
|
37
40
|
Requires-Dist: aioresponses~=0.7; extra == "test"
|
|
38
41
|
Requires-Dist: backports.zstd~=1.2; extra == "test"
|
|
39
42
|
Requires-Dist: opentelemetry-instrumentation-aiohttp-client; extra == "test"
|
|
43
|
+
Requires-Dist: obstore>=0.2; extra == "test"
|
|
44
|
+
Requires-Dist: aiofiles~=23.2; extra == "test"
|
|
40
45
|
Provides-Extra: all
|
|
41
46
|
Requires-Dist: avtomatika[redis]; extra == "all"
|
|
42
47
|
Requires-Dist: avtomatika[history]; extra == "all"
|
|
43
48
|
Requires-Dist: avtomatika[telemetry]; extra == "all"
|
|
49
|
+
Requires-Dist: avtomatika[s3]; extra == "all"
|
|
44
50
|
Dynamic: license-file
|
|
45
51
|
|
|
46
52
|
# Avtomatika Orchestrator
|
|
@@ -60,6 +66,7 @@ This document serves as a comprehensive guide for developers looking to build pi
|
|
|
60
66
|
- [Parallel Execution and Aggregation (Fan-out/Fan-in)](#parallel-execution-and-aggregation-fan-outfan-in)
|
|
61
67
|
- [Dependency Injection (DataStore)](#dependency-injection-datastore)
|
|
62
68
|
- [Native Scheduler](#native-scheduler)
|
|
69
|
+
- [S3 Payload Offloading](#s3-payload-offloading)
|
|
63
70
|
- [Webhook Notifications](#webhook-notifications)
|
|
64
71
|
- [Production Configuration](#production-configuration)
|
|
65
72
|
- [Fault Tolerance](#fault-tolerance)
|
|
@@ -107,6 +114,11 @@ Avtomatika is part of a larger ecosystem:
|
|
|
107
114
|
pip install "avtomatika[telemetry]"
|
|
108
115
|
```
|
|
109
116
|
|
|
117
|
+
* **Install with S3 support (Payload Offloading):**
|
|
118
|
+
```bash
|
|
119
|
+
pip install "avtomatika[s3]"
|
|
120
|
+
```
|
|
121
|
+
|
|
110
122
|
* **Install all dependencies, including for testing:**
|
|
111
123
|
```bash
|
|
112
124
|
pip install "avtomatika[all,test]"
|
|
@@ -250,6 +262,19 @@ async def publish_handler_old_style(context):
|
|
|
250
262
|
print(f"Job {context.job_id}: Publishing video at {output_path} ({duration}s).")
|
|
251
263
|
context.actions.transition_to("complete")
|
|
252
264
|
```
|
|
265
|
+
## Key Concepts: JobContext and Actions
|
|
266
|
+
|
|
267
|
+
### High Performance Architecture
|
|
268
|
+
|
|
269
|
+
Avtomatika is engineered for high-load environments with thousands of concurrent workers.
|
|
270
|
+
|
|
271
|
+
* **O(1) Dispatcher**: Uses advanced Redis Set intersections to find suitable workers instantly, regardless of the cluster size. No O(N) scanning.
|
|
272
|
+
* **Non-Blocking I/O**:
|
|
273
|
+
* **Webhooks**: Sent via a bounded background queue to prevent backpressure.
|
|
274
|
+
* **History Logging**: Writes to SQL databases are buffered and asynchronous, ensuring the main execution loop never blocks.
|
|
275
|
+
* **Redis Streams**: Uses blocking reads to eliminate busy-waiting and reduce CPU usage.
|
|
276
|
+
* **Memory Safety**: S3 file transfers use streaming to handle multi-gigabyte files with constant, low RAM usage.
|
|
277
|
+
|
|
253
278
|
## Blueprint Cookbook: Key Features
|
|
254
279
|
|
|
255
280
|
### 1. Conditional Transitions (`.when()`)
|
|
@@ -365,7 +390,30 @@ daily_at = "02:00"
|
|
|
365
390
|
|
|
366
391
|
The orchestrator can send asynchronous notifications to an external system when a job completes, fails, or is quarantined. This eliminates the need for clients to constantly poll the API for status updates.
|
|
367
392
|
|
|
368
|
-
|
|
393
|
+
### 7. S3 Payload Offloading
|
|
394
|
+
|
|
395
|
+
Orchestrator provides first-class support for handling large files via S3-compatible storage, powered by the high-performance `obstore` library (Rust bindings).
|
|
396
|
+
|
|
397
|
+
* **Memory Safe (Streaming)**: Uses streaming for uploads and downloads, allowing processing of files larger than available RAM without OOM errors.
|
|
398
|
+
* **Managed Mode**: The Orchestrator manages file lifecycle (automatic cleanup of S3 objects and local temporary files on job completion).
|
|
399
|
+
* **Dependency Injection**: Use the `task_files` argument in your handlers to easily read/write data.
|
|
400
|
+
* **Directory Support**: Supports recursive download and upload of entire directories.
|
|
401
|
+
|
|
402
|
+
```python
|
|
403
|
+
@bp.handler_for("process_data")
|
|
404
|
+
async def process_data(task_files, actions):
|
|
405
|
+
# Streaming download of a large file
|
|
406
|
+
local_path = await task_files.download("large_dataset.csv")
|
|
407
|
+
|
|
408
|
+
# ... process data ...
|
|
409
|
+
|
|
410
|
+
# Upload results
|
|
411
|
+
await task_files.write_json("results.json", {"status": "done"})
|
|
412
|
+
|
|
413
|
+
actions.transition_to("finished")
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
## Production Configuration
|
|
369
417
|
* **Events:**
|
|
370
418
|
* `job_finished`: The job reached a final success state.
|
|
371
419
|
* `job_failed`: The job failed (e.g., due to an error or invalid input).
|
|
@@ -1,48 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: avtomatika
|
|
3
|
-
Version: 1.0b7
|
|
4
|
-
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
|
-
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
6
|
-
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
|
|
7
|
-
Classifier: Development Status :: 4 - Beta
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.11
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
Requires-Dist: aiohttp~=3.12
|
|
15
|
-
Requires-Dist: python-json-logger~=4.0
|
|
16
|
-
Requires-Dist: graphviz~=0.21
|
|
17
|
-
Requires-Dist: zstandard~=0.24
|
|
18
|
-
Requires-Dist: aioprometheus~=23.12
|
|
19
|
-
Requires-Dist: msgpack~=1.1
|
|
20
|
-
Requires-Dist: orjson~=3.11
|
|
21
|
-
Provides-Extra: redis
|
|
22
|
-
Requires-Dist: redis~=7.1; extra == "redis"
|
|
23
|
-
Provides-Extra: history
|
|
24
|
-
Requires-Dist: aiosqlite~=0.22; extra == "history"
|
|
25
|
-
Requires-Dist: asyncpg~=0.30; extra == "history"
|
|
26
|
-
Provides-Extra: telemetry
|
|
27
|
-
Requires-Dist: opentelemetry-api~=1.39; extra == "telemetry"
|
|
28
|
-
Requires-Dist: opentelemetry-sdk~=1.39; extra == "telemetry"
|
|
29
|
-
Requires-Dist: opentelemetry-exporter-otlp~=1.39; extra == "telemetry"
|
|
30
|
-
Requires-Dist: opentelemetry-instrumentation-aiohttp-client~=0.59b0; extra == "telemetry"
|
|
31
|
-
Provides-Extra: test
|
|
32
|
-
Requires-Dist: pytest~=9.0; extra == "test"
|
|
33
|
-
Requires-Dist: pytest-asyncio~=1.1; extra == "test"
|
|
34
|
-
Requires-Dist: fakeredis~=2.33; extra == "test"
|
|
35
|
-
Requires-Dist: pytest-aiohttp~=1.1; extra == "test"
|
|
36
|
-
Requires-Dist: pytest-mock~=3.14; extra == "test"
|
|
37
|
-
Requires-Dist: aioresponses~=0.7; extra == "test"
|
|
38
|
-
Requires-Dist: backports.zstd~=1.2; extra == "test"
|
|
39
|
-
Requires-Dist: opentelemetry-instrumentation-aiohttp-client; extra == "test"
|
|
40
|
-
Provides-Extra: all
|
|
41
|
-
Requires-Dist: avtomatika[redis]; extra == "all"
|
|
42
|
-
Requires-Dist: avtomatika[history]; extra == "all"
|
|
43
|
-
Requires-Dist: avtomatika[telemetry]; extra == "all"
|
|
44
|
-
Dynamic: license-file
|
|
45
|
-
|
|
46
1
|
# Avtomatika Orchestrator
|
|
47
2
|
|
|
48
3
|
Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
|
|
@@ -60,6 +15,7 @@ This document serves as a comprehensive guide for developers looking to build pi
|
|
|
60
15
|
- [Parallel Execution and Aggregation (Fan-out/Fan-in)](#parallel-execution-and-aggregation-fan-outfan-in)
|
|
61
16
|
- [Dependency Injection (DataStore)](#dependency-injection-datastore)
|
|
62
17
|
- [Native Scheduler](#native-scheduler)
|
|
18
|
+
- [S3 Payload Offloading](#s3-payload-offloading)
|
|
63
19
|
- [Webhook Notifications](#webhook-notifications)
|
|
64
20
|
- [Production Configuration](#production-configuration)
|
|
65
21
|
- [Fault Tolerance](#fault-tolerance)
|
|
@@ -107,6 +63,11 @@ Avtomatika is part of a larger ecosystem:
|
|
|
107
63
|
pip install "avtomatika[telemetry]"
|
|
108
64
|
```
|
|
109
65
|
|
|
66
|
+
* **Install with S3 support (Payload Offloading):**
|
|
67
|
+
```bash
|
|
68
|
+
pip install "avtomatika[s3]"
|
|
69
|
+
```
|
|
70
|
+
|
|
110
71
|
* **Install all dependencies, including for testing:**
|
|
111
72
|
```bash
|
|
112
73
|
pip install "avtomatika[all,test]"
|
|
@@ -250,6 +211,19 @@ async def publish_handler_old_style(context):
|
|
|
250
211
|
print(f"Job {context.job_id}: Publishing video at {output_path} ({duration}s).")
|
|
251
212
|
context.actions.transition_to("complete")
|
|
252
213
|
```
|
|
214
|
+
## Key Concepts: JobContext and Actions
|
|
215
|
+
|
|
216
|
+
### High Performance Architecture
|
|
217
|
+
|
|
218
|
+
Avtomatika is engineered for high-load environments with thousands of concurrent workers.
|
|
219
|
+
|
|
220
|
+
* **O(1) Dispatcher**: Uses advanced Redis Set intersections to find suitable workers instantly, regardless of the cluster size. No O(N) scanning.
|
|
221
|
+
* **Non-Blocking I/O**:
|
|
222
|
+
* **Webhooks**: Sent via a bounded background queue to prevent backpressure.
|
|
223
|
+
* **History Logging**: Writes to SQL databases are buffered and asynchronous, ensuring the main execution loop never blocks.
|
|
224
|
+
* **Redis Streams**: Uses blocking reads to eliminate busy-waiting and reduce CPU usage.
|
|
225
|
+
* **Memory Safety**: S3 file transfers use streaming to handle multi-gigabyte files with constant, low RAM usage.
|
|
226
|
+
|
|
253
227
|
## Blueprint Cookbook: Key Features
|
|
254
228
|
|
|
255
229
|
### 1. Conditional Transitions (`.when()`)
|
|
@@ -365,7 +339,30 @@ daily_at = "02:00"
|
|
|
365
339
|
|
|
366
340
|
The orchestrator can send asynchronous notifications to an external system when a job completes, fails, or is quarantined. This eliminates the need for clients to constantly poll the API for status updates.
|
|
367
341
|
|
|
368
|
-
|
|
342
|
+
### 7. S3 Payload Offloading
|
|
343
|
+
|
|
344
|
+
Orchestrator provides first-class support for handling large files via S3-compatible storage, powered by the high-performance `obstore` library (Rust bindings).
|
|
345
|
+
|
|
346
|
+
* **Memory Safe (Streaming)**: Uses streaming for uploads and downloads, allowing processing of files larger than available RAM without OOM errors.
|
|
347
|
+
* **Managed Mode**: The Orchestrator manages file lifecycle (automatic cleanup of S3 objects and local temporary files on job completion).
|
|
348
|
+
* **Dependency Injection**: Use the `task_files` argument in your handlers to easily read/write data.
|
|
349
|
+
* **Directory Support**: Supports recursive download and upload of entire directories.
|
|
350
|
+
|
|
351
|
+
```python
|
|
352
|
+
@bp.handler_for("process_data")
|
|
353
|
+
async def process_data(task_files, actions):
|
|
354
|
+
# Streaming download of a large file
|
|
355
|
+
local_path = await task_files.download("large_dataset.csv")
|
|
356
|
+
|
|
357
|
+
# ... process data ...
|
|
358
|
+
|
|
359
|
+
# Upload results
|
|
360
|
+
await task_files.write_json("results.json", {"status": "done"})
|
|
361
|
+
|
|
362
|
+
actions.transition_to("finished")
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
## Production Configuration
|
|
369
366
|
* **Events:**
|
|
370
367
|
* `job_finished`: The job reached a final success state.
|
|
371
368
|
* `job_failed`: The job failed (e.g., due to an error or invalid input).
|
|
@@ -533,4 +530,4 @@ For a deeper dive into the system, please refer to the following documents:
|
|
|
533
530
|
- [**Architecture Guide**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/architecture.md): A detailed overview of the system components and their interactions.
|
|
534
531
|
- [**API Reference**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/api_reference.md): Full specification of the HTTP API.
|
|
535
532
|
- [**Deployment Guide**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
|
|
536
|
-
- [**Cookbook**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/cookbook/README.md): Examples and best practices for creating blueprints.
|
|
533
|
+
- [**Cookbook**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/cookbook/README.md): Examples and best practices for creating blueprints.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "avtomatika"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.0b8"
|
|
8
8
|
description = "A state-machine based orchestrator for long-running AI and other jobs."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -26,6 +26,7 @@ dependencies = [
|
|
|
26
26
|
|
|
27
27
|
[project.optional-dependencies]
|
|
28
28
|
redis = ["redis~=7.1"]
|
|
29
|
+
s3 = ["obstore>=0.2", "aiofiles~=23.2"]
|
|
29
30
|
history = ["aiosqlite~=0.22", "asyncpg~=0.30"]
|
|
30
31
|
telemetry = [
|
|
31
32
|
"opentelemetry-api~=1.39",
|
|
@@ -42,11 +43,14 @@ test = [
|
|
|
42
43
|
"aioresponses~=0.7",
|
|
43
44
|
"backports.zstd~=1.2",
|
|
44
45
|
"opentelemetry-instrumentation-aiohttp-client",
|
|
46
|
+
"obstore>=0.2",
|
|
47
|
+
"aiofiles~=23.2",
|
|
45
48
|
]
|
|
46
49
|
all = [
|
|
47
50
|
"avtomatika[redis]",
|
|
48
51
|
"avtomatika[history]",
|
|
49
52
|
"avtomatika[telemetry]",
|
|
53
|
+
"avtomatika[s3]",
|
|
50
54
|
]
|
|
51
55
|
|
|
52
56
|
[project.urls]
|
|
@@ -30,3 +30,4 @@ WATCHER_TASK_KEY = AppKey("watcher_task", Task)
|
|
|
30
30
|
REPUTATION_CALCULATOR_TASK_KEY = AppKey("reputation_calculator_task", Task)
|
|
31
31
|
HEALTH_CHECKER_TASK_KEY = AppKey("health_checker_task", Task)
|
|
32
32
|
SCHEDULER_TASK_KEY = AppKey("scheduler_task", Task)
|
|
33
|
+
S3_SERVICE_KEY = AppKey("s3_service", "S3Service")
|
|
@@ -39,6 +39,7 @@ class Config:
|
|
|
39
39
|
|
|
40
40
|
# Worker settings
|
|
41
41
|
self.WORKER_TIMEOUT_SECONDS: int = int(getenv("WORKER_TIMEOUT_SECONDS", 300))
|
|
42
|
+
self.TASK_FILES_DIR: str = getenv("TASK_FILES_DIR", "/tmp/avtomatika-payloads")
|
|
42
43
|
self.WORKER_POLL_TIMEOUT_SECONDS: int = int(
|
|
43
44
|
getenv("WORKER_POLL_TIMEOUT_SECONDS", 30),
|
|
44
45
|
)
|
|
@@ -52,10 +53,19 @@ class Config:
|
|
|
52
53
|
self.EXECUTOR_MAX_CONCURRENT_JOBS: int = int(
|
|
53
54
|
getenv("EXECUTOR_MAX_CONCURRENT_JOBS", 100),
|
|
54
55
|
)
|
|
56
|
+
self.REDIS_STREAM_BLOCK_MS: int = int(getenv("REDIS_STREAM_BLOCK_MS", 5000))
|
|
55
57
|
|
|
56
58
|
# History storage settings
|
|
57
59
|
self.HISTORY_DATABASE_URI: str = getenv("HISTORY_DATABASE_URI", "")
|
|
58
60
|
|
|
61
|
+
# S3 settings
|
|
62
|
+
self.S3_ENDPOINT_URL: str = getenv("S3_ENDPOINT_URL", "")
|
|
63
|
+
self.S3_ACCESS_KEY: str = getenv("S3_ACCESS_KEY", "")
|
|
64
|
+
self.S3_SECRET_KEY: str = getenv("S3_SECRET_KEY", "")
|
|
65
|
+
self.S3_REGION: str = getenv("S3_REGION", "us-east-1")
|
|
66
|
+
self.S3_DEFAULT_BUCKET: str = getenv("S3_DEFAULT_BUCKET", "avtomatika-payloads")
|
|
67
|
+
self.S3_MAX_CONCURRENCY: int = int(getenv("S3_MAX_CONCURRENCY", 100))
|
|
68
|
+
|
|
59
69
|
# Rate limiting settings
|
|
60
70
|
self.RATE_LIMITING_ENABLED: bool = getenv("RATE_LIMITING_ENABLED", "true").lower() == "true"
|
|
61
71
|
|
|
@@ -21,10 +21,11 @@ class JobContext(NamedTuple):
|
|
|
21
21
|
state_history: dict[str, Any]
|
|
22
22
|
client: ClientConfig
|
|
23
23
|
actions: "ActionFactory"
|
|
24
|
-
data_stores:
|
|
24
|
+
data_stores: Any | None = None
|
|
25
25
|
tracing_context: dict[str, Any] | None = None
|
|
26
26
|
aggregation_results: dict[str, Any] | None = None
|
|
27
27
|
webhook_url: str | None = None
|
|
28
|
+
task_files: Any | None = None
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class GPUInfo(NamedTuple):
|
|
@@ -137,32 +137,17 @@ class Dispatcher:
|
|
|
137
137
|
dispatch_strategy = task_info.get("dispatch_strategy", "default")
|
|
138
138
|
resource_requirements = task_info.get("resource_requirements")
|
|
139
139
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
raise RuntimeError("No
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
logger.debug(f"
|
|
147
|
-
idle_workers = [w for w in all_workers if w.get("status", "idle") == "idle"]
|
|
148
|
-
logger.debug(f"Idle workers: {[w['worker_id'] for w in idle_workers]}")
|
|
149
|
-
if not idle_workers:
|
|
150
|
-
if busy_mo_workers := [
|
|
151
|
-
w for w in all_workers if w.get("status") == "busy" and "multi_orchestrator_info" in w
|
|
152
|
-
]:
|
|
153
|
-
logger.warning(
|
|
154
|
-
f"No idle workers. Found {len(busy_mo_workers)} busy workers "
|
|
155
|
-
f"in multi-orchestrator mode. They are likely performing tasks for other Orchestrators.",
|
|
156
|
-
)
|
|
157
|
-
raise RuntimeError("No idle workers (all are 'busy')")
|
|
140
|
+
candidate_ids = await self.storage.find_workers_for_task(task_type)
|
|
141
|
+
if not candidate_ids:
|
|
142
|
+
logger.warning(f"No idle workers found for task '{task_type}'")
|
|
143
|
+
raise RuntimeError(f"No suitable workers for task type '{task_type}'")
|
|
144
|
+
|
|
145
|
+
capable_workers = await self.storage.get_workers(candidate_ids)
|
|
146
|
+
logger.debug(f"Found {len(capable_workers)} capable workers for task '{task_type}'")
|
|
158
147
|
|
|
159
|
-
# Filter by task type
|
|
160
|
-
capable_workers = [w for w in idle_workers if task_type in w.get("supported_tasks", [])]
|
|
161
|
-
logger.debug(f"Capable workers for task '{task_type}': {[w['worker_id'] for w in capable_workers]}")
|
|
162
148
|
if not capable_workers:
|
|
163
|
-
raise RuntimeError(f"No suitable workers for task type '{task_type}'")
|
|
149
|
+
raise RuntimeError(f"No suitable workers for task type '{task_type}' (data missing)")
|
|
164
150
|
|
|
165
|
-
# Filter by resource requirements
|
|
166
151
|
if resource_requirements:
|
|
167
152
|
compliant_workers = [w for w in capable_workers if self._is_worker_compliant(w, resource_requirements)]
|
|
168
153
|
logger.debug(
|
|
@@ -175,7 +160,6 @@ class Dispatcher:
|
|
|
175
160
|
)
|
|
176
161
|
capable_workers = compliant_workers
|
|
177
162
|
|
|
178
|
-
# Filter by maximum cost
|
|
179
163
|
max_cost = task_info.get("max_cost")
|
|
180
164
|
if max_cost is not None:
|
|
181
165
|
cost_compliant_workers = [w for w in capable_workers if w.get("cost_per_second", float("inf")) <= max_cost]
|
|
@@ -188,7 +172,6 @@ class Dispatcher:
|
|
|
188
172
|
)
|
|
189
173
|
capable_workers = cost_compliant_workers
|
|
190
174
|
|
|
191
|
-
# Select worker according to strategy
|
|
192
175
|
if dispatch_strategy == "round_robin":
|
|
193
176
|
selected_worker = self._select_round_robin(capable_workers, task_type)
|
|
194
177
|
elif dispatch_strategy == "least_connections":
|
|
@@ -205,7 +188,6 @@ class Dispatcher:
|
|
|
205
188
|
f"Dispatching task '{task_type}' to worker {worker_id} (strategy: {dispatch_strategy})",
|
|
206
189
|
)
|
|
207
190
|
|
|
208
|
-
# --- Task creation and enqueuing ---
|
|
209
191
|
task_id = task_info.get("task_id") or str(uuid4())
|
|
210
192
|
payload = {
|
|
211
193
|
"job_id": job_id,
|
|
@@ -19,6 +19,7 @@ from .app_keys import (
|
|
|
19
19
|
HTTP_SESSION_KEY,
|
|
20
20
|
REPUTATION_CALCULATOR_KEY,
|
|
21
21
|
REPUTATION_CALCULATOR_TASK_KEY,
|
|
22
|
+
S3_SERVICE_KEY,
|
|
22
23
|
SCHEDULER_KEY,
|
|
23
24
|
SCHEDULER_TASK_KEY,
|
|
24
25
|
WATCHER_KEY,
|
|
@@ -37,6 +38,7 @@ from .history.base import HistoryStorageBase
|
|
|
37
38
|
from .history.noop import NoOpHistoryStorage
|
|
38
39
|
from .logging_config import setup_logging
|
|
39
40
|
from .reputation import ReputationCalculator
|
|
41
|
+
from .s3 import S3Service
|
|
40
42
|
from .scheduler import Scheduler
|
|
41
43
|
from .storage.base import StorageBackend
|
|
42
44
|
from .telemetry import setup_telemetry
|
|
@@ -141,6 +143,11 @@ class OrchestratorEngine:
|
|
|
141
143
|
self.history_storage = NoOpHistoryStorage()
|
|
142
144
|
|
|
143
145
|
async def on_startup(self, app: web.Application) -> None:
|
|
146
|
+
# 1. Fail Fast: Check Storage Connection
|
|
147
|
+
if not await self.storage.ping():
|
|
148
|
+
logger.critical("Failed to connect to Storage Backend (Redis). Exiting.")
|
|
149
|
+
raise RuntimeError("Storage Backend is unavailable.")
|
|
150
|
+
|
|
144
151
|
try:
|
|
145
152
|
from opentelemetry.instrumentation.aiohttp_client import (
|
|
146
153
|
AioHttpClientInstrumentor,
|
|
@@ -152,6 +159,8 @@ class OrchestratorEngine:
|
|
|
152
159
|
"opentelemetry-instrumentation-aiohttp-client not found. AIOHTTP client instrumentation is disabled."
|
|
153
160
|
)
|
|
154
161
|
await self._setup_history_storage()
|
|
162
|
+
# Start history background worker
|
|
163
|
+
await self.history_storage.start()
|
|
155
164
|
|
|
156
165
|
# Load client configs if the path is provided
|
|
157
166
|
if self.config.CLIENTS_CONFIG_PATH:
|
|
@@ -188,6 +197,7 @@ class OrchestratorEngine:
|
|
|
188
197
|
|
|
189
198
|
app[HTTP_SESSION_KEY] = ClientSession()
|
|
190
199
|
self.webhook_sender = WebhookSender(app[HTTP_SESSION_KEY])
|
|
200
|
+
self.webhook_sender.start()
|
|
191
201
|
self.dispatcher = Dispatcher(self.storage, self.config)
|
|
192
202
|
app[DISPATCHER_KEY] = self.dispatcher
|
|
193
203
|
app[EXECUTOR_KEY] = JobExecutor(self, self.history_storage)
|
|
@@ -196,6 +206,7 @@ class OrchestratorEngine:
|
|
|
196
206
|
app[HEALTH_CHECKER_KEY] = HealthChecker(self)
|
|
197
207
|
app[SCHEDULER_KEY] = Scheduler(self)
|
|
198
208
|
app[WS_MANAGER_KEY] = self.ws_manager
|
|
209
|
+
app[S3_SERVICE_KEY] = S3Service(self.config, self.history_storage)
|
|
199
210
|
|
|
200
211
|
app[EXECUTOR_TASK_KEY] = create_task(app[EXECUTOR_KEY].run())
|
|
201
212
|
app[WATCHER_TASK_KEY] = create_task(app[WATCHER_KEY].run())
|
|
@@ -220,6 +231,13 @@ class OrchestratorEngine:
|
|
|
220
231
|
logger.info("Closing WebSocket connections...")
|
|
221
232
|
await self.ws_manager.close_all()
|
|
222
233
|
|
|
234
|
+
logger.info("Stopping WebhookSender...")
|
|
235
|
+
await self.webhook_sender.stop()
|
|
236
|
+
|
|
237
|
+
if S3_SERVICE_KEY in app:
|
|
238
|
+
logger.info("Closing S3 Service...")
|
|
239
|
+
await app[S3_SERVICE_KEY].close()
|
|
240
|
+
|
|
223
241
|
logger.info("Cancelling background tasks...")
|
|
224
242
|
app[HEALTH_CHECKER_TASK_KEY].cancel()
|
|
225
243
|
app[WATCHER_TASK_KEY].cancel()
|
|
@@ -352,7 +370,7 @@ class OrchestratorEngine:
|
|
|
352
370
|
)
|
|
353
371
|
|
|
354
372
|
# Run in background to not block the main flow
|
|
355
|
-
|
|
373
|
+
await self.webhook_sender.send(webhook_url, payload)
|
|
356
374
|
|
|
357
375
|
def run(self) -> None:
|
|
358
376
|
self.setup()
|
|
@@ -47,6 +47,7 @@ except ImportError:
|
|
|
47
47
|
inject = NoOpPropagate().inject
|
|
48
48
|
TraceContextTextMapPropagator = NoOpTraceContextTextMapPropagator() # Instantiate the class
|
|
49
49
|
|
|
50
|
+
from .app_keys import S3_SERVICE_KEY
|
|
50
51
|
from .context import ActionFactory
|
|
51
52
|
from .data_types import ClientConfig, JobContext
|
|
52
53
|
from .history.base import HistoryStorageBase
|
|
@@ -74,7 +75,7 @@ class JobExecutor:
|
|
|
74
75
|
self._running = False
|
|
75
76
|
self._processing_messages: set[str] = set()
|
|
76
77
|
|
|
77
|
-
async def _process_job(self, job_id: str, message_id: str):
|
|
78
|
+
async def _process_job(self, job_id: str, message_id: str) -> None:
|
|
78
79
|
"""The core logic for processing a single job dequeued from storage."""
|
|
79
80
|
if message_id in self._processing_messages:
|
|
80
81
|
return
|
|
@@ -143,6 +144,11 @@ class JobExecutor:
|
|
|
143
144
|
plan=client_config_dict.get("plan", "unknown"),
|
|
144
145
|
params=client_config_dict.get("params", {}),
|
|
145
146
|
)
|
|
147
|
+
|
|
148
|
+
# Get TaskFiles if S3 service is available
|
|
149
|
+
s3_service = self.engine.app.get(S3_SERVICE_KEY)
|
|
150
|
+
task_files = s3_service.get_task_files(job_id) if s3_service else None
|
|
151
|
+
|
|
146
152
|
context = JobContext(
|
|
147
153
|
job_id=job_id,
|
|
148
154
|
current_state=job_state["current_state"],
|
|
@@ -153,6 +159,7 @@ class JobExecutor:
|
|
|
153
159
|
data_stores=SimpleNamespace(**blueprint.data_stores),
|
|
154
160
|
tracing_context=tracing_context,
|
|
155
161
|
aggregation_results=job_state.get("aggregation_results"),
|
|
162
|
+
task_files=task_files,
|
|
156
163
|
)
|
|
157
164
|
|
|
158
165
|
try:
|
|
@@ -173,12 +180,17 @@ class JobExecutor:
|
|
|
173
180
|
params_to_inject["context"] = context
|
|
174
181
|
if "actions" in param_names:
|
|
175
182
|
params_to_inject["actions"] = action_factory
|
|
183
|
+
if "task_files" in param_names:
|
|
184
|
+
params_to_inject["task_files"] = task_files
|
|
176
185
|
else:
|
|
177
186
|
# New injection logic with prioritized lookup.
|
|
178
187
|
context_as_dict = context._asdict()
|
|
179
188
|
for param_name in param_names:
|
|
189
|
+
# Direct injection of task_files
|
|
190
|
+
if param_name == "task_files":
|
|
191
|
+
params_to_inject[param_name] = task_files
|
|
180
192
|
# Look in JobContext fields first.
|
|
181
|
-
|
|
193
|
+
elif param_name in context_as_dict:
|
|
182
194
|
params_to_inject[param_name] = context_as_dict[param_name]
|
|
183
195
|
# Then look in state_history (data from previous steps/workers).
|
|
184
196
|
elif param_name in context.state_history:
|
|
@@ -258,6 +270,15 @@ class JobExecutor:
|
|
|
258
270
|
await self.storage.enqueue_job(job_id)
|
|
259
271
|
else:
|
|
260
272
|
logger.info(f"Job {job_id} reached terminal state {next_state}")
|
|
273
|
+
|
|
274
|
+
# Clean up S3 files if service is available
|
|
275
|
+
s3_service = self.engine.app.get(S3_SERVICE_KEY)
|
|
276
|
+
if s3_service:
|
|
277
|
+
task_files = s3_service.get_task_files(job_id)
|
|
278
|
+
if task_files:
|
|
279
|
+
# Run cleanup in background to not block response
|
|
280
|
+
create_task(task_files.cleanup())
|
|
281
|
+
|
|
261
282
|
await self._check_and_resume_parent(job_state)
|
|
262
283
|
# Send webhook for finished/failed jobs
|
|
263
284
|
event_type = "job_finished" if next_state == "finished" else "job_failed"
|
|
@@ -522,7 +543,10 @@ class JobExecutor:
|
|
|
522
543
|
# Wait for an available slot before fetching a new job
|
|
523
544
|
await semaphore.acquire()
|
|
524
545
|
|
|
525
|
-
|
|
546
|
+
# Block for a configured time waiting for a job
|
|
547
|
+
block_time = self.engine.config.REDIS_STREAM_BLOCK_MS
|
|
548
|
+
result = await self.storage.dequeue_job(block=block_time if block_time > 0 else None)
|
|
549
|
+
|
|
526
550
|
if result:
|
|
527
551
|
job_id, message_id = result
|
|
528
552
|
task = create_task(self._process_job(job_id, message_id))
|
|
@@ -530,14 +554,18 @@ class JobExecutor:
|
|
|
530
554
|
# Release the semaphore slot when the task is done
|
|
531
555
|
task.add_done_callback(lambda _: semaphore.release())
|
|
532
556
|
else:
|
|
533
|
-
#
|
|
557
|
+
# Timeout reached, release slot and loop again
|
|
534
558
|
semaphore.release()
|
|
535
|
-
# Prevent busy loop if
|
|
536
|
-
|
|
559
|
+
# Prevent busy loop if blocking is disabled (e.g. in tests) or failed
|
|
560
|
+
if block_time <= 0:
|
|
561
|
+
await sleep(0.1)
|
|
562
|
+
|
|
537
563
|
except CancelledError:
|
|
538
564
|
break
|
|
539
565
|
except Exception:
|
|
540
566
|
logger.exception("Error in JobExecutor main loop.")
|
|
567
|
+
# If an error occurred (e.g. Redis connection lost), sleep briefly to avoid log spam
|
|
568
|
+
semaphore.release()
|
|
541
569
|
await sleep(1)
|
|
542
570
|
logger.info("JobExecutor stopped.")
|
|
543
571
|
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""This module previously contained an active HealthChecker.
|
|
2
|
+
In the new architecture with heartbeat messages from workers,
|
|
3
|
+
the orchestrator no longer needs to actively poll workers.
|
|
4
|
+
|
|
5
|
+
Redis automatically deletes worker keys when their TTL expires,
|
|
6
|
+
and `storage.get_available_workers()` only retrieves active keys.
|
|
7
|
+
|
|
8
|
+
This file is left as a placeholder in case passive health-check
|
|
9
|
+
logic is needed in the future (e.g., for logging expired workers).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from asyncio import CancelledError, sleep
|
|
13
|
+
from logging import getLogger
|
|
14
|
+
from typing import TYPE_CHECKING
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from .engine import OrchestratorEngine
|
|
18
|
+
|
|
19
|
+
logger = getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class HealthChecker:
|
|
23
|
+
def __init__(self, engine: "OrchestratorEngine", interval_seconds: int = 600):
|
|
24
|
+
self.engine = engine
|
|
25
|
+
self.storage = engine.storage
|
|
26
|
+
self.interval_seconds = interval_seconds
|
|
27
|
+
self._running = False
|
|
28
|
+
from uuid import uuid4
|
|
29
|
+
|
|
30
|
+
self._instance_id = str(uuid4())
|
|
31
|
+
|
|
32
|
+
async def run(self):
|
|
33
|
+
logger.info(f"HealthChecker started (Active Index Cleanup, Instance ID: {self._instance_id}).")
|
|
34
|
+
self._running = True
|
|
35
|
+
while self._running:
|
|
36
|
+
try:
|
|
37
|
+
# Use distributed lock to ensure only one instance cleans up
|
|
38
|
+
if await self.storage.acquire_lock(
|
|
39
|
+
"global_health_check_lock", self._instance_id, self.interval_seconds - 5
|
|
40
|
+
):
|
|
41
|
+
try:
|
|
42
|
+
await self.storage.cleanup_expired_workers()
|
|
43
|
+
finally:
|
|
44
|
+
# We don't release the lock immediately to prevent other instances from
|
|
45
|
+
# running the same task if the interval is small.
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
await sleep(self.interval_seconds)
|
|
49
|
+
except CancelledError:
|
|
50
|
+
break
|
|
51
|
+
except Exception:
|
|
52
|
+
logger.exception("Error in HealthChecker main loop.")
|
|
53
|
+
await sleep(60)
|
|
54
|
+
logger.info("HealthChecker stopped.")
|
|
55
|
+
|
|
56
|
+
def stop(self):
|
|
57
|
+
self._running = False
|