avtomatika 1.0b4__tar.gz → 1.0b6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {avtomatika-1.0b4/src/avtomatika.egg-info → avtomatika-1.0b6}/PKG-INFO +45 -5
- {avtomatika-1.0b4 → avtomatika-1.0b6}/README.md +42 -2
- {avtomatika-1.0b4 → avtomatika-1.0b6}/pyproject.toml +5 -3
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/__init__.py +2 -2
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/blueprint.py +9 -11
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/config.py +11 -0
- avtomatika-1.0b6/src/avtomatika/constants.py +30 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/context.py +18 -18
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/data_types.py +6 -7
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/datastore.py +2 -2
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/dispatcher.py +20 -21
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/engine.py +170 -92
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/executor.py +168 -148
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/history/base.py +7 -7
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/history/noop.py +7 -7
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/history/postgres.py +63 -22
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/history/sqlite.py +61 -44
- avtomatika-1.0b6/src/avtomatika/logging_config.py +92 -0
- avtomatika-1.0b6/src/avtomatika/scheduler.py +119 -0
- avtomatika-1.0b6/src/avtomatika/scheduler_config_loader.py +41 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/security.py +3 -5
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/storage/__init__.py +2 -2
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/storage/base.py +48 -23
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/storage/memory.py +76 -46
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/storage/redis.py +141 -60
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/worker_config_loader.py +2 -2
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/ws_manager.py +1 -2
- {avtomatika-1.0b4 → avtomatika-1.0b6/src/avtomatika.egg-info}/PKG-INFO +45 -5
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika.egg-info/SOURCES.txt +4 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika.egg-info/requires.txt +2 -2
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_engine.py +145 -104
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_executor.py +24 -8
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_history.py +4 -3
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_logging_config.py +18 -6
- avtomatika-1.0b6/tests/test_scheduler.py +200 -0
- avtomatika-1.0b4/src/avtomatika/logging_config.py +0 -41
- {avtomatika-1.0b4 → avtomatika-1.0b6}/LICENSE +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/setup.cfg +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/api.html +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/client_config_loader.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/compression.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/health_checker.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/metrics.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/py.typed +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/quota.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/ratelimit.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/reputation.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/telemetry.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika/watcher.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika.egg-info/dependency_links.txt +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/src/avtomatika.egg-info/top_level.txt +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_blueprint_conditions.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_blueprints.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_client_config_loader.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_compression.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_config_validation.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_context.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_dispatcher.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_dispatcher_extended.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_error_handling.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_health_checker.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_integration.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_memory_locking.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_memory_storage.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_metrics.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_noop_history.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_postgres_history.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_ratelimit.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_redis_locking.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_redis_storage.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_reputation.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_telemetry.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_watcher.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_worker_config_loader.py +0 -0
- {avtomatika-1.0b4 → avtomatika-1.0b6}/tests/test_ws_manager.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b6
|
|
4
4
|
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
5
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
|
|
@@ -17,13 +17,13 @@ Requires-Dist: python-json-logger~=4.0
|
|
|
17
17
|
Requires-Dist: graphviz~=0.21
|
|
18
18
|
Requires-Dist: zstandard~=0.24
|
|
19
19
|
Requires-Dist: aioprometheus~=23.12
|
|
20
|
+
Requires-Dist: msgpack~=1.1
|
|
21
|
+
Requires-Dist: orjson~=3.11
|
|
20
22
|
Provides-Extra: redis
|
|
21
23
|
Requires-Dist: redis~=7.1; extra == "redis"
|
|
22
|
-
Requires-Dist: orjson~=3.11; extra == "redis"
|
|
23
24
|
Provides-Extra: history
|
|
24
25
|
Requires-Dist: aiosqlite~=0.22; extra == "history"
|
|
25
26
|
Requires-Dist: asyncpg~=0.30; extra == "history"
|
|
26
|
-
Requires-Dist: orjson~=3.11; extra == "history"
|
|
27
27
|
Provides-Extra: telemetry
|
|
28
28
|
Requires-Dist: opentelemetry-api~=1.39; extra == "telemetry"
|
|
29
29
|
Requires-Dist: opentelemetry-sdk~=1.39; extra == "telemetry"
|
|
@@ -60,6 +60,7 @@ This document serves as a comprehensive guide for developers looking to build pi
|
|
|
60
60
|
- [Delegating Tasks to Workers (dispatch_task)](#delegating-tasks-to-workers-dispatch_task)
|
|
61
61
|
- [Parallel Execution and Aggregation (Fan-out/Fan-in)](#parallel-execution-and-aggregation-fan-outfan-in)
|
|
62
62
|
- [Dependency Injection (DataStore)](#dependency-injection-datastore)
|
|
63
|
+
- [Native Scheduler](#native-scheduler)
|
|
63
64
|
- [Production Configuration](#production-configuration)
|
|
64
65
|
- [Fault Tolerance](#fault-tolerance)
|
|
65
66
|
- [Storage Backend](#storage-backend)
|
|
@@ -74,7 +75,17 @@ The project is based on a simple yet powerful architectural pattern that separat
|
|
|
74
75
|
|
|
75
76
|
* **Orchestrator (OrchestratorEngine)** — The Director. It manages the entire process from start to finish, tracks state, handles errors, and decides what should happen next. It does not perform business tasks itself.
|
|
76
77
|
* **Blueprints (Blueprint)** — The Script. Each blueprint is a detailed plan (a state machine) for a specific business process. It describes the steps (states) and the rules for transitioning between them.
|
|
77
|
-
* **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator
|
|
78
|
+
* **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator.
|
|
79
|
+
|
|
80
|
+
## Ecosystem
|
|
81
|
+
|
|
82
|
+
Avtomatika is part of a larger ecosystem:
|
|
83
|
+
|
|
84
|
+
* **[Avtomatika Worker SDK](https://github.com/avtomatika-ai/avtomatika-worker)**: The official Python SDK for building workers that connect to this engine.
|
|
85
|
+
* **[RCA Protocol](https://github.com/avtomatika-ai/rca)**: The architectural specification and manifesto behind the system.
|
|
86
|
+
* **[Full Example](https://github.com/avtomatika-ai/avtomatika-full-example)**: A complete reference project demonstrating the engine and workers in action.
|
|
87
|
+
|
|
88
|
+
## Installation
|
|
78
89
|
|
|
79
90
|
* **Install the core engine only:**
|
|
80
91
|
```bash
|
|
@@ -328,6 +339,22 @@ async def cache_handler(data_stores):
|
|
|
328
339
|
user_data = await data_stores.cache.get("user:123")
|
|
329
340
|
print(f"User from cache: {user_data}")
|
|
330
341
|
```
|
|
342
|
+
|
|
343
|
+
### 5. Native Scheduler
|
|
344
|
+
|
|
345
|
+
Avtomatika includes a built-in distributed scheduler. It allows you to trigger blueprints periodically (interval, daily, weekly, monthly) without external tools like cron.
|
|
346
|
+
|
|
347
|
+
* **Configuration:** Defined in `schedules.toml`.
|
|
348
|
+
* **Timezone Aware:** Supports global timezone configuration (e.g., `TZ="Europe/Moscow"`).
|
|
349
|
+
* **Distributed Locking:** Safe to run with multiple orchestrator instances; jobs are guaranteed to run only once per interval using distributed locks (Redis/Memory).
|
|
350
|
+
|
|
351
|
+
```toml
|
|
352
|
+
# schedules.toml example
|
|
353
|
+
[nightly_backup]
|
|
354
|
+
blueprint = "backup_flow"
|
|
355
|
+
daily_at = "02:00"
|
|
356
|
+
```
|
|
357
|
+
|
|
331
358
|
## Production Configuration
|
|
332
359
|
|
|
333
360
|
The orchestrator's behavior can be configured through environment variables. Additionally, any configuration parameter loaded from environment variables can be programmatically overridden in your application code after the `Config` object has been initialized. This provides flexibility for different deployment and testing scenarios.
|
|
@@ -349,6 +376,12 @@ To manage access and worker settings securely, Avtomatika uses TOML configuratio
|
|
|
349
376
|
[gpu-worker-01]
|
|
350
377
|
token = "worker-secret-456"
|
|
351
378
|
```
|
|
379
|
+
- **`schedules.toml`**: Defines periodic tasks (CRON-like) for the native scheduler.
|
|
380
|
+
```toml
|
|
381
|
+
[nightly_backup]
|
|
382
|
+
blueprint = "backup_flow"
|
|
383
|
+
daily_at = "02:00"
|
|
384
|
+
```
|
|
352
385
|
|
|
353
386
|
For detailed specifications and examples, please refer to the [**Configuration Guide**](docs/configuration.md).
|
|
354
387
|
|
|
@@ -360,18 +393,25 @@ The orchestrator has built-in mechanisms for handling failures based on the `err
|
|
|
360
393
|
* **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
|
|
361
394
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
362
395
|
|
|
396
|
+
### Concurrency & Performance
|
|
397
|
+
|
|
398
|
+
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
399
|
+
|
|
400
|
+
* **`EXECUTOR_MAX_CONCURRENT_JOBS`**: Limits the number of job handlers running simultaneously within the Orchestrator process (default: `100`). If this limit is reached, new jobs remain in the Redis queue until a slot becomes available. This ensures the event loop remains responsive even with a massive backlog of pending jobs.
|
|
401
|
+
|
|
363
402
|
### High Availability & Distributed Locking
|
|
364
403
|
|
|
365
404
|
The architecture supports horizontal scaling. Multiple Orchestrator instances can run behind a load balancer.
|
|
366
405
|
|
|
367
406
|
* **Stateless API:** The API is stateless; all state is persisted in Redis.
|
|
407
|
+
* **Instance Identity:** Each instance should have a unique `INSTANCE_ID` (defaults to hostname) for correct handling of Redis Streams consumer groups.
|
|
368
408
|
* **Distributed Locking:** Background processes (`Watcher`, `ReputationCalculator`) use distributed locks (via Redis `SET NX`) to coordinate and prevent race conditions when multiple instances are active.
|
|
369
409
|
|
|
370
410
|
### Storage Backend
|
|
371
411
|
|
|
372
412
|
By default, the engine uses in-memory storage. For production, you must configure persistent storage via environment variables.
|
|
373
413
|
|
|
374
|
-
* **Redis (StorageBackend)**: For storing current job states.
|
|
414
|
+
* **Redis (StorageBackend)**: For storing current job states (serialized with `msgpack`) and managing task queues (using Redis Streams with consumer groups).
|
|
375
415
|
* Install:
|
|
376
416
|
```bash
|
|
377
417
|
pip install "avtomatika[redis]"
|
|
@@ -14,6 +14,7 @@ This document serves as a comprehensive guide for developers looking to build pi
|
|
|
14
14
|
- [Delegating Tasks to Workers (dispatch_task)](#delegating-tasks-to-workers-dispatch_task)
|
|
15
15
|
- [Parallel Execution and Aggregation (Fan-out/Fan-in)](#parallel-execution-and-aggregation-fan-outfan-in)
|
|
16
16
|
- [Dependency Injection (DataStore)](#dependency-injection-datastore)
|
|
17
|
+
- [Native Scheduler](#native-scheduler)
|
|
17
18
|
- [Production Configuration](#production-configuration)
|
|
18
19
|
- [Fault Tolerance](#fault-tolerance)
|
|
19
20
|
- [Storage Backend](#storage-backend)
|
|
@@ -28,7 +29,17 @@ The project is based on a simple yet powerful architectural pattern that separat
|
|
|
28
29
|
|
|
29
30
|
* **Orchestrator (OrchestratorEngine)** — The Director. It manages the entire process from start to finish, tracks state, handles errors, and decides what should happen next. It does not perform business tasks itself.
|
|
30
31
|
* **Blueprints (Blueprint)** — The Script. Each blueprint is a detailed plan (a state machine) for a specific business process. It describes the steps (states) and the rules for transitioning between them.
|
|
31
|
-
* **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator
|
|
32
|
+
* **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator.
|
|
33
|
+
|
|
34
|
+
## Ecosystem
|
|
35
|
+
|
|
36
|
+
Avtomatika is part of a larger ecosystem:
|
|
37
|
+
|
|
38
|
+
* **[Avtomatika Worker SDK](https://github.com/avtomatika-ai/avtomatika-worker)**: The official Python SDK for building workers that connect to this engine.
|
|
39
|
+
* **[RCA Protocol](https://github.com/avtomatika-ai/rca)**: The architectural specification and manifesto behind the system.
|
|
40
|
+
* **[Full Example](https://github.com/avtomatika-ai/avtomatika-full-example)**: A complete reference project demonstrating the engine and workers in action.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
32
43
|
|
|
33
44
|
* **Install the core engine only:**
|
|
34
45
|
```bash
|
|
@@ -282,6 +293,22 @@ async def cache_handler(data_stores):
|
|
|
282
293
|
user_data = await data_stores.cache.get("user:123")
|
|
283
294
|
print(f"User from cache: {user_data}")
|
|
284
295
|
```
|
|
296
|
+
|
|
297
|
+
### 5. Native Scheduler
|
|
298
|
+
|
|
299
|
+
Avtomatika includes a built-in distributed scheduler. It allows you to trigger blueprints periodically (interval, daily, weekly, monthly) without external tools like cron.
|
|
300
|
+
|
|
301
|
+
* **Configuration:** Defined in `schedules.toml`.
|
|
302
|
+
* **Timezone Aware:** Supports global timezone configuration (e.g., `TZ="Europe/Moscow"`).
|
|
303
|
+
* **Distributed Locking:** Safe to run with multiple orchestrator instances; jobs are guaranteed to run only once per interval using distributed locks (Redis/Memory).
|
|
304
|
+
|
|
305
|
+
```toml
|
|
306
|
+
# schedules.toml example
|
|
307
|
+
[nightly_backup]
|
|
308
|
+
blueprint = "backup_flow"
|
|
309
|
+
daily_at = "02:00"
|
|
310
|
+
```
|
|
311
|
+
|
|
285
312
|
## Production Configuration
|
|
286
313
|
|
|
287
314
|
The orchestrator's behavior can be configured through environment variables. Additionally, any configuration parameter loaded from environment variables can be programmatically overridden in your application code after the `Config` object has been initialized. This provides flexibility for different deployment and testing scenarios.
|
|
@@ -303,6 +330,12 @@ To manage access and worker settings securely, Avtomatika uses TOML configuratio
|
|
|
303
330
|
[gpu-worker-01]
|
|
304
331
|
token = "worker-secret-456"
|
|
305
332
|
```
|
|
333
|
+
- **`schedules.toml`**: Defines periodic tasks (CRON-like) for the native scheduler.
|
|
334
|
+
```toml
|
|
335
|
+
[nightly_backup]
|
|
336
|
+
blueprint = "backup_flow"
|
|
337
|
+
daily_at = "02:00"
|
|
338
|
+
```
|
|
306
339
|
|
|
307
340
|
For detailed specifications and examples, please refer to the [**Configuration Guide**](docs/configuration.md).
|
|
308
341
|
|
|
@@ -314,18 +347,25 @@ The orchestrator has built-in mechanisms for handling failures based on the `err
|
|
|
314
347
|
* **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
|
|
315
348
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
316
349
|
|
|
350
|
+
### Concurrency & Performance
|
|
351
|
+
|
|
352
|
+
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
353
|
+
|
|
354
|
+
* **`EXECUTOR_MAX_CONCURRENT_JOBS`**: Limits the number of job handlers running simultaneously within the Orchestrator process (default: `100`). If this limit is reached, new jobs remain in the Redis queue until a slot becomes available. This ensures the event loop remains responsive even with a massive backlog of pending jobs.
|
|
355
|
+
|
|
317
356
|
### High Availability & Distributed Locking
|
|
318
357
|
|
|
319
358
|
The architecture supports horizontal scaling. Multiple Orchestrator instances can run behind a load balancer.
|
|
320
359
|
|
|
321
360
|
* **Stateless API:** The API is stateless; all state is persisted in Redis.
|
|
361
|
+
* **Instance Identity:** Each instance should have a unique `INSTANCE_ID` (defaults to hostname) for correct handling of Redis Streams consumer groups.
|
|
322
362
|
* **Distributed Locking:** Background processes (`Watcher`, `ReputationCalculator`) use distributed locks (via Redis `SET NX`) to coordinate and prevent race conditions when multiple instances are active.
|
|
323
363
|
|
|
324
364
|
### Storage Backend
|
|
325
365
|
|
|
326
366
|
By default, the engine uses in-memory storage. For production, you must configure persistent storage via environment variables.
|
|
327
367
|
|
|
328
|
-
* **Redis (StorageBackend)**: For storing current job states.
|
|
368
|
+
* **Redis (StorageBackend)**: For storing current job states (serialized with `msgpack`) and managing task queues (using Redis Streams with consumer groups).
|
|
329
369
|
* Install:
|
|
330
370
|
```bash
|
|
331
371
|
pip install "avtomatika[redis]"
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "avtomatika"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.0b6"
|
|
8
8
|
description = "A state-machine based orchestrator for long-running AI and other jobs."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -21,11 +21,13 @@ dependencies = [
|
|
|
21
21
|
"graphviz~=0.21",
|
|
22
22
|
"zstandard~=0.24",
|
|
23
23
|
"aioprometheus~=23.12",
|
|
24
|
+
"msgpack~=1.1",
|
|
25
|
+
"orjson~=3.11",
|
|
24
26
|
]
|
|
25
27
|
|
|
26
28
|
[project.optional-dependencies]
|
|
27
|
-
redis = ["redis~=7.1"
|
|
28
|
-
history = ["aiosqlite~=0.22", "asyncpg~=0.30"
|
|
29
|
+
redis = ["redis~=7.1"]
|
|
30
|
+
history = ["aiosqlite~=0.22", "asyncpg~=0.30"]
|
|
29
31
|
telemetry = [
|
|
30
32
|
"opentelemetry-api~=1.39",
|
|
31
33
|
"opentelemetry-sdk~=1.39",
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
This module exposes the primary classes for building and running state-driven automations.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import
|
|
7
|
+
from contextlib import suppress
|
|
8
8
|
from importlib.metadata import version
|
|
9
9
|
|
|
10
10
|
__version__ = version("avtomatika")
|
|
@@ -23,7 +23,7 @@ __all__ = [
|
|
|
23
23
|
"StorageBackend",
|
|
24
24
|
]
|
|
25
25
|
|
|
26
|
-
with
|
|
26
|
+
with suppress(ImportError):
|
|
27
27
|
from .storage.redis import RedisStorage # noqa: F401
|
|
28
28
|
|
|
29
29
|
__all__.append("RedisStorage")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from operator import eq, ge, gt, le, lt, ne
|
|
2
2
|
from re import compile as re_compile
|
|
3
|
-
from typing import Any, Callable,
|
|
3
|
+
from typing import Any, Callable, NamedTuple
|
|
4
4
|
|
|
5
5
|
from .datastore import AsyncDictStore
|
|
6
6
|
|
|
@@ -99,8 +99,6 @@ class HandlerDecorator:
|
|
|
99
99
|
|
|
100
100
|
def when(self, condition_str: str) -> Callable:
|
|
101
101
|
def decorator(func: Callable) -> Callable:
|
|
102
|
-
# We still register the base handler to ensure the state is known,
|
|
103
|
-
# but we can make it a no-op if only conditional handlers exist for a state.
|
|
104
102
|
if self._state not in self._blueprint.handlers:
|
|
105
103
|
self._blueprint.handlers[self._state] = lambda: None # Placeholder
|
|
106
104
|
|
|
@@ -115,8 +113,8 @@ class StateMachineBlueprint:
|
|
|
115
113
|
def __init__(
|
|
116
114
|
self,
|
|
117
115
|
name: str,
|
|
118
|
-
api_endpoint:
|
|
119
|
-
api_version:
|
|
116
|
+
api_endpoint: str | None = None,
|
|
117
|
+
api_version: str | None = None,
|
|
120
118
|
data_stores: Any = None,
|
|
121
119
|
):
|
|
122
120
|
"""Initializes a new blueprint.
|
|
@@ -132,14 +130,14 @@ class StateMachineBlueprint:
|
|
|
132
130
|
self.name = name
|
|
133
131
|
self.api_endpoint = api_endpoint
|
|
134
132
|
self.api_version = api_version
|
|
135
|
-
self.data_stores:
|
|
136
|
-
self.handlers:
|
|
137
|
-
self.aggregator_handlers:
|
|
133
|
+
self.data_stores: dict[str, AsyncDictStore] = data_stores if data_stores is not None else {}
|
|
134
|
+
self.handlers: dict[str, Callable] = {}
|
|
135
|
+
self.aggregator_handlers: dict[str, Callable] = {}
|
|
138
136
|
self.conditional_handlers: list[ConditionalHandler] = []
|
|
139
|
-
self.start_state:
|
|
137
|
+
self.start_state: str | None = None
|
|
140
138
|
self.end_states: set[str] = set()
|
|
141
139
|
|
|
142
|
-
def add_data_store(self, name: str, initial_data:
|
|
140
|
+
def add_data_store(self, name: str, initial_data: dict[str, Any]):
|
|
143
141
|
"""Adds a named data store to the blueprint."""
|
|
144
142
|
if name in self.data_stores:
|
|
145
143
|
raise ValueError(f"Data store with name '{name}' already exists.")
|
|
@@ -174,7 +172,7 @@ class StateMachineBlueprint:
|
|
|
174
172
|
f"No suitable handler found for state '{state}' in blueprint '{self.name}' for the given context.",
|
|
175
173
|
)
|
|
176
174
|
|
|
177
|
-
def render_graph(self, output_filename:
|
|
175
|
+
def render_graph(self, output_filename: str | None = None, output_format: str = "png"):
|
|
178
176
|
import ast
|
|
179
177
|
import inspect
|
|
180
178
|
import logging
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from os import getenv
|
|
2
|
+
from socket import gethostname
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
class Config:
|
|
@@ -7,6 +8,9 @@ class Config:
|
|
|
7
8
|
"""
|
|
8
9
|
|
|
9
10
|
def __init__(self):
|
|
11
|
+
# Instance identity
|
|
12
|
+
self.INSTANCE_ID: str = getenv("INSTANCE_ID", gethostname())
|
|
13
|
+
|
|
10
14
|
# Redis settings
|
|
11
15
|
self.REDIS_HOST: str = getenv("REDIS_HOST", "")
|
|
12
16
|
self.REDIS_PORT: int = int(getenv("REDIS_PORT", 6379))
|
|
@@ -45,6 +49,9 @@ class Config:
|
|
|
45
49
|
self.WATCHER_INTERVAL_SECONDS: int = int(
|
|
46
50
|
getenv("WATCHER_INTERVAL_SECONDS", 20),
|
|
47
51
|
)
|
|
52
|
+
self.EXECUTOR_MAX_CONCURRENT_JOBS: int = int(
|
|
53
|
+
getenv("EXECUTOR_MAX_CONCURRENT_JOBS", 100),
|
|
54
|
+
)
|
|
48
55
|
|
|
49
56
|
# History storage settings
|
|
50
57
|
self.HISTORY_DATABASE_URI: str = getenv("HISTORY_DATABASE_URI", "")
|
|
@@ -55,3 +62,7 @@ class Config:
|
|
|
55
62
|
# External config files
|
|
56
63
|
self.WORKERS_CONFIG_PATH: str = getenv("WORKERS_CONFIG_PATH", "")
|
|
57
64
|
self.CLIENTS_CONFIG_PATH: str = getenv("CLIENTS_CONFIG_PATH", "")
|
|
65
|
+
self.SCHEDULES_CONFIG_PATH: str = getenv("SCHEDULES_CONFIG_PATH", "")
|
|
66
|
+
|
|
67
|
+
# Timezone settings
|
|
68
|
+
self.TZ: str = getenv("TZ", "UTC")
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized constants for the Avtomatika protocol.
|
|
3
|
+
Use these constants instead of hardcoded strings to ensure consistency.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
# --- Auth Headers ---
|
|
7
|
+
AUTH_HEADER_CLIENT = "X-Avtomatika-Token"
|
|
8
|
+
AUTH_HEADER_WORKER = "X-Worker-Token"
|
|
9
|
+
|
|
10
|
+
# --- Error Codes ---
|
|
11
|
+
# Error codes returned by workers in the result payload
|
|
12
|
+
ERROR_CODE_TRANSIENT = "TRANSIENT_ERROR"
|
|
13
|
+
ERROR_CODE_PERMANENT = "PERMANENT_ERROR"
|
|
14
|
+
ERROR_CODE_INVALID_INPUT = "INVALID_INPUT_ERROR"
|
|
15
|
+
|
|
16
|
+
# --- Task Statuses ---
|
|
17
|
+
# Standard statuses for task results
|
|
18
|
+
TASK_STATUS_SUCCESS = "success"
|
|
19
|
+
TASK_STATUS_FAILURE = "failure"
|
|
20
|
+
TASK_STATUS_CANCELLED = "cancelled"
|
|
21
|
+
|
|
22
|
+
# --- Job Statuses ---
|
|
23
|
+
JOB_STATUS_PENDING = "pending"
|
|
24
|
+
JOB_STATUS_WAITING_FOR_WORKER = "waiting_for_worker"
|
|
25
|
+
JOB_STATUS_RUNNING = "running"
|
|
26
|
+
JOB_STATUS_FAILED = "failed"
|
|
27
|
+
JOB_STATUS_QUARANTINED = "quarantined"
|
|
28
|
+
JOB_STATUS_CANCELLED = "cancelled"
|
|
29
|
+
JOB_STATUS_WAITING_FOR_HUMAN = "waiting_for_human"
|
|
30
|
+
JOB_STATUS_WAITING_FOR_PARALLEL = "waiting_for_parallel_tasks"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class ActionFactory:
|
|
@@ -6,10 +6,10 @@ class ActionFactory:
|
|
|
6
6
|
|
|
7
7
|
def __init__(self, job_id: str):
|
|
8
8
|
self._job_id = job_id
|
|
9
|
-
self._next_state_val:
|
|
10
|
-
self._task_to_dispatch_val:
|
|
11
|
-
self._sub_blueprint_to_run_val:
|
|
12
|
-
self._parallel_tasks_to_dispatch_val:
|
|
9
|
+
self._next_state_val: str | None = None
|
|
10
|
+
self._task_to_dispatch_val: dict[str, Any] | None = None
|
|
11
|
+
self._sub_blueprint_to_run_val: dict[str, Any] | None = None
|
|
12
|
+
self._parallel_tasks_to_dispatch_val: dict[str, Any] | None = None
|
|
13
13
|
|
|
14
14
|
def _check_for_existing_action(self):
|
|
15
15
|
"""
|
|
@@ -30,22 +30,22 @@ class ActionFactory:
|
|
|
30
30
|
)
|
|
31
31
|
|
|
32
32
|
@property
|
|
33
|
-
def next_state(self) ->
|
|
33
|
+
def next_state(self) -> str | None:
|
|
34
34
|
return self._next_state_val
|
|
35
35
|
|
|
36
36
|
@property
|
|
37
|
-
def task_to_dispatch(self) ->
|
|
37
|
+
def task_to_dispatch(self) -> dict[str, Any] | None:
|
|
38
38
|
return self._task_to_dispatch_val
|
|
39
39
|
|
|
40
40
|
@property
|
|
41
|
-
def sub_blueprint_to_run(self) ->
|
|
41
|
+
def sub_blueprint_to_run(self) -> dict[str, Any] | None:
|
|
42
42
|
return self._sub_blueprint_to_run_val
|
|
43
43
|
|
|
44
44
|
@property
|
|
45
|
-
def parallel_tasks_to_dispatch(self) ->
|
|
45
|
+
def parallel_tasks_to_dispatch(self) -> dict[str, Any] | None:
|
|
46
46
|
return self._parallel_tasks_to_dispatch_val
|
|
47
47
|
|
|
48
|
-
def dispatch_parallel(self, tasks:
|
|
48
|
+
def dispatch_parallel(self, tasks: dict[str, Any] | None, aggregate_into: str) -> None:
|
|
49
49
|
"""
|
|
50
50
|
Dispatches multiple tasks for parallel execution.
|
|
51
51
|
"""
|
|
@@ -65,12 +65,12 @@ class ActionFactory:
|
|
|
65
65
|
def dispatch_task(
|
|
66
66
|
self,
|
|
67
67
|
task_type: str,
|
|
68
|
-
params:
|
|
69
|
-
transitions:
|
|
68
|
+
params: dict[str, Any],
|
|
69
|
+
transitions: dict[str, str],
|
|
70
70
|
dispatch_strategy: str = "default",
|
|
71
|
-
resource_requirements:
|
|
72
|
-
timeout_seconds:
|
|
73
|
-
max_cost:
|
|
71
|
+
resource_requirements: dict[str, Any] | None = None,
|
|
72
|
+
timeout_seconds: int | None = None,
|
|
73
|
+
max_cost: float | None = None,
|
|
74
74
|
priority: float = 0.0,
|
|
75
75
|
) -> None:
|
|
76
76
|
"""Dispatches a task to a worker for execution."""
|
|
@@ -91,7 +91,7 @@ class ActionFactory:
|
|
|
91
91
|
self,
|
|
92
92
|
integration: str,
|
|
93
93
|
message: str,
|
|
94
|
-
transitions:
|
|
94
|
+
transitions: dict[str, str],
|
|
95
95
|
) -> None:
|
|
96
96
|
"""Pauses the pipeline until an external signal (human approval) is received."""
|
|
97
97
|
self._check_for_existing_action()
|
|
@@ -106,8 +106,8 @@ class ActionFactory:
|
|
|
106
106
|
def run_blueprint(
|
|
107
107
|
self,
|
|
108
108
|
blueprint_name: str,
|
|
109
|
-
initial_data:
|
|
110
|
-
transitions:
|
|
109
|
+
initial_data: dict[str, Any],
|
|
110
|
+
transitions: dict[str, str],
|
|
111
111
|
) -> None:
|
|
112
112
|
"""Runs a child blueprint and waits for its result."""
|
|
113
113
|
self._check_for_existing_action()
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING, Any,
|
|
1
|
+
from typing import TYPE_CHECKING, Any, NamedTuple
|
|
2
2
|
|
|
3
3
|
if TYPE_CHECKING:
|
|
4
4
|
from .context import ActionFactory
|
|
@@ -9,8 +9,7 @@ class ClientConfig(NamedTuple):
|
|
|
9
9
|
|
|
10
10
|
token: str
|
|
11
11
|
plan: str
|
|
12
|
-
|
|
13
|
-
params: Dict[str, Any]
|
|
12
|
+
params: dict[str, Any]
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
class JobContext(NamedTuple):
|
|
@@ -18,13 +17,13 @@ class JobContext(NamedTuple):
|
|
|
18
17
|
|
|
19
18
|
job_id: str
|
|
20
19
|
current_state: str
|
|
21
|
-
initial_data:
|
|
22
|
-
state_history:
|
|
20
|
+
initial_data: dict[str, Any]
|
|
21
|
+
state_history: dict[str, Any]
|
|
23
22
|
client: ClientConfig
|
|
24
23
|
actions: "ActionFactory"
|
|
25
24
|
data_stores: Any = None
|
|
26
|
-
tracing_context:
|
|
27
|
-
aggregation_results:
|
|
25
|
+
tracing_context: dict[str, Any] = {}
|
|
26
|
+
aggregation_results: dict[str, Any] | None = None
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
class GPUInfo(NamedTuple):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class AsyncDictStore:
|
|
@@ -6,7 +6,7 @@ class AsyncDictStore:
|
|
|
6
6
|
Simulates the behavior of a persistent store for use in blueprints.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
def __init__(self, initial_data:
|
|
9
|
+
def __init__(self, initial_data: dict[str, Any]):
|
|
10
10
|
self._data = initial_data.copy()
|
|
11
11
|
|
|
12
12
|
async def get(self, key: str) -> Any:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from logging import getLogger
|
|
3
3
|
from random import choice
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
7
|
try:
|
|
@@ -26,12 +26,12 @@ class Dispatcher:
|
|
|
26
26
|
def __init__(self, storage: StorageBackend, config: Config):
|
|
27
27
|
self.storage = storage
|
|
28
28
|
self.config = config
|
|
29
|
-
self._round_robin_indices:
|
|
29
|
+
self._round_robin_indices: dict[str, int] = defaultdict(int)
|
|
30
30
|
|
|
31
31
|
@staticmethod
|
|
32
32
|
def _is_worker_compliant(
|
|
33
|
-
worker:
|
|
34
|
-
requirements:
|
|
33
|
+
worker: dict[str, Any],
|
|
34
|
+
requirements: dict[str, Any],
|
|
35
35
|
) -> bool:
|
|
36
36
|
"""Checks if a worker meets the specified resource requirements."""
|
|
37
37
|
if required_gpu := requirements.get("gpu_info"):
|
|
@@ -58,9 +58,9 @@ class Dispatcher:
|
|
|
58
58
|
|
|
59
59
|
@staticmethod
|
|
60
60
|
def _select_default(
|
|
61
|
-
workers:
|
|
61
|
+
workers: list[dict[str, Any]],
|
|
62
62
|
task_type: str,
|
|
63
|
-
) ->
|
|
63
|
+
) -> dict[str, Any]:
|
|
64
64
|
"""Default strategy: first selects "warm" workers (those that have the
|
|
65
65
|
task in their cache), and then selects the cheapest among them.
|
|
66
66
|
|
|
@@ -80,9 +80,9 @@ class Dispatcher:
|
|
|
80
80
|
|
|
81
81
|
def _select_round_robin(
|
|
82
82
|
self,
|
|
83
|
-
workers:
|
|
83
|
+
workers: list[dict[str, Any]],
|
|
84
84
|
task_type: str,
|
|
85
|
-
) ->
|
|
85
|
+
) -> dict[str, Any]:
|
|
86
86
|
""" "Round Robin" strategy: distributes tasks sequentially among all
|
|
87
87
|
available workers.
|
|
88
88
|
"""
|
|
@@ -93,9 +93,9 @@ class Dispatcher:
|
|
|
93
93
|
|
|
94
94
|
@staticmethod
|
|
95
95
|
def _select_least_connections(
|
|
96
|
-
workers:
|
|
96
|
+
workers: list[dict[str, Any]],
|
|
97
97
|
task_type: str,
|
|
98
|
-
) ->
|
|
98
|
+
) -> dict[str, Any]:
|
|
99
99
|
""" "Least Connections" strategy: selects the worker with the fewest
|
|
100
100
|
active tasks (based on the `load` field).
|
|
101
101
|
"""
|
|
@@ -103,14 +103,14 @@ class Dispatcher:
|
|
|
103
103
|
|
|
104
104
|
@staticmethod
|
|
105
105
|
def _select_cheapest(
|
|
106
|
-
workers:
|
|
106
|
+
workers: list[dict[str, Any]],
|
|
107
107
|
task_type: str,
|
|
108
|
-
) ->
|
|
108
|
+
) -> dict[str, Any]:
|
|
109
109
|
"""Selects the cheapest worker based on 'cost_per_second'."""
|
|
110
110
|
return min(workers, key=lambda w: w.get("cost_per_second", float("inf")))
|
|
111
111
|
|
|
112
112
|
@staticmethod
|
|
113
|
-
def _get_best_value_score(worker:
|
|
113
|
+
def _get_best_value_score(worker: dict[str, Any]) -> float:
|
|
114
114
|
"""Calculates a "score" for a worker using the formula cost / reputation.
|
|
115
115
|
The lower the score, the better.
|
|
116
116
|
"""
|
|
@@ -122,13 +122,13 @@ class Dispatcher:
|
|
|
122
122
|
|
|
123
123
|
def _select_best_value(
|
|
124
124
|
self,
|
|
125
|
-
workers:
|
|
125
|
+
workers: list[dict[str, Any]],
|
|
126
126
|
task_type: str,
|
|
127
|
-
) ->
|
|
127
|
+
) -> dict[str, Any]:
|
|
128
128
|
"""Selects the worker with the best price-quality (reputation) ratio."""
|
|
129
129
|
return min(workers, key=self._get_best_value_score)
|
|
130
130
|
|
|
131
|
-
async def dispatch(self, job_state:
|
|
131
|
+
async def dispatch(self, job_state: dict[str, Any], task_info: dict[str, Any]):
|
|
132
132
|
job_id = job_state["id"]
|
|
133
133
|
task_type = task_info.get("type")
|
|
134
134
|
if not task_type:
|
|
@@ -142,7 +142,6 @@ class Dispatcher:
|
|
|
142
142
|
if not all_workers:
|
|
143
143
|
raise RuntimeError("No available workers")
|
|
144
144
|
|
|
145
|
-
# 1. Filter by 'idle' status
|
|
146
145
|
# A worker is considered available if its status is 'idle' or not specified (for backward compatibility)
|
|
147
146
|
logger.debug(f"All available workers: {[w['worker_id'] for w in all_workers]}")
|
|
148
147
|
idle_workers = [w for w in all_workers if w.get("status", "idle") == "idle"]
|
|
@@ -157,13 +156,13 @@ class Dispatcher:
|
|
|
157
156
|
)
|
|
158
157
|
raise RuntimeError("No idle workers (all are 'busy')")
|
|
159
158
|
|
|
160
|
-
#
|
|
159
|
+
# Filter by task type
|
|
161
160
|
capable_workers = [w for w in idle_workers if task_type in w.get("supported_tasks", [])]
|
|
162
161
|
logger.debug(f"Capable workers for task '{task_type}': {[w['worker_id'] for w in capable_workers]}")
|
|
163
162
|
if not capable_workers:
|
|
164
163
|
raise RuntimeError(f"No suitable workers for task type '{task_type}'")
|
|
165
164
|
|
|
166
|
-
#
|
|
165
|
+
# Filter by resource requirements
|
|
167
166
|
if resource_requirements:
|
|
168
167
|
compliant_workers = [w for w in capable_workers if self._is_worker_compliant(w, resource_requirements)]
|
|
169
168
|
logger.debug(
|
|
@@ -176,7 +175,7 @@ class Dispatcher:
|
|
|
176
175
|
)
|
|
177
176
|
capable_workers = compliant_workers
|
|
178
177
|
|
|
179
|
-
#
|
|
178
|
+
# Filter by maximum cost
|
|
180
179
|
max_cost = task_info.get("max_cost")
|
|
181
180
|
if max_cost is not None:
|
|
182
181
|
cost_compliant_workers = [w for w in capable_workers if w.get("cost_per_second", float("inf")) <= max_cost]
|
|
@@ -189,7 +188,7 @@ class Dispatcher:
|
|
|
189
188
|
)
|
|
190
189
|
capable_workers = cost_compliant_workers
|
|
191
190
|
|
|
192
|
-
#
|
|
191
|
+
# Select worker according to strategy
|
|
193
192
|
if dispatch_strategy == "round_robin":
|
|
194
193
|
selected_worker = self._select_round_robin(capable_workers, task_type)
|
|
195
194
|
elif dispatch_strategy == "least_connections":
|