avtomatika 1.0b3__tar.gz → 1.0b5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {avtomatika-1.0b3/src/avtomatika.egg-info → avtomatika-1.0b5}/PKG-INFO +44 -9
- {avtomatika-1.0b3 → avtomatika-1.0b5}/README.md +41 -6
- {avtomatika-1.0b3 → avtomatika-1.0b5}/pyproject.toml +5 -3
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/__init__.py +2 -2
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/api.html +0 -11
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/blueprint.py +9 -11
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/config.py +7 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/context.py +18 -18
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/data_types.py +6 -7
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/datastore.py +2 -2
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/dispatcher.py +20 -21
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/engine.py +107 -68
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/executor.py +168 -148
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/history/base.py +7 -7
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/history/noop.py +7 -7
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/history/postgres.py +7 -9
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/history/sqlite.py +7 -10
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/logging_config.py +1 -1
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/storage/__init__.py +2 -2
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/storage/base.py +31 -20
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/storage/memory.py +36 -43
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/storage/redis.py +124 -60
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/worker_config_loader.py +2 -2
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/ws_manager.py +1 -2
- {avtomatika-1.0b3 → avtomatika-1.0b5/src/avtomatika.egg-info}/PKG-INFO +44 -9
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika.egg-info/requires.txt +2 -2
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_engine.py +145 -88
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_executor.py +24 -8
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_history.py +4 -3
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_logging_config.py +2 -2
- {avtomatika-1.0b3 → avtomatika-1.0b5}/LICENSE +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/setup.cfg +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/client_config_loader.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/compression.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/health_checker.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/metrics.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/py.typed +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/quota.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/ratelimit.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/reputation.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/security.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/telemetry.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika/watcher.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika.egg-info/SOURCES.txt +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika.egg-info/dependency_links.txt +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/src/avtomatika.egg-info/top_level.txt +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_blueprint_conditions.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_blueprints.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_client_config_loader.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_compression.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_config_validation.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_context.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_dispatcher.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_dispatcher_extended.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_error_handling.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_health_checker.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_integration.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_memory_locking.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_memory_storage.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_metrics.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_noop_history.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_postgres_history.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_ratelimit.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_redis_locking.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_redis_storage.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_reputation.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_telemetry.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_watcher.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_worker_config_loader.py +0 -0
- {avtomatika-1.0b3 → avtomatika-1.0b5}/tests/test_ws_manager.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b5
|
|
4
4
|
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
5
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
|
|
@@ -17,13 +17,13 @@ Requires-Dist: python-json-logger~=4.0
|
|
|
17
17
|
Requires-Dist: graphviz~=0.21
|
|
18
18
|
Requires-Dist: zstandard~=0.24
|
|
19
19
|
Requires-Dist: aioprometheus~=23.12
|
|
20
|
+
Requires-Dist: msgpack~=1.1
|
|
21
|
+
Requires-Dist: orjson~=3.11
|
|
20
22
|
Provides-Extra: redis
|
|
21
23
|
Requires-Dist: redis~=7.1; extra == "redis"
|
|
22
|
-
Requires-Dist: orjson~=3.11; extra == "redis"
|
|
23
24
|
Provides-Extra: history
|
|
24
25
|
Requires-Dist: aiosqlite~=0.22; extra == "history"
|
|
25
26
|
Requires-Dist: asyncpg~=0.30; extra == "history"
|
|
26
|
-
Requires-Dist: orjson~=3.11; extra == "history"
|
|
27
27
|
Provides-Extra: telemetry
|
|
28
28
|
Requires-Dist: opentelemetry-api~=1.39; extra == "telemetry"
|
|
29
29
|
Requires-Dist: opentelemetry-sdk~=1.39; extra == "telemetry"
|
|
@@ -334,6 +334,24 @@ The orchestrator's behavior can be configured through environment variables. Add
|
|
|
334
334
|
|
|
335
335
|
**Important:** The system employs **strict validation** for configuration files (`clients.toml`, `workers.toml`) at startup. If a configuration file is invalid (e.g., malformed TOML, missing required fields), the application will **fail fast** and exit with an error, rather than starting in a partially broken state. This ensures the security and integrity of the deployment.
|
|
336
336
|
|
|
337
|
+
### Configuration Files
|
|
338
|
+
|
|
339
|
+
To manage access and worker settings securely, Avtomatika uses TOML configuration files.
|
|
340
|
+
|
|
341
|
+
- **`clients.toml`**: Defines API clients, their tokens, plans, and quotas.
|
|
342
|
+
```toml
|
|
343
|
+
[client_premium]
|
|
344
|
+
token = "secret-token-123"
|
|
345
|
+
plan = "premium"
|
|
346
|
+
```
|
|
347
|
+
- **`workers.toml`**: Defines individual tokens for workers to enhance security.
|
|
348
|
+
```toml
|
|
349
|
+
[gpu-worker-01]
|
|
350
|
+
token = "worker-secret-456"
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
For detailed specifications and examples, please refer to the [**Configuration Guide**](docs/configuration.md).
|
|
354
|
+
|
|
337
355
|
### Fault Tolerance
|
|
338
356
|
|
|
339
357
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
@@ -342,18 +360,25 @@ The orchestrator has built-in mechanisms for handling failures based on the `err
|
|
|
342
360
|
* **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
|
|
343
361
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
344
362
|
|
|
363
|
+
### Concurrency & Performance
|
|
364
|
+
|
|
365
|
+
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
366
|
+
|
|
367
|
+
* **`EXECUTOR_MAX_CONCURRENT_JOBS`**: Limits the number of job handlers running simultaneously within the Orchestrator process (default: `100`). If this limit is reached, new jobs remain in the Redis queue until a slot becomes available. This ensures the event loop remains responsive even with a massive backlog of pending jobs.
|
|
368
|
+
|
|
345
369
|
### High Availability & Distributed Locking
|
|
346
370
|
|
|
347
371
|
The architecture supports horizontal scaling. Multiple Orchestrator instances can run behind a load balancer.
|
|
348
372
|
|
|
349
373
|
* **Stateless API:** The API is stateless; all state is persisted in Redis.
|
|
374
|
+
* **Instance Identity:** Each instance should have a unique `INSTANCE_ID` (defaults to hostname) for correct handling of Redis Streams consumer groups.
|
|
350
375
|
* **Distributed Locking:** Background processes (`Watcher`, `ReputationCalculator`) use distributed locks (via Redis `SET NX`) to coordinate and prevent race conditions when multiple instances are active.
|
|
351
376
|
|
|
352
377
|
### Storage Backend
|
|
353
378
|
|
|
354
379
|
By default, the engine uses in-memory storage. For production, you must configure persistent storage via environment variables.
|
|
355
380
|
|
|
356
|
-
* **Redis (StorageBackend)**: For storing current job states.
|
|
381
|
+
* **Redis (StorageBackend)**: For storing current job states (serialized with `msgpack`) and managing task queues (using Redis Streams with consumer groups).
|
|
357
382
|
* Install:
|
|
358
383
|
```bash
|
|
359
384
|
pip install "avtomatika[redis]"
|
|
@@ -418,11 +443,21 @@ To run the `avtomatika` test suite:
|
|
|
418
443
|
pytest avtomatika/tests/
|
|
419
444
|
```
|
|
420
445
|
|
|
446
|
+
### Interactive API Documentation
|
|
447
|
+
|
|
448
|
+
Avtomatika provides a built-in interactive API documentation page (similar to Swagger UI) that is automatically generated based on your registered blueprints.
|
|
449
|
+
|
|
450
|
+
* **Endpoint:** `/_public/docs`
|
|
451
|
+
* **Features:**
|
|
452
|
+
* **List of all system endpoints:** Detailed documentation for Public, Protected, and Worker API groups.
|
|
453
|
+
* **Dynamic Blueprint Documentation:** Automatically generates and lists documentation for all blueprints registered in the engine, including their specific API endpoints.
|
|
454
|
+
* **Interactive Testing:** Allows you to test API calls directly from the browser. You can provide authentication tokens, parameters, and request bodies to see real server responses.
|
|
455
|
+
|
|
421
456
|
## Detailed Documentation
|
|
422
457
|
|
|
423
|
-
For a deeper dive into the system, please refer to the following documents
|
|
458
|
+
For a deeper dive into the system, please refer to the following documents:
|
|
424
459
|
|
|
425
|
-
- [**Architecture Guide**](docs/architecture.md): A detailed overview of the system components and their interactions.
|
|
426
|
-
- [**API Reference**](docs/api_reference.md): Full specification of the HTTP API.
|
|
427
|
-
- [**Deployment Guide**](docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
|
|
428
|
-
- [**Cookbook**](docs/cookbook/README.md): Examples and best practices for creating blueprints.
|
|
460
|
+
- [**Architecture Guide**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/architecture.md): A detailed overview of the system components and their interactions.
|
|
461
|
+
- [**API Reference**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/api_reference.md): Full specification of the HTTP API.
|
|
462
|
+
- [**Deployment Guide**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
|
|
463
|
+
- [**Cookbook**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/cookbook/README.md): Examples and best practices for creating blueprints.
|
|
@@ -288,6 +288,24 @@ The orchestrator's behavior can be configured through environment variables. Add
|
|
|
288
288
|
|
|
289
289
|
**Important:** The system employs **strict validation** for configuration files (`clients.toml`, `workers.toml`) at startup. If a configuration file is invalid (e.g., malformed TOML, missing required fields), the application will **fail fast** and exit with an error, rather than starting in a partially broken state. This ensures the security and integrity of the deployment.
|
|
290
290
|
|
|
291
|
+
### Configuration Files
|
|
292
|
+
|
|
293
|
+
To manage access and worker settings securely, Avtomatika uses TOML configuration files.
|
|
294
|
+
|
|
295
|
+
- **`clients.toml`**: Defines API clients, their tokens, plans, and quotas.
|
|
296
|
+
```toml
|
|
297
|
+
[client_premium]
|
|
298
|
+
token = "secret-token-123"
|
|
299
|
+
plan = "premium"
|
|
300
|
+
```
|
|
301
|
+
- **`workers.toml`**: Defines individual tokens for workers to enhance security.
|
|
302
|
+
```toml
|
|
303
|
+
[gpu-worker-01]
|
|
304
|
+
token = "worker-secret-456"
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
For detailed specifications and examples, please refer to the [**Configuration Guide**](docs/configuration.md).
|
|
308
|
+
|
|
291
309
|
### Fault Tolerance
|
|
292
310
|
|
|
293
311
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
@@ -296,18 +314,25 @@ The orchestrator has built-in mechanisms for handling failures based on the `err
|
|
|
296
314
|
* **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
|
|
297
315
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
298
316
|
|
|
317
|
+
### Concurrency & Performance
|
|
318
|
+
|
|
319
|
+
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
320
|
+
|
|
321
|
+
* **`EXECUTOR_MAX_CONCURRENT_JOBS`**: Limits the number of job handlers running simultaneously within the Orchestrator process (default: `100`). If this limit is reached, new jobs remain in the Redis queue until a slot becomes available. This ensures the event loop remains responsive even with a massive backlog of pending jobs.
|
|
322
|
+
|
|
299
323
|
### High Availability & Distributed Locking
|
|
300
324
|
|
|
301
325
|
The architecture supports horizontal scaling. Multiple Orchestrator instances can run behind a load balancer.
|
|
302
326
|
|
|
303
327
|
* **Stateless API:** The API is stateless; all state is persisted in Redis.
|
|
328
|
+
* **Instance Identity:** Each instance should have a unique `INSTANCE_ID` (defaults to hostname) for correct handling of Redis Streams consumer groups.
|
|
304
329
|
* **Distributed Locking:** Background processes (`Watcher`, `ReputationCalculator`) use distributed locks (via Redis `SET NX`) to coordinate and prevent race conditions when multiple instances are active.
|
|
305
330
|
|
|
306
331
|
### Storage Backend
|
|
307
332
|
|
|
308
333
|
By default, the engine uses in-memory storage. For production, you must configure persistent storage via environment variables.
|
|
309
334
|
|
|
310
|
-
* **Redis (StorageBackend)**: For storing current job states.
|
|
335
|
+
* **Redis (StorageBackend)**: For storing current job states (serialized with `msgpack`) and managing task queues (using Redis Streams with consumer groups).
|
|
311
336
|
* Install:
|
|
312
337
|
```bash
|
|
313
338
|
pip install "avtomatika[redis]"
|
|
@@ -372,11 +397,21 @@ To run the `avtomatika` test suite:
|
|
|
372
397
|
pytest avtomatika/tests/
|
|
373
398
|
```
|
|
374
399
|
|
|
400
|
+
### Interactive API Documentation
|
|
401
|
+
|
|
402
|
+
Avtomatika provides a built-in interactive API documentation page (similar to Swagger UI) that is automatically generated based on your registered blueprints.
|
|
403
|
+
|
|
404
|
+
* **Endpoint:** `/_public/docs`
|
|
405
|
+
* **Features:**
|
|
406
|
+
* **List of all system endpoints:** Detailed documentation for Public, Protected, and Worker API groups.
|
|
407
|
+
* **Dynamic Blueprint Documentation:** Automatically generates and lists documentation for all blueprints registered in the engine, including their specific API endpoints.
|
|
408
|
+
* **Interactive Testing:** Allows you to test API calls directly from the browser. You can provide authentication tokens, parameters, and request bodies to see real server responses.
|
|
409
|
+
|
|
375
410
|
## Detailed Documentation
|
|
376
411
|
|
|
377
|
-
For a deeper dive into the system, please refer to the following documents
|
|
412
|
+
For a deeper dive into the system, please refer to the following documents:
|
|
378
413
|
|
|
379
|
-
- [**Architecture Guide**](docs/architecture.md): A detailed overview of the system components and their interactions.
|
|
380
|
-
- [**API Reference**](docs/api_reference.md): Full specification of the HTTP API.
|
|
381
|
-
- [**Deployment Guide**](docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
|
|
382
|
-
- [**Cookbook**](docs/cookbook/README.md): Examples and best practices for creating blueprints.
|
|
414
|
+
- [**Architecture Guide**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/architecture.md): A detailed overview of the system components and their interactions.
|
|
415
|
+
- [**API Reference**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/api_reference.md): Full specification of the HTTP API.
|
|
416
|
+
- [**Deployment Guide**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
|
|
417
|
+
- [**Cookbook**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/cookbook/README.md): Examples and best practices for creating blueprints.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "avtomatika"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.0b5"
|
|
8
8
|
description = "A state-machine based orchestrator for long-running AI and other jobs."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -21,11 +21,13 @@ dependencies = [
|
|
|
21
21
|
"graphviz~=0.21",
|
|
22
22
|
"zstandard~=0.24",
|
|
23
23
|
"aioprometheus~=23.12",
|
|
24
|
+
"msgpack~=1.1",
|
|
25
|
+
"orjson~=3.11",
|
|
24
26
|
]
|
|
25
27
|
|
|
26
28
|
[project.optional-dependencies]
|
|
27
|
-
redis = ["redis~=7.1"
|
|
28
|
-
history = ["aiosqlite~=0.22", "asyncpg~=0.30"
|
|
29
|
+
redis = ["redis~=7.1"]
|
|
30
|
+
history = ["aiosqlite~=0.22", "asyncpg~=0.30"]
|
|
29
31
|
telemetry = [
|
|
30
32
|
"opentelemetry-api~=1.39",
|
|
31
33
|
"opentelemetry-sdk~=1.39",
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
This module exposes the primary classes for building and running state-driven automations.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import
|
|
7
|
+
from contextlib import suppress
|
|
8
8
|
from importlib.metadata import version
|
|
9
9
|
|
|
10
10
|
__version__ = version("avtomatika")
|
|
@@ -23,7 +23,7 @@ __all__ = [
|
|
|
23
23
|
"StorageBackend",
|
|
24
24
|
]
|
|
25
25
|
|
|
26
|
-
with
|
|
26
|
+
with suppress(ImportError):
|
|
27
27
|
from .storage.redis import RedisStorage # noqa: F401
|
|
28
28
|
|
|
29
29
|
__all__.append("RedisStorage")
|
|
@@ -199,17 +199,6 @@
|
|
|
199
199
|
{ code: '202 Accepted', description: 'Job successfully accepted for processing.', body: { "status": "accepted", "job_id": "..." } }
|
|
200
200
|
]
|
|
201
201
|
},
|
|
202
|
-
{
|
|
203
|
-
id: 'post-create-showcase-job',
|
|
204
|
-
name: 'Create a Full Showcase Job',
|
|
205
|
-
method: 'POST',
|
|
206
|
-
path: '/api/v1/jobs/full_showcase',
|
|
207
|
-
description: 'Creates and starts a new instance (Job) of the `full_showcase` blueprint. This blueprint demonstrates most of the features of the Avtomatika library.',
|
|
208
|
-
request: { body: { "path": "/path/to/video.mp4", "user_id": "user-123", "quality": "high" } },
|
|
209
|
-
responses: [
|
|
210
|
-
{ code: '202 Accepted', description: 'Job successfully accepted for processing.', body: { "status": "accepted", "job_id": "..." } }
|
|
211
|
-
]
|
|
212
|
-
},
|
|
213
202
|
{
|
|
214
203
|
id: 'get-job-status',
|
|
215
204
|
name: 'Get Job Status',
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from operator import eq, ge, gt, le, lt, ne
|
|
2
2
|
from re import compile as re_compile
|
|
3
|
-
from typing import Any, Callable,
|
|
3
|
+
from typing import Any, Callable, NamedTuple
|
|
4
4
|
|
|
5
5
|
from .datastore import AsyncDictStore
|
|
6
6
|
|
|
@@ -99,8 +99,6 @@ class HandlerDecorator:
|
|
|
99
99
|
|
|
100
100
|
def when(self, condition_str: str) -> Callable:
|
|
101
101
|
def decorator(func: Callable) -> Callable:
|
|
102
|
-
# We still register the base handler to ensure the state is known,
|
|
103
|
-
# but we can make it a no-op if only conditional handlers exist for a state.
|
|
104
102
|
if self._state not in self._blueprint.handlers:
|
|
105
103
|
self._blueprint.handlers[self._state] = lambda: None # Placeholder
|
|
106
104
|
|
|
@@ -115,8 +113,8 @@ class StateMachineBlueprint:
|
|
|
115
113
|
def __init__(
|
|
116
114
|
self,
|
|
117
115
|
name: str,
|
|
118
|
-
api_endpoint:
|
|
119
|
-
api_version:
|
|
116
|
+
api_endpoint: str | None = None,
|
|
117
|
+
api_version: str | None = None,
|
|
120
118
|
data_stores: Any = None,
|
|
121
119
|
):
|
|
122
120
|
"""Initializes a new blueprint.
|
|
@@ -132,14 +130,14 @@ class StateMachineBlueprint:
|
|
|
132
130
|
self.name = name
|
|
133
131
|
self.api_endpoint = api_endpoint
|
|
134
132
|
self.api_version = api_version
|
|
135
|
-
self.data_stores:
|
|
136
|
-
self.handlers:
|
|
137
|
-
self.aggregator_handlers:
|
|
133
|
+
self.data_stores: dict[str, AsyncDictStore] = data_stores if data_stores is not None else {}
|
|
134
|
+
self.handlers: dict[str, Callable] = {}
|
|
135
|
+
self.aggregator_handlers: dict[str, Callable] = {}
|
|
138
136
|
self.conditional_handlers: list[ConditionalHandler] = []
|
|
139
|
-
self.start_state:
|
|
137
|
+
self.start_state: str | None = None
|
|
140
138
|
self.end_states: set[str] = set()
|
|
141
139
|
|
|
142
|
-
def add_data_store(self, name: str, initial_data:
|
|
140
|
+
def add_data_store(self, name: str, initial_data: dict[str, Any]):
|
|
143
141
|
"""Adds a named data store to the blueprint."""
|
|
144
142
|
if name in self.data_stores:
|
|
145
143
|
raise ValueError(f"Data store with name '{name}' already exists.")
|
|
@@ -174,7 +172,7 @@ class StateMachineBlueprint:
|
|
|
174
172
|
f"No suitable handler found for state '{state}' in blueprint '{self.name}' for the given context.",
|
|
175
173
|
)
|
|
176
174
|
|
|
177
|
-
def render_graph(self, output_filename:
|
|
175
|
+
def render_graph(self, output_filename: str | None = None, output_format: str = "png"):
|
|
178
176
|
import ast
|
|
179
177
|
import inspect
|
|
180
178
|
import logging
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from os import getenv
|
|
2
|
+
from socket import gethostname
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
class Config:
|
|
@@ -7,6 +8,9 @@ class Config:
|
|
|
7
8
|
"""
|
|
8
9
|
|
|
9
10
|
def __init__(self):
|
|
11
|
+
# Instance identity
|
|
12
|
+
self.INSTANCE_ID: str = getenv("INSTANCE_ID", gethostname())
|
|
13
|
+
|
|
10
14
|
# Redis settings
|
|
11
15
|
self.REDIS_HOST: str = getenv("REDIS_HOST", "")
|
|
12
16
|
self.REDIS_PORT: int = int(getenv("REDIS_PORT", 6379))
|
|
@@ -45,6 +49,9 @@ class Config:
|
|
|
45
49
|
self.WATCHER_INTERVAL_SECONDS: int = int(
|
|
46
50
|
getenv("WATCHER_INTERVAL_SECONDS", 20),
|
|
47
51
|
)
|
|
52
|
+
self.EXECUTOR_MAX_CONCURRENT_JOBS: int = int(
|
|
53
|
+
getenv("EXECUTOR_MAX_CONCURRENT_JOBS", 100),
|
|
54
|
+
)
|
|
48
55
|
|
|
49
56
|
# History storage settings
|
|
50
57
|
self.HISTORY_DATABASE_URI: str = getenv("HISTORY_DATABASE_URI", "")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class ActionFactory:
|
|
@@ -6,10 +6,10 @@ class ActionFactory:
|
|
|
6
6
|
|
|
7
7
|
def __init__(self, job_id: str):
|
|
8
8
|
self._job_id = job_id
|
|
9
|
-
self._next_state_val:
|
|
10
|
-
self._task_to_dispatch_val:
|
|
11
|
-
self._sub_blueprint_to_run_val:
|
|
12
|
-
self._parallel_tasks_to_dispatch_val:
|
|
9
|
+
self._next_state_val: str | None = None
|
|
10
|
+
self._task_to_dispatch_val: dict[str, Any] | None = None
|
|
11
|
+
self._sub_blueprint_to_run_val: dict[str, Any] | None = None
|
|
12
|
+
self._parallel_tasks_to_dispatch_val: dict[str, Any] | None = None
|
|
13
13
|
|
|
14
14
|
def _check_for_existing_action(self):
|
|
15
15
|
"""
|
|
@@ -30,22 +30,22 @@ class ActionFactory:
|
|
|
30
30
|
)
|
|
31
31
|
|
|
32
32
|
@property
|
|
33
|
-
def next_state(self) ->
|
|
33
|
+
def next_state(self) -> str | None:
|
|
34
34
|
return self._next_state_val
|
|
35
35
|
|
|
36
36
|
@property
|
|
37
|
-
def task_to_dispatch(self) ->
|
|
37
|
+
def task_to_dispatch(self) -> dict[str, Any] | None:
|
|
38
38
|
return self._task_to_dispatch_val
|
|
39
39
|
|
|
40
40
|
@property
|
|
41
|
-
def sub_blueprint_to_run(self) ->
|
|
41
|
+
def sub_blueprint_to_run(self) -> dict[str, Any] | None:
|
|
42
42
|
return self._sub_blueprint_to_run_val
|
|
43
43
|
|
|
44
44
|
@property
|
|
45
|
-
def parallel_tasks_to_dispatch(self) ->
|
|
45
|
+
def parallel_tasks_to_dispatch(self) -> dict[str, Any] | None:
|
|
46
46
|
return self._parallel_tasks_to_dispatch_val
|
|
47
47
|
|
|
48
|
-
def dispatch_parallel(self, tasks:
|
|
48
|
+
def dispatch_parallel(self, tasks: dict[str, Any] | None, aggregate_into: str) -> None:
|
|
49
49
|
"""
|
|
50
50
|
Dispatches multiple tasks for parallel execution.
|
|
51
51
|
"""
|
|
@@ -65,12 +65,12 @@ class ActionFactory:
|
|
|
65
65
|
def dispatch_task(
|
|
66
66
|
self,
|
|
67
67
|
task_type: str,
|
|
68
|
-
params:
|
|
69
|
-
transitions:
|
|
68
|
+
params: dict[str, Any],
|
|
69
|
+
transitions: dict[str, str],
|
|
70
70
|
dispatch_strategy: str = "default",
|
|
71
|
-
resource_requirements:
|
|
72
|
-
timeout_seconds:
|
|
73
|
-
max_cost:
|
|
71
|
+
resource_requirements: dict[str, Any] | None = None,
|
|
72
|
+
timeout_seconds: int | None = None,
|
|
73
|
+
max_cost: float | None = None,
|
|
74
74
|
priority: float = 0.0,
|
|
75
75
|
) -> None:
|
|
76
76
|
"""Dispatches a task to a worker for execution."""
|
|
@@ -91,7 +91,7 @@ class ActionFactory:
|
|
|
91
91
|
self,
|
|
92
92
|
integration: str,
|
|
93
93
|
message: str,
|
|
94
|
-
transitions:
|
|
94
|
+
transitions: dict[str, str],
|
|
95
95
|
) -> None:
|
|
96
96
|
"""Pauses the pipeline until an external signal (human approval) is received."""
|
|
97
97
|
self._check_for_existing_action()
|
|
@@ -106,8 +106,8 @@ class ActionFactory:
|
|
|
106
106
|
def run_blueprint(
|
|
107
107
|
self,
|
|
108
108
|
blueprint_name: str,
|
|
109
|
-
initial_data:
|
|
110
|
-
transitions:
|
|
109
|
+
initial_data: dict[str, Any],
|
|
110
|
+
transitions: dict[str, str],
|
|
111
111
|
) -> None:
|
|
112
112
|
"""Runs a child blueprint and waits for its result."""
|
|
113
113
|
self._check_for_existing_action()
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING, Any,
|
|
1
|
+
from typing import TYPE_CHECKING, Any, NamedTuple
|
|
2
2
|
|
|
3
3
|
if TYPE_CHECKING:
|
|
4
4
|
from .context import ActionFactory
|
|
@@ -9,8 +9,7 @@ class ClientConfig(NamedTuple):
|
|
|
9
9
|
|
|
10
10
|
token: str
|
|
11
11
|
plan: str
|
|
12
|
-
|
|
13
|
-
params: Dict[str, Any]
|
|
12
|
+
params: dict[str, Any]
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
class JobContext(NamedTuple):
|
|
@@ -18,13 +17,13 @@ class JobContext(NamedTuple):
|
|
|
18
17
|
|
|
19
18
|
job_id: str
|
|
20
19
|
current_state: str
|
|
21
|
-
initial_data:
|
|
22
|
-
state_history:
|
|
20
|
+
initial_data: dict[str, Any]
|
|
21
|
+
state_history: dict[str, Any]
|
|
23
22
|
client: ClientConfig
|
|
24
23
|
actions: "ActionFactory"
|
|
25
24
|
data_stores: Any = None
|
|
26
|
-
tracing_context:
|
|
27
|
-
aggregation_results:
|
|
25
|
+
tracing_context: dict[str, Any] = {}
|
|
26
|
+
aggregation_results: dict[str, Any] | None = None
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
class GPUInfo(NamedTuple):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class AsyncDictStore:
|
|
@@ -6,7 +6,7 @@ class AsyncDictStore:
|
|
|
6
6
|
Simulates the behavior of a persistent store for use in blueprints.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
def __init__(self, initial_data:
|
|
9
|
+
def __init__(self, initial_data: dict[str, Any]):
|
|
10
10
|
self._data = initial_data.copy()
|
|
11
11
|
|
|
12
12
|
async def get(self, key: str) -> Any:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from logging import getLogger
|
|
3
3
|
from random import choice
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
7
|
try:
|
|
@@ -26,12 +26,12 @@ class Dispatcher:
|
|
|
26
26
|
def __init__(self, storage: StorageBackend, config: Config):
|
|
27
27
|
self.storage = storage
|
|
28
28
|
self.config = config
|
|
29
|
-
self._round_robin_indices:
|
|
29
|
+
self._round_robin_indices: dict[str, int] = defaultdict(int)
|
|
30
30
|
|
|
31
31
|
@staticmethod
|
|
32
32
|
def _is_worker_compliant(
|
|
33
|
-
worker:
|
|
34
|
-
requirements:
|
|
33
|
+
worker: dict[str, Any],
|
|
34
|
+
requirements: dict[str, Any],
|
|
35
35
|
) -> bool:
|
|
36
36
|
"""Checks if a worker meets the specified resource requirements."""
|
|
37
37
|
if required_gpu := requirements.get("gpu_info"):
|
|
@@ -58,9 +58,9 @@ class Dispatcher:
|
|
|
58
58
|
|
|
59
59
|
@staticmethod
|
|
60
60
|
def _select_default(
|
|
61
|
-
workers:
|
|
61
|
+
workers: list[dict[str, Any]],
|
|
62
62
|
task_type: str,
|
|
63
|
-
) ->
|
|
63
|
+
) -> dict[str, Any]:
|
|
64
64
|
"""Default strategy: first selects "warm" workers (those that have the
|
|
65
65
|
task in their cache), and then selects the cheapest among them.
|
|
66
66
|
|
|
@@ -80,9 +80,9 @@ class Dispatcher:
|
|
|
80
80
|
|
|
81
81
|
def _select_round_robin(
|
|
82
82
|
self,
|
|
83
|
-
workers:
|
|
83
|
+
workers: list[dict[str, Any]],
|
|
84
84
|
task_type: str,
|
|
85
|
-
) ->
|
|
85
|
+
) -> dict[str, Any]:
|
|
86
86
|
""" "Round Robin" strategy: distributes tasks sequentially among all
|
|
87
87
|
available workers.
|
|
88
88
|
"""
|
|
@@ -93,9 +93,9 @@ class Dispatcher:
|
|
|
93
93
|
|
|
94
94
|
@staticmethod
|
|
95
95
|
def _select_least_connections(
|
|
96
|
-
workers:
|
|
96
|
+
workers: list[dict[str, Any]],
|
|
97
97
|
task_type: str,
|
|
98
|
-
) ->
|
|
98
|
+
) -> dict[str, Any]:
|
|
99
99
|
""" "Least Connections" strategy: selects the worker with the fewest
|
|
100
100
|
active tasks (based on the `load` field).
|
|
101
101
|
"""
|
|
@@ -103,14 +103,14 @@ class Dispatcher:
|
|
|
103
103
|
|
|
104
104
|
@staticmethod
|
|
105
105
|
def _select_cheapest(
|
|
106
|
-
workers:
|
|
106
|
+
workers: list[dict[str, Any]],
|
|
107
107
|
task_type: str,
|
|
108
|
-
) ->
|
|
108
|
+
) -> dict[str, Any]:
|
|
109
109
|
"""Selects the cheapest worker based on 'cost_per_second'."""
|
|
110
110
|
return min(workers, key=lambda w: w.get("cost_per_second", float("inf")))
|
|
111
111
|
|
|
112
112
|
@staticmethod
|
|
113
|
-
def _get_best_value_score(worker:
|
|
113
|
+
def _get_best_value_score(worker: dict[str, Any]) -> float:
|
|
114
114
|
"""Calculates a "score" for a worker using the formula cost / reputation.
|
|
115
115
|
The lower the score, the better.
|
|
116
116
|
"""
|
|
@@ -122,13 +122,13 @@ class Dispatcher:
|
|
|
122
122
|
|
|
123
123
|
def _select_best_value(
|
|
124
124
|
self,
|
|
125
|
-
workers:
|
|
125
|
+
workers: list[dict[str, Any]],
|
|
126
126
|
task_type: str,
|
|
127
|
-
) ->
|
|
127
|
+
) -> dict[str, Any]:
|
|
128
128
|
"""Selects the worker with the best price-quality (reputation) ratio."""
|
|
129
129
|
return min(workers, key=self._get_best_value_score)
|
|
130
130
|
|
|
131
|
-
async def dispatch(self, job_state:
|
|
131
|
+
async def dispatch(self, job_state: dict[str, Any], task_info: dict[str, Any]):
|
|
132
132
|
job_id = job_state["id"]
|
|
133
133
|
task_type = task_info.get("type")
|
|
134
134
|
if not task_type:
|
|
@@ -142,7 +142,6 @@ class Dispatcher:
|
|
|
142
142
|
if not all_workers:
|
|
143
143
|
raise RuntimeError("No available workers")
|
|
144
144
|
|
|
145
|
-
# 1. Filter by 'idle' status
|
|
146
145
|
# A worker is considered available if its status is 'idle' or not specified (for backward compatibility)
|
|
147
146
|
logger.debug(f"All available workers: {[w['worker_id'] for w in all_workers]}")
|
|
148
147
|
idle_workers = [w for w in all_workers if w.get("status", "idle") == "idle"]
|
|
@@ -157,13 +156,13 @@ class Dispatcher:
|
|
|
157
156
|
)
|
|
158
157
|
raise RuntimeError("No idle workers (all are 'busy')")
|
|
159
158
|
|
|
160
|
-
#
|
|
159
|
+
# Filter by task type
|
|
161
160
|
capable_workers = [w for w in idle_workers if task_type in w.get("supported_tasks", [])]
|
|
162
161
|
logger.debug(f"Capable workers for task '{task_type}': {[w['worker_id'] for w in capable_workers]}")
|
|
163
162
|
if not capable_workers:
|
|
164
163
|
raise RuntimeError(f"No suitable workers for task type '{task_type}'")
|
|
165
164
|
|
|
166
|
-
#
|
|
165
|
+
# Filter by resource requirements
|
|
167
166
|
if resource_requirements:
|
|
168
167
|
compliant_workers = [w for w in capable_workers if self._is_worker_compliant(w, resource_requirements)]
|
|
169
168
|
logger.debug(
|
|
@@ -176,7 +175,7 @@ class Dispatcher:
|
|
|
176
175
|
)
|
|
177
176
|
capable_workers = compliant_workers
|
|
178
177
|
|
|
179
|
-
#
|
|
178
|
+
# Filter by maximum cost
|
|
180
179
|
max_cost = task_info.get("max_cost")
|
|
181
180
|
if max_cost is not None:
|
|
182
181
|
cost_compliant_workers = [w for w in capable_workers if w.get("cost_per_second", float("inf")) <= max_cost]
|
|
@@ -189,7 +188,7 @@ class Dispatcher:
|
|
|
189
188
|
)
|
|
190
189
|
capable_workers = cost_compliant_workers
|
|
191
190
|
|
|
192
|
-
#
|
|
191
|
+
# Select worker according to strategy
|
|
193
192
|
if dispatch_strategy == "round_robin":
|
|
194
193
|
selected_worker = self._select_round_robin(capable_workers, task_type)
|
|
195
194
|
elif dispatch_strategy == "least_connections":
|