avtomatika 1.0b1__tar.gz → 1.0b3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {avtomatika-1.0b1 → avtomatika-1.0b3}/PKG-INFO +37 -12
  2. avtomatika-1.0b1/src/avtomatika.egg-info/PKG-INFO → avtomatika-1.0b3/README.md +27 -48
  3. {avtomatika-1.0b1 → avtomatika-1.0b3}/pyproject.toml +10 -10
  4. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/__init__.py +2 -3
  5. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/api.html +14 -0
  6. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/blueprint.py +13 -8
  7. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/client_config_loader.py +18 -6
  8. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/dispatcher.py +13 -19
  9. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/engine.py +28 -15
  10. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/executor.py +6 -3
  11. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/ratelimit.py +3 -10
  12. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/reputation.py +11 -2
  13. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/security.py +5 -3
  14. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/storage/__init__.py +3 -3
  15. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/storage/base.py +23 -0
  16. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/storage/memory.py +34 -8
  17. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/storage/redis.py +37 -20
  18. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/telemetry.py +3 -3
  19. avtomatika-1.0b3/src/avtomatika/watcher.py +82 -0
  20. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/worker_config_loader.py +11 -3
  21. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/ws_manager.py +2 -1
  22. avtomatika-1.0b1/README.md → avtomatika-1.0b3/src/avtomatika.egg-info/PKG-INFO +73 -2
  23. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika.egg-info/SOURCES.txt +5 -0
  24. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika.egg-info/requires.txt +8 -8
  25. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_client_config_loader.py +7 -6
  26. avtomatika-1.0b3/tests/test_compression.py +121 -0
  27. avtomatika-1.0b3/tests/test_config_validation.py +60 -0
  28. avtomatika-1.0b3/tests/test_dispatcher_extended.py +95 -0
  29. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_integration.py +9 -2
  30. avtomatika-1.0b3/tests/test_memory_locking.py +44 -0
  31. avtomatika-1.0b3/tests/test_redis_locking.py +45 -0
  32. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_watcher.py +3 -0
  33. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_worker_config_loader.py +13 -6
  34. avtomatika-1.0b1/src/avtomatika/watcher.py +0 -68
  35. {avtomatika-1.0b1 → avtomatika-1.0b3}/LICENSE +0 -0
  36. {avtomatika-1.0b1 → avtomatika-1.0b3}/setup.cfg +0 -0
  37. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/compression.py +0 -0
  38. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/config.py +0 -0
  39. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/context.py +0 -0
  40. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/data_types.py +0 -0
  41. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/datastore.py +0 -0
  42. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/health_checker.py +0 -0
  43. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/history/base.py +0 -0
  44. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/history/noop.py +0 -0
  45. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/history/postgres.py +0 -0
  46. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/history/sqlite.py +0 -0
  47. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/logging_config.py +0 -0
  48. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/metrics.py +0 -0
  49. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/py.typed +0 -0
  50. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika/quota.py +0 -0
  51. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika.egg-info/dependency_links.txt +0 -0
  52. {avtomatika-1.0b1 → avtomatika-1.0b3}/src/avtomatika.egg-info/top_level.txt +0 -0
  53. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_blueprint_conditions.py +0 -0
  54. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_blueprints.py +0 -0
  55. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_context.py +0 -0
  56. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_dispatcher.py +0 -0
  57. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_engine.py +0 -0
  58. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_error_handling.py +0 -0
  59. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_executor.py +0 -0
  60. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_health_checker.py +0 -0
  61. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_history.py +0 -0
  62. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_logging_config.py +0 -0
  63. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_memory_storage.py +0 -0
  64. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_metrics.py +0 -0
  65. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_noop_history.py +0 -0
  66. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_postgres_history.py +0 -0
  67. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_ratelimit.py +0 -0
  68. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_redis_storage.py +0 -0
  69. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_reputation.py +0 -0
  70. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_telemetry.py +0 -0
  71. {avtomatika-1.0b1 → avtomatika-1.0b3}/tests/test_ws_manager.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avtomatika
3
- Version: 1.0b1
4
- Summary: A state-machine based orchestrator for long-running jobs.
3
+ Version: 1.0b3
4
+ Summary: A state-machine based orchestrator for long-running AI and other jobs.
5
5
  Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
6
6
  Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
7
7
  Classifier: Development Status :: 4 - Beta
@@ -18,25 +18,25 @@ Requires-Dist: graphviz~=0.21
18
18
  Requires-Dist: zstandard~=0.24
19
19
  Requires-Dist: aioprometheus~=23.12
20
20
  Provides-Extra: redis
21
- Requires-Dist: redis~=6.4; extra == "redis"
21
+ Requires-Dist: redis~=7.1; extra == "redis"
22
22
  Requires-Dist: orjson~=3.11; extra == "redis"
23
23
  Provides-Extra: history
24
- Requires-Dist: aiosqlite~=0.21; extra == "history"
24
+ Requires-Dist: aiosqlite~=0.22; extra == "history"
25
25
  Requires-Dist: asyncpg~=0.30; extra == "history"
26
26
  Requires-Dist: orjson~=3.11; extra == "history"
27
27
  Provides-Extra: telemetry
28
- Requires-Dist: opentelemetry-api~=1.38; extra == "telemetry"
29
- Requires-Dist: opentelemetry-sdk~=1.38; extra == "telemetry"
30
- Requires-Dist: opentelemetry-exporter-otlp~=1.36; extra == "telemetry"
28
+ Requires-Dist: opentelemetry-api~=1.39; extra == "telemetry"
29
+ Requires-Dist: opentelemetry-sdk~=1.39; extra == "telemetry"
30
+ Requires-Dist: opentelemetry-exporter-otlp~=1.39; extra == "telemetry"
31
31
  Requires-Dist: opentelemetry-instrumentation-aiohttp-client~=0.59b0; extra == "telemetry"
32
32
  Provides-Extra: test
33
- Requires-Dist: pytest~=8.4; extra == "test"
33
+ Requires-Dist: pytest~=9.0; extra == "test"
34
34
  Requires-Dist: pytest-asyncio~=1.1; extra == "test"
35
- Requires-Dist: fakeredis~=2.31; extra == "test"
35
+ Requires-Dist: fakeredis~=2.33; extra == "test"
36
36
  Requires-Dist: pytest-aiohttp~=1.1; extra == "test"
37
37
  Requires-Dist: pytest-mock~=3.14; extra == "test"
38
38
  Requires-Dist: aioresponses~=0.7; extra == "test"
39
- Requires-Dist: backports.zstd; extra == "test"
39
+ Requires-Dist: backports.zstd~=1.2; extra == "test"
40
40
  Requires-Dist: opentelemetry-instrumentation-aiohttp-client; extra == "test"
41
41
  Provides-Extra: all
42
42
  Requires-Dist: avtomatika[redis]; extra == "all"
@@ -251,6 +251,11 @@ async def handle_normal(actions):
251
251
  actions.transition_to("normal_processing")
252
252
  ```
253
253
 
254
+ > **Note on Limitations:** The current version of `.when()` uses a simple parser with the following limitations:
255
+ > * **No Nested Attributes:** You can only access direct fields of `context.initial_data` or `context.state_history` (e.g., `context.initial_data.field`). Nested objects (e.g., `context.initial_data.area.field`) are not supported.
256
+ > * **Simple Comparisons Only:** Only the following operators are supported: `==`, `!=`, `>`, `<`, `>=`, `<=`. Complex logical expressions with `AND`, `OR`, or `NOT` are not allowed.
257
+ > * **Limited Value Types:** The parser only recognizes strings (in quotes), integers, and floats. Boolean values (`True`, `False`) and `None` are not correctly parsed and will be treated as strings.
258
+
254
259
  ### 2. Delegating Tasks to Workers (`dispatch_task`)
255
260
 
256
261
  This is the primary function for delegating work. The orchestrator will queue the task and wait for a worker to pick it up and return a result.
@@ -280,7 +285,7 @@ Run multiple tasks simultaneously and gather their results.
280
285
  @my_blueprint.handler_for("process_files")
281
286
  async def fan_out_handler(initial_data, actions):
282
287
  tasks_to_dispatch = [
283
- {"task_type": "file_analysis", "params": {"file": file}}
288
+ {"task_type": "file_analysis", "params": {"file": file}})
284
289
  for file in initial_data.get("files", [])
285
290
  ]
286
291
  # Use dispatch_parallel to send all tasks at once.
@@ -327,6 +332,8 @@ async def cache_handler(data_stores):
327
332
 
328
333
  The orchestrator's behavior can be configured through environment variables. Additionally, any configuration parameter loaded from environment variables can be programmatically overridden in your application code after the `Config` object has been initialized. This provides flexibility for different deployment and testing scenarios.
329
334
 
335
+ **Important:** The system employs **strict validation** for configuration files (`clients.toml`, `workers.toml`) at startup. If a configuration file is invalid (e.g., malformed TOML, missing required fields), the application will **fail fast** and exit with an error, rather than starting in a partially broken state. This ensures the security and integrity of the deployment.
336
+
330
337
  ### Fault Tolerance
331
338
 
332
339
  The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
@@ -335,6 +342,13 @@ The orchestrator has built-in mechanisms for handling failures based on the `err
335
342
  * **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
336
343
  * **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
337
344
 
345
+ ### High Availability & Distributed Locking
346
+
347
+ The architecture supports horizontal scaling. Multiple Orchestrator instances can run behind a load balancer.
348
+
349
+ * **Stateless API:** The API is stateless; all state is persisted in Redis.
350
+ * **Distributed Locking:** Background processes (`Watcher`, `ReputationCalculator`) use distributed locks (via Redis `SET NX`) to coordinate and prevent race conditions when multiple instances are active.
351
+
338
352
  ### Storage Backend
339
353
 
340
354
  By default, the engine uses in-memory storage. For production, you must configure persistent storage via environment variables.
@@ -368,7 +382,9 @@ The orchestrator uses tokens to authenticate API requests.
368
382
  * **Client Authentication**: All API clients must provide a token in the `X-Avtomatika-Token` header. The orchestrator validates this token against client configurations.
369
383
  * **Worker Authentication**: Workers must provide a token in the `X-Worker-Token` header.
370
384
  * `GLOBAL_WORKER_TOKEN`: You can set a global token for all workers using this environment variable. For development and testing, it defaults to `"secure-worker-token"`.
371
- * **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable.
385
+ * **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable. Tokens from this file are stored in a hashed format for security.
386
+
387
+ > **Note on Dynamic Reloading:** The worker configuration file can be reloaded without restarting the orchestrator by sending an authenticated `POST` request to the `/api/v1/admin/reload-workers` endpoint. This allows for dynamic updates of worker tokens.
372
388
 
373
389
  ### Observability
374
390
 
@@ -401,3 +417,12 @@ To run the `avtomatika` test suite:
401
417
  ```bash
402
418
  pytest avtomatika/tests/
403
419
  ```
420
+
421
+ ## Detailed Documentation
422
+
423
+ For a deeper dive into the system, please refer to the following documents in the `docs/` directory:
424
+
425
+ - [**Architecture Guide**](docs/architecture.md): A detailed overview of the system components and their interactions.
426
+ - [**API Reference**](docs/api_reference.md): Full specification of the HTTP API.
427
+ - [**Deployment Guide**](docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
428
+ - [**Cookbook**](docs/cookbook/README.md): Examples and best practices for creating blueprints.
@@ -1,49 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: avtomatika
3
- Version: 1.0b1
4
- Summary: A state-machine based orchestrator for long-running jobs.
5
- Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
6
- Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
7
- Classifier: Development Status :: 4 - Beta
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.11
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: aiohttp~=3.12
15
- Requires-Dist: aiocache~=0.12
16
- Requires-Dist: python-json-logger~=4.0
17
- Requires-Dist: graphviz~=0.21
18
- Requires-Dist: zstandard~=0.24
19
- Requires-Dist: aioprometheus~=23.12
20
- Provides-Extra: redis
21
- Requires-Dist: redis~=6.4; extra == "redis"
22
- Requires-Dist: orjson~=3.11; extra == "redis"
23
- Provides-Extra: history
24
- Requires-Dist: aiosqlite~=0.21; extra == "history"
25
- Requires-Dist: asyncpg~=0.30; extra == "history"
26
- Requires-Dist: orjson~=3.11; extra == "history"
27
- Provides-Extra: telemetry
28
- Requires-Dist: opentelemetry-api~=1.38; extra == "telemetry"
29
- Requires-Dist: opentelemetry-sdk~=1.38; extra == "telemetry"
30
- Requires-Dist: opentelemetry-exporter-otlp~=1.36; extra == "telemetry"
31
- Requires-Dist: opentelemetry-instrumentation-aiohttp-client~=0.59b0; extra == "telemetry"
32
- Provides-Extra: test
33
- Requires-Dist: pytest~=8.4; extra == "test"
34
- Requires-Dist: pytest-asyncio~=1.1; extra == "test"
35
- Requires-Dist: fakeredis~=2.31; extra == "test"
36
- Requires-Dist: pytest-aiohttp~=1.1; extra == "test"
37
- Requires-Dist: pytest-mock~=3.14; extra == "test"
38
- Requires-Dist: aioresponses~=0.7; extra == "test"
39
- Requires-Dist: backports.zstd; extra == "test"
40
- Requires-Dist: opentelemetry-instrumentation-aiohttp-client; extra == "test"
41
- Provides-Extra: all
42
- Requires-Dist: avtomatika[redis]; extra == "all"
43
- Requires-Dist: avtomatika[history]; extra == "all"
44
- Requires-Dist: avtomatika[telemetry]; extra == "all"
45
- Dynamic: license-file
46
-
47
1
  # Avtomatika Orchestrator
48
2
 
49
3
  Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
@@ -251,6 +205,11 @@ async def handle_normal(actions):
251
205
  actions.transition_to("normal_processing")
252
206
  ```
253
207
 
208
+ > **Note on Limitations:** The current version of `.when()` uses a simple parser with the following limitations:
209
+ > * **No Nested Attributes:** You can only access direct fields of `context.initial_data` or `context.state_history` (e.g., `context.initial_data.field`). Nested objects (e.g., `context.initial_data.area.field`) are not supported.
210
+ > * **Simple Comparisons Only:** Only the following operators are supported: `==`, `!=`, `>`, `<`, `>=`, `<=`. Complex logical expressions with `AND`, `OR`, or `NOT` are not allowed.
211
+ > * **Limited Value Types:** The parser only recognizes strings (in quotes), integers, and floats. Boolean values (`True`, `False`) and `None` are not correctly parsed and will be treated as strings.
212
+
254
213
  ### 2. Delegating Tasks to Workers (`dispatch_task`)
255
214
 
256
215
  This is the primary function for delegating work. The orchestrator will queue the task and wait for a worker to pick it up and return a result.
@@ -280,7 +239,7 @@ Run multiple tasks simultaneously and gather their results.
280
239
  @my_blueprint.handler_for("process_files")
281
240
  async def fan_out_handler(initial_data, actions):
282
241
  tasks_to_dispatch = [
283
- {"task_type": "file_analysis", "params": {"file": file}}
242
+ {"task_type": "file_analysis", "params": {"file": file}})
284
243
  for file in initial_data.get("files", [])
285
244
  ]
286
245
  # Use dispatch_parallel to send all tasks at once.
@@ -327,6 +286,8 @@ async def cache_handler(data_stores):
327
286
 
328
287
  The orchestrator's behavior can be configured through environment variables. Additionally, any configuration parameter loaded from environment variables can be programmatically overridden in your application code after the `Config` object has been initialized. This provides flexibility for different deployment and testing scenarios.
329
288
 
289
+ **Important:** The system employs **strict validation** for configuration files (`clients.toml`, `workers.toml`) at startup. If a configuration file is invalid (e.g., malformed TOML, missing required fields), the application will **fail fast** and exit with an error, rather than starting in a partially broken state. This ensures the security and integrity of the deployment.
290
+
330
291
  ### Fault Tolerance
331
292
 
332
293
  The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
@@ -335,6 +296,13 @@ The orchestrator has built-in mechanisms for handling failures based on the `err
335
296
  * **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
336
297
  * **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
337
298
 
299
+ ### High Availability & Distributed Locking
300
+
301
+ The architecture supports horizontal scaling. Multiple Orchestrator instances can run behind a load balancer.
302
+
303
+ * **Stateless API:** The API is stateless; all state is persisted in Redis.
304
+ * **Distributed Locking:** Background processes (`Watcher`, `ReputationCalculator`) use distributed locks (via Redis `SET NX`) to coordinate and prevent race conditions when multiple instances are active.
305
+
338
306
  ### Storage Backend
339
307
 
340
308
  By default, the engine uses in-memory storage. For production, you must configure persistent storage via environment variables.
@@ -368,7 +336,9 @@ The orchestrator uses tokens to authenticate API requests.
368
336
  * **Client Authentication**: All API clients must provide a token in the `X-Avtomatika-Token` header. The orchestrator validates this token against client configurations.
369
337
  * **Worker Authentication**: Workers must provide a token in the `X-Worker-Token` header.
370
338
  * `GLOBAL_WORKER_TOKEN`: You can set a global token for all workers using this environment variable. For development and testing, it defaults to `"secure-worker-token"`.
371
- * **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable.
339
+ * **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable. Tokens from this file are stored in a hashed format for security.
340
+
341
+ > **Note on Dynamic Reloading:** The worker configuration file can be reloaded without restarting the orchestrator by sending an authenticated `POST` request to the `/api/v1/admin/reload-workers` endpoint. This allows for dynamic updates of worker tokens.
372
342
 
373
343
  ### Observability
374
344
 
@@ -401,3 +371,12 @@ To run the `avtomatika` test suite:
401
371
  ```bash
402
372
  pytest avtomatika/tests/
403
373
  ```
374
+
375
+ ## Detailed Documentation
376
+
377
+ For a deeper dive into the system, please refer to the following documents in the `docs/` directory:
378
+
379
+ - [**Architecture Guide**](docs/architecture.md): A detailed overview of the system components and their interactions.
380
+ - [**API Reference**](docs/api_reference.md): Full specification of the HTTP API.
381
+ - [**Deployment Guide**](docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
382
+ - [**Cookbook**](docs/cookbook/README.md): Examples and best practices for creating blueprints.
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "avtomatika"
7
- version = "1.0b1"
8
- description = "A state-machine based orchestrator for long-running jobs."
7
+ version = "1.0b3"
8
+ description = "A state-machine based orchestrator for long-running AI and other jobs."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
11
11
  classifiers = [
@@ -24,22 +24,22 @@ dependencies = [
24
24
  ]
25
25
 
26
26
  [project.optional-dependencies]
27
- redis = ["redis~=6.4", "orjson~=3.11"]
28
- history = ["aiosqlite~=0.21", "asyncpg~=0.30", "orjson~=3.11"]
27
+ redis = ["redis~=7.1", "orjson~=3.11"]
28
+ history = ["aiosqlite~=0.22", "asyncpg~=0.30", "orjson~=3.11"]
29
29
  telemetry = [
30
- "opentelemetry-api~=1.38",
31
- "opentelemetry-sdk~=1.38",
32
- "opentelemetry-exporter-otlp~=1.36",
30
+ "opentelemetry-api~=1.39",
31
+ "opentelemetry-sdk~=1.39",
32
+ "opentelemetry-exporter-otlp~=1.39",
33
33
  "opentelemetry-instrumentation-aiohttp-client~=0.59b0",
34
34
  ]
35
35
  test = [
36
- "pytest~=8.4",
36
+ "pytest~=9.0",
37
37
  "pytest-asyncio~=1.1",
38
- "fakeredis~=2.31",
38
+ "fakeredis~=2.33",
39
39
  "pytest-aiohttp~=1.1",
40
40
  "pytest-mock~=3.14",
41
41
  "aioresponses~=0.7",
42
- "backports.zstd",
42
+ "backports.zstd~=1.2",
43
43
  "opentelemetry-instrumentation-aiohttp-client",
44
44
  ]
45
45
  all = [
@@ -4,6 +4,7 @@
4
4
  This module exposes the primary classes for building and running state-driven automations.
5
5
  """
6
6
 
7
+ import contextlib
7
8
  from importlib.metadata import version
8
9
 
9
10
  __version__ = version("avtomatika")
@@ -22,9 +23,7 @@ __all__ = [
22
23
  "StorageBackend",
23
24
  ]
24
25
 
25
- try:
26
+ with contextlib.suppress(ImportError):
26
27
  from .storage.redis import RedisStorage # noqa: F401
27
28
 
28
29
  __all__.append("RedisStorage")
29
- except ImportError:
30
- pass
@@ -305,6 +305,20 @@
305
305
  responses: [
306
306
  { code: '200 OK', description: 'Successful response.', body: "{...}" }
307
307
  ]
308
+ },
309
+ {
310
+ id: 'post-reload-worker-configs',
311
+ name: 'Reload Worker Configurations',
312
+ method: 'POST',
313
+ path: '/api/{version}/admin/reload-workers',
314
+ description: 'Triggers a dynamic reload of worker configurations from the TOML file. Requires client authentication.',
315
+ parameters: [
316
+ { name: 'version', type: 'string', description: 'API Version', example: 'v1' }
317
+ ],
318
+ request: { body: null },
319
+ responses: [
320
+ { code: '200 OK', description: 'Successful response.', body: { "status": "worker_configs_reloaded" } }
321
+ ]
308
322
  }
309
323
  ]
310
324
  },
@@ -168,8 +168,7 @@ class StateMachineBlueprint:
168
168
  for handler in self.conditional_handlers:
169
169
  if handler.state == state and handler.evaluate(context):
170
170
  return handler.func
171
- default_handler = self.handlers.get(state)
172
- if default_handler:
171
+ if default_handler := self.handlers.get(state):
173
172
  return default_handler
174
173
  raise ValueError(
175
174
  f"No suitable handler found for state '{state}' in blueprint '{self.name}' for the given context.",
@@ -178,10 +177,13 @@ class StateMachineBlueprint:
178
177
  def render_graph(self, output_filename: Optional[str] = None, output_format: str = "png"):
179
178
  import ast
180
179
  import inspect
180
+ import logging
181
181
  import textwrap
182
182
 
183
183
  from graphviz import Digraph # type: ignore[import]
184
184
 
185
+ logger = logging.getLogger(__name__)
186
+
185
187
  dot = Digraph(comment=f"State Machine for {self.name}")
186
188
  dot.attr("node", shape="box", style="rounded")
187
189
  all_handlers = list(self.handlers.items()) + [(ch.state, ch.func) for ch in self.conditional_handlers]
@@ -222,13 +224,16 @@ class StateMachineBlueprint:
222
224
  value,
223
225
  label=f"on {key}",
224
226
  )
225
- except (TypeError, OSError):
226
- pass
227
+ except (TypeError, OSError) as e:
228
+ logger.warning(
229
+ f"Could not parse handler '{handler_func.__name__}' for state '{handler_state}'. "
230
+ f"Graph may be incomplete. Error: {e}"
231
+ )
227
232
  for state in states:
228
233
  dot.node(state, state)
229
234
 
230
- if output_filename:
231
- dot.render(output_filename, format=output_format, cleanup=True)
232
- print(f"Graph rendered to {output_filename}.{output_format}")
233
- else:
235
+ if not output_filename:
234
236
  return dot.source
237
+ dot.render(output_filename, format=output_format, cleanup=True)
238
+ print(f"Graph rendered to {output_filename}.{output_format}")
239
+ return None
@@ -26,25 +26,37 @@ async def load_client_configs_to_redis(
26
26
  config_path,
27
27
  )
28
28
  return
29
+ except Exception as e:
30
+ logger.error(f"Failed to parse client config file '{config_path}': {e}")
31
+ raise ValueError(f"Invalid client configuration file: {e}") from e
29
32
 
30
33
  loaded_count = 0
31
34
  for client_name, config in clients_data.items():
35
+ if not isinstance(config, dict):
36
+ logger.error(f"Client '{client_name}' configuration must be a table (dict).")
37
+ raise ValueError(f"Invalid configuration for client '{client_name}'")
38
+
32
39
  token = config.get("token")
33
40
  if not token:
34
- logger.warning(
35
- "Skipping client '%s' due to missing 'token' field.",
36
- client_name,
37
- )
38
- continue
41
+ logger.error(f"Client '{client_name}' is missing required 'token' field.")
42
+ raise ValueError(f"Missing token for client '{client_name}'")
43
+
44
+ if not isinstance(token, str):
45
+ logger.error(f"Token for client '{client_name}' must be a string.")
46
+ raise ValueError(f"Invalid token type for client '{client_name}'")
39
47
 
40
48
  # Separate static config from dynamic quota values
41
49
  static_config = {k: v for k, v in config.items() if k != "monthly_attempts"}
42
50
  quota = config.get("monthly_attempts")
43
51
 
52
+ if quota is not None and not isinstance(quota, int):
53
+ logger.error(f"Quota 'monthly_attempts' for client '{client_name}' must be an integer.")
54
+ raise ValueError(f"Invalid quota type for client '{client_name}'")
55
+
44
56
  try:
45
57
  # Assume these storage methods will be implemented
46
58
  await storage.save_client_config(token, static_config)
47
- if quota is not None and isinstance(quota, int):
59
+ if quota is not None:
48
60
  await storage.initialize_client_quota(token, quota)
49
61
 
50
62
  loaded_count += 1
@@ -28,15 +28,13 @@ class Dispatcher:
28
28
  self.config = config
29
29
  self._round_robin_indices: Dict[str, int] = defaultdict(int)
30
30
 
31
+ @staticmethod
31
32
  def _is_worker_compliant(
32
- self,
33
33
  worker: Dict[str, Any],
34
34
  requirements: Dict[str, Any],
35
35
  ) -> bool:
36
36
  """Checks if a worker meets the specified resource requirements."""
37
- # GPU check
38
- required_gpu = requirements.get("gpu_info")
39
- if required_gpu:
37
+ if required_gpu := requirements.get("gpu_info"):
40
38
  gpu_info = worker.get("resources", {}).get("gpu_info")
41
39
  if not gpu_info:
42
40
  return False
@@ -51,17 +49,15 @@ class Dispatcher:
51
49
  ):
52
50
  return False
53
51
 
54
- # Installed models check
55
- required_models = requirements.get("installed_models")
56
- if required_models:
52
+ if required_models := requirements.get("installed_models"):
57
53
  installed_models = {m["name"] for m in worker.get("installed_models", [])}
58
54
  if not set(required_models).issubset(installed_models):
59
55
  return False
60
56
 
61
57
  return True
62
58
 
59
+ @staticmethod
63
60
  def _select_default(
64
- self,
65
61
  workers: List[Dict[str, Any]],
66
62
  task_type: str,
67
63
  ) -> Dict[str, Any]:
@@ -74,7 +70,7 @@ class Dispatcher:
74
70
  """
75
71
  warm_workers = [w for w in workers if task_type in w.get("hot_cache", [])]
76
72
 
77
- target_pool = warm_workers if warm_workers else workers
73
+ target_pool = warm_workers or workers
78
74
 
79
75
  # The `cost` field is deprecated but maintained for backward compatibility.
80
76
  min_cost = min(w.get("cost", float("inf")) for w in target_pool)
@@ -95,8 +91,8 @@ class Dispatcher:
95
91
  self._round_robin_indices[task_type] = idx + 1
96
92
  return selected_worker
97
93
 
94
+ @staticmethod
98
95
  def _select_least_connections(
99
- self,
100
96
  workers: List[Dict[str, Any]],
101
97
  task_type: str,
102
98
  ) -> Dict[str, Any]:
@@ -105,15 +101,16 @@ class Dispatcher:
105
101
  """
106
102
  return min(workers, key=lambda w: w.get("load", 0.0))
107
103
 
104
+ @staticmethod
108
105
  def _select_cheapest(
109
- self,
110
106
  workers: List[Dict[str, Any]],
111
107
  task_type: str,
112
108
  ) -> Dict[str, Any]:
113
109
  """Selects the cheapest worker based on 'cost_per_second'."""
114
110
  return min(workers, key=lambda w: w.get("cost_per_second", float("inf")))
115
111
 
116
- def _get_best_value_score(self, worker: Dict[str, Any]) -> float:
112
+ @staticmethod
113
+ def _get_best_value_score(worker: Dict[str, Any]) -> float:
117
114
  """Calculates a "score" for a worker using the formula cost / reputation.
118
115
  The lower the score, the better.
119
116
  """
@@ -121,9 +118,7 @@ class Dispatcher:
121
118
  # Default reputation is 1.0 if absent
122
119
  reputation = worker.get("reputation", 1.0)
123
120
  # Avoid division by zero
124
- if reputation == 0:
125
- return float("inf")
126
- return cost / reputation
121
+ return float("inf") if reputation == 0 else cost / reputation
127
122
 
128
123
  def _select_best_value(
129
124
  self,
@@ -153,10 +148,9 @@ class Dispatcher:
153
148
  idle_workers = [w for w in all_workers if w.get("status", "idle") == "idle"]
154
149
  logger.debug(f"Idle workers: {[w['worker_id'] for w in idle_workers]}")
155
150
  if not idle_workers:
156
- # If there are no idle workers, check if there are any busy workers in multi-orchestrator mode.
157
- # This doesn't change the logic (an error will still occur), but it makes the logs more informative.
158
- busy_mo_workers = [w for w in all_workers if w.get("status") == "busy" and "multi_orchestrator_info" in w]
159
- if busy_mo_workers:
151
+ if busy_mo_workers := [
152
+ w for w in all_workers if w.get("status") == "busy" and "multi_orchestrator_info" in w
153
+ ]:
160
154
  logger.warning(
161
155
  f"No idle workers. Found {len(busy_mo_workers)} busy workers "
162
156
  f"in multi-orchestrator mode. They are likely performing tasks for other Orchestrators.",
@@ -485,8 +485,7 @@ class OrchestratorEngine:
485
485
  await self.storage.save_job_state(job_id, job_state)
486
486
  # Optionally, trigger a specific 'cancelled' transition if defined in the blueprint
487
487
  transitions = job_state.get("current_task_transitions", {})
488
- next_state = transitions.get("cancelled")
489
- if next_state:
488
+ if next_state := transitions.get("cancelled"):
490
489
  job_state["current_state"] = next_state
491
490
  job_state["status"] = "running" # It's running the cancellation handler now
492
491
  await self.storage.save_job_state(job_id, job_state)
@@ -494,9 +493,7 @@ class OrchestratorEngine:
494
493
  return web.json_response({"status": "result_accepted_cancelled"}, status=200)
495
494
 
496
495
  transitions = job_state.get("current_task_transitions", {})
497
- next_state = transitions.get(result_status)
498
-
499
- if next_state:
496
+ if next_state := transitions.get(result_status):
500
497
  logging.info(f"Job {job_id} transitioning based on worker status '{result_status}' to state '{next_state}'")
501
498
 
502
499
  worker_data = result.get("data")
@@ -584,13 +581,26 @@ class OrchestratorEngine:
584
581
  jobs = await self.storage.get_quarantined_jobs()
585
582
  return web.json_response(jobs)
586
583
 
584
+ async def _reload_worker_configs_handler(self, request: web.Request) -> web.Response:
585
+ """Handles the dynamic reloading of worker configurations."""
586
+ logger.info("Received request to reload worker configurations.")
587
+ if not self.config.WORKERS_CONFIG_PATH:
588
+ return web.json_response(
589
+ {"error": "WORKERS_CONFIG_PATH is not set, cannot reload configs."},
590
+ status=400,
591
+ )
592
+
593
+ await load_worker_configs_to_redis(self.storage, self.config.WORKERS_CONFIG_PATH)
594
+ return web.json_response({"status": "worker_configs_reloaded"})
595
+
587
596
  async def _flush_db_handler(self, request: web.Request) -> web.Response:
588
597
  logger.warning("Received request to flush the database.")
589
598
  await self.storage.flush_all()
590
599
  await load_client_configs_to_redis(self.storage)
591
600
  return web.json_response({"status": "db_flushed"}, status=200)
592
601
 
593
- async def _docs_handler(self, request: web.Request) -> web.Response:
602
+ @staticmethod
603
+ async def _docs_handler(request: web.Request) -> web.Response:
594
604
  from importlib import resources
595
605
 
596
606
  try:
@@ -635,15 +645,7 @@ class OrchestratorEngine:
635
645
  all_protected_apps.append(protected_app)
636
646
 
637
647
  for app in all_protected_apps:
638
- app.router.add_get("/jobs/{job_id}", self._get_job_status_handler)
639
- app.router.add_post("/jobs/{job_id}/cancel", self._cancel_job_handler)
640
- if not isinstance(self.history_storage, NoOpHistoryStorage):
641
- app.router.add_get("/jobs/{job_id}/history", self._get_job_history_handler)
642
- app.router.add_get("/blueprints/{blueprint_name}/graph", self._get_blueprint_graph_handler)
643
- app.router.add_get("/workers", self._get_workers_handler)
644
- app.router.add_get("/jobs", self._get_jobs_handler)
645
- app.router.add_get("/dashboard", self._get_dashboard_handler)
646
-
648
+ self._register_common_routes(app)
647
649
  if has_unversioned_routes:
648
650
  self.app.add_subapp("/api/", protected_app)
649
651
  for version, app in versioned_apps.items():
@@ -663,6 +665,17 @@ class OrchestratorEngine:
663
665
  worker_app.router.add_get("/ws/{worker_id}", self._websocket_handler)
664
666
  self.app.add_subapp("/_worker/", worker_app)
665
667
 
668
+ def _register_common_routes(self, app):
669
+ app.router.add_get("/jobs/{job_id}", self._get_job_status_handler)
670
+ app.router.add_post("/jobs/{job_id}/cancel", self._cancel_job_handler)
671
+ if not isinstance(self.history_storage, NoOpHistoryStorage):
672
+ app.router.add_get("/jobs/{job_id}/history", self._get_job_history_handler)
673
+ app.router.add_get("/blueprints/{blueprint_name}/graph", self._get_blueprint_graph_handler)
674
+ app.router.add_get("/workers", self._get_workers_handler)
675
+ app.router.add_get("/jobs", self._get_jobs_handler)
676
+ app.router.add_get("/dashboard", self._get_dashboard_handler)
677
+ app.router.add_post("/admin/reload-workers", self._reload_worker_configs_handler)
678
+
666
679
  async def _websocket_handler(self, request: web.Request) -> web.WebSocketResponse:
667
680
  worker_id = request.match_info.get("worker_id")
668
681
  if not worker_id:
@@ -35,11 +35,13 @@ except ImportError:
35
35
  def inject(self, *args, **kwargs):
36
36
  pass
37
37
 
38
- def extract(self, *args, **kwargs):
38
+ @staticmethod
39
+ def extract(*args, **kwargs):
39
40
  return None
40
41
 
41
42
  class NoOpTraceContextTextMapPropagator:
42
- def extract(self, *args, **kwargs):
43
+ @staticmethod
44
+ def extract(*args, **kwargs):
43
45
  return None
44
46
 
45
47
  trace = NoOpTracer()
@@ -485,7 +487,8 @@ class JobExecutor:
485
487
  await self.storage.save_job_state(parent_job_id, parent_job_state)
486
488
  await self.storage.enqueue_job(parent_job_id)
487
489
 
488
- def _handle_task_completion(self, task: Task):
490
+ @staticmethod
491
+ def _handle_task_completion(task: Task):
489
492
  """Callback to handle completion of a job processing task."""
490
493
  try:
491
494
  # This will re-raise any exception caught in the task
@@ -1,3 +1,4 @@
1
+ from contextlib import suppress
1
2
  from typing import Awaitable, Callable
2
3
 
3
4
  from aiohttp import web
@@ -23,23 +24,15 @@ def rate_limit_middleware_factory(
23
24
  """Rate-limiting middleware that uses the provided storage backend."""
24
25
  # Determine the key for rate limiting (e.g., by worker_id or IP)
25
26
  # For worker endpoints, we key by worker_id. For others, by IP.
26
- key_identifier = request.match_info.get("worker_id", request.remote)
27
- if not key_identifier:
28
- # Fallback for cases where remote IP might not be available
29
- key_identifier = "unknown"
27
+ key_identifier = request.match_info.get("worker_id", request.remote) or "unknown"
30
28
 
31
29
  # Key by identifier and path to have per-endpoint limits
32
30
  rate_limit_key = f"ratelimit:{key_identifier}:{request.path}"
33
31
 
34
- try:
32
+ with suppress(Exception):
35
33
  count = await storage.increment_key_with_ttl(rate_limit_key, period)
36
34
  if count > limit:
37
35
  return web.json_response({"error": "Too Many Requests"}, status=429)
38
- except Exception:
39
- # If the rate limiter fails for any reason (e.g., Redis down),
40
- # it's safer to let the request through than to block everything.
41
- pass
42
-
43
36
  return await handler(request)
44
37
 
45
38
  return rate_limit_middleware