avtomatika 1.0b8__tar.gz → 1.0b10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {avtomatika-1.0b8/src/avtomatika.egg-info → avtomatika-1.0b10}/PKG-INFO +47 -15
  2. {avtomatika-1.0b8 → avtomatika-1.0b10}/README.md +39 -12
  3. {avtomatika-1.0b8 → avtomatika-1.0b10}/pyproject.toml +11 -4
  4. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/api/handlers.py +5 -257
  5. avtomatika-1.0b10/src/avtomatika/api/routes.py +97 -0
  6. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/api.html +1 -1
  7. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/app_keys.py +1 -0
  8. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/blueprint.py +3 -2
  9. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/config.py +8 -0
  10. avtomatika-1.0b10/src/avtomatika/constants.py +80 -0
  11. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/data_types.py +2 -22
  12. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/dispatcher.py +4 -0
  13. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/engine.py +119 -7
  14. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/executor.py +19 -19
  15. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/logging_config.py +16 -7
  16. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/s3.py +96 -40
  17. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/scheduler_config_loader.py +5 -2
  18. avtomatika-1.0b10/src/avtomatika/security.py +96 -0
  19. avtomatika-1.0b10/src/avtomatika/services/worker_service.py +267 -0
  20. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/storage/base.py +10 -0
  21. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/storage/memory.py +15 -4
  22. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/storage/redis.py +42 -11
  23. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/telemetry.py +8 -7
  24. avtomatika-1.0b10/src/avtomatika/utils/__init__.py +0 -0
  25. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/utils/webhook_sender.py +3 -3
  26. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/watcher.py +4 -2
  27. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/ws_manager.py +16 -8
  28. {avtomatika-1.0b8 → avtomatika-1.0b10/src/avtomatika.egg-info}/PKG-INFO +47 -15
  29. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika.egg-info/SOURCES.txt +9 -0
  30. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika.egg-info/requires.txt +3 -2
  31. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_engine.py +3 -1
  32. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_error_handling.py +34 -18
  33. avtomatika-1.0b10/tests/test_handlers.py +236 -0
  34. avtomatika-1.0b10/tests/test_handlers_sts.py +87 -0
  35. avtomatika-1.0b10/tests/test_horizontal_scaling.py +179 -0
  36. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_integration.py +28 -25
  37. avtomatika-1.0b10/tests/test_mtls.py +194 -0
  38. avtomatika-1.0b10/tests/test_rxon_handler.py +59 -0
  39. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_s3.py +12 -1
  40. avtomatika-1.0b10/tests/test_s3_metadata.py +138 -0
  41. avtomatika-1.0b10/tests/test_sts.py +188 -0
  42. avtomatika-1.0b10/tests/test_validation_integration.py +48 -0
  43. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_ws_manager.py +32 -15
  44. avtomatika-1.0b8/src/avtomatika/api/routes.py +0 -118
  45. avtomatika-1.0b8/src/avtomatika/constants.py +0 -30
  46. avtomatika-1.0b8/src/avtomatika/security.py +0 -114
  47. avtomatika-1.0b8/tests/test_handlers.py +0 -459
  48. {avtomatika-1.0b8 → avtomatika-1.0b10}/LICENSE +0 -0
  49. {avtomatika-1.0b8 → avtomatika-1.0b10}/setup.cfg +0 -0
  50. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/__init__.py +0 -0
  51. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/client_config_loader.py +0 -0
  52. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/compression.py +0 -0
  53. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/context.py +0 -0
  54. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/datastore.py +0 -0
  55. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/health_checker.py +0 -0
  56. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/history/base.py +0 -0
  57. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/history/noop.py +0 -0
  58. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/history/postgres.py +0 -0
  59. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/history/sqlite.py +0 -0
  60. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/metrics.py +0 -0
  61. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/py.typed +0 -0
  62. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/quota.py +0 -0
  63. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/ratelimit.py +0 -0
  64. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/reputation.py +0 -0
  65. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/scheduler.py +0 -0
  66. {avtomatika-1.0b8/src/avtomatika/utils → avtomatika-1.0b10/src/avtomatika/services}/__init__.py +0 -0
  67. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/storage/__init__.py +0 -0
  68. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/worker_config_loader.py +0 -0
  69. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika.egg-info/dependency_links.txt +0 -0
  70. {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika.egg-info/top_level.txt +0 -0
  71. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_blueprint_conditions.py +0 -0
  72. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_blueprint_integrity.py +0 -0
  73. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_blueprints.py +0 -0
  74. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_client_config_loader.py +0 -0
  75. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_compression.py +0 -0
  76. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_config_validation.py +0 -0
  77. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_context.py +0 -0
  78. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_dispatcher.py +0 -0
  79. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_dispatcher_extended.py +0 -0
  80. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_executor.py +0 -0
  81. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_health_checker.py +0 -0
  82. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_history.py +0 -0
  83. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_logging_config.py +0 -0
  84. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_memory_locking.py +0 -0
  85. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_memory_storage.py +0 -0
  86. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_metrics.py +0 -0
  87. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_noop_history.py +0 -0
  88. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_optimization.py +0 -0
  89. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_postgres_history.py +0 -0
  90. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_ratelimit.py +0 -0
  91. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_redis_locking.py +0 -0
  92. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_redis_storage.py +0 -0
  93. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_reputation.py +0 -0
  94. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_scheduler.py +0 -0
  95. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_telemetry.py +0 -0
  96. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_watcher.py +0 -0
  97. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_webhook_sender.py +0 -0
  98. {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_worker_config_loader.py +0 -0
@@ -1,16 +1,21 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avtomatika
3
- Version: 1.0b8
3
+ Version: 1.0b10
4
4
  Summary: A state-machine based orchestrator for long-running AI and other jobs.
5
+ Author-email: Dmitrii Gagarin <madgagarin@gmail.com>
5
6
  Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
6
7
  Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
8
+ Keywords: orchestrator,state-machine,workflow,distributed,ai,llm,rxon,hln
7
9
  Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
8
11
  Classifier: Programming Language :: Python :: 3
9
12
  Classifier: License :: OSI Approved :: MIT License
10
13
  Classifier: Operating System :: OS Independent
14
+ Classifier: Typing :: Typed
11
15
  Requires-Python: >=3.11
12
16
  Description-Content-Type: text/markdown
13
17
  License-File: LICENSE
18
+ Requires-Dist: rxon==1.0b2
14
19
  Requires-Dist: aiohttp~=3.12
15
20
  Requires-Dist: python-json-logger~=4.0
16
21
  Requires-Dist: graphviz~=0.21
@@ -22,7 +27,7 @@ Provides-Extra: redis
22
27
  Requires-Dist: redis~=7.1; extra == "redis"
23
28
  Provides-Extra: s3
24
29
  Requires-Dist: obstore>=0.2; extra == "s3"
25
- Requires-Dist: aiofiles~=23.2; extra == "s3"
30
+ Requires-Dist: aiofiles~=25.1; extra == "s3"
26
31
  Provides-Extra: history
27
32
  Requires-Dist: aiosqlite~=0.22; extra == "history"
28
33
  Requires-Dist: asyncpg~=0.30; extra == "history"
@@ -41,7 +46,7 @@ Requires-Dist: aioresponses~=0.7; extra == "test"
41
46
  Requires-Dist: backports.zstd~=1.2; extra == "test"
42
47
  Requires-Dist: opentelemetry-instrumentation-aiohttp-client; extra == "test"
43
48
  Requires-Dist: obstore>=0.2; extra == "test"
44
- Requires-Dist: aiofiles~=23.2; extra == "test"
49
+ Requires-Dist: aiofiles~=25.1; extra == "test"
45
50
  Provides-Extra: all
46
51
  Requires-Dist: avtomatika[redis]; extra == "all"
47
52
  Requires-Dist: avtomatika[history]; extra == "all"
@@ -51,6 +56,11 @@ Dynamic: license-file
51
56
 
52
57
  # Avtomatika Orchestrator
53
58
 
59
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
60
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/release/python-3110/)
61
+ [![Tests](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml/badge.svg)](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml)
62
+ [![Code Style: Ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
63
+
54
64
  Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
55
65
 
56
66
  This document serves as a comprehensive guide for developers looking to build pipelines (blueprints) and embed the Orchestrator into their applications.
@@ -88,8 +98,9 @@ The project is based on a simple yet powerful architectural pattern that separat
88
98
 
89
99
  Avtomatika is part of a larger ecosystem:
90
100
 
101
+ * **[Avtomatika Protocol](https://github.com/avtomatika-ai/rxon)**: Shared package containing protocol definitions, data models, and utilities ensuring consistency across all components.
91
102
  * **[Avtomatika Worker SDK](https://github.com/avtomatika-ai/avtomatika-worker)**: The official Python SDK for building workers that connect to this engine.
92
- * **[RCA Protocol](https://github.com/avtomatika-ai/rca)**: The architectural specification and manifesto behind the system.
103
+ * **[HLN Protocol](https://github.com/avtomatika-ai/hln)**: The architectural specification and manifesto behind the system (Hierarchical Logic Network).
93
104
  * **[Full Example](https://github.com/avtomatika-ai/avtomatika-full-example)**: A complete reference project demonstrating the engine and workers in action.
94
105
 
95
106
  ## Installation
@@ -140,7 +151,7 @@ storage = MemoryStorage()
140
151
  config = Config() # Loads configuration from environment variables
141
152
 
142
153
  # Explicitly set tokens for this example
143
- # Client token must be sent in the 'X-Avtomatika-Token' header.
154
+ # Client token must be sent in the 'X-Client-Token' header.
144
155
  config.CLIENT_TOKEN = "my-secret-client-token"
145
156
  # Worker token must be sent in the 'X-Worker-Token' header.
146
157
  config.GLOBAL_WORKER_TOKEN = "my-secret-worker-token"
@@ -268,12 +279,18 @@ async def publish_handler_old_style(context):
268
279
 
269
280
  Avtomatika is engineered for high-load environments with thousands of concurrent workers.
270
281
 
271
- * **O(1) Dispatcher**: Uses advanced Redis Set intersections to find suitable workers instantly, regardless of the cluster size. No O(N) scanning.
282
+ * **O(1) Dispatcher**: Uses advanced Redis Set intersections to find suitable workers instantly.
283
+ * **Zero Trust Security**:
284
+ * **mTLS (Mutual TLS)**: Mutual authentication between Orchestrator and Workers using certificates.
285
+ * **STS (Security Token Service)**: Token rotation mechanism with short-lived access tokens.
286
+ * **Identity Extraction**: Automatically maps Certificate Common Name (CN) to Worker ID.
287
+ * **Data Integrity**:
288
+ * **End-to-End Validation**: Automatic verification of file size and ETag (hash) during S3 transfers.
289
+ * **Audit Trail**: File metadata is logged in history for full traceability.
290
+ * **Protocol Layer**: Built on top of `rxon`, a strict contract defining interactions, ensuring forward compatibility and allowing transport evolution (e.g., to gRPC).
272
291
  * **Non-Blocking I/O**:
273
- * **Webhooks**: Sent via a bounded background queue to prevent backpressure.
274
- * **History Logging**: Writes to SQL databases are buffered and asynchronous, ensuring the main execution loop never blocks.
275
- * **Redis Streams**: Uses blocking reads to eliminate busy-waiting and reduce CPU usage.
276
- * **Memory Safety**: S3 file transfers use streaming to handle multi-gigabyte files with constant, low RAM usage.
292
+ * **Webhooks**: Sent via a bounded background queue.
293
+ * **S3 Streaming**: Constant memory usage regardless of file size.
277
294
 
278
295
  ## Blueprint Cookbook: Key Features
279
296
 
@@ -477,10 +494,16 @@ For detailed specifications and examples, please refer to the [**Configuration G
477
494
 
478
495
  The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
479
496
 
480
- * **TRANSIENT_ERROR**: A temporary error (e.g., network failure, rate limit). The orchestrator will automatically retry the task several times.
481
- * **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
497
+ * **TRANSIENT_ERROR**: A temporary error (e.g., network failure). The orchestrator will automatically retry the task several times.
498
+ * **RESOURCE_EXHAUSTED_ERROR / TIMEOUT_ERROR / INTERNAL_ERROR**: Treated as transient errors and retried.
499
+ * **PERMANENT_ERROR**: A permanent error. The task will be immediately sent to quarantine.
500
+ * **SECURITY_ERROR / DEPENDENCY_ERROR**: Treated as permanent errors (e.g., security violation or missing model). Immediate quarantine.
482
501
  * **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
483
502
 
503
+ ### Progress Tracking
504
+
505
+ Workers can report real-time execution progress (0-100%) and status messages. This information is automatically persisted by the Orchestrator and exposed via the Job Status API (`GET /api/v1/jobs/{job_id}`).
506
+
484
507
  ### Concurrency & Performance
485
508
 
486
509
  To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
@@ -525,13 +548,18 @@ By default, the engine uses in-memory storage. For production, you must configur
525
548
 
526
549
  The orchestrator uses tokens to authenticate API requests.
527
550
 
528
- * **Client Authentication**: All API clients must provide a token in the `X-Avtomatika-Token` header. The orchestrator validates this token against client configurations.
551
+ * **Client Authentication**: All API clients must provide a token in the `X-Client-Token` header. The orchestrator validates this token against client configurations.
529
552
  * **Worker Authentication**: Workers must provide a token in the `X-Worker-Token` header.
530
553
  * `GLOBAL_WORKER_TOKEN`: You can set a global token for all workers using this environment variable. For development and testing, it defaults to `"secure-worker-token"`.
531
554
  * **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable. Tokens from this file are stored in a hashed format for security.
532
555
 
533
556
  > **Note on Dynamic Reloading:** The worker configuration file can be reloaded without restarting the orchestrator by sending an authenticated `POST` request to the `/api/v1/admin/reload-workers` endpoint. This allows for dynamic updates of worker tokens.
534
557
 
558
+ ### Pure Holon Mode
559
+ For high-security environments or when operating as a Compound Holon within an HLN, you can disable the public client API.
560
+ * **Enable/Disable**: Set `ENABLE_CLIENT_API="false"` (default: `true`).
561
+ * **Effect**: The Orchestrator will stop listening on `/api/v1/jobs/...`. It will only accept tasks via the Worker Protocol (RXON) from its parent.
562
+
535
563
  ### Observability
536
564
 
537
565
  When installed with the telemetry dependency, the system automatically provides:
@@ -543,7 +571,11 @@ When installed with the telemetry dependency, the system automatically provides:
543
571
  ### Setup Environment
544
572
 
545
573
  * Clone the repository.
546
- * Install the package in editable mode with all dependencies:
574
+ * **For local development**, install the protocol package first:
575
+ ```bash
576
+ pip install -e ../rxon
577
+ ```
578
+ * Then install the engine in editable mode with all dependencies:
547
579
  ```bash
548
580
  pip install -e ".[all,test]"
549
581
  ```
@@ -561,7 +593,7 @@ When installed with the telemetry dependency, the system automatically provides:
561
593
 
562
594
  To run the `avtomatika` test suite:
563
595
  ```bash
564
- pytest avtomatika/tests/
596
+ pytest tests/
565
597
  ```
566
598
 
567
599
  ### Interactive API Documentation
@@ -1,5 +1,10 @@
1
1
  # Avtomatika Orchestrator
2
2
 
3
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
4
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/release/python-3110/)
5
+ [![Tests](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml/badge.svg)](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml)
6
+ [![Code Style: Ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
7
+
3
8
  Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
4
9
 
5
10
  This document serves as a comprehensive guide for developers looking to build pipelines (blueprints) and embed the Orchestrator into their applications.
@@ -37,8 +42,9 @@ The project is based on a simple yet powerful architectural pattern that separat
37
42
 
38
43
  Avtomatika is part of a larger ecosystem:
39
44
 
45
+ * **[Avtomatika Protocol](https://github.com/avtomatika-ai/rxon)**: Shared package containing protocol definitions, data models, and utilities ensuring consistency across all components.
40
46
  * **[Avtomatika Worker SDK](https://github.com/avtomatika-ai/avtomatika-worker)**: The official Python SDK for building workers that connect to this engine.
41
- * **[RCA Protocol](https://github.com/avtomatika-ai/rca)**: The architectural specification and manifesto behind the system.
47
+ * **[HLN Protocol](https://github.com/avtomatika-ai/hln)**: The architectural specification and manifesto behind the system (Hierarchical Logic Network).
42
48
  * **[Full Example](https://github.com/avtomatika-ai/avtomatika-full-example)**: A complete reference project demonstrating the engine and workers in action.
43
49
 
44
50
  ## Installation
@@ -89,7 +95,7 @@ storage = MemoryStorage()
89
95
  config = Config() # Loads configuration from environment variables
90
96
 
91
97
  # Explicitly set tokens for this example
92
- # Client token must be sent in the 'X-Avtomatika-Token' header.
98
+ # Client token must be sent in the 'X-Client-Token' header.
93
99
  config.CLIENT_TOKEN = "my-secret-client-token"
94
100
  # Worker token must be sent in the 'X-Worker-Token' header.
95
101
  config.GLOBAL_WORKER_TOKEN = "my-secret-worker-token"
@@ -217,12 +223,18 @@ async def publish_handler_old_style(context):
217
223
 
218
224
  Avtomatika is engineered for high-load environments with thousands of concurrent workers.
219
225
 
220
- * **O(1) Dispatcher**: Uses advanced Redis Set intersections to find suitable workers instantly, regardless of the cluster size. No O(N) scanning.
226
+ * **O(1) Dispatcher**: Uses advanced Redis Set intersections to find suitable workers instantly.
227
+ * **Zero Trust Security**:
228
+ * **mTLS (Mutual TLS)**: Mutual authentication between Orchestrator and Workers using certificates.
229
+ * **STS (Security Token Service)**: Token rotation mechanism with short-lived access tokens.
230
+ * **Identity Extraction**: Automatically maps Certificate Common Name (CN) to Worker ID.
231
+ * **Data Integrity**:
232
+ * **End-to-End Validation**: Automatic verification of file size and ETag (hash) during S3 transfers.
233
+ * **Audit Trail**: File metadata is logged in history for full traceability.
234
+ * **Protocol Layer**: Built on top of `rxon`, a strict contract defining interactions, ensuring forward compatibility and allowing transport evolution (e.g., to gRPC).
221
235
  * **Non-Blocking I/O**:
222
- * **Webhooks**: Sent via a bounded background queue to prevent backpressure.
223
- * **History Logging**: Writes to SQL databases are buffered and asynchronous, ensuring the main execution loop never blocks.
224
- * **Redis Streams**: Uses blocking reads to eliminate busy-waiting and reduce CPU usage.
225
- * **Memory Safety**: S3 file transfers use streaming to handle multi-gigabyte files with constant, low RAM usage.
236
+ * **Webhooks**: Sent via a bounded background queue.
237
+ * **S3 Streaming**: Constant memory usage regardless of file size.
226
238
 
227
239
  ## Blueprint Cookbook: Key Features
228
240
 
@@ -426,10 +438,16 @@ For detailed specifications and examples, please refer to the [**Configuration G
426
438
 
427
439
  The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
428
440
 
429
- * **TRANSIENT_ERROR**: A temporary error (e.g., network failure, rate limit). The orchestrator will automatically retry the task several times.
430
- * **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
441
+ * **TRANSIENT_ERROR**: A temporary error (e.g., network failure). The orchestrator will automatically retry the task several times.
442
+ * **RESOURCE_EXHAUSTED_ERROR / TIMEOUT_ERROR / INTERNAL_ERROR**: Treated as transient errors and retried.
443
+ * **PERMANENT_ERROR**: A permanent error. The task will be immediately sent to quarantine.
444
+ * **SECURITY_ERROR / DEPENDENCY_ERROR**: Treated as permanent errors (e.g., security violation or missing model). Immediate quarantine.
431
445
  * **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
432
446
 
447
+ ### Progress Tracking
448
+
449
+ Workers can report real-time execution progress (0-100%) and status messages. This information is automatically persisted by the Orchestrator and exposed via the Job Status API (`GET /api/v1/jobs/{job_id}`).
450
+
433
451
  ### Concurrency & Performance
434
452
 
435
453
  To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
@@ -474,13 +492,18 @@ By default, the engine uses in-memory storage. For production, you must configur
474
492
 
475
493
  The orchestrator uses tokens to authenticate API requests.
476
494
 
477
- * **Client Authentication**: All API clients must provide a token in the `X-Avtomatika-Token` header. The orchestrator validates this token against client configurations.
495
+ * **Client Authentication**: All API clients must provide a token in the `X-Client-Token` header. The orchestrator validates this token against client configurations.
478
496
  * **Worker Authentication**: Workers must provide a token in the `X-Worker-Token` header.
479
497
  * `GLOBAL_WORKER_TOKEN`: You can set a global token for all workers using this environment variable. For development and testing, it defaults to `"secure-worker-token"`.
480
498
  * **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable. Tokens from this file are stored in a hashed format for security.
481
499
 
482
500
  > **Note on Dynamic Reloading:** The worker configuration file can be reloaded without restarting the orchestrator by sending an authenticated `POST` request to the `/api/v1/admin/reload-workers` endpoint. This allows for dynamic updates of worker tokens.
483
501
 
502
+ ### Pure Holon Mode
503
+ For high-security environments or when operating as a Compound Holon within an HLN, you can disable the public client API.
504
+ * **Enable/Disable**: Set `ENABLE_CLIENT_API="false"` (default: `true`).
505
+ * **Effect**: The Orchestrator will stop listening on `/api/v1/jobs/...`. It will only accept tasks via the Worker Protocol (RXON) from its parent.
506
+
484
507
  ### Observability
485
508
 
486
509
  When installed with the telemetry dependency, the system automatically provides:
@@ -492,7 +515,11 @@ When installed with the telemetry dependency, the system automatically provides:
492
515
  ### Setup Environment
493
516
 
494
517
  * Clone the repository.
495
- * Install the package in editable mode with all dependencies:
518
+ * **For local development**, install the protocol package first:
519
+ ```bash
520
+ pip install -e ../rxon
521
+ ```
522
+ * Then install the engine in editable mode with all dependencies:
496
523
  ```bash
497
524
  pip install -e ".[all,test]"
498
525
  ```
@@ -510,7 +537,7 @@ When installed with the telemetry dependency, the system automatically provides:
510
537
 
511
538
  To run the `avtomatika` test suite:
512
539
  ```bash
513
- pytest avtomatika/tests/
540
+ pytest tests/
514
541
  ```
515
542
 
516
543
  ### Interactive API Documentation
@@ -4,17 +4,24 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "avtomatika"
7
- version = "1.0b8"
7
+ version = "1.0b10"
8
8
  description = "A state-machine based orchestrator for long-running AI and other jobs."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
11
+ authors = [
12
+ {name = "Dmitrii Gagarin", email = "madgagarin@gmail.com"},
13
+ ]
14
+ keywords = ["orchestrator", "state-machine", "workflow", "distributed", "ai", "llm", "rxon", "hln"]
11
15
  classifiers = [
12
16
  "Development Status :: 4 - Beta",
17
+ "Intended Audience :: Developers",
13
18
  "Programming Language :: Python :: 3",
14
19
  "License :: OSI Approved :: MIT License",
15
20
  "Operating System :: OS Independent",
21
+ "Typing :: Typed",
16
22
  ]
17
23
  dependencies = [
24
+ "rxon==1.0b2",
18
25
  "aiohttp~=3.12",
19
26
  "python-json-logger~=4.0",
20
27
  "graphviz~=0.21",
@@ -26,7 +33,7 @@ dependencies = [
26
33
 
27
34
  [project.optional-dependencies]
28
35
  redis = ["redis~=7.1"]
29
- s3 = ["obstore>=0.2", "aiofiles~=23.2"]
36
+ s3 = ["obstore>=0.2", "aiofiles~=25.1"]
30
37
  history = ["aiosqlite~=0.22", "asyncpg~=0.30"]
31
38
  telemetry = [
32
39
  "opentelemetry-api~=1.39",
@@ -44,7 +51,7 @@ test = [
44
51
  "backports.zstd~=1.2",
45
52
  "opentelemetry-instrumentation-aiohttp-client",
46
53
  "obstore>=0.2",
47
- "aiofiles~=23.2",
54
+ "aiofiles~=25.1",
48
55
  ]
49
56
  all = [
50
57
  "avtomatika[redis]",
@@ -58,7 +65,7 @@ all = [
58
65
  "Bug Tracker" = "https://github.com/avtomatika-ai/avtomatika/issues"
59
66
 
60
67
  [tool.setuptools.package-data]
61
- "avtomatika" = ["api.html"]
68
+ "avtomatika" = ["api.html", "py.typed"]
62
69
 
63
70
  [tool.setuptools.packages.find]
64
71
  where = ["src"]
@@ -3,7 +3,7 @@ from logging import getLogger
3
3
  from typing import Any, Callable
4
4
  from uuid import uuid4
5
5
 
6
- from aiohttp import WSMsgType, web
6
+ from aiohttp import web
7
7
  from aioprometheus import render
8
8
  from orjson import OPT_INDENT_2, dumps, loads
9
9
 
@@ -14,31 +14,22 @@ from ..app_keys import (
14
14
  from ..blueprint import StateMachineBlueprint
15
15
  from ..client_config_loader import load_client_configs_to_redis
16
16
  from ..constants import (
17
- ERROR_CODE_INVALID_INPUT,
18
- ERROR_CODE_PERMANENT,
19
- ERROR_CODE_TRANSIENT,
20
- JOB_STATUS_CANCELLED,
21
- JOB_STATUS_FAILED,
17
+ COMMAND_CANCEL_TASK,
22
18
  JOB_STATUS_PENDING,
23
- JOB_STATUS_QUARANTINED,
24
19
  JOB_STATUS_RUNNING,
25
20
  JOB_STATUS_WAITING_FOR_HUMAN,
26
- JOB_STATUS_WAITING_FOR_PARALLEL,
27
21
  JOB_STATUS_WAITING_FOR_WORKER,
28
- TASK_STATUS_CANCELLED,
29
- TASK_STATUS_FAILURE,
30
- TASK_STATUS_SUCCESS,
31
22
  )
32
23
  from ..worker_config_loader import load_worker_configs_to_redis
33
24
 
34
25
  logger = getLogger(__name__)
35
26
 
36
27
 
37
- def json_dumps(obj) -> str:
28
+ def json_dumps(obj: Any) -> str:
38
29
  return dumps(obj).decode("utf-8")
39
30
 
40
31
 
41
- def json_response(data, **kwargs) -> web.Response:
32
+ def json_response(data: Any, **kwargs: Any) -> web.Response:
42
33
  return web.json_response(data, dumps=json_dumps, **kwargs)
43
34
 
44
35
 
@@ -138,7 +129,7 @@ async def cancel_job_handler(request: web.Request) -> web.Response:
138
129
 
139
130
  # Attempt WebSocket-based cancellation if supported
140
131
  if worker_info and worker_info.get("capabilities", {}).get("websockets"):
141
- command = {"command": "cancel_task", "task_id": task_id, "job_id": job_id}
132
+ command = {"command": COMMAND_CANCEL_TASK, "task_id": task_id, "job_id": job_id}
142
133
  sent = await engine.ws_manager.send_command(worker_id, command)
143
134
  if sent:
144
135
  return json_response({"status": "cancellation_request_sent"})
@@ -208,143 +199,6 @@ async def get_dashboard_handler(request: web.Request) -> web.Response:
208
199
  return json_response(dashboard_data)
209
200
 
210
201
 
211
- async def task_result_handler(request: web.Request) -> web.Response:
212
- engine = request.app[ENGINE_KEY]
213
- try:
214
- data = await request.json(loads=loads)
215
- job_id = data.get("job_id")
216
- task_id = data.get("task_id")
217
- result = data.get("result", {})
218
- result_status = result.get("status", TASK_STATUS_SUCCESS)
219
- error_message = result.get("error")
220
- payload_worker_id = data.get("worker_id")
221
- except Exception:
222
- return json_response({"error": "Invalid JSON body"}, status=400)
223
-
224
- # Security check: Ensure the worker_id from the payload matches the authenticated worker
225
- authenticated_worker_id = request.get("worker_id")
226
- if not authenticated_worker_id:
227
- return json_response({"error": "Could not identify authenticated worker."}, status=500)
228
-
229
- if payload_worker_id and payload_worker_id != authenticated_worker_id:
230
- return json_response(
231
- {
232
- "error": f"Forbidden: Authenticated worker '{authenticated_worker_id}' "
233
- f"cannot submit results for another worker '{payload_worker_id}'.",
234
- },
235
- status=403,
236
- )
237
-
238
- if not job_id or not task_id:
239
- return json_response({"error": "job_id and task_id are required"}, status=400)
240
-
241
- job_state = await engine.storage.get_job_state(job_id)
242
- if not job_state:
243
- return json_response({"error": "Job not found"}, status=404)
244
-
245
- # Handle parallel task completion
246
- if job_state.get("status") == JOB_STATUS_WAITING_FOR_PARALLEL:
247
- await engine.storage.remove_job_from_watch(f"{job_id}:{task_id}")
248
- job_state.setdefault("aggregation_results", {})[task_id] = result
249
- job_state.setdefault("active_branches", []).remove(task_id)
250
-
251
- if not job_state["active_branches"]:
252
- logger.info(f"All parallel branches for job {job_id} have completed.")
253
- job_state["status"] = JOB_STATUS_RUNNING
254
- job_state["current_state"] = job_state["aggregation_target"]
255
- await engine.storage.save_job_state(job_id, job_state)
256
- await engine.storage.enqueue_job(job_id)
257
- else:
258
- logger.info(
259
- f"Branch {task_id} for job {job_id} completed. Waiting for {len(job_state['active_branches'])} more.",
260
- )
261
- await engine.storage.save_job_state(job_id, job_state)
262
-
263
- return json_response({"status": "parallel_branch_result_accepted"}, status=200)
264
-
265
- await engine.storage.remove_job_from_watch(job_id)
266
-
267
- import time
268
-
269
- now = time.monotonic()
270
- dispatched_at = job_state.get("task_dispatched_at", now)
271
- duration_ms = int((now - dispatched_at) * 1000)
272
-
273
- await engine.history_storage.log_job_event(
274
- {
275
- "job_id": job_id,
276
- "state": job_state.get("current_state"),
277
- "event_type": "task_finished",
278
- "duration_ms": duration_ms,
279
- "worker_id": authenticated_worker_id,
280
- "context_snapshot": {**job_state, "result": result},
281
- },
282
- )
283
-
284
- job_state["tracing_context"] = {str(k): v for k, v in request.headers.items()}
285
-
286
- if result_status == TASK_STATUS_FAILURE:
287
- error_details = result.get("error", {})
288
- error_type = ERROR_CODE_TRANSIENT
289
- error_message = "No error details provided."
290
-
291
- if isinstance(error_details, dict):
292
- error_type = error_details.get("code", ERROR_CODE_TRANSIENT)
293
- error_message = error_details.get("message", "No error message provided.")
294
- elif isinstance(error_details, str):
295
- error_message = error_details
296
-
297
- logger.warning(f"Task {task_id} for job {job_id} failed with error type '{error_type}'.")
298
-
299
- if error_type == ERROR_CODE_PERMANENT:
300
- job_state["status"] = JOB_STATUS_QUARANTINED
301
- job_state["error_message"] = f"Task failed with permanent error: {error_message}"
302
- await engine.storage.save_job_state(job_id, job_state)
303
- await engine.storage.quarantine_job(job_id)
304
- elif error_type == ERROR_CODE_INVALID_INPUT:
305
- job_state["status"] = JOB_STATUS_FAILED
306
- job_state["error_message"] = f"Task failed due to invalid input: {error_message}"
307
- await engine.storage.save_job_state(job_id, job_state)
308
- else: # TRANSIENT_ERROR
309
- await engine.handle_task_failure(job_state, task_id, error_message)
310
-
311
- return json_response({"status": "result_accepted_failure"}, status=200)
312
-
313
- if result_status == TASK_STATUS_CANCELLED:
314
- logger.info(f"Task {task_id} for job {job_id} was cancelled by worker.")
315
- job_state["status"] = JOB_STATUS_CANCELLED
316
- await engine.storage.save_job_state(job_id, job_state)
317
- transitions = job_state.get("current_task_transitions", {})
318
- if next_state := transitions.get("cancelled"):
319
- job_state["current_state"] = next_state
320
- job_state["status"] = JOB_STATUS_RUNNING
321
- await engine.storage.save_job_state(job_id, job_state)
322
- await engine.storage.enqueue_job(job_id)
323
- return json_response({"status": "result_accepted_cancelled"}, status=200)
324
-
325
- transitions = job_state.get("current_task_transitions", {})
326
- if next_state := transitions.get(result_status):
327
- logger.info(f"Job {job_id} transitioning based on worker status '{result_status}' to state '{next_state}'")
328
-
329
- worker_data = result.get("data")
330
- if worker_data and isinstance(worker_data, dict):
331
- if "state_history" not in job_state:
332
- job_state["state_history"] = {}
333
- job_state["state_history"].update(worker_data)
334
-
335
- job_state["current_state"] = next_state
336
- job_state["status"] = JOB_STATUS_RUNNING
337
- await engine.storage.save_job_state(job_id, job_state)
338
- await engine.storage.enqueue_job(job_id)
339
- else:
340
- logger.error(f"Job {job_id} failed. Worker returned unhandled status '{result_status}'.")
341
- job_state["status"] = JOB_STATUS_FAILED
342
- job_state["error_message"] = f"Worker returned unhandled status: {result_status}"
343
- await engine.storage.save_job_state(job_id, job_state)
344
-
345
- return json_response({"status": "result_accepted_success"}, status=200)
346
-
347
-
348
202
  async def human_approval_webhook_handler(request: web.Request) -> web.Response:
349
203
  engine = request.app[ENGINE_KEY]
350
204
  job_id = request.match_info.get("job_id")
@@ -441,109 +295,3 @@ async def docs_handler(request: web.Request) -> web.Response:
441
295
  content = content.replace(marker, f"{marker}\n{endpoints_json.strip('[]')},")
442
296
 
443
297
  return web.Response(text=content, content_type="text/html")
444
-
445
-
446
- async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
447
- engine = request.app[ENGINE_KEY]
448
- worker_id = request.match_info.get("worker_id")
449
- if not worker_id:
450
- raise web.HTTPBadRequest(text="worker_id is required")
451
-
452
- ws = web.WebSocketResponse()
453
- await ws.prepare(request)
454
-
455
- await engine.ws_manager.register(worker_id, ws)
456
- try:
457
- async for msg in ws:
458
- if msg.type == WSMsgType.TEXT:
459
- try:
460
- data = msg.json()
461
- await engine.ws_manager.handle_message(worker_id, data)
462
- except Exception as e:
463
- logger.error(f"Error processing WebSocket message from {worker_id}: {e}")
464
- elif msg.type == WSMsgType.ERROR:
465
- logger.error(f"WebSocket connection for {worker_id} closed with exception {ws.exception()}")
466
- break
467
- finally:
468
- await engine.ws_manager.unregister(worker_id)
469
- return ws
470
-
471
-
472
- async def handle_get_next_task(request: web.Request) -> web.Response:
473
- engine = request.app[ENGINE_KEY]
474
- worker_id = request.match_info.get("worker_id")
475
- if not worker_id:
476
- return json_response({"error": "worker_id is required in path"}, status=400)
477
-
478
- logger.debug(f"Worker {worker_id} is requesting a new task.")
479
- task = await engine.storage.dequeue_task_for_worker(worker_id, engine.config.WORKER_POLL_TIMEOUT_SECONDS)
480
-
481
- if task:
482
- logger.info(f"Sending task {task.get('task_id')} to worker {worker_id}")
483
- return json_response(task, status=200)
484
- logger.debug(f"No tasks for worker {worker_id}, responding 204.")
485
- return web.Response(status=204)
486
-
487
-
488
- async def worker_update_handler(request: web.Request) -> web.Response:
489
- engine = request.app[ENGINE_KEY]
490
- worker_id = request.match_info.get("worker_id")
491
- if not worker_id:
492
- return json_response({"error": "worker_id is required in path"}, status=400)
493
-
494
- ttl = engine.config.WORKER_HEALTH_CHECK_INTERVAL_SECONDS * 2
495
- update_data = None
496
-
497
- if request.can_read_body:
498
- try:
499
- update_data = await request.json(loads=loads)
500
- except Exception:
501
- logger.warning(
502
- f"Received PATCH from worker {worker_id} with non-JSON body. Treating as TTL-only heartbeat."
503
- )
504
-
505
- if update_data:
506
- updated_worker = await engine.storage.update_worker_status(worker_id, update_data, ttl)
507
- if not updated_worker:
508
- return json_response({"error": "Worker not found"}, status=404)
509
-
510
- await engine.history_storage.log_worker_event(
511
- {
512
- "worker_id": worker_id,
513
- "event_type": "status_update",
514
- "worker_info_snapshot": updated_worker,
515
- },
516
- )
517
- return json_response(updated_worker, status=200)
518
- else:
519
- refreshed = await engine.storage.refresh_worker_ttl(worker_id, ttl)
520
- if not refreshed:
521
- return json_response({"error": "Worker not found"}, status=404)
522
- return json_response({"status": "ttl_refreshed"})
523
-
524
-
525
- async def register_worker_handler(request: web.Request) -> web.Response:
526
- engine = request.app[ENGINE_KEY]
527
- worker_data = request.get("worker_registration_data")
528
- if not worker_data:
529
- return json_response({"error": "Worker data not found in request"}, status=500)
530
-
531
- worker_id = worker_data.get("worker_id")
532
- if not worker_id:
533
- return json_response({"error": "Missing required field: worker_id"}, status=400)
534
-
535
- ttl = engine.config.WORKER_HEALTH_CHECK_INTERVAL_SECONDS * 2
536
- await engine.storage.register_worker(worker_id, worker_data, ttl)
537
-
538
- logger.info(
539
- f"Worker '{worker_id}' registered with info: {worker_data}",
540
- )
541
-
542
- await engine.history_storage.log_worker_event(
543
- {
544
- "worker_id": worker_id,
545
- "event_type": "registered",
546
- "worker_info_snapshot": worker_data,
547
- },
548
- )
549
- return json_response({"status": "registered"}, status=200)