avtomatika 1.0b8__tar.gz → 1.0b10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {avtomatika-1.0b8/src/avtomatika.egg-info → avtomatika-1.0b10}/PKG-INFO +47 -15
- {avtomatika-1.0b8 → avtomatika-1.0b10}/README.md +39 -12
- {avtomatika-1.0b8 → avtomatika-1.0b10}/pyproject.toml +11 -4
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/api/handlers.py +5 -257
- avtomatika-1.0b10/src/avtomatika/api/routes.py +97 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/api.html +1 -1
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/app_keys.py +1 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/blueprint.py +3 -2
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/config.py +8 -0
- avtomatika-1.0b10/src/avtomatika/constants.py +80 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/data_types.py +2 -22
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/dispatcher.py +4 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/engine.py +119 -7
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/executor.py +19 -19
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/logging_config.py +16 -7
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/s3.py +96 -40
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/scheduler_config_loader.py +5 -2
- avtomatika-1.0b10/src/avtomatika/security.py +96 -0
- avtomatika-1.0b10/src/avtomatika/services/worker_service.py +267 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/storage/base.py +10 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/storage/memory.py +15 -4
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/storage/redis.py +42 -11
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/telemetry.py +8 -7
- avtomatika-1.0b10/src/avtomatika/utils/__init__.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/utils/webhook_sender.py +3 -3
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/watcher.py +4 -2
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/ws_manager.py +16 -8
- {avtomatika-1.0b8 → avtomatika-1.0b10/src/avtomatika.egg-info}/PKG-INFO +47 -15
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika.egg-info/SOURCES.txt +9 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika.egg-info/requires.txt +3 -2
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_engine.py +3 -1
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_error_handling.py +34 -18
- avtomatika-1.0b10/tests/test_handlers.py +236 -0
- avtomatika-1.0b10/tests/test_handlers_sts.py +87 -0
- avtomatika-1.0b10/tests/test_horizontal_scaling.py +179 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_integration.py +28 -25
- avtomatika-1.0b10/tests/test_mtls.py +194 -0
- avtomatika-1.0b10/tests/test_rxon_handler.py +59 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_s3.py +12 -1
- avtomatika-1.0b10/tests/test_s3_metadata.py +138 -0
- avtomatika-1.0b10/tests/test_sts.py +188 -0
- avtomatika-1.0b10/tests/test_validation_integration.py +48 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_ws_manager.py +32 -15
- avtomatika-1.0b8/src/avtomatika/api/routes.py +0 -118
- avtomatika-1.0b8/src/avtomatika/constants.py +0 -30
- avtomatika-1.0b8/src/avtomatika/security.py +0 -114
- avtomatika-1.0b8/tests/test_handlers.py +0 -459
- {avtomatika-1.0b8 → avtomatika-1.0b10}/LICENSE +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/setup.cfg +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/__init__.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/client_config_loader.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/compression.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/context.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/datastore.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/health_checker.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/history/base.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/history/noop.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/history/postgres.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/history/sqlite.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/metrics.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/py.typed +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/quota.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/ratelimit.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/reputation.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/scheduler.py +0 -0
- {avtomatika-1.0b8/src/avtomatika/utils → avtomatika-1.0b10/src/avtomatika/services}/__init__.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/storage/__init__.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika/worker_config_loader.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika.egg-info/dependency_links.txt +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/src/avtomatika.egg-info/top_level.txt +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_blueprint_conditions.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_blueprint_integrity.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_blueprints.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_client_config_loader.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_compression.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_config_validation.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_context.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_dispatcher.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_dispatcher_extended.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_executor.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_health_checker.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_history.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_logging_config.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_memory_locking.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_memory_storage.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_metrics.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_noop_history.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_optimization.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_postgres_history.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_ratelimit.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_redis_locking.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_redis_storage.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_reputation.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_scheduler.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_telemetry.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_watcher.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_webhook_sender.py +0 -0
- {avtomatika-1.0b8 → avtomatika-1.0b10}/tests/test_worker_config_loader.py +0 -0
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b10
|
|
4
4
|
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
|
+
Author-email: Dmitrii Gagarin <madgagarin@gmail.com>
|
|
5
6
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
6
7
|
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
|
|
8
|
+
Keywords: orchestrator,state-machine,workflow,distributed,ai,llm,rxon,hln
|
|
7
9
|
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
8
11
|
Classifier: Programming Language :: Python :: 3
|
|
9
12
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
13
|
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Typing :: Typed
|
|
11
15
|
Requires-Python: >=3.11
|
|
12
16
|
Description-Content-Type: text/markdown
|
|
13
17
|
License-File: LICENSE
|
|
18
|
+
Requires-Dist: rxon==1.0b2
|
|
14
19
|
Requires-Dist: aiohttp~=3.12
|
|
15
20
|
Requires-Dist: python-json-logger~=4.0
|
|
16
21
|
Requires-Dist: graphviz~=0.21
|
|
@@ -22,7 +27,7 @@ Provides-Extra: redis
|
|
|
22
27
|
Requires-Dist: redis~=7.1; extra == "redis"
|
|
23
28
|
Provides-Extra: s3
|
|
24
29
|
Requires-Dist: obstore>=0.2; extra == "s3"
|
|
25
|
-
Requires-Dist: aiofiles~=
|
|
30
|
+
Requires-Dist: aiofiles~=25.1; extra == "s3"
|
|
26
31
|
Provides-Extra: history
|
|
27
32
|
Requires-Dist: aiosqlite~=0.22; extra == "history"
|
|
28
33
|
Requires-Dist: asyncpg~=0.30; extra == "history"
|
|
@@ -41,7 +46,7 @@ Requires-Dist: aioresponses~=0.7; extra == "test"
|
|
|
41
46
|
Requires-Dist: backports.zstd~=1.2; extra == "test"
|
|
42
47
|
Requires-Dist: opentelemetry-instrumentation-aiohttp-client; extra == "test"
|
|
43
48
|
Requires-Dist: obstore>=0.2; extra == "test"
|
|
44
|
-
Requires-Dist: aiofiles~=
|
|
49
|
+
Requires-Dist: aiofiles~=25.1; extra == "test"
|
|
45
50
|
Provides-Extra: all
|
|
46
51
|
Requires-Dist: avtomatika[redis]; extra == "all"
|
|
47
52
|
Requires-Dist: avtomatika[history]; extra == "all"
|
|
@@ -51,6 +56,11 @@ Dynamic: license-file
|
|
|
51
56
|
|
|
52
57
|
# Avtomatika Orchestrator
|
|
53
58
|
|
|
59
|
+
[](https://opensource.org/licenses/MIT)
|
|
60
|
+
[](https://www.python.org/downloads/release/python-3110/)
|
|
61
|
+
[](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml)
|
|
62
|
+
[](https://github.com/astral-sh/ruff)
|
|
63
|
+
|
|
54
64
|
Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
|
|
55
65
|
|
|
56
66
|
This document serves as a comprehensive guide for developers looking to build pipelines (blueprints) and embed the Orchestrator into their applications.
|
|
@@ -88,8 +98,9 @@ The project is based on a simple yet powerful architectural pattern that separat
|
|
|
88
98
|
|
|
89
99
|
Avtomatika is part of a larger ecosystem:
|
|
90
100
|
|
|
101
|
+
* **[Avtomatika Protocol](https://github.com/avtomatika-ai/rxon)**: Shared package containing protocol definitions, data models, and utilities ensuring consistency across all components.
|
|
91
102
|
* **[Avtomatika Worker SDK](https://github.com/avtomatika-ai/avtomatika-worker)**: The official Python SDK for building workers that connect to this engine.
|
|
92
|
-
* **[
|
|
103
|
+
* **[HLN Protocol](https://github.com/avtomatika-ai/hln)**: The architectural specification and manifesto behind the system (Hierarchical Logic Network).
|
|
93
104
|
* **[Full Example](https://github.com/avtomatika-ai/avtomatika-full-example)**: A complete reference project demonstrating the engine and workers in action.
|
|
94
105
|
|
|
95
106
|
## Installation
|
|
@@ -140,7 +151,7 @@ storage = MemoryStorage()
|
|
|
140
151
|
config = Config() # Loads configuration from environment variables
|
|
141
152
|
|
|
142
153
|
# Explicitly set tokens for this example
|
|
143
|
-
# Client token must be sent in the 'X-
|
|
154
|
+
# Client token must be sent in the 'X-Client-Token' header.
|
|
144
155
|
config.CLIENT_TOKEN = "my-secret-client-token"
|
|
145
156
|
# Worker token must be sent in the 'X-Worker-Token' header.
|
|
146
157
|
config.GLOBAL_WORKER_TOKEN = "my-secret-worker-token"
|
|
@@ -268,12 +279,18 @@ async def publish_handler_old_style(context):
|
|
|
268
279
|
|
|
269
280
|
Avtomatika is engineered for high-load environments with thousands of concurrent workers.
|
|
270
281
|
|
|
271
|
-
* **O(1) Dispatcher**: Uses advanced Redis Set intersections to find suitable workers instantly
|
|
282
|
+
* **O(1) Dispatcher**: Uses advanced Redis Set intersections to find suitable workers instantly.
|
|
283
|
+
* **Zero Trust Security**:
|
|
284
|
+
* **mTLS (Mutual TLS)**: Mutual authentication between Orchestrator and Workers using certificates.
|
|
285
|
+
* **STS (Security Token Service)**: Token rotation mechanism with short-lived access tokens.
|
|
286
|
+
* **Identity Extraction**: Automatically maps Certificate Common Name (CN) to Worker ID.
|
|
287
|
+
* **Data Integrity**:
|
|
288
|
+
* **End-to-End Validation**: Automatic verification of file size and ETag (hash) during S3 transfers.
|
|
289
|
+
* **Audit Trail**: File metadata is logged in history for full traceability.
|
|
290
|
+
* **Protocol Layer**: Built on top of `rxon`, a strict contract defining interactions, ensuring forward compatibility and allowing transport evolution (e.g., to gRPC).
|
|
272
291
|
* **Non-Blocking I/O**:
|
|
273
|
-
* **Webhooks**: Sent via a bounded background queue
|
|
274
|
-
* **
|
|
275
|
-
* **Redis Streams**: Uses blocking reads to eliminate busy-waiting and reduce CPU usage.
|
|
276
|
-
* **Memory Safety**: S3 file transfers use streaming to handle multi-gigabyte files with constant, low RAM usage.
|
|
292
|
+
* **Webhooks**: Sent via a bounded background queue.
|
|
293
|
+
* **S3 Streaming**: Constant memory usage regardless of file size.
|
|
277
294
|
|
|
278
295
|
## Blueprint Cookbook: Key Features
|
|
279
296
|
|
|
@@ -477,10 +494,16 @@ For detailed specifications and examples, please refer to the [**Configuration G
|
|
|
477
494
|
|
|
478
495
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
479
496
|
|
|
480
|
-
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure
|
|
481
|
-
* **
|
|
497
|
+
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure). The orchestrator will automatically retry the task several times.
|
|
498
|
+
* **RESOURCE_EXHAUSTED_ERROR / TIMEOUT_ERROR / INTERNAL_ERROR**: Treated as transient errors and retried.
|
|
499
|
+
* **PERMANENT_ERROR**: A permanent error. The task will be immediately sent to quarantine.
|
|
500
|
+
* **SECURITY_ERROR / DEPENDENCY_ERROR**: Treated as permanent errors (e.g., security violation or missing model). Immediate quarantine.
|
|
482
501
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
483
502
|
|
|
503
|
+
### Progress Tracking
|
|
504
|
+
|
|
505
|
+
Workers can report real-time execution progress (0-100%) and status messages. This information is automatically persisted by the Orchestrator and exposed via the Job Status API (`GET /api/v1/jobs/{job_id}`).
|
|
506
|
+
|
|
484
507
|
### Concurrency & Performance
|
|
485
508
|
|
|
486
509
|
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
@@ -525,13 +548,18 @@ By default, the engine uses in-memory storage. For production, you must configur
|
|
|
525
548
|
|
|
526
549
|
The orchestrator uses tokens to authenticate API requests.
|
|
527
550
|
|
|
528
|
-
* **Client Authentication**: All API clients must provide a token in the `X-
|
|
551
|
+
* **Client Authentication**: All API clients must provide a token in the `X-Client-Token` header. The orchestrator validates this token against client configurations.
|
|
529
552
|
* **Worker Authentication**: Workers must provide a token in the `X-Worker-Token` header.
|
|
530
553
|
* `GLOBAL_WORKER_TOKEN`: You can set a global token for all workers using this environment variable. For development and testing, it defaults to `"secure-worker-token"`.
|
|
531
554
|
* **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable. Tokens from this file are stored in a hashed format for security.
|
|
532
555
|
|
|
533
556
|
> **Note on Dynamic Reloading:** The worker configuration file can be reloaded without restarting the orchestrator by sending an authenticated `POST` request to the `/api/v1/admin/reload-workers` endpoint. This allows for dynamic updates of worker tokens.
|
|
534
557
|
|
|
558
|
+
### Pure Holon Mode
|
|
559
|
+
For high-security environments or when operating as a Compound Holon within an HLN, you can disable the public client API.
|
|
560
|
+
* **Enable/Disable**: Set `ENABLE_CLIENT_API="false"` (default: `true`).
|
|
561
|
+
* **Effect**: The Orchestrator will stop listening on `/api/v1/jobs/...`. It will only accept tasks via the Worker Protocol (RXON) from its parent.
|
|
562
|
+
|
|
535
563
|
### Observability
|
|
536
564
|
|
|
537
565
|
When installed with the telemetry dependency, the system automatically provides:
|
|
@@ -543,7 +571,11 @@ When installed with the telemetry dependency, the system automatically provides:
|
|
|
543
571
|
### Setup Environment
|
|
544
572
|
|
|
545
573
|
* Clone the repository.
|
|
546
|
-
*
|
|
574
|
+
* **For local development**, install the protocol package first:
|
|
575
|
+
```bash
|
|
576
|
+
pip install -e ../rxon
|
|
577
|
+
```
|
|
578
|
+
* Then install the engine in editable mode with all dependencies:
|
|
547
579
|
```bash
|
|
548
580
|
pip install -e ".[all,test]"
|
|
549
581
|
```
|
|
@@ -561,7 +593,7 @@ When installed with the telemetry dependency, the system automatically provides:
|
|
|
561
593
|
|
|
562
594
|
To run the `avtomatika` test suite:
|
|
563
595
|
```bash
|
|
564
|
-
pytest
|
|
596
|
+
pytest tests/
|
|
565
597
|
```
|
|
566
598
|
|
|
567
599
|
### Interactive API Documentation
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# Avtomatika Orchestrator
|
|
2
2
|
|
|
3
|
+
[](https://opensource.org/licenses/MIT)
|
|
4
|
+
[](https://www.python.org/downloads/release/python-3110/)
|
|
5
|
+
[](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml)
|
|
6
|
+
[](https://github.com/astral-sh/ruff)
|
|
7
|
+
|
|
3
8
|
Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
|
|
4
9
|
|
|
5
10
|
This document serves as a comprehensive guide for developers looking to build pipelines (blueprints) and embed the Orchestrator into their applications.
|
|
@@ -37,8 +42,9 @@ The project is based on a simple yet powerful architectural pattern that separat
|
|
|
37
42
|
|
|
38
43
|
Avtomatika is part of a larger ecosystem:
|
|
39
44
|
|
|
45
|
+
* **[Avtomatika Protocol](https://github.com/avtomatika-ai/rxon)**: Shared package containing protocol definitions, data models, and utilities ensuring consistency across all components.
|
|
40
46
|
* **[Avtomatika Worker SDK](https://github.com/avtomatika-ai/avtomatika-worker)**: The official Python SDK for building workers that connect to this engine.
|
|
41
|
-
* **[
|
|
47
|
+
* **[HLN Protocol](https://github.com/avtomatika-ai/hln)**: The architectural specification and manifesto behind the system (Hierarchical Logic Network).
|
|
42
48
|
* **[Full Example](https://github.com/avtomatika-ai/avtomatika-full-example)**: A complete reference project demonstrating the engine and workers in action.
|
|
43
49
|
|
|
44
50
|
## Installation
|
|
@@ -89,7 +95,7 @@ storage = MemoryStorage()
|
|
|
89
95
|
config = Config() # Loads configuration from environment variables
|
|
90
96
|
|
|
91
97
|
# Explicitly set tokens for this example
|
|
92
|
-
# Client token must be sent in the 'X-
|
|
98
|
+
# Client token must be sent in the 'X-Client-Token' header.
|
|
93
99
|
config.CLIENT_TOKEN = "my-secret-client-token"
|
|
94
100
|
# Worker token must be sent in the 'X-Worker-Token' header.
|
|
95
101
|
config.GLOBAL_WORKER_TOKEN = "my-secret-worker-token"
|
|
@@ -217,12 +223,18 @@ async def publish_handler_old_style(context):
|
|
|
217
223
|
|
|
218
224
|
Avtomatika is engineered for high-load environments with thousands of concurrent workers.
|
|
219
225
|
|
|
220
|
-
* **O(1) Dispatcher**: Uses advanced Redis Set intersections to find suitable workers instantly
|
|
226
|
+
* **O(1) Dispatcher**: Uses advanced Redis Set intersections to find suitable workers instantly.
|
|
227
|
+
* **Zero Trust Security**:
|
|
228
|
+
* **mTLS (Mutual TLS)**: Mutual authentication between Orchestrator and Workers using certificates.
|
|
229
|
+
* **STS (Security Token Service)**: Token rotation mechanism with short-lived access tokens.
|
|
230
|
+
* **Identity Extraction**: Automatically maps Certificate Common Name (CN) to Worker ID.
|
|
231
|
+
* **Data Integrity**:
|
|
232
|
+
* **End-to-End Validation**: Automatic verification of file size and ETag (hash) during S3 transfers.
|
|
233
|
+
* **Audit Trail**: File metadata is logged in history for full traceability.
|
|
234
|
+
* **Protocol Layer**: Built on top of `rxon`, a strict contract defining interactions, ensuring forward compatibility and allowing transport evolution (e.g., to gRPC).
|
|
221
235
|
* **Non-Blocking I/O**:
|
|
222
|
-
* **Webhooks**: Sent via a bounded background queue
|
|
223
|
-
* **
|
|
224
|
-
* **Redis Streams**: Uses blocking reads to eliminate busy-waiting and reduce CPU usage.
|
|
225
|
-
* **Memory Safety**: S3 file transfers use streaming to handle multi-gigabyte files with constant, low RAM usage.
|
|
236
|
+
* **Webhooks**: Sent via a bounded background queue.
|
|
237
|
+
* **S3 Streaming**: Constant memory usage regardless of file size.
|
|
226
238
|
|
|
227
239
|
## Blueprint Cookbook: Key Features
|
|
228
240
|
|
|
@@ -426,10 +438,16 @@ For detailed specifications and examples, please refer to the [**Configuration G
|
|
|
426
438
|
|
|
427
439
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
428
440
|
|
|
429
|
-
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure
|
|
430
|
-
* **
|
|
441
|
+
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure). The orchestrator will automatically retry the task several times.
|
|
442
|
+
* **RESOURCE_EXHAUSTED_ERROR / TIMEOUT_ERROR / INTERNAL_ERROR**: Treated as transient errors and retried.
|
|
443
|
+
* **PERMANENT_ERROR**: A permanent error. The task will be immediately sent to quarantine.
|
|
444
|
+
* **SECURITY_ERROR / DEPENDENCY_ERROR**: Treated as permanent errors (e.g., security violation or missing model). Immediate quarantine.
|
|
431
445
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
432
446
|
|
|
447
|
+
### Progress Tracking
|
|
448
|
+
|
|
449
|
+
Workers can report real-time execution progress (0-100%) and status messages. This information is automatically persisted by the Orchestrator and exposed via the Job Status API (`GET /api/v1/jobs/{job_id}`).
|
|
450
|
+
|
|
433
451
|
### Concurrency & Performance
|
|
434
452
|
|
|
435
453
|
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
@@ -474,13 +492,18 @@ By default, the engine uses in-memory storage. For production, you must configur
|
|
|
474
492
|
|
|
475
493
|
The orchestrator uses tokens to authenticate API requests.
|
|
476
494
|
|
|
477
|
-
* **Client Authentication**: All API clients must provide a token in the `X-
|
|
495
|
+
* **Client Authentication**: All API clients must provide a token in the `X-Client-Token` header. The orchestrator validates this token against client configurations.
|
|
478
496
|
* **Worker Authentication**: Workers must provide a token in the `X-Worker-Token` header.
|
|
479
497
|
* `GLOBAL_WORKER_TOKEN`: You can set a global token for all workers using this environment variable. For development and testing, it defaults to `"secure-worker-token"`.
|
|
480
498
|
* **Individual Tokens**: For production, it is recommended to define individual tokens for each worker in a separate configuration file and provide its path via the `WORKERS_CONFIG_PATH` environment variable. Tokens from this file are stored in a hashed format for security.
|
|
481
499
|
|
|
482
500
|
> **Note on Dynamic Reloading:** The worker configuration file can be reloaded without restarting the orchestrator by sending an authenticated `POST` request to the `/api/v1/admin/reload-workers` endpoint. This allows for dynamic updates of worker tokens.
|
|
483
501
|
|
|
502
|
+
### Pure Holon Mode
|
|
503
|
+
For high-security environments or when operating as a Compound Holon within an HLN, you can disable the public client API.
|
|
504
|
+
* **Enable/Disable**: Set `ENABLE_CLIENT_API="false"` (default: `true`).
|
|
505
|
+
* **Effect**: The Orchestrator will stop listening on `/api/v1/jobs/...`. It will only accept tasks via the Worker Protocol (RXON) from its parent.
|
|
506
|
+
|
|
484
507
|
### Observability
|
|
485
508
|
|
|
486
509
|
When installed with the telemetry dependency, the system automatically provides:
|
|
@@ -492,7 +515,11 @@ When installed with the telemetry dependency, the system automatically provides:
|
|
|
492
515
|
### Setup Environment
|
|
493
516
|
|
|
494
517
|
* Clone the repository.
|
|
495
|
-
*
|
|
518
|
+
* **For local development**, install the protocol package first:
|
|
519
|
+
```bash
|
|
520
|
+
pip install -e ../rxon
|
|
521
|
+
```
|
|
522
|
+
* Then install the engine in editable mode with all dependencies:
|
|
496
523
|
```bash
|
|
497
524
|
pip install -e ".[all,test]"
|
|
498
525
|
```
|
|
@@ -510,7 +537,7 @@ When installed with the telemetry dependency, the system automatically provides:
|
|
|
510
537
|
|
|
511
538
|
To run the `avtomatika` test suite:
|
|
512
539
|
```bash
|
|
513
|
-
pytest
|
|
540
|
+
pytest tests/
|
|
514
541
|
```
|
|
515
542
|
|
|
516
543
|
### Interactive API Documentation
|
|
@@ -4,17 +4,24 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "avtomatika"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.0b10"
|
|
8
8
|
description = "A state-machine based orchestrator for long-running AI and other jobs."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Dmitrii Gagarin", email = "madgagarin@gmail.com"},
|
|
13
|
+
]
|
|
14
|
+
keywords = ["orchestrator", "state-machine", "workflow", "distributed", "ai", "llm", "rxon", "hln"]
|
|
11
15
|
classifiers = [
|
|
12
16
|
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
13
18
|
"Programming Language :: Python :: 3",
|
|
14
19
|
"License :: OSI Approved :: MIT License",
|
|
15
20
|
"Operating System :: OS Independent",
|
|
21
|
+
"Typing :: Typed",
|
|
16
22
|
]
|
|
17
23
|
dependencies = [
|
|
24
|
+
"rxon==1.0b2",
|
|
18
25
|
"aiohttp~=3.12",
|
|
19
26
|
"python-json-logger~=4.0",
|
|
20
27
|
"graphviz~=0.21",
|
|
@@ -26,7 +33,7 @@ dependencies = [
|
|
|
26
33
|
|
|
27
34
|
[project.optional-dependencies]
|
|
28
35
|
redis = ["redis~=7.1"]
|
|
29
|
-
s3 = ["obstore>=0.2", "aiofiles~=
|
|
36
|
+
s3 = ["obstore>=0.2", "aiofiles~=25.1"]
|
|
30
37
|
history = ["aiosqlite~=0.22", "asyncpg~=0.30"]
|
|
31
38
|
telemetry = [
|
|
32
39
|
"opentelemetry-api~=1.39",
|
|
@@ -44,7 +51,7 @@ test = [
|
|
|
44
51
|
"backports.zstd~=1.2",
|
|
45
52
|
"opentelemetry-instrumentation-aiohttp-client",
|
|
46
53
|
"obstore>=0.2",
|
|
47
|
-
"aiofiles~=
|
|
54
|
+
"aiofiles~=25.1",
|
|
48
55
|
]
|
|
49
56
|
all = [
|
|
50
57
|
"avtomatika[redis]",
|
|
@@ -58,7 +65,7 @@ all = [
|
|
|
58
65
|
"Bug Tracker" = "https://github.com/avtomatika-ai/avtomatika/issues"
|
|
59
66
|
|
|
60
67
|
[tool.setuptools.package-data]
|
|
61
|
-
"avtomatika" = ["api.html"]
|
|
68
|
+
"avtomatika" = ["api.html", "py.typed"]
|
|
62
69
|
|
|
63
70
|
[tool.setuptools.packages.find]
|
|
64
71
|
where = ["src"]
|
|
@@ -3,7 +3,7 @@ from logging import getLogger
|
|
|
3
3
|
from typing import Any, Callable
|
|
4
4
|
from uuid import uuid4
|
|
5
5
|
|
|
6
|
-
from aiohttp import
|
|
6
|
+
from aiohttp import web
|
|
7
7
|
from aioprometheus import render
|
|
8
8
|
from orjson import OPT_INDENT_2, dumps, loads
|
|
9
9
|
|
|
@@ -14,31 +14,22 @@ from ..app_keys import (
|
|
|
14
14
|
from ..blueprint import StateMachineBlueprint
|
|
15
15
|
from ..client_config_loader import load_client_configs_to_redis
|
|
16
16
|
from ..constants import (
|
|
17
|
-
|
|
18
|
-
ERROR_CODE_PERMANENT,
|
|
19
|
-
ERROR_CODE_TRANSIENT,
|
|
20
|
-
JOB_STATUS_CANCELLED,
|
|
21
|
-
JOB_STATUS_FAILED,
|
|
17
|
+
COMMAND_CANCEL_TASK,
|
|
22
18
|
JOB_STATUS_PENDING,
|
|
23
|
-
JOB_STATUS_QUARANTINED,
|
|
24
19
|
JOB_STATUS_RUNNING,
|
|
25
20
|
JOB_STATUS_WAITING_FOR_HUMAN,
|
|
26
|
-
JOB_STATUS_WAITING_FOR_PARALLEL,
|
|
27
21
|
JOB_STATUS_WAITING_FOR_WORKER,
|
|
28
|
-
TASK_STATUS_CANCELLED,
|
|
29
|
-
TASK_STATUS_FAILURE,
|
|
30
|
-
TASK_STATUS_SUCCESS,
|
|
31
22
|
)
|
|
32
23
|
from ..worker_config_loader import load_worker_configs_to_redis
|
|
33
24
|
|
|
34
25
|
logger = getLogger(__name__)
|
|
35
26
|
|
|
36
27
|
|
|
37
|
-
def json_dumps(obj) -> str:
|
|
28
|
+
def json_dumps(obj: Any) -> str:
|
|
38
29
|
return dumps(obj).decode("utf-8")
|
|
39
30
|
|
|
40
31
|
|
|
41
|
-
def json_response(data, **kwargs) -> web.Response:
|
|
32
|
+
def json_response(data: Any, **kwargs: Any) -> web.Response:
|
|
42
33
|
return web.json_response(data, dumps=json_dumps, **kwargs)
|
|
43
34
|
|
|
44
35
|
|
|
@@ -138,7 +129,7 @@ async def cancel_job_handler(request: web.Request) -> web.Response:
|
|
|
138
129
|
|
|
139
130
|
# Attempt WebSocket-based cancellation if supported
|
|
140
131
|
if worker_info and worker_info.get("capabilities", {}).get("websockets"):
|
|
141
|
-
command = {"command":
|
|
132
|
+
command = {"command": COMMAND_CANCEL_TASK, "task_id": task_id, "job_id": job_id}
|
|
142
133
|
sent = await engine.ws_manager.send_command(worker_id, command)
|
|
143
134
|
if sent:
|
|
144
135
|
return json_response({"status": "cancellation_request_sent"})
|
|
@@ -208,143 +199,6 @@ async def get_dashboard_handler(request: web.Request) -> web.Response:
|
|
|
208
199
|
return json_response(dashboard_data)
|
|
209
200
|
|
|
210
201
|
|
|
211
|
-
async def task_result_handler(request: web.Request) -> web.Response:
|
|
212
|
-
engine = request.app[ENGINE_KEY]
|
|
213
|
-
try:
|
|
214
|
-
data = await request.json(loads=loads)
|
|
215
|
-
job_id = data.get("job_id")
|
|
216
|
-
task_id = data.get("task_id")
|
|
217
|
-
result = data.get("result", {})
|
|
218
|
-
result_status = result.get("status", TASK_STATUS_SUCCESS)
|
|
219
|
-
error_message = result.get("error")
|
|
220
|
-
payload_worker_id = data.get("worker_id")
|
|
221
|
-
except Exception:
|
|
222
|
-
return json_response({"error": "Invalid JSON body"}, status=400)
|
|
223
|
-
|
|
224
|
-
# Security check: Ensure the worker_id from the payload matches the authenticated worker
|
|
225
|
-
authenticated_worker_id = request.get("worker_id")
|
|
226
|
-
if not authenticated_worker_id:
|
|
227
|
-
return json_response({"error": "Could not identify authenticated worker."}, status=500)
|
|
228
|
-
|
|
229
|
-
if payload_worker_id and payload_worker_id != authenticated_worker_id:
|
|
230
|
-
return json_response(
|
|
231
|
-
{
|
|
232
|
-
"error": f"Forbidden: Authenticated worker '{authenticated_worker_id}' "
|
|
233
|
-
f"cannot submit results for another worker '{payload_worker_id}'.",
|
|
234
|
-
},
|
|
235
|
-
status=403,
|
|
236
|
-
)
|
|
237
|
-
|
|
238
|
-
if not job_id or not task_id:
|
|
239
|
-
return json_response({"error": "job_id and task_id are required"}, status=400)
|
|
240
|
-
|
|
241
|
-
job_state = await engine.storage.get_job_state(job_id)
|
|
242
|
-
if not job_state:
|
|
243
|
-
return json_response({"error": "Job not found"}, status=404)
|
|
244
|
-
|
|
245
|
-
# Handle parallel task completion
|
|
246
|
-
if job_state.get("status") == JOB_STATUS_WAITING_FOR_PARALLEL:
|
|
247
|
-
await engine.storage.remove_job_from_watch(f"{job_id}:{task_id}")
|
|
248
|
-
job_state.setdefault("aggregation_results", {})[task_id] = result
|
|
249
|
-
job_state.setdefault("active_branches", []).remove(task_id)
|
|
250
|
-
|
|
251
|
-
if not job_state["active_branches"]:
|
|
252
|
-
logger.info(f"All parallel branches for job {job_id} have completed.")
|
|
253
|
-
job_state["status"] = JOB_STATUS_RUNNING
|
|
254
|
-
job_state["current_state"] = job_state["aggregation_target"]
|
|
255
|
-
await engine.storage.save_job_state(job_id, job_state)
|
|
256
|
-
await engine.storage.enqueue_job(job_id)
|
|
257
|
-
else:
|
|
258
|
-
logger.info(
|
|
259
|
-
f"Branch {task_id} for job {job_id} completed. Waiting for {len(job_state['active_branches'])} more.",
|
|
260
|
-
)
|
|
261
|
-
await engine.storage.save_job_state(job_id, job_state)
|
|
262
|
-
|
|
263
|
-
return json_response({"status": "parallel_branch_result_accepted"}, status=200)
|
|
264
|
-
|
|
265
|
-
await engine.storage.remove_job_from_watch(job_id)
|
|
266
|
-
|
|
267
|
-
import time
|
|
268
|
-
|
|
269
|
-
now = time.monotonic()
|
|
270
|
-
dispatched_at = job_state.get("task_dispatched_at", now)
|
|
271
|
-
duration_ms = int((now - dispatched_at) * 1000)
|
|
272
|
-
|
|
273
|
-
await engine.history_storage.log_job_event(
|
|
274
|
-
{
|
|
275
|
-
"job_id": job_id,
|
|
276
|
-
"state": job_state.get("current_state"),
|
|
277
|
-
"event_type": "task_finished",
|
|
278
|
-
"duration_ms": duration_ms,
|
|
279
|
-
"worker_id": authenticated_worker_id,
|
|
280
|
-
"context_snapshot": {**job_state, "result": result},
|
|
281
|
-
},
|
|
282
|
-
)
|
|
283
|
-
|
|
284
|
-
job_state["tracing_context"] = {str(k): v for k, v in request.headers.items()}
|
|
285
|
-
|
|
286
|
-
if result_status == TASK_STATUS_FAILURE:
|
|
287
|
-
error_details = result.get("error", {})
|
|
288
|
-
error_type = ERROR_CODE_TRANSIENT
|
|
289
|
-
error_message = "No error details provided."
|
|
290
|
-
|
|
291
|
-
if isinstance(error_details, dict):
|
|
292
|
-
error_type = error_details.get("code", ERROR_CODE_TRANSIENT)
|
|
293
|
-
error_message = error_details.get("message", "No error message provided.")
|
|
294
|
-
elif isinstance(error_details, str):
|
|
295
|
-
error_message = error_details
|
|
296
|
-
|
|
297
|
-
logger.warning(f"Task {task_id} for job {job_id} failed with error type '{error_type}'.")
|
|
298
|
-
|
|
299
|
-
if error_type == ERROR_CODE_PERMANENT:
|
|
300
|
-
job_state["status"] = JOB_STATUS_QUARANTINED
|
|
301
|
-
job_state["error_message"] = f"Task failed with permanent error: {error_message}"
|
|
302
|
-
await engine.storage.save_job_state(job_id, job_state)
|
|
303
|
-
await engine.storage.quarantine_job(job_id)
|
|
304
|
-
elif error_type == ERROR_CODE_INVALID_INPUT:
|
|
305
|
-
job_state["status"] = JOB_STATUS_FAILED
|
|
306
|
-
job_state["error_message"] = f"Task failed due to invalid input: {error_message}"
|
|
307
|
-
await engine.storage.save_job_state(job_id, job_state)
|
|
308
|
-
else: # TRANSIENT_ERROR
|
|
309
|
-
await engine.handle_task_failure(job_state, task_id, error_message)
|
|
310
|
-
|
|
311
|
-
return json_response({"status": "result_accepted_failure"}, status=200)
|
|
312
|
-
|
|
313
|
-
if result_status == TASK_STATUS_CANCELLED:
|
|
314
|
-
logger.info(f"Task {task_id} for job {job_id} was cancelled by worker.")
|
|
315
|
-
job_state["status"] = JOB_STATUS_CANCELLED
|
|
316
|
-
await engine.storage.save_job_state(job_id, job_state)
|
|
317
|
-
transitions = job_state.get("current_task_transitions", {})
|
|
318
|
-
if next_state := transitions.get("cancelled"):
|
|
319
|
-
job_state["current_state"] = next_state
|
|
320
|
-
job_state["status"] = JOB_STATUS_RUNNING
|
|
321
|
-
await engine.storage.save_job_state(job_id, job_state)
|
|
322
|
-
await engine.storage.enqueue_job(job_id)
|
|
323
|
-
return json_response({"status": "result_accepted_cancelled"}, status=200)
|
|
324
|
-
|
|
325
|
-
transitions = job_state.get("current_task_transitions", {})
|
|
326
|
-
if next_state := transitions.get(result_status):
|
|
327
|
-
logger.info(f"Job {job_id} transitioning based on worker status '{result_status}' to state '{next_state}'")
|
|
328
|
-
|
|
329
|
-
worker_data = result.get("data")
|
|
330
|
-
if worker_data and isinstance(worker_data, dict):
|
|
331
|
-
if "state_history" not in job_state:
|
|
332
|
-
job_state["state_history"] = {}
|
|
333
|
-
job_state["state_history"].update(worker_data)
|
|
334
|
-
|
|
335
|
-
job_state["current_state"] = next_state
|
|
336
|
-
job_state["status"] = JOB_STATUS_RUNNING
|
|
337
|
-
await engine.storage.save_job_state(job_id, job_state)
|
|
338
|
-
await engine.storage.enqueue_job(job_id)
|
|
339
|
-
else:
|
|
340
|
-
logger.error(f"Job {job_id} failed. Worker returned unhandled status '{result_status}'.")
|
|
341
|
-
job_state["status"] = JOB_STATUS_FAILED
|
|
342
|
-
job_state["error_message"] = f"Worker returned unhandled status: {result_status}"
|
|
343
|
-
await engine.storage.save_job_state(job_id, job_state)
|
|
344
|
-
|
|
345
|
-
return json_response({"status": "result_accepted_success"}, status=200)
|
|
346
|
-
|
|
347
|
-
|
|
348
202
|
async def human_approval_webhook_handler(request: web.Request) -> web.Response:
|
|
349
203
|
engine = request.app[ENGINE_KEY]
|
|
350
204
|
job_id = request.match_info.get("job_id")
|
|
@@ -441,109 +295,3 @@ async def docs_handler(request: web.Request) -> web.Response:
|
|
|
441
295
|
content = content.replace(marker, f"{marker}\n{endpoints_json.strip('[]')},")
|
|
442
296
|
|
|
443
297
|
return web.Response(text=content, content_type="text/html")
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
|
|
447
|
-
engine = request.app[ENGINE_KEY]
|
|
448
|
-
worker_id = request.match_info.get("worker_id")
|
|
449
|
-
if not worker_id:
|
|
450
|
-
raise web.HTTPBadRequest(text="worker_id is required")
|
|
451
|
-
|
|
452
|
-
ws = web.WebSocketResponse()
|
|
453
|
-
await ws.prepare(request)
|
|
454
|
-
|
|
455
|
-
await engine.ws_manager.register(worker_id, ws)
|
|
456
|
-
try:
|
|
457
|
-
async for msg in ws:
|
|
458
|
-
if msg.type == WSMsgType.TEXT:
|
|
459
|
-
try:
|
|
460
|
-
data = msg.json()
|
|
461
|
-
await engine.ws_manager.handle_message(worker_id, data)
|
|
462
|
-
except Exception as e:
|
|
463
|
-
logger.error(f"Error processing WebSocket message from {worker_id}: {e}")
|
|
464
|
-
elif msg.type == WSMsgType.ERROR:
|
|
465
|
-
logger.error(f"WebSocket connection for {worker_id} closed with exception {ws.exception()}")
|
|
466
|
-
break
|
|
467
|
-
finally:
|
|
468
|
-
await engine.ws_manager.unregister(worker_id)
|
|
469
|
-
return ws
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
async def handle_get_next_task(request: web.Request) -> web.Response:
|
|
473
|
-
engine = request.app[ENGINE_KEY]
|
|
474
|
-
worker_id = request.match_info.get("worker_id")
|
|
475
|
-
if not worker_id:
|
|
476
|
-
return json_response({"error": "worker_id is required in path"}, status=400)
|
|
477
|
-
|
|
478
|
-
logger.debug(f"Worker {worker_id} is requesting a new task.")
|
|
479
|
-
task = await engine.storage.dequeue_task_for_worker(worker_id, engine.config.WORKER_POLL_TIMEOUT_SECONDS)
|
|
480
|
-
|
|
481
|
-
if task:
|
|
482
|
-
logger.info(f"Sending task {task.get('task_id')} to worker {worker_id}")
|
|
483
|
-
return json_response(task, status=200)
|
|
484
|
-
logger.debug(f"No tasks for worker {worker_id}, responding 204.")
|
|
485
|
-
return web.Response(status=204)
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
async def worker_update_handler(request: web.Request) -> web.Response:
|
|
489
|
-
engine = request.app[ENGINE_KEY]
|
|
490
|
-
worker_id = request.match_info.get("worker_id")
|
|
491
|
-
if not worker_id:
|
|
492
|
-
return json_response({"error": "worker_id is required in path"}, status=400)
|
|
493
|
-
|
|
494
|
-
ttl = engine.config.WORKER_HEALTH_CHECK_INTERVAL_SECONDS * 2
|
|
495
|
-
update_data = None
|
|
496
|
-
|
|
497
|
-
if request.can_read_body:
|
|
498
|
-
try:
|
|
499
|
-
update_data = await request.json(loads=loads)
|
|
500
|
-
except Exception:
|
|
501
|
-
logger.warning(
|
|
502
|
-
f"Received PATCH from worker {worker_id} with non-JSON body. Treating as TTL-only heartbeat."
|
|
503
|
-
)
|
|
504
|
-
|
|
505
|
-
if update_data:
|
|
506
|
-
updated_worker = await engine.storage.update_worker_status(worker_id, update_data, ttl)
|
|
507
|
-
if not updated_worker:
|
|
508
|
-
return json_response({"error": "Worker not found"}, status=404)
|
|
509
|
-
|
|
510
|
-
await engine.history_storage.log_worker_event(
|
|
511
|
-
{
|
|
512
|
-
"worker_id": worker_id,
|
|
513
|
-
"event_type": "status_update",
|
|
514
|
-
"worker_info_snapshot": updated_worker,
|
|
515
|
-
},
|
|
516
|
-
)
|
|
517
|
-
return json_response(updated_worker, status=200)
|
|
518
|
-
else:
|
|
519
|
-
refreshed = await engine.storage.refresh_worker_ttl(worker_id, ttl)
|
|
520
|
-
if not refreshed:
|
|
521
|
-
return json_response({"error": "Worker not found"}, status=404)
|
|
522
|
-
return json_response({"status": "ttl_refreshed"})
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
async def register_worker_handler(request: web.Request) -> web.Response:
|
|
526
|
-
engine = request.app[ENGINE_KEY]
|
|
527
|
-
worker_data = request.get("worker_registration_data")
|
|
528
|
-
if not worker_data:
|
|
529
|
-
return json_response({"error": "Worker data not found in request"}, status=500)
|
|
530
|
-
|
|
531
|
-
worker_id = worker_data.get("worker_id")
|
|
532
|
-
if not worker_id:
|
|
533
|
-
return json_response({"error": "Missing required field: worker_id"}, status=400)
|
|
534
|
-
|
|
535
|
-
ttl = engine.config.WORKER_HEALTH_CHECK_INTERVAL_SECONDS * 2
|
|
536
|
-
await engine.storage.register_worker(worker_id, worker_data, ttl)
|
|
537
|
-
|
|
538
|
-
logger.info(
|
|
539
|
-
f"Worker '{worker_id}' registered with info: {worker_data}",
|
|
540
|
-
)
|
|
541
|
-
|
|
542
|
-
await engine.history_storage.log_worker_event(
|
|
543
|
-
{
|
|
544
|
-
"worker_id": worker_id,
|
|
545
|
-
"event_type": "registered",
|
|
546
|
-
"worker_info_snapshot": worker_data,
|
|
547
|
-
},
|
|
548
|
-
)
|
|
549
|
-
return json_response({"status": "registered"}, status=200)
|