avtomatika-worker 1.0b3__tar.gz → 1.0b5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/LICENSE +1 -1
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/PKG-INFO +82 -21
- avtomatika_worker-1.0b3/src/avtomatika_worker.egg-info/PKG-INFO → avtomatika_worker-1.0b5/README.md +76 -48
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/pyproject.toml +11 -1
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker/__init__.py +1 -1
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker/config.py +6 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker/s3.py +76 -48
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker/task_files.py +60 -2
- avtomatika_worker-1.0b5/src/avtomatika_worker/types.py +46 -0
- avtomatika_worker-1.0b5/src/avtomatika_worker/worker.py +704 -0
- avtomatika_worker-1.0b3/README.md → avtomatika_worker-1.0b5/src/avtomatika_worker.egg-info/PKG-INFO +109 -20
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker.egg-info/SOURCES.txt +2 -3
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker.egg-info/requires.txt +1 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_dependency_injection.py +3 -4
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_parameter_typing.py +15 -15
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_s3.py +22 -12
- avtomatika_worker-1.0b5/tests/test_task_files_extended.py +60 -0
- avtomatika_worker-1.0b5/tests/test_validation.py +57 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_worker_logic.py +38 -25
- avtomatika_worker-1.0b5/tests/test_worker_more_logic.py +211 -0
- avtomatika_worker-1.0b5/tests/test_worker_sdk.py +281 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_wrr_logic.py +4 -3
- avtomatika_worker-1.0b3/src/avtomatika_worker/client.py +0 -93
- avtomatika_worker-1.0b3/src/avtomatika_worker/constants.py +0 -22
- avtomatika_worker-1.0b3/src/avtomatika_worker/types.py +0 -21
- avtomatika_worker-1.0b3/src/avtomatika_worker/worker.py +0 -526
- avtomatika_worker-1.0b3/tests/test_client.py +0 -52
- avtomatika_worker-1.0b3/tests/test_worker_more_logic.py +0 -310
- avtomatika_worker-1.0b3/tests/test_worker_sdk.py +0 -342
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/setup.cfg +0 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker/py.typed +0 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker.egg-info/dependency_links.txt +0 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker.egg-info/top_level.txt +0 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_concurrency_limits.py +0 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_config.py +0 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_init.py +0 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_per_orchestrator_token.py +0 -0
- {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2025 Dmitrii Gagarin
|
|
3
|
+
Copyright (c) 2025-2026 Dmitrii Gagarin aka madgagarin
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika-worker
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b5
|
|
4
4
|
Summary: Worker SDK for the Avtomatika orchestrator.
|
|
5
|
+
Author-email: Dmitrii Gagarin <madgagarin@gmail.com>
|
|
5
6
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika-worker
|
|
6
7
|
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika-worker/issues
|
|
8
|
+
Keywords: worker,sdk,orchestrator,distributed,task-queue,rxon,hln
|
|
7
9
|
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
8
11
|
Classifier: Programming Language :: Python :: 3
|
|
9
12
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
13
|
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Typing :: Typed
|
|
11
15
|
Requires-Python: >=3.11
|
|
12
16
|
Description-Content-Type: text/markdown
|
|
13
17
|
License-File: LICENSE
|
|
18
|
+
Requires-Dist: rxon==1.0b2
|
|
14
19
|
Requires-Dist: aiohttp~=3.13.2
|
|
15
20
|
Requires-Dist: python-json-logger~=4.0.0
|
|
16
21
|
Requires-Dist: obstore>=0.1
|
|
@@ -28,7 +33,11 @@ Dynamic: license-file
|
|
|
28
33
|
|
|
29
34
|
# Avtomatika Worker SDK
|
|
30
35
|
|
|
31
|
-
|
|
36
|
+
[](https://opensource.org/licenses/MIT)
|
|
37
|
+
[](https://www.python.org/downloads/release/python-3110/)
|
|
38
|
+
[](https://github.com/astral-sh/ruff)
|
|
39
|
+
|
|
40
|
+
This is the official SDK for creating workers compatible with the **[Avtomatika Orchestrator](https://github.com/avtomatika-ai/avtomatika)**. It is built upon the **[Avtomatika Protocol](https://github.com/avtomatika-ai/rxon)** and implements the **[HLN Protocol](https://github.com/avtomatika-ai/hln)**, handling all communication complexity (polling, heartbeats, S3 offloading) so you can focus on writing your business logic.
|
|
32
41
|
|
|
33
42
|
## Installation
|
|
34
43
|
|
|
@@ -286,13 +295,26 @@ async def image_resizer(params: ResizeParams, **kwargs):
|
|
|
286
295
|
|
|
287
296
|
### 1. Task Handlers
|
|
288
297
|
|
|
289
|
-
Each handler is
|
|
298
|
+
Each handler is a function (either `async def` or `def`) that accepts two arguments:
|
|
290
299
|
|
|
291
300
|
- `params` (`dict`, `dataclass`, or `pydantic.BaseModel`): The parameters for the task, automatically validated and instantiated based on your type hint.
|
|
292
301
|
- `**kwargs`: Additional metadata about the task, including:
|
|
293
302
|
- `task_id` (`str`): The unique ID of the task itself.
|
|
294
303
|
- `job_id` (`str`): The ID of the parent `Job` to which the task belongs.
|
|
295
304
|
- `priority` (`int`): The execution priority of the task.
|
|
305
|
+
- `send_progress` (`callable`): An async function `await send_progress(progress_float, message_string)` to report task execution progress (0.0 to 1.0) to the orchestrator.
|
|
306
|
+
|
|
307
|
+
**Synchronous Handlers:**
|
|
308
|
+
If you define your handler as a standard synchronous function (`def handler(...)`), the SDK will automatically execute it in a separate thread using `asyncio.to_thread`. This ensures that CPU-intensive operations (like model inference) do not block the worker's main event loop, allowing heartbeats and other background tasks to continue running smoothly.
|
|
309
|
+
|
|
310
|
+
```python
|
|
311
|
+
@worker.task("cpu_heavy_task")
|
|
312
|
+
def heavy_computation(params: dict, **kwargs):
|
|
313
|
+
# This will run in a thread, not blocking the loop
|
|
314
|
+
import time
|
|
315
|
+
time.sleep(10)
|
|
316
|
+
return {"status": "success"}
|
|
317
|
+
```
|
|
296
318
|
|
|
297
319
|
### 2. Concurrency Limiting
|
|
298
320
|
|
|
@@ -383,7 +405,7 @@ return {
|
|
|
383
405
|
|
|
384
406
|
#### Error Handling
|
|
385
407
|
|
|
386
|
-
To control the orchestrator's fault tolerance mechanism, you can return standardized error types.
|
|
408
|
+
To control the orchestrator's fault tolerance mechanism, you can return standardized error types. All error constants can be imported from `avtomatika_worker.typing`.
|
|
387
409
|
|
|
388
410
|
- **Transient Error (`TRANSIENT_ERROR`)**: For issues that might be resolved on a retry (e.g., a network failure).
|
|
389
411
|
```python
|
|
@@ -396,17 +418,10 @@ To control the orchestrator's fault tolerance mechanism, you can return standard
|
|
|
396
418
|
}
|
|
397
419
|
}
|
|
398
420
|
```
|
|
399
|
-
- **Permanent Error (`PERMANENT_ERROR`)**: For unresolvable problems (e.g., an invalid file format).
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
"status": "failure",
|
|
404
|
-
"error": {
|
|
405
|
-
"code": PERMANENT_ERROR,
|
|
406
|
-
"message": "Corrupted input file"
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
```
|
|
421
|
+
- **Permanent Error (`PERMANENT_ERROR`)**: For unresolvable problems (e.g., an invalid file format). Causes immediate quarantine.
|
|
422
|
+
- **Security Error (`SECURITY_ERROR`)**: For security violations. Causes immediate quarantine.
|
|
423
|
+
- **Dependency Error (`DEPENDENCY_ERROR`)**: For missing models or tools. Causes immediate quarantine.
|
|
424
|
+
- **Resource Exhausted (`RESOURCE_EXHAUSTED_ERROR`)**: When resources are temporarily unavailable. Treated as transient (retried).
|
|
410
425
|
|
|
411
426
|
### 4. Failover and Load Balancing
|
|
412
427
|
|
|
@@ -521,6 +536,48 @@ This only requires configuring environment variables for S3 access (see Full Con
|
|
|
521
536
|
|
|
522
537
|
### 7. WebSocket Support
|
|
523
538
|
|
|
539
|
+
For real-time communication (e.g., immediate task cancellation), the worker supports WebSocket connections. This is enabled by setting `WORKER_ENABLE_WEBSOCKETS=true`. When connected, the orchestrator can push commands like `cancel_task` directly to the worker.
|
|
540
|
+
|
|
541
|
+
### 8. Middleware
|
|
542
|
+
|
|
543
|
+
The worker supports a middleware system, allowing you to wrap task executions with custom logic. This is particularly useful for resource management (e.g., acquiring GPU locks), logging, error handling, or **Dependency Injection**.
|
|
544
|
+
|
|
545
|
+
Middleware functions wrap the execution of the task handler (and any subsequent middlewares). They receive a context dictionary and the next handler in the chain.
|
|
546
|
+
|
|
547
|
+
The `context` dictionary contains:
|
|
548
|
+
- `task_id`, `job_id`, `task_name`: Metadata.
|
|
549
|
+
- `params`: The validated parameters object.
|
|
550
|
+
- `handler_kwargs`: A dictionary of arguments that will be passed to the handler. **Middleware can modify this dictionary to inject dependencies.**
|
|
551
|
+
|
|
552
|
+
**Example: GPU Resource Manager & Dependency Injection**
|
|
553
|
+
|
|
554
|
+
```python
|
|
555
|
+
async def gpu_lock_middleware(context: dict, next_handler: callable):
|
|
556
|
+
# Pre-processing: Acquire resource
|
|
557
|
+
print(f"Acquiring GPU for task {context['task_id']}...")
|
|
558
|
+
model_path = await resource_manager.allocate()
|
|
559
|
+
|
|
560
|
+
# Inject the model path into the handler's arguments
|
|
561
|
+
context["handler_kwargs"]["model_path"] = model_path
|
|
562
|
+
|
|
563
|
+
try:
|
|
564
|
+
# Execute the next handler in the chain
|
|
565
|
+
result = await next_handler()
|
|
566
|
+
return result
|
|
567
|
+
finally:
|
|
568
|
+
# Post-processing: Release resource
|
|
569
|
+
print(f"Releasing GPU for task {context['task_id']}...")
|
|
570
|
+
resource_manager.release()
|
|
571
|
+
|
|
572
|
+
# Register the middleware
|
|
573
|
+
worker.add_middleware(gpu_lock_middleware)
|
|
574
|
+
|
|
575
|
+
# Handler now receives 'model_path' automatically
|
|
576
|
+
@worker.task("generate")
|
|
577
|
+
def generate(params, model_path, **kwargs):
|
|
578
|
+
print(f"Using model at: {model_path}")
|
|
579
|
+
```
|
|
580
|
+
|
|
524
581
|
## Advanced Features
|
|
525
582
|
|
|
526
583
|
### Reporting Skill & Model Dependencies
|
|
@@ -577,8 +634,11 @@ The worker is fully configured via environment variables.
|
|
|
577
634
|
| `WORKER_TYPE` | A string identifying the type of the worker. | `generic-cpu-worker` |
|
|
578
635
|
| `WORKER_PORT` | The port for the worker's health check server. | `8083` |
|
|
579
636
|
| `WORKER_TOKEN` | A common authentication token used to connect to orchestrators. | `your-secret-worker-token` |
|
|
580
|
-
|
|
581
|
-
|
|
637
|
+
- **`WORKER_INDIVIDUAL_TOKEN`**: An individual token for this worker, which overrides `WORKER_TOKEN` if set.
|
|
638
|
+
- **`TLS_CA_PATH`**: Path to the CA certificate to verify the orchestrator.
|
|
639
|
+
- **`TLS_CERT_PATH`**: Path to the client certificate for mTLS.
|
|
640
|
+
- **`TLS_KEY_PATH`**: Path to the client private key for mTLS.
|
|
641
|
+
- **`ORCHESTRATOR_URL`**: The address of the Avtomatika orchestrator.
|
|
582
642
|
| `ORCHESTRATORS_CONFIG` | A JSON string with a list of orchestrators for multi-orchestrator modes. | `[]` |
|
|
583
643
|
| `MULTI_ORCHESTRATOR_MODE` | The mode for handling multiple orchestrators. Possible values: `FAILOVER`, `ROUND_ROBIN`. | `FAILOVER` |
|
|
584
644
|
| `MAX_CONCURRENT_TASKS` | The maximum number of tasks the worker can execute simultaneously. | `10` |
|
|
@@ -605,8 +665,9 @@ The worker is fully configured via environment variables.
|
|
|
605
665
|
|
|
606
666
|
## Development
|
|
607
667
|
|
|
608
|
-
To install the necessary dependencies for running tests
|
|
668
|
+
To install the necessary dependencies for running tests (assuming you are in the package root):
|
|
609
669
|
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
670
|
+
1. Install the worker in editable mode with test dependencies:
|
|
671
|
+
```bash
|
|
672
|
+
pip install -e .[test]
|
|
673
|
+
```
|
avtomatika_worker-1.0b3/src/avtomatika_worker.egg-info/PKG-INFO → avtomatika_worker-1.0b5/README.md
RENAMED
|
@@ -1,34 +1,10 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: avtomatika-worker
|
|
3
|
-
Version: 1.0b3
|
|
4
|
-
Summary: Worker SDK for the Avtomatika orchestrator.
|
|
5
|
-
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika-worker
|
|
6
|
-
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika-worker/issues
|
|
7
|
-
Classifier: Development Status :: 4 - Beta
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.11
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
Requires-Dist: aiohttp~=3.13.2
|
|
15
|
-
Requires-Dist: python-json-logger~=4.0.0
|
|
16
|
-
Requires-Dist: obstore>=0.1
|
|
17
|
-
Requires-Dist: aiofiles~=25.1.0
|
|
18
|
-
Provides-Extra: test
|
|
19
|
-
Requires-Dist: pytest; extra == "test"
|
|
20
|
-
Requires-Dist: pytest-asyncio; extra == "test"
|
|
21
|
-
Requires-Dist: aioresponses; extra == "test"
|
|
22
|
-
Requires-Dist: pytest-mock; extra == "test"
|
|
23
|
-
Requires-Dist: pydantic; extra == "test"
|
|
24
|
-
Requires-Dist: types-aiofiles; extra == "test"
|
|
25
|
-
Provides-Extra: pydantic
|
|
26
|
-
Requires-Dist: pydantic; extra == "pydantic"
|
|
27
|
-
Dynamic: license-file
|
|
28
|
-
|
|
29
1
|
# Avtomatika Worker SDK
|
|
30
2
|
|
|
31
|
-
|
|
3
|
+
[](https://opensource.org/licenses/MIT)
|
|
4
|
+
[](https://www.python.org/downloads/release/python-3110/)
|
|
5
|
+
[](https://github.com/astral-sh/ruff)
|
|
6
|
+
|
|
7
|
+
This is the official SDK for creating workers compatible with the **[Avtomatika Orchestrator](https://github.com/avtomatika-ai/avtomatika)**. It is built upon the **[Avtomatika Protocol](https://github.com/avtomatika-ai/rxon)** and implements the **[HLN Protocol](https://github.com/avtomatika-ai/hln)**, handling all communication complexity (polling, heartbeats, S3 offloading) so you can focus on writing your business logic.
|
|
32
8
|
|
|
33
9
|
## Installation
|
|
34
10
|
|
|
@@ -286,13 +262,26 @@ async def image_resizer(params: ResizeParams, **kwargs):
|
|
|
286
262
|
|
|
287
263
|
### 1. Task Handlers
|
|
288
264
|
|
|
289
|
-
Each handler is
|
|
265
|
+
Each handler is a function (either `async def` or `def`) that accepts two arguments:
|
|
290
266
|
|
|
291
267
|
- `params` (`dict`, `dataclass`, or `pydantic.BaseModel`): The parameters for the task, automatically validated and instantiated based on your type hint.
|
|
292
268
|
- `**kwargs`: Additional metadata about the task, including:
|
|
293
269
|
- `task_id` (`str`): The unique ID of the task itself.
|
|
294
270
|
- `job_id` (`str`): The ID of the parent `Job` to which the task belongs.
|
|
295
271
|
- `priority` (`int`): The execution priority of the task.
|
|
272
|
+
- `send_progress` (`callable`): An async function `await send_progress(progress_float, message_string)` to report task execution progress (0.0 to 1.0) to the orchestrator.
|
|
273
|
+
|
|
274
|
+
**Synchronous Handlers:**
|
|
275
|
+
If you define your handler as a standard synchronous function (`def handler(...)`), the SDK will automatically execute it in a separate thread using `asyncio.to_thread`. This ensures that CPU-intensive operations (like model inference) do not block the worker's main event loop, allowing heartbeats and other background tasks to continue running smoothly.
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
@worker.task("cpu_heavy_task")
|
|
279
|
+
def heavy_computation(params: dict, **kwargs):
|
|
280
|
+
# This will run in a thread, not blocking the loop
|
|
281
|
+
import time
|
|
282
|
+
time.sleep(10)
|
|
283
|
+
return {"status": "success"}
|
|
284
|
+
```
|
|
296
285
|
|
|
297
286
|
### 2. Concurrency Limiting
|
|
298
287
|
|
|
@@ -383,7 +372,7 @@ return {
|
|
|
383
372
|
|
|
384
373
|
#### Error Handling
|
|
385
374
|
|
|
386
|
-
To control the orchestrator's fault tolerance mechanism, you can return standardized error types.
|
|
375
|
+
To control the orchestrator's fault tolerance mechanism, you can return standardized error types. All error constants can be imported from `avtomatika_worker.typing`.
|
|
387
376
|
|
|
388
377
|
- **Transient Error (`TRANSIENT_ERROR`)**: For issues that might be resolved on a retry (e.g., a network failure).
|
|
389
378
|
```python
|
|
@@ -396,17 +385,10 @@ To control the orchestrator's fault tolerance mechanism, you can return standard
|
|
|
396
385
|
}
|
|
397
386
|
}
|
|
398
387
|
```
|
|
399
|
-
- **Permanent Error (`PERMANENT_ERROR`)**: For unresolvable problems (e.g., an invalid file format).
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
"status": "failure",
|
|
404
|
-
"error": {
|
|
405
|
-
"code": PERMANENT_ERROR,
|
|
406
|
-
"message": "Corrupted input file"
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
```
|
|
388
|
+
- **Permanent Error (`PERMANENT_ERROR`)**: For unresolvable problems (e.g., an invalid file format). Causes immediate quarantine.
|
|
389
|
+
- **Security Error (`SECURITY_ERROR`)**: For security violations. Causes immediate quarantine.
|
|
390
|
+
- **Dependency Error (`DEPENDENCY_ERROR`)**: For missing models or tools. Causes immediate quarantine.
|
|
391
|
+
- **Resource Exhausted (`RESOURCE_EXHAUSTED_ERROR`)**: When resources are temporarily unavailable. Treated as transient (retried).
|
|
410
392
|
|
|
411
393
|
### 4. Failover and Load Balancing
|
|
412
394
|
|
|
@@ -521,6 +503,48 @@ This only requires configuring environment variables for S3 access (see Full Con
|
|
|
521
503
|
|
|
522
504
|
### 7. WebSocket Support
|
|
523
505
|
|
|
506
|
+
For real-time communication (e.g., immediate task cancellation), the worker supports WebSocket connections. This is enabled by setting `WORKER_ENABLE_WEBSOCKETS=true`. When connected, the orchestrator can push commands like `cancel_task` directly to the worker.
|
|
507
|
+
|
|
508
|
+
### 8. Middleware
|
|
509
|
+
|
|
510
|
+
The worker supports a middleware system, allowing you to wrap task executions with custom logic. This is particularly useful for resource management (e.g., acquiring GPU locks), logging, error handling, or **Dependency Injection**.
|
|
511
|
+
|
|
512
|
+
Middleware functions wrap the execution of the task handler (and any subsequent middlewares). They receive a context dictionary and the next handler in the chain.
|
|
513
|
+
|
|
514
|
+
The `context` dictionary contains:
|
|
515
|
+
- `task_id`, `job_id`, `task_name`: Metadata.
|
|
516
|
+
- `params`: The validated parameters object.
|
|
517
|
+
- `handler_kwargs`: A dictionary of arguments that will be passed to the handler. **Middleware can modify this dictionary to inject dependencies.**
|
|
518
|
+
|
|
519
|
+
**Example: GPU Resource Manager & Dependency Injection**
|
|
520
|
+
|
|
521
|
+
```python
|
|
522
|
+
async def gpu_lock_middleware(context: dict, next_handler: callable):
|
|
523
|
+
# Pre-processing: Acquire resource
|
|
524
|
+
print(f"Acquiring GPU for task {context['task_id']}...")
|
|
525
|
+
model_path = await resource_manager.allocate()
|
|
526
|
+
|
|
527
|
+
# Inject the model path into the handler's arguments
|
|
528
|
+
context["handler_kwargs"]["model_path"] = model_path
|
|
529
|
+
|
|
530
|
+
try:
|
|
531
|
+
# Execute the next handler in the chain
|
|
532
|
+
result = await next_handler()
|
|
533
|
+
return result
|
|
534
|
+
finally:
|
|
535
|
+
# Post-processing: Release resource
|
|
536
|
+
print(f"Releasing GPU for task {context['task_id']}...")
|
|
537
|
+
resource_manager.release()
|
|
538
|
+
|
|
539
|
+
# Register the middleware
|
|
540
|
+
worker.add_middleware(gpu_lock_middleware)
|
|
541
|
+
|
|
542
|
+
# Handler now receives 'model_path' automatically
|
|
543
|
+
@worker.task("generate")
|
|
544
|
+
def generate(params, model_path, **kwargs):
|
|
545
|
+
print(f"Using model at: {model_path}")
|
|
546
|
+
```
|
|
547
|
+
|
|
524
548
|
## Advanced Features
|
|
525
549
|
|
|
526
550
|
### Reporting Skill & Model Dependencies
|
|
@@ -577,8 +601,11 @@ The worker is fully configured via environment variables.
|
|
|
577
601
|
| `WORKER_TYPE` | A string identifying the type of the worker. | `generic-cpu-worker` |
|
|
578
602
|
| `WORKER_PORT` | The port for the worker's health check server. | `8083` |
|
|
579
603
|
| `WORKER_TOKEN` | A common authentication token used to connect to orchestrators. | `your-secret-worker-token` |
|
|
580
|
-
|
|
581
|
-
|
|
604
|
+
- **`WORKER_INDIVIDUAL_TOKEN`**: An individual token for this worker, which overrides `WORKER_TOKEN` if set.
|
|
605
|
+
- **`TLS_CA_PATH`**: Path to the CA certificate to verify the orchestrator.
|
|
606
|
+
- **`TLS_CERT_PATH`**: Path to the client certificate for mTLS.
|
|
607
|
+
- **`TLS_KEY_PATH`**: Path to the client private key for mTLS.
|
|
608
|
+
- **`ORCHESTRATOR_URL`**: The address of the Avtomatika orchestrator.
|
|
582
609
|
| `ORCHESTRATORS_CONFIG` | A JSON string with a list of orchestrators for multi-orchestrator modes. | `[]` |
|
|
583
610
|
| `MULTI_ORCHESTRATOR_MODE` | The mode for handling multiple orchestrators. Possible values: `FAILOVER`, `ROUND_ROBIN`. | `FAILOVER` |
|
|
584
611
|
| `MAX_CONCURRENT_TASKS` | The maximum number of tasks the worker can execute simultaneously. | `10` |
|
|
@@ -605,8 +632,9 @@ The worker is fully configured via environment variables.
|
|
|
605
632
|
|
|
606
633
|
## Development
|
|
607
634
|
|
|
608
|
-
To install the necessary dependencies for running tests
|
|
635
|
+
To install the necessary dependencies for running tests (assuming you are in the package root):
|
|
609
636
|
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
637
|
+
1. Install the worker in editable mode with test dependencies:
|
|
638
|
+
```bash
|
|
639
|
+
pip install -e .[test]
|
|
640
|
+
```
|
|
@@ -4,17 +4,24 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "avtomatika-worker"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.0b5"
|
|
8
8
|
description = "Worker SDK for the Avtomatika orchestrator."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Dmitrii Gagarin", email = "madgagarin@gmail.com"},
|
|
13
|
+
]
|
|
14
|
+
keywords = ["worker", "sdk", "orchestrator", "distributed", "task-queue", "rxon", "hln"]
|
|
11
15
|
classifiers = [
|
|
12
16
|
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
13
18
|
"Programming Language :: Python :: 3",
|
|
14
19
|
"License :: OSI Approved :: MIT License",
|
|
15
20
|
"Operating System :: OS Independent",
|
|
21
|
+
"Typing :: Typed",
|
|
16
22
|
]
|
|
17
23
|
dependencies = [
|
|
24
|
+
"rxon==1.0b2",
|
|
18
25
|
"aiohttp~=3.13.2",
|
|
19
26
|
"python-json-logger~=4.0.0",
|
|
20
27
|
"obstore>=0.1",
|
|
@@ -39,6 +46,9 @@ pydantic = ["pydantic"]
|
|
|
39
46
|
[tool.setuptools.packages.find]
|
|
40
47
|
where = ["src"]
|
|
41
48
|
|
|
49
|
+
[tool.setuptools.package-data]
|
|
50
|
+
"avtomatika_worker" = ["py.typed"]
|
|
51
|
+
|
|
42
52
|
[tool.pytest.ini_options]
|
|
43
53
|
markers = [
|
|
44
54
|
"e2e: marks tests as end-to-end tests",
|
|
@@ -4,6 +4,8 @@ from os import getenv
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
|
+
from rxon.validators import validate_identifier
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
class WorkerConfig:
|
|
9
11
|
"""A class for centralized management of worker configuration.
|
|
@@ -29,6 +31,9 @@ class WorkerConfig:
|
|
|
29
31
|
"WORKER_INDIVIDUAL_TOKEN",
|
|
30
32
|
getenv("WORKER_TOKEN", "your-secret-worker-token"),
|
|
31
33
|
)
|
|
34
|
+
self.TLS_CA_PATH: str | None = getenv("TLS_CA_PATH")
|
|
35
|
+
self.TLS_CERT_PATH: str | None = getenv("TLS_CERT_PATH")
|
|
36
|
+
self.TLS_KEY_PATH: str | None = getenv("TLS_KEY_PATH")
|
|
32
37
|
|
|
33
38
|
# --- Resources and performance ---
|
|
34
39
|
self.COST_PER_SKILL: dict[str, float] = self._load_json_from_env("COST_PER_SKILL", default={})
|
|
@@ -73,6 +78,7 @@ class WorkerConfig:
|
|
|
73
78
|
|
|
74
79
|
def validate(self) -> None:
|
|
75
80
|
"""Validates critical configuration parameters."""
|
|
81
|
+
validate_identifier(self.WORKER_ID, "WORKER_ID")
|
|
76
82
|
if self.WORKER_TOKEN == "your-secret-worker-token":
|
|
77
83
|
print("Warning: WORKER_TOKEN is set to the default value. Tasks might fail authentication.")
|
|
78
84
|
|
|
@@ -4,13 +4,17 @@ from os import walk
|
|
|
4
4
|
from os.path import basename, dirname, join, relpath
|
|
5
5
|
from shutil import rmtree
|
|
6
6
|
from typing import Any, cast
|
|
7
|
-
from urllib.parse import urlparse
|
|
8
7
|
|
|
9
|
-
import obstore
|
|
10
8
|
from aiofiles import open as aio_open
|
|
11
9
|
from aiofiles.os import makedirs
|
|
12
|
-
from aiofiles.ospath import exists, isdir
|
|
10
|
+
from aiofiles.ospath import exists, getsize, isdir
|
|
11
|
+
from obstore import get as obstore_get
|
|
12
|
+
from obstore import list as obstore_list
|
|
13
|
+
from obstore import put as obstore_put
|
|
13
14
|
from obstore.store import S3Store
|
|
15
|
+
from rxon.blob import parse_uri
|
|
16
|
+
from rxon.exceptions import IntegrityError
|
|
17
|
+
from rxon.models import FileMetadata
|
|
14
18
|
|
|
15
19
|
from .config import WorkerConfig
|
|
16
20
|
|
|
@@ -61,12 +65,12 @@ class S3Manager:
|
|
|
61
65
|
if await exists(task_dir):
|
|
62
66
|
await to_thread(lambda: rmtree(task_dir, ignore_errors=True))
|
|
63
67
|
|
|
64
|
-
async def _process_s3_uri(self, uri: str, task_id: str) -> str:
|
|
65
|
-
"""Downloads a file or a folder
|
|
68
|
+
async def _process_s3_uri(self, uri: str, task_id: str, verify_meta: FileMetadata | None = None) -> str:
|
|
69
|
+
"""Downloads a file or a folder from S3 and returns the local path.
|
|
70
|
+
If verify_meta is provided, performs integrity checks.
|
|
71
|
+
"""
|
|
66
72
|
try:
|
|
67
|
-
|
|
68
|
-
bucket_name = parsed_url.netloc
|
|
69
|
-
object_key = parsed_url.path.lstrip("/")
|
|
73
|
+
bucket_name, object_key, is_directory = parse_uri(uri)
|
|
70
74
|
store = self._get_store(bucket_name)
|
|
71
75
|
|
|
72
76
|
# Use task-specific directory for isolation
|
|
@@ -76,36 +80,27 @@ class S3Manager:
|
|
|
76
80
|
logger.info(f"Starting download from S3: {uri}")
|
|
77
81
|
|
|
78
82
|
# Handle folder download (prefix)
|
|
79
|
-
if
|
|
83
|
+
if is_directory:
|
|
80
84
|
folder_name = object_key.rstrip("/").split("/")[-1]
|
|
81
85
|
local_folder_path = join(local_dir_root, folder_name)
|
|
82
|
-
|
|
83
|
-
# List objects with prefix
|
|
84
|
-
# obstore.list returns an async iterator of ObjectMeta
|
|
85
86
|
files_to_download = []
|
|
86
87
|
|
|
87
|
-
|
|
88
|
-
async for obj in obstore.list(store, prefix=object_key):
|
|
88
|
+
async for obj in obstore_list(store, prefix=object_key):
|
|
89
89
|
key = obj.key
|
|
90
|
-
|
|
91
90
|
if key.endswith("/"):
|
|
92
91
|
continue
|
|
93
|
-
|
|
94
|
-
# Calculate relative path inside the folder
|
|
95
92
|
rel_path = key[len(object_key) :]
|
|
96
93
|
local_file_path = join(local_folder_path, rel_path)
|
|
97
|
-
|
|
98
94
|
await makedirs(dirname(local_file_path), exist_ok=True)
|
|
99
95
|
files_to_download.append((key, local_file_path))
|
|
100
96
|
|
|
101
97
|
async def _download_file(key: str, path: str) -> None:
|
|
102
98
|
async with self._semaphore:
|
|
103
|
-
result = await
|
|
99
|
+
result = await obstore_get(store, key)
|
|
104
100
|
async with aio_open(path, "wb") as f:
|
|
105
101
|
async for chunk in result.stream():
|
|
106
102
|
await f.write(chunk)
|
|
107
103
|
|
|
108
|
-
# Execute downloads in parallel
|
|
109
104
|
if files_to_download:
|
|
110
105
|
await gather(*[_download_file(k, p) for k, p in files_to_download])
|
|
111
106
|
|
|
@@ -115,7 +110,20 @@ class S3Manager:
|
|
|
115
110
|
# Handle single file download
|
|
116
111
|
local_path = join(local_dir_root, basename(object_key))
|
|
117
112
|
|
|
118
|
-
result = await
|
|
113
|
+
result = await obstore_get(store, object_key)
|
|
114
|
+
|
|
115
|
+
# Integrity check before download
|
|
116
|
+
if verify_meta:
|
|
117
|
+
if verify_meta.size != result.meta.size:
|
|
118
|
+
raise IntegrityError(
|
|
119
|
+
f"Size mismatch for {uri}: expected {verify_meta.size}, got {result.meta.size}"
|
|
120
|
+
)
|
|
121
|
+
if verify_meta.etag and result.meta.e_tag:
|
|
122
|
+
actual_etag = result.meta.e_tag.strip('"')
|
|
123
|
+
expected_etag = verify_meta.etag.strip('"')
|
|
124
|
+
if actual_etag != expected_etag:
|
|
125
|
+
raise IntegrityError(f"ETag mismatch for {uri}: expected {expected_etag}, got {actual_etag}")
|
|
126
|
+
|
|
119
127
|
async with aio_open(local_path, "wb") as f:
|
|
120
128
|
async for chunk in result.stream():
|
|
121
129
|
await f.write(chunk)
|
|
@@ -128,8 +136,8 @@ class S3Manager:
|
|
|
128
136
|
logger.exception(f"Error during download of {uri}: {e}")
|
|
129
137
|
raise
|
|
130
138
|
|
|
131
|
-
async def _upload_to_s3(self, local_path: str) ->
|
|
132
|
-
"""Uploads a file or a folder to S3 and returns
|
|
139
|
+
async def _upload_to_s3(self, local_path: str) -> FileMetadata:
|
|
140
|
+
"""Uploads a file or a folder to S3 and returns FileMetadata."""
|
|
133
141
|
bucket_name = self._config.S3_DEFAULT_BUCKET
|
|
134
142
|
store = self._get_store(bucket_name)
|
|
135
143
|
|
|
@@ -141,70 +149,90 @@ class S3Manager:
|
|
|
141
149
|
folder_name = basename(local_path.rstrip("/"))
|
|
142
150
|
s3_prefix = f"{folder_name}/"
|
|
143
151
|
|
|
144
|
-
# Use to_thread to avoid blocking event loop during file walk
|
|
145
152
|
def _get_files_to_upload():
|
|
153
|
+
from os.path import getsize as std_getsize
|
|
154
|
+
|
|
146
155
|
files_to_upload = []
|
|
156
|
+
total_size = 0
|
|
147
157
|
for root, _, files in walk(local_path):
|
|
148
158
|
for file in files:
|
|
149
159
|
f_path = join(root, file)
|
|
150
160
|
rel = relpath(f_path, local_path)
|
|
161
|
+
total_size += std_getsize(f_path)
|
|
151
162
|
files_to_upload.append((f_path, f"{s3_prefix}{rel}"))
|
|
152
|
-
return files_to_upload
|
|
163
|
+
return files_to_upload, total_size
|
|
153
164
|
|
|
154
|
-
files_list = await to_thread(_get_files_to_upload)
|
|
165
|
+
files_list, total_size = await to_thread(_get_files_to_upload)
|
|
155
166
|
|
|
156
167
|
async def _upload_file(path: str, key: str) -> None:
|
|
157
168
|
async with self._semaphore:
|
|
158
|
-
# obstore.put accepts bytes or file-like objects.
|
|
159
|
-
# Since we are in async, reading small files is fine.
|
|
160
169
|
with open(path, "rb") as f:
|
|
161
|
-
await
|
|
170
|
+
await obstore_put(store, key, f)
|
|
162
171
|
|
|
163
172
|
if files_list:
|
|
164
|
-
# Upload in parallel
|
|
165
173
|
await gather(*[_upload_file(f, k) for f, k in files_list])
|
|
166
174
|
|
|
167
175
|
s3_uri = f"s3://{bucket_name}/{s3_prefix}"
|
|
168
176
|
logger.info(f"Successfully uploaded folder to S3: {local_path} -> {s3_uri} ({len(files_list)} files)")
|
|
169
|
-
return s3_uri
|
|
177
|
+
return FileMetadata(uri=s3_uri, size=total_size)
|
|
170
178
|
|
|
171
179
|
# Handle single file upload
|
|
172
180
|
object_key = basename(local_path)
|
|
181
|
+
file_size = await getsize(local_path)
|
|
173
182
|
with open(local_path, "rb") as f:
|
|
174
|
-
await
|
|
183
|
+
put_result = await obstore_put(store, object_key, f)
|
|
175
184
|
|
|
176
185
|
s3_uri = f"s3://{bucket_name}/{object_key}"
|
|
177
|
-
|
|
178
|
-
|
|
186
|
+
etag = put_result.e_tag.strip('"') if put_result.e_tag else None
|
|
187
|
+
logger.info(f"Successfully uploaded file to S3: {local_path} -> {s3_uri} (ETag: {etag})")
|
|
188
|
+
return FileMetadata(uri=s3_uri, size=file_size, etag=etag)
|
|
179
189
|
|
|
180
190
|
except Exception as e:
|
|
181
191
|
logger.exception(f"Error during upload of {local_path}: {e}")
|
|
182
192
|
raise
|
|
183
193
|
|
|
184
|
-
async def process_params(
|
|
185
|
-
|
|
194
|
+
async def process_params(
|
|
195
|
+
self, params: dict[str, Any], task_id: str, metadata: dict[str, FileMetadata] | None = None
|
|
196
|
+
) -> dict[str, Any]:
|
|
197
|
+
"""Recursively searches for S3 URIs in params and downloads the files.
|
|
198
|
+
Uses metadata for integrity verification if available.
|
|
199
|
+
"""
|
|
186
200
|
if not self._config.S3_ENDPOINT_URL:
|
|
187
201
|
return params
|
|
188
202
|
|
|
189
|
-
async def _process(item: Any) -> Any:
|
|
203
|
+
async def _process(item: Any, key_path: str = "") -> Any:
|
|
190
204
|
if isinstance(item, str) and item.startswith("s3://"):
|
|
191
|
-
|
|
205
|
+
verify_meta = metadata.get(key_path) if metadata else None
|
|
206
|
+
return await self._process_s3_uri(item, task_id, verify_meta=verify_meta)
|
|
192
207
|
if isinstance(item, dict):
|
|
193
|
-
return {k: await _process(v) for k, v in item.items()}
|
|
194
|
-
|
|
208
|
+
return {k: await _process(v, f"{key_path}.{k}" if key_path else k) for k, v in item.items()}
|
|
209
|
+
if isinstance(item, list):
|
|
210
|
+
return [await _process(v, f"{key_path}[{i}]") for i, v in enumerate(item)]
|
|
211
|
+
return item
|
|
195
212
|
|
|
196
213
|
return cast(dict[str, Any], await _process(params))
|
|
197
214
|
|
|
198
|
-
async def process_result(self, result: dict[str, Any]) -> dict[str, Any]:
|
|
199
|
-
"""Recursively searches for local file paths in the result and uploads them to S3.
|
|
215
|
+
async def process_result(self, result: dict[str, Any]) -> tuple[dict[str, Any], dict[str, FileMetadata]]:
|
|
216
|
+
"""Recursively searches for local file paths in the result and uploads them to S3.
|
|
217
|
+
Returns a tuple of (updated_result, metadata_map).
|
|
218
|
+
"""
|
|
200
219
|
if not self._config.S3_ENDPOINT_URL:
|
|
201
|
-
return result
|
|
220
|
+
return result, {}
|
|
221
|
+
|
|
222
|
+
metadata_map = {}
|
|
202
223
|
|
|
203
|
-
async def _process(item: Any) -> Any:
|
|
224
|
+
async def _process(item: Any, key_path: str = "") -> Any:
|
|
204
225
|
if isinstance(item, str) and item.startswith(self._config.TASK_FILES_DIR):
|
|
205
|
-
|
|
226
|
+
if await exists(item):
|
|
227
|
+
meta = await self._upload_to_s3(item)
|
|
228
|
+
metadata_map[key_path] = meta
|
|
229
|
+
return meta.uri
|
|
230
|
+
return item
|
|
206
231
|
if isinstance(item, dict):
|
|
207
|
-
return {k: await _process(v) for k, v in item.items()}
|
|
208
|
-
|
|
232
|
+
return {k: await _process(v, f"{key_path}.{k}" if key_path else k) for k, v in item.items()}
|
|
233
|
+
if isinstance(item, list):
|
|
234
|
+
return [await _process(v, f"{key_path}[{i}]") for i, v in enumerate(item)]
|
|
235
|
+
return item
|
|
209
236
|
|
|
210
|
-
|
|
237
|
+
updated_result = cast(dict[str, Any], await _process(result))
|
|
238
|
+
return updated_result, metadata_map
|