avtomatika-worker 1.0b3__tar.gz → 1.0b5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/LICENSE +1 -1
  2. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/PKG-INFO +82 -21
  3. avtomatika_worker-1.0b3/src/avtomatika_worker.egg-info/PKG-INFO → avtomatika_worker-1.0b5/README.md +76 -48
  4. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/pyproject.toml +11 -1
  5. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker/__init__.py +1 -1
  6. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker/config.py +6 -0
  7. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker/s3.py +76 -48
  8. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker/task_files.py +60 -2
  9. avtomatika_worker-1.0b5/src/avtomatika_worker/types.py +46 -0
  10. avtomatika_worker-1.0b5/src/avtomatika_worker/worker.py +704 -0
  11. avtomatika_worker-1.0b3/README.md → avtomatika_worker-1.0b5/src/avtomatika_worker.egg-info/PKG-INFO +109 -20
  12. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker.egg-info/SOURCES.txt +2 -3
  13. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker.egg-info/requires.txt +1 -0
  14. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_dependency_injection.py +3 -4
  15. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_parameter_typing.py +15 -15
  16. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_s3.py +22 -12
  17. avtomatika_worker-1.0b5/tests/test_task_files_extended.py +60 -0
  18. avtomatika_worker-1.0b5/tests/test_validation.py +57 -0
  19. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_worker_logic.py +38 -25
  20. avtomatika_worker-1.0b5/tests/test_worker_more_logic.py +211 -0
  21. avtomatika_worker-1.0b5/tests/test_worker_sdk.py +281 -0
  22. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_wrr_logic.py +4 -3
  23. avtomatika_worker-1.0b3/src/avtomatika_worker/client.py +0 -93
  24. avtomatika_worker-1.0b3/src/avtomatika_worker/constants.py +0 -22
  25. avtomatika_worker-1.0b3/src/avtomatika_worker/types.py +0 -21
  26. avtomatika_worker-1.0b3/src/avtomatika_worker/worker.py +0 -526
  27. avtomatika_worker-1.0b3/tests/test_client.py +0 -52
  28. avtomatika_worker-1.0b3/tests/test_worker_more_logic.py +0 -310
  29. avtomatika_worker-1.0b3/tests/test_worker_sdk.py +0 -342
  30. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/setup.cfg +0 -0
  31. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker/py.typed +0 -0
  32. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker.egg-info/dependency_links.txt +0 -0
  33. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/src/avtomatika_worker.egg-info/top_level.txt +0 -0
  34. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_concurrency_limits.py +0 -0
  35. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_config.py +0 -0
  36. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_init.py +0 -0
  37. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_per_orchestrator_token.py +0 -0
  38. {avtomatika_worker-1.0b3 → avtomatika_worker-1.0b5}/tests/test_types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 Dmitrii Gagarin
3
+ Copyright (c) 2025-2026 Dmitrii Gagarin aka madgagarin
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,16 +1,21 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avtomatika-worker
3
- Version: 1.0b3
3
+ Version: 1.0b5
4
4
  Summary: Worker SDK for the Avtomatika orchestrator.
5
+ Author-email: Dmitrii Gagarin <madgagarin@gmail.com>
5
6
  Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika-worker
6
7
  Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika-worker/issues
8
+ Keywords: worker,sdk,orchestrator,distributed,task-queue,rxon,hln
7
9
  Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
8
11
  Classifier: Programming Language :: Python :: 3
9
12
  Classifier: License :: OSI Approved :: MIT License
10
13
  Classifier: Operating System :: OS Independent
14
+ Classifier: Typing :: Typed
11
15
  Requires-Python: >=3.11
12
16
  Description-Content-Type: text/markdown
13
17
  License-File: LICENSE
18
+ Requires-Dist: rxon==1.0b2
14
19
  Requires-Dist: aiohttp~=3.13.2
15
20
  Requires-Dist: python-json-logger~=4.0.0
16
21
  Requires-Dist: obstore>=0.1
@@ -28,7 +33,11 @@ Dynamic: license-file
28
33
 
29
34
  # Avtomatika Worker SDK
30
35
 
31
- This is the official SDK for creating workers compatible with the **[Avtomatika Orchestrator](https://github.com/avtomatika-ai/avtomatika)**. It implements the **[RCA Protocol](https://github.com/avtomatika-ai/rca)**, handling all communication complexity (polling, heartbeats, S3 offloading) so you can focus on writing your business logic.
36
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
37
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/release/python-3110/)
38
+ [![Code Style: Ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
39
+
40
+ This is the official SDK for creating workers compatible with the **[Avtomatika Orchestrator](https://github.com/avtomatika-ai/avtomatika)**. It is built upon the **[Avtomatika Protocol](https://github.com/avtomatika-ai/rxon)** and implements the **[HLN Protocol](https://github.com/avtomatika-ai/hln)**, handling all communication complexity (polling, heartbeats, S3 offloading) so you can focus on writing your business logic.
32
41
 
33
42
  ## Installation
34
43
 
@@ -286,13 +295,26 @@ async def image_resizer(params: ResizeParams, **kwargs):
286
295
 
287
296
  ### 1. Task Handlers
288
297
 
289
- Each handler is an asynchronous function that accepts two arguments:
298
+ Each handler is a function (either `async def` or `def`) that accepts two arguments:
290
299
 
291
300
  - `params` (`dict`, `dataclass`, or `pydantic.BaseModel`): The parameters for the task, automatically validated and instantiated based on your type hint.
292
301
  - `**kwargs`: Additional metadata about the task, including:
293
302
  - `task_id` (`str`): The unique ID of the task itself.
294
303
  - `job_id` (`str`): The ID of the parent `Job` to which the task belongs.
295
304
  - `priority` (`int`): The execution priority of the task.
305
+ - `send_progress` (`callable`): An async function `await send_progress(progress_float, message_string)` to report task execution progress (0.0 to 1.0) to the orchestrator.
306
+
307
+ **Synchronous Handlers:**
308
+ If you define your handler as a standard synchronous function (`def handler(...)`), the SDK will automatically execute it in a separate thread using `asyncio.to_thread`. This ensures that CPU-intensive operations (like model inference) do not block the worker's main event loop, allowing heartbeats and other background tasks to continue running smoothly.
309
+
310
+ ```python
311
+ @worker.task("cpu_heavy_task")
312
+ def heavy_computation(params: dict, **kwargs):
313
+ # This will run in a thread, not blocking the loop
314
+ import time
315
+ time.sleep(10)
316
+ return {"status": "success"}
317
+ ```
296
318
 
297
319
  ### 2. Concurrency Limiting
298
320
 
@@ -383,7 +405,7 @@ return {
383
405
 
384
406
  #### Error Handling
385
407
 
386
- To control the orchestrator's fault tolerance mechanism, you can return standardized error types.
408
+ To control the orchestrator's fault tolerance mechanism, you can return standardized error types. All error constants can be imported from `avtomatika_worker.typing`.
387
409
 
388
410
  - **Transient Error (`TRANSIENT_ERROR`)**: For issues that might be resolved on a retry (e.g., a network failure).
389
411
  ```python
@@ -396,17 +418,10 @@ To control the orchestrator's fault tolerance mechanism, you can return standard
396
418
  }
397
419
  }
398
420
  ```
399
- - **Permanent Error (`PERMANENT_ERROR`)**: For unresolvable problems (e.g., an invalid file format).
400
- ```python
401
- from avtomatika_worker.typing import PERMANENT_ERROR
402
- return {
403
- "status": "failure",
404
- "error": {
405
- "code": PERMANENT_ERROR,
406
- "message": "Corrupted input file"
407
- }
408
- }
409
- ```
421
+ - **Permanent Error (`PERMANENT_ERROR`)**: For unresolvable problems (e.g., an invalid file format). Causes immediate quarantine.
422
+ - **Security Error (`SECURITY_ERROR`)**: For security violations. Causes immediate quarantine.
423
+ - **Dependency Error (`DEPENDENCY_ERROR`)**: For missing models or tools. Causes immediate quarantine.
424
+ - **Resource Exhausted (`RESOURCE_EXHAUSTED_ERROR`)**: When resources are temporarily unavailable. Treated as transient (retried).
410
425
 
411
426
  ### 4. Failover and Load Balancing
412
427
 
@@ -521,6 +536,48 @@ This only requires configuring environment variables for S3 access (see Full Con
521
536
 
522
537
  ### 7. WebSocket Support
523
538
 
539
+ For real-time communication (e.g., immediate task cancellation), the worker supports WebSocket connections. This is enabled by setting `WORKER_ENABLE_WEBSOCKETS=true`. When connected, the orchestrator can push commands like `cancel_task` directly to the worker.
540
+
541
+ ### 8. Middleware
542
+
543
+ The worker supports a middleware system, allowing you to wrap task executions with custom logic. This is particularly useful for resource management (e.g., acquiring GPU locks), logging, error handling, or **Dependency Injection**.
544
+
545
+ Middleware functions wrap the execution of the task handler (and any subsequent middlewares). They receive a context dictionary and the next handler in the chain.
546
+
547
+ The `context` dictionary contains:
548
+ - `task_id`, `job_id`, `task_name`: Metadata.
549
+ - `params`: The validated parameters object.
550
+ - `handler_kwargs`: A dictionary of arguments that will be passed to the handler. **Middleware can modify this dictionary to inject dependencies.**
551
+
552
+ **Example: GPU Resource Manager & Dependency Injection**
553
+
554
+ ```python
555
+ async def gpu_lock_middleware(context: dict, next_handler: callable):
556
+ # Pre-processing: Acquire resource
557
+ print(f"Acquiring GPU for task {context['task_id']}...")
558
+ model_path = await resource_manager.allocate()
559
+
560
+ # Inject the model path into the handler's arguments
561
+ context["handler_kwargs"]["model_path"] = model_path
562
+
563
+ try:
564
+ # Execute the next handler in the chain
565
+ result = await next_handler()
566
+ return result
567
+ finally:
568
+ # Post-processing: Release resource
569
+ print(f"Releasing GPU for task {context['task_id']}...")
570
+ resource_manager.release()
571
+
572
+ # Register the middleware
573
+ worker.add_middleware(gpu_lock_middleware)
574
+
575
+ # Handler now receives 'model_path' automatically
576
+ @worker.task("generate")
577
+ def generate(params, model_path, **kwargs):
578
+ print(f"Using model at: {model_path}")
579
+ ```
580
+
524
581
  ## Advanced Features
525
582
 
526
583
  ### Reporting Skill & Model Dependencies
@@ -577,8 +634,11 @@ The worker is fully configured via environment variables.
577
634
  | `WORKER_TYPE` | A string identifying the type of the worker. | `generic-cpu-worker` |
578
635
  | `WORKER_PORT` | The port for the worker's health check server. | `8083` |
579
636
  | `WORKER_TOKEN` | A common authentication token used to connect to orchestrators. | `your-secret-worker-token` |
580
- | `WORKER_INDIVIDUAL_TOKEN` | An individual token for this worker, which overrides `WORKER_TOKEN` if set. | - |
581
- | `ORCHESTRATOR_URL` | The URL of a single orchestrator (used if `ORCHESTRATORS_CONFIG` is not set). | `http://localhost:8080` |
637
+ - **`WORKER_INDIVIDUAL_TOKEN`**: An individual token for this worker, which overrides `WORKER_TOKEN` if set.
638
+ - **`TLS_CA_PATH`**: Path to the CA certificate to verify the orchestrator.
639
+ - **`TLS_CERT_PATH`**: Path to the client certificate for mTLS.
640
+ - **`TLS_KEY_PATH`**: Path to the client private key for mTLS.
641
+ - **`ORCHESTRATOR_URL`**: The address of the Avtomatika orchestrator.
582
642
  | `ORCHESTRATORS_CONFIG` | A JSON string with a list of orchestrators for multi-orchestrator modes. | `[]` |
583
643
  | `MULTI_ORCHESTRATOR_MODE` | The mode for handling multiple orchestrators. Possible values: `FAILOVER`, `ROUND_ROBIN`. | `FAILOVER` |
584
644
  | `MAX_CONCURRENT_TASKS` | The maximum number of tasks the worker can execute simultaneously. | `10` |
@@ -605,8 +665,9 @@ The worker is fully configured via environment variables.
605
665
 
606
666
  ## Development
607
667
 
608
- To install the necessary dependencies for running tests, use the following command:
668
+ To install the necessary dependencies for running tests (assuming you are in the package root):
609
669
 
610
- ```bash
611
- pip install .[test]
612
- ```
670
+ 1. Install the worker in editable mode with test dependencies:
671
+ ```bash
672
+ pip install -e .[test]
673
+ ```
@@ -1,34 +1,10 @@
1
- Metadata-Version: 2.4
2
- Name: avtomatika-worker
3
- Version: 1.0b3
4
- Summary: Worker SDK for the Avtomatika orchestrator.
5
- Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika-worker
6
- Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika-worker/issues
7
- Classifier: Development Status :: 4 - Beta
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.11
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: aiohttp~=3.13.2
15
- Requires-Dist: python-json-logger~=4.0.0
16
- Requires-Dist: obstore>=0.1
17
- Requires-Dist: aiofiles~=25.1.0
18
- Provides-Extra: test
19
- Requires-Dist: pytest; extra == "test"
20
- Requires-Dist: pytest-asyncio; extra == "test"
21
- Requires-Dist: aioresponses; extra == "test"
22
- Requires-Dist: pytest-mock; extra == "test"
23
- Requires-Dist: pydantic; extra == "test"
24
- Requires-Dist: types-aiofiles; extra == "test"
25
- Provides-Extra: pydantic
26
- Requires-Dist: pydantic; extra == "pydantic"
27
- Dynamic: license-file
28
-
29
1
  # Avtomatika Worker SDK
30
2
 
31
- This is the official SDK for creating workers compatible with the **[Avtomatika Orchestrator](https://github.com/avtomatika-ai/avtomatika)**. It implements the **[RCA Protocol](https://github.com/avtomatika-ai/rca)**, handling all communication complexity (polling, heartbeats, S3 offloading) so you can focus on writing your business logic.
3
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
4
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/release/python-3110/)
5
+ [![Code Style: Ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
6
+
7
+ This is the official SDK for creating workers compatible with the **[Avtomatika Orchestrator](https://github.com/avtomatika-ai/avtomatika)**. It is built upon the **[Avtomatika Protocol](https://github.com/avtomatika-ai/rxon)** and implements the **[HLN Protocol](https://github.com/avtomatika-ai/hln)**, handling all communication complexity (polling, heartbeats, S3 offloading) so you can focus on writing your business logic.
32
8
 
33
9
  ## Installation
34
10
 
@@ -286,13 +262,26 @@ async def image_resizer(params: ResizeParams, **kwargs):
286
262
 
287
263
  ### 1. Task Handlers
288
264
 
289
- Each handler is an asynchronous function that accepts two arguments:
265
+ Each handler is a function (either `async def` or `def`) that accepts two arguments:
290
266
 
291
267
  - `params` (`dict`, `dataclass`, or `pydantic.BaseModel`): The parameters for the task, automatically validated and instantiated based on your type hint.
292
268
  - `**kwargs`: Additional metadata about the task, including:
293
269
  - `task_id` (`str`): The unique ID of the task itself.
294
270
  - `job_id` (`str`): The ID of the parent `Job` to which the task belongs.
295
271
  - `priority` (`int`): The execution priority of the task.
272
+ - `send_progress` (`callable`): An async function `await send_progress(progress_float, message_string)` to report task execution progress (0.0 to 1.0) to the orchestrator.
273
+
274
+ **Synchronous Handlers:**
275
+ If you define your handler as a standard synchronous function (`def handler(...)`), the SDK will automatically execute it in a separate thread using `asyncio.to_thread`. This ensures that CPU-intensive operations (like model inference) do not block the worker's main event loop, allowing heartbeats and other background tasks to continue running smoothly.
276
+
277
+ ```python
278
+ @worker.task("cpu_heavy_task")
279
+ def heavy_computation(params: dict, **kwargs):
280
+ # This will run in a thread, not blocking the loop
281
+ import time
282
+ time.sleep(10)
283
+ return {"status": "success"}
284
+ ```
296
285
 
297
286
  ### 2. Concurrency Limiting
298
287
 
@@ -383,7 +372,7 @@ return {
383
372
 
384
373
  #### Error Handling
385
374
 
386
- To control the orchestrator's fault tolerance mechanism, you can return standardized error types.
375
+ To control the orchestrator's fault tolerance mechanism, you can return standardized error types. All error constants can be imported from `avtomatika_worker.typing`.
387
376
 
388
377
  - **Transient Error (`TRANSIENT_ERROR`)**: For issues that might be resolved on a retry (e.g., a network failure).
389
378
  ```python
@@ -396,17 +385,10 @@ To control the orchestrator's fault tolerance mechanism, you can return standard
396
385
  }
397
386
  }
398
387
  ```
399
- - **Permanent Error (`PERMANENT_ERROR`)**: For unresolvable problems (e.g., an invalid file format).
400
- ```python
401
- from avtomatika_worker.typing import PERMANENT_ERROR
402
- return {
403
- "status": "failure",
404
- "error": {
405
- "code": PERMANENT_ERROR,
406
- "message": "Corrupted input file"
407
- }
408
- }
409
- ```
388
+ - **Permanent Error (`PERMANENT_ERROR`)**: For unresolvable problems (e.g., an invalid file format). Causes immediate quarantine.
389
+ - **Security Error (`SECURITY_ERROR`)**: For security violations. Causes immediate quarantine.
390
+ - **Dependency Error (`DEPENDENCY_ERROR`)**: For missing models or tools. Causes immediate quarantine.
391
+ - **Resource Exhausted (`RESOURCE_EXHAUSTED_ERROR`)**: When resources are temporarily unavailable. Treated as transient (retried).
410
392
 
411
393
  ### 4. Failover and Load Balancing
412
394
 
@@ -521,6 +503,48 @@ This only requires configuring environment variables for S3 access (see Full Con
521
503
 
522
504
  ### 7. WebSocket Support
523
505
 
506
+ For real-time communication (e.g., immediate task cancellation), the worker supports WebSocket connections. This is enabled by setting `WORKER_ENABLE_WEBSOCKETS=true`. When connected, the orchestrator can push commands like `cancel_task` directly to the worker.
507
+
508
+ ### 8. Middleware
509
+
510
+ The worker supports a middleware system, allowing you to wrap task executions with custom logic. This is particularly useful for resource management (e.g., acquiring GPU locks), logging, error handling, or **Dependency Injection**.
511
+
512
+ Middleware functions wrap the execution of the task handler (and any subsequent middlewares). They receive a context dictionary and the next handler in the chain.
513
+
514
+ The `context` dictionary contains:
515
+ - `task_id`, `job_id`, `task_name`: Metadata.
516
+ - `params`: The validated parameters object.
517
+ - `handler_kwargs`: A dictionary of arguments that will be passed to the handler. **Middleware can modify this dictionary to inject dependencies.**
518
+
519
+ **Example: GPU Resource Manager & Dependency Injection**
520
+
521
+ ```python
522
+ async def gpu_lock_middleware(context: dict, next_handler: callable):
523
+ # Pre-processing: Acquire resource
524
+ print(f"Acquiring GPU for task {context['task_id']}...")
525
+ model_path = await resource_manager.allocate()
526
+
527
+ # Inject the model path into the handler's arguments
528
+ context["handler_kwargs"]["model_path"] = model_path
529
+
530
+ try:
531
+ # Execute the next handler in the chain
532
+ result = await next_handler()
533
+ return result
534
+ finally:
535
+ # Post-processing: Release resource
536
+ print(f"Releasing GPU for task {context['task_id']}...")
537
+ resource_manager.release()
538
+
539
+ # Register the middleware
540
+ worker.add_middleware(gpu_lock_middleware)
541
+
542
+ # Handler now receives 'model_path' automatically
543
+ @worker.task("generate")
544
+ def generate(params, model_path, **kwargs):
545
+ print(f"Using model at: {model_path}")
546
+ ```
547
+
524
548
  ## Advanced Features
525
549
 
526
550
  ### Reporting Skill & Model Dependencies
@@ -577,8 +601,11 @@ The worker is fully configured via environment variables.
577
601
  | `WORKER_TYPE` | A string identifying the type of the worker. | `generic-cpu-worker` |
578
602
  | `WORKER_PORT` | The port for the worker's health check server. | `8083` |
579
603
  | `WORKER_TOKEN` | A common authentication token used to connect to orchestrators. | `your-secret-worker-token` |
580
- | `WORKER_INDIVIDUAL_TOKEN` | An individual token for this worker, which overrides `WORKER_TOKEN` if set. | - |
581
- | `ORCHESTRATOR_URL` | The URL of a single orchestrator (used if `ORCHESTRATORS_CONFIG` is not set). | `http://localhost:8080` |
604
+ - **`WORKER_INDIVIDUAL_TOKEN`**: An individual token for this worker, which overrides `WORKER_TOKEN` if set.
605
+ - **`TLS_CA_PATH`**: Path to the CA certificate to verify the orchestrator.
606
+ - **`TLS_CERT_PATH`**: Path to the client certificate for mTLS.
607
+ - **`TLS_KEY_PATH`**: Path to the client private key for mTLS.
608
+ - **`ORCHESTRATOR_URL`**: The address of the Avtomatika orchestrator.
582
609
  | `ORCHESTRATORS_CONFIG` | A JSON string with a list of orchestrators for multi-orchestrator modes. | `[]` |
583
610
  | `MULTI_ORCHESTRATOR_MODE` | The mode for handling multiple orchestrators. Possible values: `FAILOVER`, `ROUND_ROBIN`. | `FAILOVER` |
584
611
  | `MAX_CONCURRENT_TASKS` | The maximum number of tasks the worker can execute simultaneously. | `10` |
@@ -605,8 +632,9 @@ The worker is fully configured via environment variables.
605
632
 
606
633
  ## Development
607
634
 
608
- To install the necessary dependencies for running tests, use the following command:
635
+ To install the necessary dependencies for running tests (assuming you are in the package root):
609
636
 
610
- ```bash
611
- pip install .[test]
612
- ```
637
+ 1. Install the worker in editable mode with test dependencies:
638
+ ```bash
639
+ pip install -e .[test]
640
+ ```
@@ -4,17 +4,24 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "avtomatika-worker"
7
- version = "1.0.b3"
7
+ version = "1.0b5"
8
8
  description = "Worker SDK for the Avtomatika orchestrator."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
11
+ authors = [
12
+ {name = "Dmitrii Gagarin", email = "madgagarin@gmail.com"},
13
+ ]
14
+ keywords = ["worker", "sdk", "orchestrator", "distributed", "task-queue", "rxon", "hln"]
11
15
  classifiers = [
12
16
  "Development Status :: 4 - Beta",
17
+ "Intended Audience :: Developers",
13
18
  "Programming Language :: Python :: 3",
14
19
  "License :: OSI Approved :: MIT License",
15
20
  "Operating System :: OS Independent",
21
+ "Typing :: Typed",
16
22
  ]
17
23
  dependencies = [
24
+ "rxon==1.0b2",
18
25
  "aiohttp~=3.13.2",
19
26
  "python-json-logger~=4.0.0",
20
27
  "obstore>=0.1",
@@ -39,6 +46,9 @@ pydantic = ["pydantic"]
39
46
  [tool.setuptools.packages.find]
40
47
  where = ["src"]
41
48
 
49
+ [tool.setuptools.package-data]
50
+ "avtomatika_worker" = ["py.typed"]
51
+
42
52
  [tool.pytest.ini_options]
43
53
  markers = [
44
54
  "e2e: marks tests as end-to-end tests",
@@ -1,4 +1,4 @@
1
- """A Python SDK for creating workers for the Py-Orchestrator."""
1
+ """A Python SDK for creating workers for the Avtomatika Orchestrator."""
2
2
 
3
3
  from importlib.metadata import PackageNotFoundError, version
4
4
 
@@ -4,6 +4,8 @@ from os import getenv
4
4
  from typing import Any
5
5
  from uuid import uuid4
6
6
 
7
+ from rxon.validators import validate_identifier
8
+
7
9
 
8
10
  class WorkerConfig:
9
11
  """A class for centralized management of worker configuration.
@@ -29,6 +31,9 @@ class WorkerConfig:
29
31
  "WORKER_INDIVIDUAL_TOKEN",
30
32
  getenv("WORKER_TOKEN", "your-secret-worker-token"),
31
33
  )
34
+ self.TLS_CA_PATH: str | None = getenv("TLS_CA_PATH")
35
+ self.TLS_CERT_PATH: str | None = getenv("TLS_CERT_PATH")
36
+ self.TLS_KEY_PATH: str | None = getenv("TLS_KEY_PATH")
32
37
 
33
38
  # --- Resources and performance ---
34
39
  self.COST_PER_SKILL: dict[str, float] = self._load_json_from_env("COST_PER_SKILL", default={})
@@ -73,6 +78,7 @@ class WorkerConfig:
73
78
 
74
79
  def validate(self) -> None:
75
80
  """Validates critical configuration parameters."""
81
+ validate_identifier(self.WORKER_ID, "WORKER_ID")
76
82
  if self.WORKER_TOKEN == "your-secret-worker-token":
77
83
  print("Warning: WORKER_TOKEN is set to the default value. Tasks might fail authentication.")
78
84
 
@@ -4,13 +4,17 @@ from os import walk
4
4
  from os.path import basename, dirname, join, relpath
5
5
  from shutil import rmtree
6
6
  from typing import Any, cast
7
- from urllib.parse import urlparse
8
7
 
9
- import obstore
10
8
  from aiofiles import open as aio_open
11
9
  from aiofiles.os import makedirs
12
- from aiofiles.ospath import exists, isdir
10
+ from aiofiles.ospath import exists, getsize, isdir
11
+ from obstore import get as obstore_get
12
+ from obstore import list as obstore_list
13
+ from obstore import put as obstore_put
13
14
  from obstore.store import S3Store
15
+ from rxon.blob import parse_uri
16
+ from rxon.exceptions import IntegrityError
17
+ from rxon.models import FileMetadata
14
18
 
15
19
  from .config import WorkerConfig
16
20
 
@@ -61,12 +65,12 @@ class S3Manager:
61
65
  if await exists(task_dir):
62
66
  await to_thread(lambda: rmtree(task_dir, ignore_errors=True))
63
67
 
64
- async def _process_s3_uri(self, uri: str, task_id: str) -> str:
65
- """Downloads a file or a folder (if uri ends with /) from S3 and returns the local path."""
68
+ async def _process_s3_uri(self, uri: str, task_id: str, verify_meta: FileMetadata | None = None) -> str:
69
+ """Downloads a file or a folder from S3 and returns the local path.
70
+ If verify_meta is provided, performs integrity checks.
71
+ """
66
72
  try:
67
- parsed_url = urlparse(uri)
68
- bucket_name = parsed_url.netloc
69
- object_key = parsed_url.path.lstrip("/")
73
+ bucket_name, object_key, is_directory = parse_uri(uri)
70
74
  store = self._get_store(bucket_name)
71
75
 
72
76
  # Use task-specific directory for isolation
@@ -76,36 +80,27 @@ class S3Manager:
76
80
  logger.info(f"Starting download from S3: {uri}")
77
81
 
78
82
  # Handle folder download (prefix)
79
- if uri.endswith("/"):
83
+ if is_directory:
80
84
  folder_name = object_key.rstrip("/").split("/")[-1]
81
85
  local_folder_path = join(local_dir_root, folder_name)
82
-
83
- # List objects with prefix
84
- # obstore.list returns an async iterator of ObjectMeta
85
86
  files_to_download = []
86
87
 
87
- # Note: obstore.list returns an async iterator.
88
- async for obj in obstore.list(store, prefix=object_key):
88
+ async for obj in obstore_list(store, prefix=object_key):
89
89
  key = obj.key
90
-
91
90
  if key.endswith("/"):
92
91
  continue
93
-
94
- # Calculate relative path inside the folder
95
92
  rel_path = key[len(object_key) :]
96
93
  local_file_path = join(local_folder_path, rel_path)
97
-
98
94
  await makedirs(dirname(local_file_path), exist_ok=True)
99
95
  files_to_download.append((key, local_file_path))
100
96
 
101
97
  async def _download_file(key: str, path: str) -> None:
102
98
  async with self._semaphore:
103
- result = await obstore.get(store, key)
99
+ result = await obstore_get(store, key)
104
100
  async with aio_open(path, "wb") as f:
105
101
  async for chunk in result.stream():
106
102
  await f.write(chunk)
107
103
 
108
- # Execute downloads in parallel
109
104
  if files_to_download:
110
105
  await gather(*[_download_file(k, p) for k, p in files_to_download])
111
106
 
@@ -115,7 +110,20 @@ class S3Manager:
115
110
  # Handle single file download
116
111
  local_path = join(local_dir_root, basename(object_key))
117
112
 
118
- result = await obstore.get(store, object_key)
113
+ result = await obstore_get(store, object_key)
114
+
115
+ # Integrity check before download
116
+ if verify_meta:
117
+ if verify_meta.size != result.meta.size:
118
+ raise IntegrityError(
119
+ f"Size mismatch for {uri}: expected {verify_meta.size}, got {result.meta.size}"
120
+ )
121
+ if verify_meta.etag and result.meta.e_tag:
122
+ actual_etag = result.meta.e_tag.strip('"')
123
+ expected_etag = verify_meta.etag.strip('"')
124
+ if actual_etag != expected_etag:
125
+ raise IntegrityError(f"ETag mismatch for {uri}: expected {expected_etag}, got {actual_etag}")
126
+
119
127
  async with aio_open(local_path, "wb") as f:
120
128
  async for chunk in result.stream():
121
129
  await f.write(chunk)
@@ -128,8 +136,8 @@ class S3Manager:
128
136
  logger.exception(f"Error during download of {uri}: {e}")
129
137
  raise
130
138
 
131
- async def _upload_to_s3(self, local_path: str) -> str:
132
- """Uploads a file or a folder to S3 and returns the S3 URI."""
139
+ async def _upload_to_s3(self, local_path: str) -> FileMetadata:
140
+ """Uploads a file or a folder to S3 and returns FileMetadata."""
133
141
  bucket_name = self._config.S3_DEFAULT_BUCKET
134
142
  store = self._get_store(bucket_name)
135
143
 
@@ -141,70 +149,90 @@ class S3Manager:
141
149
  folder_name = basename(local_path.rstrip("/"))
142
150
  s3_prefix = f"{folder_name}/"
143
151
 
144
- # Use to_thread to avoid blocking event loop during file walk
145
152
  def _get_files_to_upload():
153
+ from os.path import getsize as std_getsize
154
+
146
155
  files_to_upload = []
156
+ total_size = 0
147
157
  for root, _, files in walk(local_path):
148
158
  for file in files:
149
159
  f_path = join(root, file)
150
160
  rel = relpath(f_path, local_path)
161
+ total_size += std_getsize(f_path)
151
162
  files_to_upload.append((f_path, f"{s3_prefix}{rel}"))
152
- return files_to_upload
163
+ return files_to_upload, total_size
153
164
 
154
- files_list = await to_thread(_get_files_to_upload)
165
+ files_list, total_size = await to_thread(_get_files_to_upload)
155
166
 
156
167
  async def _upload_file(path: str, key: str) -> None:
157
168
  async with self._semaphore:
158
- # obstore.put accepts bytes or file-like objects.
159
- # Since we are in async, reading small files is fine.
160
169
  with open(path, "rb") as f:
161
- await obstore.put(store, key, f)
170
+ await obstore_put(store, key, f)
162
171
 
163
172
  if files_list:
164
- # Upload in parallel
165
173
  await gather(*[_upload_file(f, k) for f, k in files_list])
166
174
 
167
175
  s3_uri = f"s3://{bucket_name}/{s3_prefix}"
168
176
  logger.info(f"Successfully uploaded folder to S3: {local_path} -> {s3_uri} ({len(files_list)} files)")
169
- return s3_uri
177
+ return FileMetadata(uri=s3_uri, size=total_size)
170
178
 
171
179
  # Handle single file upload
172
180
  object_key = basename(local_path)
181
+ file_size = await getsize(local_path)
173
182
  with open(local_path, "rb") as f:
174
- await obstore.put(store, object_key, f)
183
+ put_result = await obstore_put(store, object_key, f)
175
184
 
176
185
  s3_uri = f"s3://{bucket_name}/{object_key}"
177
- logger.info(f"Successfully uploaded file to S3: {local_path} -> {s3_uri}")
178
- return s3_uri
186
+ etag = put_result.e_tag.strip('"') if put_result.e_tag else None
187
+ logger.info(f"Successfully uploaded file to S3: {local_path} -> {s3_uri} (ETag: {etag})")
188
+ return FileMetadata(uri=s3_uri, size=file_size, etag=etag)
179
189
 
180
190
  except Exception as e:
181
191
  logger.exception(f"Error during upload of {local_path}: {e}")
182
192
  raise
183
193
 
184
- async def process_params(self, params: dict[str, Any], task_id: str) -> dict[str, Any]:
185
- """Recursively searches for S3 URIs in params and downloads the files."""
194
+ async def process_params(
195
+ self, params: dict[str, Any], task_id: str, metadata: dict[str, FileMetadata] | None = None
196
+ ) -> dict[str, Any]:
197
+ """Recursively searches for S3 URIs in params and downloads the files.
198
+ Uses metadata for integrity verification if available.
199
+ """
186
200
  if not self._config.S3_ENDPOINT_URL:
187
201
  return params
188
202
 
189
- async def _process(item: Any) -> Any:
203
+ async def _process(item: Any, key_path: str = "") -> Any:
190
204
  if isinstance(item, str) and item.startswith("s3://"):
191
- return await self._process_s3_uri(item, task_id)
205
+ verify_meta = metadata.get(key_path) if metadata else None
206
+ return await self._process_s3_uri(item, task_id, verify_meta=verify_meta)
192
207
  if isinstance(item, dict):
193
- return {k: await _process(v) for k, v in item.items()}
194
- return [await _process(i) for i in item] if isinstance(item, list) else item
208
+ return {k: await _process(v, f"{key_path}.{k}" if key_path else k) for k, v in item.items()}
209
+ if isinstance(item, list):
210
+ return [await _process(v, f"{key_path}[{i}]") for i, v in enumerate(item)]
211
+ return item
195
212
 
196
213
  return cast(dict[str, Any], await _process(params))
197
214
 
198
- async def process_result(self, result: dict[str, Any]) -> dict[str, Any]:
199
- """Recursively searches for local file paths in the result and uploads them to S3."""
215
+ async def process_result(self, result: dict[str, Any]) -> tuple[dict[str, Any], dict[str, FileMetadata]]:
216
+ """Recursively searches for local file paths in the result and uploads them to S3.
217
+ Returns a tuple of (updated_result, metadata_map).
218
+ """
200
219
  if not self._config.S3_ENDPOINT_URL:
201
- return result
220
+ return result, {}
221
+
222
+ metadata_map = {}
202
223
 
203
- async def _process(item: Any) -> Any:
224
+ async def _process(item: Any, key_path: str = "") -> Any:
204
225
  if isinstance(item, str) and item.startswith(self._config.TASK_FILES_DIR):
205
- return await self._upload_to_s3(item) if await exists(item) else item
226
+ if await exists(item):
227
+ meta = await self._upload_to_s3(item)
228
+ metadata_map[key_path] = meta
229
+ return meta.uri
230
+ return item
206
231
  if isinstance(item, dict):
207
- return {k: await _process(v) for k, v in item.items()}
208
- return [await _process(i) for i in item] if isinstance(item, list) else item
232
+ return {k: await _process(v, f"{key_path}.{k}" if key_path else k) for k, v in item.items()}
233
+ if isinstance(item, list):
234
+ return [await _process(v, f"{key_path}[{i}]") for i, v in enumerate(item)]
235
+ return item
209
236
 
210
- return cast(dict[str, Any], await _process(result))
237
+ updated_result = cast(dict[str, Any], await _process(result))
238
+ return updated_result, metadata_map