avtomatika-worker 1.0b1__tar.gz → 1.0b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/PKG-INFO +86 -13
- avtomatika_worker-1.0b1/src/avtomatika_worker.egg-info/PKG-INFO → avtomatika_worker-1.0b2/README.md +81 -35
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/pyproject.toml +6 -7
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker/__init__.py +2 -1
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker/config.py +13 -14
- avtomatika_worker-1.0b2/src/avtomatika_worker/s3.py +141 -0
- avtomatika_worker-1.0b2/src/avtomatika_worker/task_files.py +97 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker/worker.py +35 -12
- avtomatika_worker-1.0b1/README.md → avtomatika_worker-1.0b2/src/avtomatika_worker.egg-info/PKG-INFO +108 -7
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker.egg-info/SOURCES.txt +2 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker.egg-info/requires.txt +2 -3
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_config.py +3 -0
- avtomatika_worker-1.0b2/tests/test_dependency_injection.py +117 -0
- avtomatika_worker-1.0b2/tests/test_s3.py +179 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_worker_more_logic.py +19 -15
- avtomatika_worker-1.0b1/src/avtomatika_worker/s3.py +0 -75
- avtomatika_worker-1.0b1/tests/test_s3.py +0 -85
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/LICENSE +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/setup.cfg +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker/types.py +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker.egg-info/dependency_links.txt +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker.egg-info/top_level.txt +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_concurrency_limits.py +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_init.py +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_parameter_typing.py +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_per_orchestrator_token.py +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_types.py +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_worker_logic.py +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_worker_sdk.py +0 -0
- {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_wrr_logic.py +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika-worker
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b2
|
|
4
4
|
Summary: Worker SDK for the Avtomatika orchestrator.
|
|
5
|
-
Project-URL: Homepage, https://github.com/
|
|
6
|
-
Project-URL: Bug Tracker, https://github.com/
|
|
5
|
+
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika-worker
|
|
6
|
+
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika-worker/issues
|
|
7
7
|
Classifier: Development Status :: 4 - Beta
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -13,15 +13,14 @@ Description-Content-Type: text/markdown
|
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
Requires-Dist: aiohttp~=3.13.2
|
|
15
15
|
Requires-Dist: python-json-logger~=4.0.0
|
|
16
|
-
Requires-Dist: aioboto3~=
|
|
16
|
+
Requires-Dist: aioboto3~=15.5.0
|
|
17
|
+
Requires-Dist: aiofiles~=25.1.0
|
|
17
18
|
Provides-Extra: test
|
|
18
19
|
Requires-Dist: pytest; extra == "test"
|
|
19
20
|
Requires-Dist: pytest-asyncio; extra == "test"
|
|
20
21
|
Requires-Dist: aioresponses; extra == "test"
|
|
21
22
|
Requires-Dist: pytest-mock; extra == "test"
|
|
22
23
|
Requires-Dist: pydantic; extra == "test"
|
|
23
|
-
Requires-Dist: moto[server]; extra == "test"
|
|
24
|
-
Requires-Dist: aiofiles; extra == "test"
|
|
25
24
|
Provides-Extra: pydantic
|
|
26
25
|
Requires-Dist: pydantic; extra == "pydantic"
|
|
27
26
|
Dynamic: license-file
|
|
@@ -434,18 +433,92 @@ The `ORCHESTRATORS_CONFIG` variable must contain a JSON string. Each object in t
|
|
|
434
433
|
|
|
435
434
|
|
|
436
435
|
|
|
437
|
-
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
### 5. File System Helper (TaskFiles)
|
|
439
|
+
|
|
440
|
+
To simplify working with temporary files and paths, the SDK provides a `TaskFiles` helper class. It automatically manages directory creation within the isolated task folder and provides an asynchronous interface for file operations. Just add an argument typed as `TaskFiles` to your handler:
|
|
441
|
+
|
|
442
|
+
```python
|
|
443
|
+
from avtomatika_worker import Worker, TaskFiles
|
|
444
|
+
|
|
445
|
+
@worker.task("generate_report")
|
|
446
|
+
async def generate_report(params: dict, files: TaskFiles, **kwargs):
|
|
447
|
+
# 1. Easy read/write
|
|
448
|
+
await files.write("data.json", '{"status": "ok"}')
|
|
449
|
+
content = await files.read("data.json")
|
|
450
|
+
|
|
451
|
+
# 2. Get path (directory is created automatically)
|
|
452
|
+
output_path = await files.path_to("report.pdf")
|
|
453
|
+
|
|
454
|
+
# 3. Check and list files
|
|
455
|
+
if await files.exists("input.jpg"):
|
|
456
|
+
file_list = await files.list()
|
|
457
|
+
|
|
458
|
+
return {"data": {"report": output_path}}
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
**Available Methods (all asynchronous):**
|
|
462
|
+
- `await path_to(name)` — returns the full path to a file (ensures the task directory exists).
|
|
463
|
+
- `await read(name, mode='r')` — reads the entire file.
|
|
464
|
+
- `await write(name, data, mode='w')` — writes data to a file.
|
|
465
|
+
- `await list()` — lists filenames in the task directory.
|
|
466
|
+
- `await exists(name)` — checks if a file exists.
|
|
467
|
+
- `async with open(name, mode)` — async context manager for advanced usage.
|
|
468
|
+
|
|
469
|
+
> **Note: Automatic Cleanup**
|
|
470
|
+
>
|
|
471
|
+
> The SDK automatically deletes the entire task directory (including everything created via `TaskFiles`) immediately after the task completes and the result is sent.
|
|
472
|
+
|
|
473
|
+
### 6. Handling Large Files (S3 Payload Offloading)
|
|
438
474
|
|
|
439
475
|
The SDK supports working with large files "out of the box" via S3-compatible storage.
|
|
440
476
|
|
|
441
|
-
- **Automatic Download**: If a value in `params` is a URI of the form `s3://...`, the SDK will automatically download the file to the local disk and replace the URI in `params` with the local path.
|
|
442
|
-
- **Automatic Upload**: If your handler returns a local file path in `data` (located within the `
|
|
477
|
+
- **Automatic Download**: If a value in `params` is a URI of the form `s3://...`, the SDK will automatically download the file to the local disk and replace the URI in `params` with the local path. **If the URI ends with `/` (e.g., `s3://bucket/data/`), the SDK treats it as a folder prefix and recursively downloads all matching objects into a local directory.**
|
|
478
|
+
- **Automatic Upload**: If your handler returns a local file path in `data` (located within the `TASK_FILES_DIR` directory), the SDK will automatically upload this file to S3 and replace the path with an `s3://` URI in the final result. **If the path is a directory, the SDK recursively uploads all files within it.**
|
|
479
|
+
|
|
480
|
+
This functionality is transparent to your code.
|
|
481
|
+
|
|
482
|
+
#### S3 Example
|
|
483
|
+
|
|
484
|
+
Suppose the orchestrator sends a task with `{"input_image": "s3://my-bucket/photo.jpg"}`:
|
|
485
|
+
|
|
486
|
+
```python
|
|
487
|
+
import os
|
|
488
|
+
from avtomatika_worker import Worker, TaskFiles
|
|
489
|
+
|
|
490
|
+
worker = Worker(worker_type="image-worker")
|
|
491
|
+
|
|
492
|
+
@worker.task("process_image")
|
|
493
|
+
async def handle_image(params: dict, files: TaskFiles, **kwargs):
|
|
494
|
+
# SDK has already downloaded the file.
|
|
495
|
+
# 'input_image' now contains a local path like '/tmp/payloads/task-id/photo.jpg'
|
|
496
|
+
local_input = params["input_image"]
|
|
497
|
+
local_output = await files.path_to("processed.png")
|
|
498
|
+
|
|
499
|
+
# Your logic here (using local files)
|
|
500
|
+
# ... image processing ...
|
|
501
|
+
|
|
502
|
+
# Return the local path of the result.
|
|
503
|
+
# The SDK will upload it back to S3 automatically.
|
|
504
|
+
return {
|
|
505
|
+
"status": "success",
|
|
506
|
+
"data": {
|
|
507
|
+
"output_image": local_output
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
```
|
|
443
511
|
|
|
444
|
-
This
|
|
512
|
+
This only requires configuring environment variables for S3 access (see Full Configuration Reference).
|
|
445
513
|
|
|
446
|
-
|
|
514
|
+
> **Important: S3 Consistency**
|
|
515
|
+
>
|
|
516
|
+
> The SDK **does not validate** that the Worker and Orchestrator share the same storage backend. You must ensure that:
|
|
517
|
+
> 1. The Worker can reach the `S3_ENDPOINT_URL` used by the Orchestrator.
|
|
518
|
+
> 2. The Worker's credentials allow reading from the buckets referenced in the incoming `s3://` URIs.
|
|
519
|
+
> 3. The Worker's credentials allow writing to the `S3_DEFAULT_BUCKET`.
|
|
447
520
|
|
|
448
|
-
|
|
521
|
+
### 7. WebSocket Support
|
|
449
522
|
|
|
450
523
|
## Advanced Features
|
|
451
524
|
|
|
@@ -522,7 +595,7 @@ The worker is fully configured via environment variables.
|
|
|
522
595
|
| `TASK_POLL_TIMEOUT` | The timeout in seconds for polling for new tasks. | `30` |
|
|
523
596
|
| `TASK_POLL_ERROR_DELAY` | The delay in seconds before retrying after a polling error. | `5.0` |
|
|
524
597
|
| `IDLE_POLL_DELAY` | The delay in seconds between polls when the worker is idle. | `0.01` |
|
|
525
|
-
| `
|
|
598
|
+
| `TASK_FILES_DIR` | The directory for temporarily storing files when working with S3. | `/tmp/payloads` |
|
|
526
599
|
| `S3_ENDPOINT_URL` | The URL of the S3-compatible storage. | - |
|
|
527
600
|
| `S3_ACCESS_KEY` | The access key for S3. | - |
|
|
528
601
|
| `S3_SECRET_KEY` | The secret key for S3. | - |
|
avtomatika_worker-1.0b1/src/avtomatika_worker.egg-info/PKG-INFO → avtomatika_worker-1.0b2/README.md
RENAMED
|
@@ -1,31 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: avtomatika-worker
|
|
3
|
-
Version: 1.0b1
|
|
4
|
-
Summary: Worker SDK for the Avtomatika orchestrator.
|
|
5
|
-
Project-URL: Homepage, https://github.com/avtomatila-ai/avtomatika-worker
|
|
6
|
-
Project-URL: Bug Tracker, https://github.com/avtomatila-ai/avtomatika-worker/issues
|
|
7
|
-
Classifier: Development Status :: 4 - Beta
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.11
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
Requires-Dist: aiohttp~=3.13.2
|
|
15
|
-
Requires-Dist: python-json-logger~=4.0.0
|
|
16
|
-
Requires-Dist: aioboto3~=13.0.0
|
|
17
|
-
Provides-Extra: test
|
|
18
|
-
Requires-Dist: pytest; extra == "test"
|
|
19
|
-
Requires-Dist: pytest-asyncio; extra == "test"
|
|
20
|
-
Requires-Dist: aioresponses; extra == "test"
|
|
21
|
-
Requires-Dist: pytest-mock; extra == "test"
|
|
22
|
-
Requires-Dist: pydantic; extra == "test"
|
|
23
|
-
Requires-Dist: moto[server]; extra == "test"
|
|
24
|
-
Requires-Dist: aiofiles; extra == "test"
|
|
25
|
-
Provides-Extra: pydantic
|
|
26
|
-
Requires-Dist: pydantic; extra == "pydantic"
|
|
27
|
-
Dynamic: license-file
|
|
28
|
-
|
|
29
1
|
# Avtomatika Worker SDK
|
|
30
2
|
|
|
31
3
|
This is an SDK for creating workers compatible with the **Avtomatika** orchestrator. The SDK handles all the complexity of interacting with the orchestrator, allowing you to focus on writing your business logic.
|
|
@@ -434,18 +406,92 @@ The `ORCHESTRATORS_CONFIG` variable must contain a JSON string. Each object in t
|
|
|
434
406
|
|
|
435
407
|
|
|
436
408
|
|
|
437
|
-
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
### 5. File System Helper (TaskFiles)
|
|
412
|
+
|
|
413
|
+
To simplify working with temporary files and paths, the SDK provides a `TaskFiles` helper class. It automatically manages directory creation within the isolated task folder and provides an asynchronous interface for file operations. Just add an argument typed as `TaskFiles` to your handler:
|
|
414
|
+
|
|
415
|
+
```python
|
|
416
|
+
from avtomatika_worker import Worker, TaskFiles
|
|
417
|
+
|
|
418
|
+
@worker.task("generate_report")
|
|
419
|
+
async def generate_report(params: dict, files: TaskFiles, **kwargs):
|
|
420
|
+
# 1. Easy read/write
|
|
421
|
+
await files.write("data.json", '{"status": "ok"}')
|
|
422
|
+
content = await files.read("data.json")
|
|
423
|
+
|
|
424
|
+
# 2. Get path (directory is created automatically)
|
|
425
|
+
output_path = await files.path_to("report.pdf")
|
|
426
|
+
|
|
427
|
+
# 3. Check and list files
|
|
428
|
+
if await files.exists("input.jpg"):
|
|
429
|
+
file_list = await files.list()
|
|
430
|
+
|
|
431
|
+
return {"data": {"report": output_path}}
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
**Available Methods (all asynchronous):**
|
|
435
|
+
- `await path_to(name)` — returns the full path to a file (ensures the task directory exists).
|
|
436
|
+
- `await read(name, mode='r')` — reads the entire file.
|
|
437
|
+
- `await write(name, data, mode='w')` — writes data to a file.
|
|
438
|
+
- `await list()` — lists filenames in the task directory.
|
|
439
|
+
- `await exists(name)` — checks if a file exists.
|
|
440
|
+
- `async with open(name, mode)` — async context manager for advanced usage.
|
|
441
|
+
|
|
442
|
+
> **Note: Automatic Cleanup**
|
|
443
|
+
>
|
|
444
|
+
> The SDK automatically deletes the entire task directory (including everything created via `TaskFiles`) immediately after the task completes and the result is sent.
|
|
445
|
+
|
|
446
|
+
### 6. Handling Large Files (S3 Payload Offloading)
|
|
438
447
|
|
|
439
448
|
The SDK supports working with large files "out of the box" via S3-compatible storage.
|
|
440
449
|
|
|
441
|
-
- **Automatic Download**: If a value in `params` is a URI of the form `s3://...`, the SDK will automatically download the file to the local disk and replace the URI in `params` with the local path.
|
|
442
|
-
- **Automatic Upload**: If your handler returns a local file path in `data` (located within the `
|
|
450
|
+
- **Automatic Download**: If a value in `params` is a URI of the form `s3://...`, the SDK will automatically download the file to the local disk and replace the URI in `params` with the local path. **If the URI ends with `/` (e.g., `s3://bucket/data/`), the SDK treats it as a folder prefix and recursively downloads all matching objects into a local directory.**
|
|
451
|
+
- **Automatic Upload**: If your handler returns a local file path in `data` (located within the `TASK_FILES_DIR` directory), the SDK will automatically upload this file to S3 and replace the path with an `s3://` URI in the final result. **If the path is a directory, the SDK recursively uploads all files within it.**
|
|
452
|
+
|
|
453
|
+
This functionality is transparent to your code.
|
|
454
|
+
|
|
455
|
+
#### S3 Example
|
|
456
|
+
|
|
457
|
+
Suppose the orchestrator sends a task with `{"input_image": "s3://my-bucket/photo.jpg"}`:
|
|
458
|
+
|
|
459
|
+
```python
|
|
460
|
+
import os
|
|
461
|
+
from avtomatika_worker import Worker, TaskFiles
|
|
462
|
+
|
|
463
|
+
worker = Worker(worker_type="image-worker")
|
|
464
|
+
|
|
465
|
+
@worker.task("process_image")
|
|
466
|
+
async def handle_image(params: dict, files: TaskFiles, **kwargs):
|
|
467
|
+
# SDK has already downloaded the file.
|
|
468
|
+
# 'input_image' now contains a local path like '/tmp/payloads/task-id/photo.jpg'
|
|
469
|
+
local_input = params["input_image"]
|
|
470
|
+
local_output = await files.path_to("processed.png")
|
|
471
|
+
|
|
472
|
+
# Your logic here (using local files)
|
|
473
|
+
# ... image processing ...
|
|
474
|
+
|
|
475
|
+
# Return the local path of the result.
|
|
476
|
+
# The SDK will upload it back to S3 automatically.
|
|
477
|
+
return {
|
|
478
|
+
"status": "success",
|
|
479
|
+
"data": {
|
|
480
|
+
"output_image": local_output
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
```
|
|
443
484
|
|
|
444
|
-
This
|
|
485
|
+
This only requires configuring environment variables for S3 access (see Full Configuration Reference).
|
|
445
486
|
|
|
446
|
-
|
|
487
|
+
> **Important: S3 Consistency**
|
|
488
|
+
>
|
|
489
|
+
> The SDK **does not validate** that the Worker and Orchestrator share the same storage backend. You must ensure that:
|
|
490
|
+
> 1. The Worker can reach the `S3_ENDPOINT_URL` used by the Orchestrator.
|
|
491
|
+
> 2. The Worker's credentials allow reading from the buckets referenced in the incoming `s3://` URIs.
|
|
492
|
+
> 3. The Worker's credentials allow writing to the `S3_DEFAULT_BUCKET`.
|
|
447
493
|
|
|
448
|
-
|
|
494
|
+
### 7. WebSocket Support
|
|
449
495
|
|
|
450
496
|
## Advanced Features
|
|
451
497
|
|
|
@@ -522,7 +568,7 @@ The worker is fully configured via environment variables.
|
|
|
522
568
|
| `TASK_POLL_TIMEOUT` | The timeout in seconds for polling for new tasks. | `30` |
|
|
523
569
|
| `TASK_POLL_ERROR_DELAY` | The delay in seconds before retrying after a polling error. | `5.0` |
|
|
524
570
|
| `IDLE_POLL_DELAY` | The delay in seconds between polls when the worker is idle. | `0.01` |
|
|
525
|
-
| `
|
|
571
|
+
| `TASK_FILES_DIR` | The directory for temporarily storing files when working with S3. | `/tmp/payloads` |
|
|
526
572
|
| `S3_ENDPOINT_URL` | The URL of the S3-compatible storage. | - |
|
|
527
573
|
| `S3_ACCESS_KEY` | The access key for S3. | - |
|
|
528
574
|
| `S3_SECRET_KEY` | The secret key for S3. | - |
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "avtomatika-worker"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.b2"
|
|
8
8
|
description = "Worker SDK for the Avtomatika orchestrator."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -17,7 +17,8 @@ classifiers = [
|
|
|
17
17
|
dependencies = [
|
|
18
18
|
"aiohttp~=3.13.2",
|
|
19
19
|
"python-json-logger~=4.0.0",
|
|
20
|
-
"aioboto3~=
|
|
20
|
+
"aioboto3~=15.5.0",
|
|
21
|
+
"aiofiles~=25.1.0",
|
|
21
22
|
]
|
|
22
23
|
|
|
23
24
|
[project.optional-dependencies]
|
|
@@ -27,14 +28,12 @@ test = [
|
|
|
27
28
|
"aioresponses",
|
|
28
29
|
"pytest-mock",
|
|
29
30
|
"pydantic",
|
|
30
|
-
"moto[server]",
|
|
31
|
-
"aiofiles",
|
|
32
31
|
]
|
|
33
32
|
pydantic = ["pydantic"]
|
|
34
33
|
|
|
35
34
|
[project.urls]
|
|
36
|
-
"Homepage" = "https://github.com/
|
|
37
|
-
"Bug Tracker" = "https://github.com/
|
|
35
|
+
"Homepage" = "https://github.com/avtomatika-ai/avtomatika-worker"
|
|
36
|
+
"Bug Tracker" = "https://github.com/avtomatika-ai/avtomatika-worker/issues"
|
|
38
37
|
|
|
39
38
|
[tool.setuptools.packages.find]
|
|
40
39
|
where = ["src"]
|
|
@@ -47,7 +46,7 @@ pythonpath = "src"
|
|
|
47
46
|
filterwarnings = [
|
|
48
47
|
"ignore:'asyncio.iscoroutinefunction' is deprecated:DeprecationWarning",
|
|
49
48
|
"ignore:coroutine 'AsyncMockMixin._execute_mock_call' was never awaited:RuntimeWarning",
|
|
50
|
-
"ignore
|
|
49
|
+
"ignore::pytest.PytestUnraisableExceptionWarning"
|
|
51
50
|
]
|
|
52
51
|
|
|
53
52
|
[tool.ruff]
|
|
@@ -49,7 +49,7 @@ class WorkerConfig:
|
|
|
49
49
|
)
|
|
50
50
|
|
|
51
51
|
# --- S3 Settings for payload offloading ---
|
|
52
|
-
self.
|
|
52
|
+
self.TASK_FILES_DIR: str = getenv("TASK_FILES_DIR", "/tmp/payloads")
|
|
53
53
|
self.S3_ENDPOINT_URL: str | None = getenv("S3_ENDPOINT_URL")
|
|
54
54
|
self.S3_ACCESS_KEY: str | None = getenv("S3_ACCESS_KEY")
|
|
55
55
|
self.S3_SECRET_KEY: str | None = getenv("S3_SECRET_KEY")
|
|
@@ -75,8 +75,7 @@ class WorkerConfig:
|
|
|
75
75
|
Loads orchestrator configuration from the ORCHESTRATORS_CONFIG environment variable.
|
|
76
76
|
For backward compatibility, if it is not set, it uses ORCHESTRATOR_URL.
|
|
77
77
|
"""
|
|
78
|
-
orchestrators_json
|
|
79
|
-
if orchestrators_json:
|
|
78
|
+
if orchestrators_json := getenv("ORCHESTRATORS_CONFIG"):
|
|
80
79
|
try:
|
|
81
80
|
orchestrators = loads(orchestrators_json)
|
|
82
81
|
if getenv("ORCHESTRATOR_URL"):
|
|
@@ -94,23 +93,23 @@ class WorkerConfig:
|
|
|
94
93
|
orchestrator_url = getenv("ORCHESTRATOR_URL", "http://localhost:8080")
|
|
95
94
|
return [{"url": orchestrator_url, "priority": 1, "weight": 1}]
|
|
96
95
|
|
|
97
|
-
|
|
96
|
+
@staticmethod
|
|
97
|
+
def _get_gpu_info() -> dict[str, Any] | None:
|
|
98
98
|
"""Collects GPU information from environment variables.
|
|
99
99
|
Returns None if GPU is not configured.
|
|
100
100
|
"""
|
|
101
|
-
gpu_model
|
|
102
|
-
|
|
101
|
+
if gpu_model := getenv("GPU_MODEL"):
|
|
102
|
+
return {
|
|
103
|
+
"model": gpu_model,
|
|
104
|
+
"vram_gb": int(getenv("GPU_VRAM_GB", "0")),
|
|
105
|
+
}
|
|
106
|
+
else:
|
|
103
107
|
return None
|
|
104
108
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
"vram_gb": int(getenv("GPU_VRAM_GB", "0")),
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
def _load_json_from_env(self, key: str, default: Any) -> Any:
|
|
109
|
+
@staticmethod
|
|
110
|
+
def _load_json_from_env(key: str, default: Any) -> Any:
|
|
111
111
|
"""Safely loads a JSON string from an environment variable."""
|
|
112
|
-
value
|
|
113
|
-
if value:
|
|
112
|
+
if value := getenv(key):
|
|
114
113
|
try:
|
|
115
114
|
return loads(value)
|
|
116
115
|
except JSONDecodeError:
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from asyncio import gather, to_thread
|
|
2
|
+
from os import walk
|
|
3
|
+
from os.path import basename, dirname, join, relpath
|
|
4
|
+
from shutil import rmtree
|
|
5
|
+
from typing import Any
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
from aioboto3 import Session
|
|
9
|
+
from aiofiles.os import makedirs
|
|
10
|
+
from aiofiles.ospath import exists, isdir
|
|
11
|
+
from botocore.client import Config
|
|
12
|
+
|
|
13
|
+
from .config import WorkerConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class S3Manager:
|
|
17
|
+
"""Handles S3 payload offloading."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: WorkerConfig):
|
|
20
|
+
self._config = config
|
|
21
|
+
self._session = Session()
|
|
22
|
+
|
|
23
|
+
def _get_client_args(self) -> dict[str, Any]:
|
|
24
|
+
"""Returns standard arguments for S3 client creation."""
|
|
25
|
+
return {
|
|
26
|
+
"service_name": "s3",
|
|
27
|
+
"endpoint_url": self._config.S3_ENDPOINT_URL,
|
|
28
|
+
"aws_access_key_id": self._config.S3_ACCESS_KEY,
|
|
29
|
+
"aws_secret_access_key": self._config.S3_SECRET_KEY,
|
|
30
|
+
"config": Config(signature_version="s3v4"),
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async def cleanup(self, task_id: str):
|
|
34
|
+
"""Removes the task-specific payload directory."""
|
|
35
|
+
task_dir = join(self._config.TASK_FILES_DIR, task_id)
|
|
36
|
+
if await exists(task_dir):
|
|
37
|
+
await to_thread(lambda: rmtree(task_dir, ignore_errors=True))
|
|
38
|
+
|
|
39
|
+
async def _process_s3_uri(self, uri: str, task_id: str) -> str:
|
|
40
|
+
"""Downloads a file or a folder (if uri ends with /) from S3 and returns the local path."""
|
|
41
|
+
parsed_url = urlparse(uri)
|
|
42
|
+
bucket_name = parsed_url.netloc
|
|
43
|
+
object_key = parsed_url.path.lstrip("/")
|
|
44
|
+
|
|
45
|
+
# Use task-specific directory for isolation
|
|
46
|
+
local_dir_root = join(self._config.TASK_FILES_DIR, task_id)
|
|
47
|
+
await makedirs(local_dir_root, exist_ok=True)
|
|
48
|
+
|
|
49
|
+
async with self._session.client(**self._get_client_args()) as s3:
|
|
50
|
+
# Handle folder download (prefix)
|
|
51
|
+
if uri.endswith("/"):
|
|
52
|
+
folder_name = object_key.rstrip("/").split("/")[-1]
|
|
53
|
+
local_folder_path = join(local_dir_root, folder_name)
|
|
54
|
+
|
|
55
|
+
paginator = s3.get_paginator("list_objects_v2")
|
|
56
|
+
tasks = []
|
|
57
|
+
async for page in paginator.paginate(Bucket=bucket_name, Prefix=object_key):
|
|
58
|
+
for obj in page.get("Contents", []):
|
|
59
|
+
key = obj["Key"]
|
|
60
|
+
if key.endswith("/"):
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
# Calculate relative path inside the folder
|
|
64
|
+
rel_path = key[len(object_key) :]
|
|
65
|
+
local_file_path = join(local_folder_path, rel_path)
|
|
66
|
+
|
|
67
|
+
await makedirs(dirname(local_file_path), exist_ok=True)
|
|
68
|
+
tasks.append(s3.download_file(bucket_name, key, local_file_path))
|
|
69
|
+
|
|
70
|
+
if tasks:
|
|
71
|
+
await gather(*tasks)
|
|
72
|
+
return local_folder_path
|
|
73
|
+
|
|
74
|
+
# Handle single file download
|
|
75
|
+
local_path = join(local_dir_root, basename(object_key))
|
|
76
|
+
await s3.download_file(bucket_name, object_key, local_path)
|
|
77
|
+
return local_path
|
|
78
|
+
|
|
79
|
+
async def _upload_to_s3(self, local_path: str) -> str:
|
|
80
|
+
"""Uploads a file or a folder to S3 and returns the S3 URI."""
|
|
81
|
+
bucket_name = self._config.S3_DEFAULT_BUCKET
|
|
82
|
+
|
|
83
|
+
async with self._session.client(**self._get_client_args()) as s3:
|
|
84
|
+
# Handle folder upload
|
|
85
|
+
if await isdir(local_path):
|
|
86
|
+
folder_name = basename(local_path.rstrip("/"))
|
|
87
|
+
s3_prefix = f"{folder_name}/"
|
|
88
|
+
tasks = []
|
|
89
|
+
|
|
90
|
+
# Use to_thread to avoid blocking event loop during file walk
|
|
91
|
+
def _get_files_to_upload():
|
|
92
|
+
files_to_upload = []
|
|
93
|
+
for root, _, files in walk(local_path):
|
|
94
|
+
for file in files:
|
|
95
|
+
f_path = join(root, file)
|
|
96
|
+
rel = relpath(f_path, local_path)
|
|
97
|
+
files_to_upload.append((f_path, f"{s3_prefix}{rel}"))
|
|
98
|
+
return files_to_upload
|
|
99
|
+
|
|
100
|
+
files_list = await to_thread(_get_files_to_upload)
|
|
101
|
+
|
|
102
|
+
for full_path, key in files_list:
|
|
103
|
+
tasks.append(s3.upload_file(full_path, bucket_name, key))
|
|
104
|
+
|
|
105
|
+
if tasks:
|
|
106
|
+
await gather(*tasks)
|
|
107
|
+
|
|
108
|
+
return f"s3://{bucket_name}/{s3_prefix}"
|
|
109
|
+
|
|
110
|
+
# Handle single file upload
|
|
111
|
+
object_key = basename(local_path)
|
|
112
|
+
await s3.upload_file(local_path, bucket_name, object_key)
|
|
113
|
+
return f"s3://{bucket_name}/{object_key}"
|
|
114
|
+
|
|
115
|
+
async def process_params(self, params: dict[str, Any], task_id: str) -> dict[str, Any]:
|
|
116
|
+
"""Recursively searches for S3 URIs in params and downloads the files."""
|
|
117
|
+
if not self._config.S3_ENDPOINT_URL:
|
|
118
|
+
return params
|
|
119
|
+
|
|
120
|
+
async def _process(item: Any) -> Any:
|
|
121
|
+
if isinstance(item, str) and item.startswith("s3://"):
|
|
122
|
+
return await self._process_s3_uri(item, task_id)
|
|
123
|
+
if isinstance(item, dict):
|
|
124
|
+
return {k: await _process(v) for k, v in item.items()}
|
|
125
|
+
return [await _process(i) for i in item] if isinstance(item, list) else item
|
|
126
|
+
|
|
127
|
+
return await _process(params)
|
|
128
|
+
|
|
129
|
+
async def process_result(self, result: dict[str, Any]) -> dict[str, Any]:
|
|
130
|
+
"""Recursively searches for local file paths in the result and uploads them to S3."""
|
|
131
|
+
if not self._config.S3_ENDPOINT_URL:
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
async def _process(item: Any) -> Any:
|
|
135
|
+
if isinstance(item, str) and item.startswith(self._config.TASK_FILES_DIR):
|
|
136
|
+
return await self._upload_to_s3(item) if await exists(item) else item
|
|
137
|
+
if isinstance(item, dict):
|
|
138
|
+
return {k: await _process(v) for k, v in item.items()}
|
|
139
|
+
return [await _process(i) for i in item] if isinstance(item, list) else item
|
|
140
|
+
|
|
141
|
+
return await _process(result)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from contextlib import asynccontextmanager
|
|
2
|
+
from os.path import dirname, join
|
|
3
|
+
from typing import AsyncGenerator
|
|
4
|
+
|
|
5
|
+
from aiofiles import open as aiopen
|
|
6
|
+
from aiofiles.os import listdir, makedirs
|
|
7
|
+
from aiofiles.ospath import exists as aio_exists
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TaskFiles:
|
|
11
|
+
"""
|
|
12
|
+
A helper class for managing task-specific files.
|
|
13
|
+
Provides asynchronous lazy directory creation and high-level file operations
|
|
14
|
+
within an isolated workspace for each task.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, task_dir: str):
|
|
18
|
+
"""
|
|
19
|
+
Initializes TaskFiles with a specific task directory.
|
|
20
|
+
The directory is not created until needed.
|
|
21
|
+
"""
|
|
22
|
+
self._task_dir = task_dir
|
|
23
|
+
|
|
24
|
+
async def get_root(self) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Asynchronously returns the root directory for the task.
|
|
27
|
+
Creates the directory on disk if it doesn't exist.
|
|
28
|
+
"""
|
|
29
|
+
await makedirs(self._task_dir, exist_ok=True)
|
|
30
|
+
return self._task_dir
|
|
31
|
+
|
|
32
|
+
async def path_to(self, filename: str) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Asynchronously returns an absolute path for a file within the task directory.
|
|
35
|
+
Guarantees that the task root directory exists.
|
|
36
|
+
"""
|
|
37
|
+
root = await self.get_root()
|
|
38
|
+
return join(root, filename)
|
|
39
|
+
|
|
40
|
+
@asynccontextmanager
|
|
41
|
+
async def open(self, filename: str, mode: str = "r") -> AsyncGenerator:
|
|
42
|
+
"""
|
|
43
|
+
An asynchronous context manager to open a file within the task directory.
|
|
44
|
+
Automatically creates the task root and any necessary subdirectories.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
filename: Name or relative path of the file.
|
|
48
|
+
mode: File opening mode (e.g., 'r', 'w', 'a', 'rb', 'wb').
|
|
49
|
+
"""
|
|
50
|
+
path = await self.path_to(filename)
|
|
51
|
+
# Ensure directory for the file itself exists if filename contains subdirectories
|
|
52
|
+
file_dir = dirname(path)
|
|
53
|
+
if file_dir != self._task_dir:
|
|
54
|
+
await makedirs(file_dir, exist_ok=True)
|
|
55
|
+
|
|
56
|
+
async with aiopen(path, mode) as f:
|
|
57
|
+
yield f
|
|
58
|
+
|
|
59
|
+
async def read(self, filename: str, mode: str = "r") -> str | bytes:
|
|
60
|
+
"""
|
|
61
|
+
Asynchronously reads the entire content of a file.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
filename: Name of the file to read.
|
|
65
|
+
mode: Mode to open the file in (defaults to 'r').
|
|
66
|
+
"""
|
|
67
|
+
async with self.open(filename, mode) as f:
|
|
68
|
+
return await f.read()
|
|
69
|
+
|
|
70
|
+
async def write(self, filename: str, data: str | bytes, mode: str = "w") -> None:
|
|
71
|
+
"""
|
|
72
|
+
Asynchronously writes data to a file. Creates or overwrites the file by default.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
filename: Name of the file to write.
|
|
76
|
+
data: Content to write (string or bytes).
|
|
77
|
+
mode: Mode to open the file in (defaults to 'w').
|
|
78
|
+
"""
|
|
79
|
+
async with self.open(filename, mode) as f:
|
|
80
|
+
await f.write(data)
|
|
81
|
+
|
|
82
|
+
async def list(self) -> list[str]:
|
|
83
|
+
"""
|
|
84
|
+
Asynchronously lists all file and directory names within the task root.
|
|
85
|
+
"""
|
|
86
|
+
root = await self.get_root()
|
|
87
|
+
return await listdir(root)
|
|
88
|
+
|
|
89
|
+
async def exists(self, filename: str) -> bool:
|
|
90
|
+
"""
|
|
91
|
+
Asynchronously checks if a specific file or directory exists in the task root.
|
|
92
|
+
"""
|
|
93
|
+
path = join(self._task_dir, filename)
|
|
94
|
+
return await aio_exists(path)
|
|
95
|
+
|
|
96
|
+
def __repr__(self):
|
|
97
|
+
return f"<TaskFiles root='{self._task_dir}'>"
|