avtomatika-worker 1.0b1__tar.gz → 1.0b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/PKG-INFO +86 -13
  2. avtomatika_worker-1.0b1/src/avtomatika_worker.egg-info/PKG-INFO → avtomatika_worker-1.0b2/README.md +81 -35
  3. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/pyproject.toml +6 -7
  4. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker/__init__.py +2 -1
  5. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker/config.py +13 -14
  6. avtomatika_worker-1.0b2/src/avtomatika_worker/s3.py +141 -0
  7. avtomatika_worker-1.0b2/src/avtomatika_worker/task_files.py +97 -0
  8. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker/worker.py +35 -12
  9. avtomatika_worker-1.0b1/README.md → avtomatika_worker-1.0b2/src/avtomatika_worker.egg-info/PKG-INFO +108 -7
  10. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker.egg-info/SOURCES.txt +2 -0
  11. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker.egg-info/requires.txt +2 -3
  12. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_config.py +3 -0
  13. avtomatika_worker-1.0b2/tests/test_dependency_injection.py +117 -0
  14. avtomatika_worker-1.0b2/tests/test_s3.py +179 -0
  15. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_worker_more_logic.py +19 -15
  16. avtomatika_worker-1.0b1/src/avtomatika_worker/s3.py +0 -75
  17. avtomatika_worker-1.0b1/tests/test_s3.py +0 -85
  18. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/LICENSE +0 -0
  19. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/setup.cfg +0 -0
  20. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker/types.py +0 -0
  21. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker.egg-info/dependency_links.txt +0 -0
  22. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/src/avtomatika_worker.egg-info/top_level.txt +0 -0
  23. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_concurrency_limits.py +0 -0
  24. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_init.py +0 -0
  25. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_parameter_typing.py +0 -0
  26. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_per_orchestrator_token.py +0 -0
  27. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_types.py +0 -0
  28. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_worker_logic.py +0 -0
  29. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_worker_sdk.py +0 -0
  30. {avtomatika_worker-1.0b1 → avtomatika_worker-1.0b2}/tests/test_wrr_logic.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avtomatika-worker
3
- Version: 1.0b1
3
+ Version: 1.0b2
4
4
  Summary: Worker SDK for the Avtomatika orchestrator.
5
- Project-URL: Homepage, https://github.com/avtomatila-ai/avtomatika-worker
6
- Project-URL: Bug Tracker, https://github.com/avtomatila-ai/avtomatika-worker/issues
5
+ Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika-worker
6
+ Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika-worker/issues
7
7
  Classifier: Development Status :: 4 - Beta
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: License :: OSI Approved :: MIT License
@@ -13,15 +13,14 @@ Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: aiohttp~=3.13.2
15
15
  Requires-Dist: python-json-logger~=4.0.0
16
- Requires-Dist: aioboto3~=13.0.0
16
+ Requires-Dist: aioboto3~=15.5.0
17
+ Requires-Dist: aiofiles~=25.1.0
17
18
  Provides-Extra: test
18
19
  Requires-Dist: pytest; extra == "test"
19
20
  Requires-Dist: pytest-asyncio; extra == "test"
20
21
  Requires-Dist: aioresponses; extra == "test"
21
22
  Requires-Dist: pytest-mock; extra == "test"
22
23
  Requires-Dist: pydantic; extra == "test"
23
- Requires-Dist: moto[server]; extra == "test"
24
- Requires-Dist: aiofiles; extra == "test"
25
24
  Provides-Extra: pydantic
26
25
  Requires-Dist: pydantic; extra == "pydantic"
27
26
  Dynamic: license-file
@@ -434,18 +433,92 @@ The `ORCHESTRATORS_CONFIG` variable must contain a JSON string. Each object in t
434
433
 
435
434
 
436
435
 
437
- ### 5. Handling Large Files (S3 Payload Offloading)
436
+
437
+
438
+ ### 5. File System Helper (TaskFiles)
439
+
440
+ To simplify working with temporary files and paths, the SDK provides a `TaskFiles` helper class. It automatically manages directory creation within the isolated task folder and provides an asynchronous interface for file operations. Just add an argument typed as `TaskFiles` to your handler:
441
+
442
+ ```python
443
+ from avtomatika_worker import Worker, TaskFiles
444
+
445
+ @worker.task("generate_report")
446
+ async def generate_report(params: dict, files: TaskFiles, **kwargs):
447
+ # 1. Easy read/write
448
+ await files.write("data.json", '{"status": "ok"}')
449
+ content = await files.read("data.json")
450
+
451
+ # 2. Get path (directory is created automatically)
452
+ output_path = await files.path_to("report.pdf")
453
+
454
+ # 3. Check and list files
455
+ if await files.exists("input.jpg"):
456
+ file_list = await files.list()
457
+
458
+ return {"data": {"report": output_path}}
459
+ ```
460
+
461
+ **Available Methods (all asynchronous):**
462
+ - `await path_to(name)` — returns the full path to a file (ensures the task directory exists).
463
+ - `await read(name, mode='r')` — reads the entire file.
464
+ - `await write(name, data, mode='w')` — writes data to a file.
465
+ - `await list()` — lists filenames in the task directory.
466
+ - `await exists(name)` — checks if a file exists.
467
+ - `async with open(name, mode)` — async context manager for advanced usage.
468
+
469
+ > **Note: Automatic Cleanup**
470
+ >
471
+ > The SDK automatically deletes the entire task directory (including everything created via `TaskFiles`) immediately after the task completes and the result is sent.
472
+
473
+ ### 6. Handling Large Files (S3 Payload Offloading)
438
474
 
439
475
  The SDK supports working with large files "out of the box" via S3-compatible storage.
440
476
 
441
- - **Automatic Download**: If a value in `params` is a URI of the form `s3://...`, the SDK will automatically download the file to the local disk and replace the URI in `params` with the local path.
442
- - **Automatic Upload**: If your handler returns a local file path in `data` (located within the `WORKER_PAYLOAD_DIR` directory), the SDK will automatically upload this file to S3 and replace the path with an `s3://` URI in the final result.
477
+ - **Automatic Download**: If a value in `params` is a URI of the form `s3://...`, the SDK will automatically download the file to the local disk and replace the URI in `params` with the local path. **If the URI ends with `/` (e.g., `s3://bucket/data/`), the SDK treats it as a folder prefix and recursively downloads all matching objects into a local directory.**
478
+ - **Automatic Upload**: If your handler returns a local file path in `data` (located within the `TASK_FILES_DIR` directory), the SDK will automatically upload this file to S3 and replace the path with an `s3://` URI in the final result. **If the path is a directory, the SDK recursively uploads all files within it.**
479
+
480
+ This functionality is transparent to your code.
481
+
482
+ #### S3 Example
483
+
484
+ Suppose the orchestrator sends a task with `{"input_image": "s3://my-bucket/photo.jpg"}`:
485
+
486
+ ```python
487
+ import os
488
+ from avtomatika_worker import Worker, TaskFiles
489
+
490
+ worker = Worker(worker_type="image-worker")
491
+
492
+ @worker.task("process_image")
493
+ async def handle_image(params: dict, files: TaskFiles, **kwargs):
494
+ # SDK has already downloaded the file.
495
+ # 'input_image' now contains a local path like '/tmp/payloads/task-id/photo.jpg'
496
+ local_input = params["input_image"]
497
+ local_output = await files.path_to("processed.png")
498
+
499
+ # Your logic here (using local files)
500
+ # ... image processing ...
501
+
502
+ # Return the local path of the result.
503
+ # The SDK will upload it back to S3 automatically.
504
+ return {
505
+ "status": "success",
506
+ "data": {
507
+ "output_image": local_output
508
+ }
509
+ }
510
+ ```
443
511
 
444
- This functionality is transparent to your code and only requires configuring environment variables for S3 access.
512
+ This only requires configuring environment variables for S3 access (see Full Configuration Reference).
445
513
 
446
- ### 6. WebSocket Support
514
+ > **Important: S3 Consistency**
515
+ >
516
+ > The SDK **does not validate** that the Worker and Orchestrator share the same storage backend. You must ensure that:
517
+ > 1. The Worker can reach the `S3_ENDPOINT_URL` used by the Orchestrator.
518
+ > 2. The Worker's credentials allow reading from the buckets referenced in the incoming `s3://` URIs.
519
+ > 3. The Worker's credentials allow writing to the `S3_DEFAULT_BUCKET`.
447
520
 
448
- If enabled, the SDK establishes a persistent WebSocket connection with the orchestrator to receive real-time commands, such as canceling an ongoing task.
521
+ ### 7. WebSocket Support
449
522
 
450
523
  ## Advanced Features
451
524
 
@@ -522,7 +595,7 @@ The worker is fully configured via environment variables.
522
595
  | `TASK_POLL_TIMEOUT` | The timeout in seconds for polling for new tasks. | `30` |
523
596
  | `TASK_POLL_ERROR_DELAY` | The delay in seconds before retrying after a polling error. | `5.0` |
524
597
  | `IDLE_POLL_DELAY` | The delay in seconds between polls when the worker is idle. | `0.01` |
525
- | `WORKER_PAYLOAD_DIR` | The directory for temporarily storing files when working with S3. | `/tmp/payloads` |
598
+ | `TASK_FILES_DIR` | The directory for temporarily storing files when working with S3. | `/tmp/payloads` |
526
599
  | `S3_ENDPOINT_URL` | The URL of the S3-compatible storage. | - |
527
600
  | `S3_ACCESS_KEY` | The access key for S3. | - |
528
601
  | `S3_SECRET_KEY` | The secret key for S3. | - |
@@ -1,31 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: avtomatika-worker
3
- Version: 1.0b1
4
- Summary: Worker SDK for the Avtomatika orchestrator.
5
- Project-URL: Homepage, https://github.com/avtomatila-ai/avtomatika-worker
6
- Project-URL: Bug Tracker, https://github.com/avtomatila-ai/avtomatika-worker/issues
7
- Classifier: Development Status :: 4 - Beta
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.11
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: aiohttp~=3.13.2
15
- Requires-Dist: python-json-logger~=4.0.0
16
- Requires-Dist: aioboto3~=13.0.0
17
- Provides-Extra: test
18
- Requires-Dist: pytest; extra == "test"
19
- Requires-Dist: pytest-asyncio; extra == "test"
20
- Requires-Dist: aioresponses; extra == "test"
21
- Requires-Dist: pytest-mock; extra == "test"
22
- Requires-Dist: pydantic; extra == "test"
23
- Requires-Dist: moto[server]; extra == "test"
24
- Requires-Dist: aiofiles; extra == "test"
25
- Provides-Extra: pydantic
26
- Requires-Dist: pydantic; extra == "pydantic"
27
- Dynamic: license-file
28
-
29
1
  # Avtomatika Worker SDK
30
2
 
31
3
  This is an SDK for creating workers compatible with the **Avtomatika** orchestrator. The SDK handles all the complexity of interacting with the orchestrator, allowing you to focus on writing your business logic.
@@ -434,18 +406,92 @@ The `ORCHESTRATORS_CONFIG` variable must contain a JSON string. Each object in t
434
406
 
435
407
 
436
408
 
437
- ### 5. Handling Large Files (S3 Payload Offloading)
409
+
410
+
411
+ ### 5. File System Helper (TaskFiles)
412
+
413
+ To simplify working with temporary files and paths, the SDK provides a `TaskFiles` helper class. It automatically manages directory creation within the isolated task folder and provides an asynchronous interface for file operations. Just add an argument typed as `TaskFiles` to your handler:
414
+
415
+ ```python
416
+ from avtomatika_worker import Worker, TaskFiles
417
+
418
+ @worker.task("generate_report")
419
+ async def generate_report(params: dict, files: TaskFiles, **kwargs):
420
+ # 1. Easy read/write
421
+ await files.write("data.json", '{"status": "ok"}')
422
+ content = await files.read("data.json")
423
+
424
+ # 2. Get path (directory is created automatically)
425
+ output_path = await files.path_to("report.pdf")
426
+
427
+ # 3. Check and list files
428
+ if await files.exists("input.jpg"):
429
+ file_list = await files.list()
430
+
431
+ return {"data": {"report": output_path}}
432
+ ```
433
+
434
+ **Available Methods (all asynchronous):**
435
+ - `await path_to(name)` — returns the full path to a file (ensures the task directory exists).
436
+ - `await read(name, mode='r')` — reads the entire file.
437
+ - `await write(name, data, mode='w')` — writes data to a file.
438
+ - `await list()` — lists filenames in the task directory.
439
+ - `await exists(name)` — checks if a file exists.
440
+ - `async with open(name, mode)` — async context manager for advanced usage.
441
+
442
+ > **Note: Automatic Cleanup**
443
+ >
444
+ > The SDK automatically deletes the entire task directory (including everything created via `TaskFiles`) immediately after the task completes and the result is sent.
445
+
446
+ ### 6. Handling Large Files (S3 Payload Offloading)
438
447
 
439
448
  The SDK supports working with large files "out of the box" via S3-compatible storage.
440
449
 
441
- - **Automatic Download**: If a value in `params` is a URI of the form `s3://...`, the SDK will automatically download the file to the local disk and replace the URI in `params` with the local path.
442
- - **Automatic Upload**: If your handler returns a local file path in `data` (located within the `WORKER_PAYLOAD_DIR` directory), the SDK will automatically upload this file to S3 and replace the path with an `s3://` URI in the final result.
450
+ - **Automatic Download**: If a value in `params` is a URI of the form `s3://...`, the SDK will automatically download the file to the local disk and replace the URI in `params` with the local path. **If the URI ends with `/` (e.g., `s3://bucket/data/`), the SDK treats it as a folder prefix and recursively downloads all matching objects into a local directory.**
451
+ - **Automatic Upload**: If your handler returns a local file path in `data` (located within the `TASK_FILES_DIR` directory), the SDK will automatically upload this file to S3 and replace the path with an `s3://` URI in the final result. **If the path is a directory, the SDK recursively uploads all files within it.**
452
+
453
+ This functionality is transparent to your code.
454
+
455
+ #### S3 Example
456
+
457
+ Suppose the orchestrator sends a task with `{"input_image": "s3://my-bucket/photo.jpg"}`:
458
+
459
+ ```python
460
+ import os
461
+ from avtomatika_worker import Worker, TaskFiles
462
+
463
+ worker = Worker(worker_type="image-worker")
464
+
465
+ @worker.task("process_image")
466
+ async def handle_image(params: dict, files: TaskFiles, **kwargs):
467
+ # SDK has already downloaded the file.
468
+ # 'input_image' now contains a local path like '/tmp/payloads/task-id/photo.jpg'
469
+ local_input = params["input_image"]
470
+ local_output = await files.path_to("processed.png")
471
+
472
+ # Your logic here (using local files)
473
+ # ... image processing ...
474
+
475
+ # Return the local path of the result.
476
+ # The SDK will upload it back to S3 automatically.
477
+ return {
478
+ "status": "success",
479
+ "data": {
480
+ "output_image": local_output
481
+ }
482
+ }
483
+ ```
443
484
 
444
- This functionality is transparent to your code and only requires configuring environment variables for S3 access.
485
+ This only requires configuring environment variables for S3 access (see Full Configuration Reference).
445
486
 
446
- ### 6. WebSocket Support
487
+ > **Important: S3 Consistency**
488
+ >
489
+ > The SDK **does not validate** that the Worker and Orchestrator share the same storage backend. You must ensure that:
490
+ > 1. The Worker can reach the `S3_ENDPOINT_URL` used by the Orchestrator.
491
+ > 2. The Worker's credentials allow reading from the buckets referenced in the incoming `s3://` URIs.
492
+ > 3. The Worker's credentials allow writing to the `S3_DEFAULT_BUCKET`.
447
493
 
448
- If enabled, the SDK establishes a persistent WebSocket connection with the orchestrator to receive real-time commands, such as canceling an ongoing task.
494
+ ### 7. WebSocket Support
449
495
 
450
496
  ## Advanced Features
451
497
 
@@ -522,7 +568,7 @@ The worker is fully configured via environment variables.
522
568
  | `TASK_POLL_TIMEOUT` | The timeout in seconds for polling for new tasks. | `30` |
523
569
  | `TASK_POLL_ERROR_DELAY` | The delay in seconds before retrying after a polling error. | `5.0` |
524
570
  | `IDLE_POLL_DELAY` | The delay in seconds between polls when the worker is idle. | `0.01` |
525
- | `WORKER_PAYLOAD_DIR` | The directory for temporarily storing files when working with S3. | `/tmp/payloads` |
571
+ | `TASK_FILES_DIR` | The directory for temporarily storing files when working with S3. | `/tmp/payloads` |
526
572
  | `S3_ENDPOINT_URL` | The URL of the S3-compatible storage. | - |
527
573
  | `S3_ACCESS_KEY` | The access key for S3. | - |
528
574
  | `S3_SECRET_KEY` | The secret key for S3. | - |
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "avtomatika-worker"
7
- version = "1.0.b1"
7
+ version = "1.0.b2"
8
8
  description = "Worker SDK for the Avtomatika orchestrator."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -17,7 +17,8 @@ classifiers = [
17
17
  dependencies = [
18
18
  "aiohttp~=3.13.2",
19
19
  "python-json-logger~=4.0.0",
20
- "aioboto3~=13.0.0",
20
+ "aioboto3~=15.5.0",
21
+ "aiofiles~=25.1.0",
21
22
  ]
22
23
 
23
24
  [project.optional-dependencies]
@@ -27,14 +28,12 @@ test = [
27
28
  "aioresponses",
28
29
  "pytest-mock",
29
30
  "pydantic",
30
- "moto[server]",
31
- "aiofiles",
32
31
  ]
33
32
  pydantic = ["pydantic"]
34
33
 
35
34
  [project.urls]
36
- "Homepage" = "https://github.com/avtomatila-ai/avtomatika-worker"
37
- "Bug Tracker" = "https://github.com/avtomatila-ai/avtomatika-worker/issues"
35
+ "Homepage" = "https://github.com/avtomatika-ai/avtomatika-worker"
36
+ "Bug Tracker" = "https://github.com/avtomatika-ai/avtomatika-worker/issues"
38
37
 
39
38
  [tool.setuptools.packages.find]
40
39
  where = ["src"]
@@ -47,7 +46,7 @@ pythonpath = "src"
47
46
  filterwarnings = [
48
47
  "ignore:'asyncio.iscoroutinefunction' is deprecated:DeprecationWarning",
49
48
  "ignore:coroutine 'AsyncMockMixin._execute_mock_call' was never awaited:RuntimeWarning",
50
- "ignore:datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version.:DeprecationWarning",
49
+ "ignore::pytest.PytestUnraisableExceptionWarning"
51
50
  ]
52
51
 
53
52
  [tool.ruff]
@@ -2,9 +2,10 @@
2
2
 
3
3
  from importlib.metadata import PackageNotFoundError, version
4
4
 
5
+ from .task_files import TaskFiles
5
6
  from .worker import Worker
6
7
 
7
- __all__ = ["Worker"]
8
+ __all__ = ["Worker", "TaskFiles"]
8
9
 
9
10
  try:
10
11
  __version__ = version("avtomatika-worker")
@@ -49,7 +49,7 @@ class WorkerConfig:
49
49
  )
50
50
 
51
51
  # --- S3 Settings for payload offloading ---
52
- self.WORKER_PAYLOAD_DIR: str = getenv("WORKER_PAYLOAD_DIR", "/tmp/payloads")
52
+ self.TASK_FILES_DIR: str = getenv("TASK_FILES_DIR", "/tmp/payloads")
53
53
  self.S3_ENDPOINT_URL: str | None = getenv("S3_ENDPOINT_URL")
54
54
  self.S3_ACCESS_KEY: str | None = getenv("S3_ACCESS_KEY")
55
55
  self.S3_SECRET_KEY: str | None = getenv("S3_SECRET_KEY")
@@ -75,8 +75,7 @@ class WorkerConfig:
75
75
  Loads orchestrator configuration from the ORCHESTRATORS_CONFIG environment variable.
76
76
  For backward compatibility, if it is not set, it uses ORCHESTRATOR_URL.
77
77
  """
78
- orchestrators_json = getenv("ORCHESTRATORS_CONFIG")
79
- if orchestrators_json:
78
+ if orchestrators_json := getenv("ORCHESTRATORS_CONFIG"):
80
79
  try:
81
80
  orchestrators = loads(orchestrators_json)
82
81
  if getenv("ORCHESTRATOR_URL"):
@@ -94,23 +93,23 @@ class WorkerConfig:
94
93
  orchestrator_url = getenv("ORCHESTRATOR_URL", "http://localhost:8080")
95
94
  return [{"url": orchestrator_url, "priority": 1, "weight": 1}]
96
95
 
97
- def _get_gpu_info(self) -> dict[str, Any] | None:
96
+ @staticmethod
97
+ def _get_gpu_info() -> dict[str, Any] | None:
98
98
  """Collects GPU information from environment variables.
99
99
  Returns None if GPU is not configured.
100
100
  """
101
- gpu_model = getenv("GPU_MODEL")
102
- if not gpu_model:
101
+ if gpu_model := getenv("GPU_MODEL"):
102
+ return {
103
+ "model": gpu_model,
104
+ "vram_gb": int(getenv("GPU_VRAM_GB", "0")),
105
+ }
106
+ else:
103
107
  return None
104
108
 
105
- return {
106
- "model": gpu_model,
107
- "vram_gb": int(getenv("GPU_VRAM_GB", "0")),
108
- }
109
-
110
- def _load_json_from_env(self, key: str, default: Any) -> Any:
109
+ @staticmethod
110
+ def _load_json_from_env(key: str, default: Any) -> Any:
111
111
  """Safely loads a JSON string from an environment variable."""
112
- value = getenv(key)
113
- if value:
112
+ if value := getenv(key):
114
113
  try:
115
114
  return loads(value)
116
115
  except JSONDecodeError:
@@ -0,0 +1,141 @@
1
+ from asyncio import gather, to_thread
2
+ from os import walk
3
+ from os.path import basename, dirname, join, relpath
4
+ from shutil import rmtree
5
+ from typing import Any
6
+ from urllib.parse import urlparse
7
+
8
+ from aioboto3 import Session
9
+ from aiofiles.os import makedirs
10
+ from aiofiles.ospath import exists, isdir
11
+ from botocore.client import Config
12
+
13
+ from .config import WorkerConfig
14
+
15
+
16
+ class S3Manager:
17
+ """Handles S3 payload offloading."""
18
+
19
+ def __init__(self, config: WorkerConfig):
20
+ self._config = config
21
+ self._session = Session()
22
+
23
+ def _get_client_args(self) -> dict[str, Any]:
24
+ """Returns standard arguments for S3 client creation."""
25
+ return {
26
+ "service_name": "s3",
27
+ "endpoint_url": self._config.S3_ENDPOINT_URL,
28
+ "aws_access_key_id": self._config.S3_ACCESS_KEY,
29
+ "aws_secret_access_key": self._config.S3_SECRET_KEY,
30
+ "config": Config(signature_version="s3v4"),
31
+ }
32
+
33
+ async def cleanup(self, task_id: str):
34
+ """Removes the task-specific payload directory."""
35
+ task_dir = join(self._config.TASK_FILES_DIR, task_id)
36
+ if await exists(task_dir):
37
+ await to_thread(lambda: rmtree(task_dir, ignore_errors=True))
38
+
39
+ async def _process_s3_uri(self, uri: str, task_id: str) -> str:
40
+ """Downloads a file or a folder (if uri ends with /) from S3 and returns the local path."""
41
+ parsed_url = urlparse(uri)
42
+ bucket_name = parsed_url.netloc
43
+ object_key = parsed_url.path.lstrip("/")
44
+
45
+ # Use task-specific directory for isolation
46
+ local_dir_root = join(self._config.TASK_FILES_DIR, task_id)
47
+ await makedirs(local_dir_root, exist_ok=True)
48
+
49
+ async with self._session.client(**self._get_client_args()) as s3:
50
+ # Handle folder download (prefix)
51
+ if uri.endswith("/"):
52
+ folder_name = object_key.rstrip("/").split("/")[-1]
53
+ local_folder_path = join(local_dir_root, folder_name)
54
+
55
+ paginator = s3.get_paginator("list_objects_v2")
56
+ tasks = []
57
+ async for page in paginator.paginate(Bucket=bucket_name, Prefix=object_key):
58
+ for obj in page.get("Contents", []):
59
+ key = obj["Key"]
60
+ if key.endswith("/"):
61
+ continue
62
+
63
+ # Calculate relative path inside the folder
64
+ rel_path = key[len(object_key) :]
65
+ local_file_path = join(local_folder_path, rel_path)
66
+
67
+ await makedirs(dirname(local_file_path), exist_ok=True)
68
+ tasks.append(s3.download_file(bucket_name, key, local_file_path))
69
+
70
+ if tasks:
71
+ await gather(*tasks)
72
+ return local_folder_path
73
+
74
+ # Handle single file download
75
+ local_path = join(local_dir_root, basename(object_key))
76
+ await s3.download_file(bucket_name, object_key, local_path)
77
+ return local_path
78
+
79
+ async def _upload_to_s3(self, local_path: str) -> str:
80
+ """Uploads a file or a folder to S3 and returns the S3 URI."""
81
+ bucket_name = self._config.S3_DEFAULT_BUCKET
82
+
83
+ async with self._session.client(**self._get_client_args()) as s3:
84
+ # Handle folder upload
85
+ if await isdir(local_path):
86
+ folder_name = basename(local_path.rstrip("/"))
87
+ s3_prefix = f"{folder_name}/"
88
+ tasks = []
89
+
90
+ # Use to_thread to avoid blocking event loop during file walk
91
+ def _get_files_to_upload():
92
+ files_to_upload = []
93
+ for root, _, files in walk(local_path):
94
+ for file in files:
95
+ f_path = join(root, file)
96
+ rel = relpath(f_path, local_path)
97
+ files_to_upload.append((f_path, f"{s3_prefix}{rel}"))
98
+ return files_to_upload
99
+
100
+ files_list = await to_thread(_get_files_to_upload)
101
+
102
+ for full_path, key in files_list:
103
+ tasks.append(s3.upload_file(full_path, bucket_name, key))
104
+
105
+ if tasks:
106
+ await gather(*tasks)
107
+
108
+ return f"s3://{bucket_name}/{s3_prefix}"
109
+
110
+ # Handle single file upload
111
+ object_key = basename(local_path)
112
+ await s3.upload_file(local_path, bucket_name, object_key)
113
+ return f"s3://{bucket_name}/{object_key}"
114
+
115
+ async def process_params(self, params: dict[str, Any], task_id: str) -> dict[str, Any]:
116
+ """Recursively searches for S3 URIs in params and downloads the files."""
117
+ if not self._config.S3_ENDPOINT_URL:
118
+ return params
119
+
120
+ async def _process(item: Any) -> Any:
121
+ if isinstance(item, str) and item.startswith("s3://"):
122
+ return await self._process_s3_uri(item, task_id)
123
+ if isinstance(item, dict):
124
+ return {k: await _process(v) for k, v in item.items()}
125
+ return [await _process(i) for i in item] if isinstance(item, list) else item
126
+
127
+ return await _process(params)
128
+
129
+ async def process_result(self, result: dict[str, Any]) -> dict[str, Any]:
130
+ """Recursively searches for local file paths in the result and uploads them to S3."""
131
+ if not self._config.S3_ENDPOINT_URL:
132
+ return result
133
+
134
+ async def _process(item: Any) -> Any:
135
+ if isinstance(item, str) and item.startswith(self._config.TASK_FILES_DIR):
136
+ return await self._upload_to_s3(item) if await exists(item) else item
137
+ if isinstance(item, dict):
138
+ return {k: await _process(v) for k, v in item.items()}
139
+ return [await _process(i) for i in item] if isinstance(item, list) else item
140
+
141
+ return await _process(result)
@@ -0,0 +1,97 @@
1
+ from contextlib import asynccontextmanager
2
+ from os.path import dirname, join
3
+ from typing import AsyncGenerator
4
+
5
+ from aiofiles import open as aiopen
6
+ from aiofiles.os import listdir, makedirs
7
+ from aiofiles.ospath import exists as aio_exists
8
+
9
+
10
+ class TaskFiles:
11
+ """
12
+ A helper class for managing task-specific files.
13
+ Provides asynchronous lazy directory creation and high-level file operations
14
+ within an isolated workspace for each task.
15
+ """
16
+
17
+ def __init__(self, task_dir: str):
18
+ """
19
+ Initializes TaskFiles with a specific task directory.
20
+ The directory is not created until needed.
21
+ """
22
+ self._task_dir = task_dir
23
+
24
+ async def get_root(self) -> str:
25
+ """
26
+ Asynchronously returns the root directory for the task.
27
+ Creates the directory on disk if it doesn't exist.
28
+ """
29
+ await makedirs(self._task_dir, exist_ok=True)
30
+ return self._task_dir
31
+
32
+ async def path_to(self, filename: str) -> str:
33
+ """
34
+ Asynchronously returns an absolute path for a file within the task directory.
35
+ Guarantees that the task root directory exists.
36
+ """
37
+ root = await self.get_root()
38
+ return join(root, filename)
39
+
40
+ @asynccontextmanager
41
+ async def open(self, filename: str, mode: str = "r") -> AsyncGenerator:
42
+ """
43
+ An asynchronous context manager to open a file within the task directory.
44
+ Automatically creates the task root and any necessary subdirectories.
45
+
46
+ Args:
47
+ filename: Name or relative path of the file.
48
+ mode: File opening mode (e.g., 'r', 'w', 'a', 'rb', 'wb').
49
+ """
50
+ path = await self.path_to(filename)
51
+ # Ensure directory for the file itself exists if filename contains subdirectories
52
+ file_dir = dirname(path)
53
+ if file_dir != self._task_dir:
54
+ await makedirs(file_dir, exist_ok=True)
55
+
56
+ async with aiopen(path, mode) as f:
57
+ yield f
58
+
59
+ async def read(self, filename: str, mode: str = "r") -> str | bytes:
60
+ """
61
+ Asynchronously reads the entire content of a file.
62
+
63
+ Args:
64
+ filename: Name of the file to read.
65
+ mode: Mode to open the file in (defaults to 'r').
66
+ """
67
+ async with self.open(filename, mode) as f:
68
+ return await f.read()
69
+
70
+ async def write(self, filename: str, data: str | bytes, mode: str = "w") -> None:
71
+ """
72
+ Asynchronously writes data to a file. Creates or overwrites the file by default.
73
+
74
+ Args:
75
+ filename: Name of the file to write.
76
+ data: Content to write (string or bytes).
77
+ mode: Mode to open the file in (defaults to 'w').
78
+ """
79
+ async with self.open(filename, mode) as f:
80
+ await f.write(data)
81
+
82
+ async def list(self) -> list[str]:
83
+ """
84
+ Asynchronously lists all file and directory names within the task root.
85
+ """
86
+ root = await self.get_root()
87
+ return await listdir(root)
88
+
89
+ async def exists(self, filename: str) -> bool:
90
+ """
91
+ Asynchronously checks if a specific file or directory exists in the task root.
92
+ """
93
+ path = join(self._task_dir, filename)
94
+ return await aio_exists(path)
95
+
96
+ def __repr__(self):
97
+ return f"<TaskFiles root='{self._task_dir}'>"