avtomatika-worker 1.0b3__py3-none-any.whl → 1.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avtomatika_worker/__init__.py +1 -1
- avtomatika_worker/config.py +6 -0
- avtomatika_worker/s3.py +76 -48
- avtomatika_worker/task_files.py +60 -2
- avtomatika_worker/types.py +9 -4
- avtomatika_worker/worker.py +333 -155
- {avtomatika_worker-1.0b3.dist-info → avtomatika_worker-1.0b4.dist-info}/METADATA +76 -9
- avtomatika_worker-1.0b4.dist-info/RECORD +12 -0
- {avtomatika_worker-1.0b3.dist-info → avtomatika_worker-1.0b4.dist-info}/WHEEL +1 -1
- {avtomatika_worker-1.0b3.dist-info → avtomatika_worker-1.0b4.dist-info}/licenses/LICENSE +1 -1
- avtomatika_worker/client.py +0 -93
- avtomatika_worker/constants.py +0 -22
- avtomatika_worker-1.0b3.dist-info/RECORD +0 -14
- {avtomatika_worker-1.0b3.dist-info → avtomatika_worker-1.0b4.dist-info}/top_level.txt +0 -0
avtomatika_worker/__init__.py
CHANGED
avtomatika_worker/config.py
CHANGED
|
@@ -4,6 +4,8 @@ from os import getenv
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
|
+
from rxon.validators import validate_identifier
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
class WorkerConfig:
|
|
9
11
|
"""A class for centralized management of worker configuration.
|
|
@@ -29,6 +31,9 @@ class WorkerConfig:
|
|
|
29
31
|
"WORKER_INDIVIDUAL_TOKEN",
|
|
30
32
|
getenv("WORKER_TOKEN", "your-secret-worker-token"),
|
|
31
33
|
)
|
|
34
|
+
self.TLS_CA_PATH: str | None = getenv("TLS_CA_PATH")
|
|
35
|
+
self.TLS_CERT_PATH: str | None = getenv("TLS_CERT_PATH")
|
|
36
|
+
self.TLS_KEY_PATH: str | None = getenv("TLS_KEY_PATH")
|
|
32
37
|
|
|
33
38
|
# --- Resources and performance ---
|
|
34
39
|
self.COST_PER_SKILL: dict[str, float] = self._load_json_from_env("COST_PER_SKILL", default={})
|
|
@@ -73,6 +78,7 @@ class WorkerConfig:
|
|
|
73
78
|
|
|
74
79
|
def validate(self) -> None:
|
|
75
80
|
"""Validates critical configuration parameters."""
|
|
81
|
+
validate_identifier(self.WORKER_ID, "WORKER_ID")
|
|
76
82
|
if self.WORKER_TOKEN == "your-secret-worker-token":
|
|
77
83
|
print("Warning: WORKER_TOKEN is set to the default value. Tasks might fail authentication.")
|
|
78
84
|
|
avtomatika_worker/s3.py
CHANGED
|
@@ -4,13 +4,17 @@ from os import walk
|
|
|
4
4
|
from os.path import basename, dirname, join, relpath
|
|
5
5
|
from shutil import rmtree
|
|
6
6
|
from typing import Any, cast
|
|
7
|
-
from urllib.parse import urlparse
|
|
8
7
|
|
|
9
|
-
import obstore
|
|
10
8
|
from aiofiles import open as aio_open
|
|
11
9
|
from aiofiles.os import makedirs
|
|
12
|
-
from aiofiles.ospath import exists, isdir
|
|
10
|
+
from aiofiles.ospath import exists, getsize, isdir
|
|
11
|
+
from obstore import get as obstore_get
|
|
12
|
+
from obstore import list as obstore_list
|
|
13
|
+
from obstore import put as obstore_put
|
|
13
14
|
from obstore.store import S3Store
|
|
15
|
+
from rxon.blob import parse_uri
|
|
16
|
+
from rxon.exceptions import IntegrityError
|
|
17
|
+
from rxon.models import FileMetadata
|
|
14
18
|
|
|
15
19
|
from .config import WorkerConfig
|
|
16
20
|
|
|
@@ -61,12 +65,12 @@ class S3Manager:
|
|
|
61
65
|
if await exists(task_dir):
|
|
62
66
|
await to_thread(lambda: rmtree(task_dir, ignore_errors=True))
|
|
63
67
|
|
|
64
|
-
async def _process_s3_uri(self, uri: str, task_id: str) -> str:
|
|
65
|
-
"""Downloads a file or a folder
|
|
68
|
+
async def _process_s3_uri(self, uri: str, task_id: str, verify_meta: FileMetadata | None = None) -> str:
|
|
69
|
+
"""Downloads a file or a folder from S3 and returns the local path.
|
|
70
|
+
If verify_meta is provided, performs integrity checks.
|
|
71
|
+
"""
|
|
66
72
|
try:
|
|
67
|
-
|
|
68
|
-
bucket_name = parsed_url.netloc
|
|
69
|
-
object_key = parsed_url.path.lstrip("/")
|
|
73
|
+
bucket_name, object_key, is_directory = parse_uri(uri)
|
|
70
74
|
store = self._get_store(bucket_name)
|
|
71
75
|
|
|
72
76
|
# Use task-specific directory for isolation
|
|
@@ -76,36 +80,27 @@ class S3Manager:
|
|
|
76
80
|
logger.info(f"Starting download from S3: {uri}")
|
|
77
81
|
|
|
78
82
|
# Handle folder download (prefix)
|
|
79
|
-
if
|
|
83
|
+
if is_directory:
|
|
80
84
|
folder_name = object_key.rstrip("/").split("/")[-1]
|
|
81
85
|
local_folder_path = join(local_dir_root, folder_name)
|
|
82
|
-
|
|
83
|
-
# List objects with prefix
|
|
84
|
-
# obstore.list returns an async iterator of ObjectMeta
|
|
85
86
|
files_to_download = []
|
|
86
87
|
|
|
87
|
-
|
|
88
|
-
async for obj in obstore.list(store, prefix=object_key):
|
|
88
|
+
async for obj in obstore_list(store, prefix=object_key):
|
|
89
89
|
key = obj.key
|
|
90
|
-
|
|
91
90
|
if key.endswith("/"):
|
|
92
91
|
continue
|
|
93
|
-
|
|
94
|
-
# Calculate relative path inside the folder
|
|
95
92
|
rel_path = key[len(object_key) :]
|
|
96
93
|
local_file_path = join(local_folder_path, rel_path)
|
|
97
|
-
|
|
98
94
|
await makedirs(dirname(local_file_path), exist_ok=True)
|
|
99
95
|
files_to_download.append((key, local_file_path))
|
|
100
96
|
|
|
101
97
|
async def _download_file(key: str, path: str) -> None:
|
|
102
98
|
async with self._semaphore:
|
|
103
|
-
result = await
|
|
99
|
+
result = await obstore_get(store, key)
|
|
104
100
|
async with aio_open(path, "wb") as f:
|
|
105
101
|
async for chunk in result.stream():
|
|
106
102
|
await f.write(chunk)
|
|
107
103
|
|
|
108
|
-
# Execute downloads in parallel
|
|
109
104
|
if files_to_download:
|
|
110
105
|
await gather(*[_download_file(k, p) for k, p in files_to_download])
|
|
111
106
|
|
|
@@ -115,7 +110,20 @@ class S3Manager:
|
|
|
115
110
|
# Handle single file download
|
|
116
111
|
local_path = join(local_dir_root, basename(object_key))
|
|
117
112
|
|
|
118
|
-
result = await
|
|
113
|
+
result = await obstore_get(store, object_key)
|
|
114
|
+
|
|
115
|
+
# Integrity check before download
|
|
116
|
+
if verify_meta:
|
|
117
|
+
if verify_meta.size != result.meta.size:
|
|
118
|
+
raise IntegrityError(
|
|
119
|
+
f"Size mismatch for {uri}: expected {verify_meta.size}, got {result.meta.size}"
|
|
120
|
+
)
|
|
121
|
+
if verify_meta.etag and result.meta.e_tag:
|
|
122
|
+
actual_etag = result.meta.e_tag.strip('"')
|
|
123
|
+
expected_etag = verify_meta.etag.strip('"')
|
|
124
|
+
if actual_etag != expected_etag:
|
|
125
|
+
raise IntegrityError(f"ETag mismatch for {uri}: expected {expected_etag}, got {actual_etag}")
|
|
126
|
+
|
|
119
127
|
async with aio_open(local_path, "wb") as f:
|
|
120
128
|
async for chunk in result.stream():
|
|
121
129
|
await f.write(chunk)
|
|
@@ -128,8 +136,8 @@ class S3Manager:
|
|
|
128
136
|
logger.exception(f"Error during download of {uri}: {e}")
|
|
129
137
|
raise
|
|
130
138
|
|
|
131
|
-
async def _upload_to_s3(self, local_path: str) ->
|
|
132
|
-
"""Uploads a file or a folder to S3 and returns
|
|
139
|
+
async def _upload_to_s3(self, local_path: str) -> FileMetadata:
|
|
140
|
+
"""Uploads a file or a folder to S3 and returns FileMetadata."""
|
|
133
141
|
bucket_name = self._config.S3_DEFAULT_BUCKET
|
|
134
142
|
store = self._get_store(bucket_name)
|
|
135
143
|
|
|
@@ -141,70 +149,90 @@ class S3Manager:
|
|
|
141
149
|
folder_name = basename(local_path.rstrip("/"))
|
|
142
150
|
s3_prefix = f"{folder_name}/"
|
|
143
151
|
|
|
144
|
-
# Use to_thread to avoid blocking event loop during file walk
|
|
145
152
|
def _get_files_to_upload():
|
|
153
|
+
from os.path import getsize as std_getsize
|
|
154
|
+
|
|
146
155
|
files_to_upload = []
|
|
156
|
+
total_size = 0
|
|
147
157
|
for root, _, files in walk(local_path):
|
|
148
158
|
for file in files:
|
|
149
159
|
f_path = join(root, file)
|
|
150
160
|
rel = relpath(f_path, local_path)
|
|
161
|
+
total_size += std_getsize(f_path)
|
|
151
162
|
files_to_upload.append((f_path, f"{s3_prefix}{rel}"))
|
|
152
|
-
return files_to_upload
|
|
163
|
+
return files_to_upload, total_size
|
|
153
164
|
|
|
154
|
-
files_list = await to_thread(_get_files_to_upload)
|
|
165
|
+
files_list, total_size = await to_thread(_get_files_to_upload)
|
|
155
166
|
|
|
156
167
|
async def _upload_file(path: str, key: str) -> None:
|
|
157
168
|
async with self._semaphore:
|
|
158
|
-
# obstore.put accepts bytes or file-like objects.
|
|
159
|
-
# Since we are in async, reading small files is fine.
|
|
160
169
|
with open(path, "rb") as f:
|
|
161
|
-
await
|
|
170
|
+
await obstore_put(store, key, f)
|
|
162
171
|
|
|
163
172
|
if files_list:
|
|
164
|
-
# Upload in parallel
|
|
165
173
|
await gather(*[_upload_file(f, k) for f, k in files_list])
|
|
166
174
|
|
|
167
175
|
s3_uri = f"s3://{bucket_name}/{s3_prefix}"
|
|
168
176
|
logger.info(f"Successfully uploaded folder to S3: {local_path} -> {s3_uri} ({len(files_list)} files)")
|
|
169
|
-
return s3_uri
|
|
177
|
+
return FileMetadata(uri=s3_uri, size=total_size)
|
|
170
178
|
|
|
171
179
|
# Handle single file upload
|
|
172
180
|
object_key = basename(local_path)
|
|
181
|
+
file_size = await getsize(local_path)
|
|
173
182
|
with open(local_path, "rb") as f:
|
|
174
|
-
await
|
|
183
|
+
put_result = await obstore_put(store, object_key, f)
|
|
175
184
|
|
|
176
185
|
s3_uri = f"s3://{bucket_name}/{object_key}"
|
|
177
|
-
|
|
178
|
-
|
|
186
|
+
etag = put_result.e_tag.strip('"') if put_result.e_tag else None
|
|
187
|
+
logger.info(f"Successfully uploaded file to S3: {local_path} -> {s3_uri} (ETag: {etag})")
|
|
188
|
+
return FileMetadata(uri=s3_uri, size=file_size, etag=etag)
|
|
179
189
|
|
|
180
190
|
except Exception as e:
|
|
181
191
|
logger.exception(f"Error during upload of {local_path}: {e}")
|
|
182
192
|
raise
|
|
183
193
|
|
|
184
|
-
async def process_params(
|
|
185
|
-
|
|
194
|
+
async def process_params(
|
|
195
|
+
self, params: dict[str, Any], task_id: str, metadata: dict[str, FileMetadata] | None = None
|
|
196
|
+
) -> dict[str, Any]:
|
|
197
|
+
"""Recursively searches for S3 URIs in params and downloads the files.
|
|
198
|
+
Uses metadata for integrity verification if available.
|
|
199
|
+
"""
|
|
186
200
|
if not self._config.S3_ENDPOINT_URL:
|
|
187
201
|
return params
|
|
188
202
|
|
|
189
|
-
async def _process(item: Any) -> Any:
|
|
203
|
+
async def _process(item: Any, key_path: str = "") -> Any:
|
|
190
204
|
if isinstance(item, str) and item.startswith("s3://"):
|
|
191
|
-
|
|
205
|
+
verify_meta = metadata.get(key_path) if metadata else None
|
|
206
|
+
return await self._process_s3_uri(item, task_id, verify_meta=verify_meta)
|
|
192
207
|
if isinstance(item, dict):
|
|
193
|
-
return {k: await _process(v) for k, v in item.items()}
|
|
194
|
-
|
|
208
|
+
return {k: await _process(v, f"{key_path}.{k}" if key_path else k) for k, v in item.items()}
|
|
209
|
+
if isinstance(item, list):
|
|
210
|
+
return [await _process(v, f"{key_path}[{i}]") for i, v in enumerate(item)]
|
|
211
|
+
return item
|
|
195
212
|
|
|
196
213
|
return cast(dict[str, Any], await _process(params))
|
|
197
214
|
|
|
198
|
-
async def process_result(self, result: dict[str, Any]) -> dict[str, Any]:
|
|
199
|
-
"""Recursively searches for local file paths in the result and uploads them to S3.
|
|
215
|
+
async def process_result(self, result: dict[str, Any]) -> tuple[dict[str, Any], dict[str, FileMetadata]]:
|
|
216
|
+
"""Recursively searches for local file paths in the result and uploads them to S3.
|
|
217
|
+
Returns a tuple of (updated_result, metadata_map).
|
|
218
|
+
"""
|
|
200
219
|
if not self._config.S3_ENDPOINT_URL:
|
|
201
|
-
return result
|
|
220
|
+
return result, {}
|
|
221
|
+
|
|
222
|
+
metadata_map = {}
|
|
202
223
|
|
|
203
|
-
async def _process(item: Any) -> Any:
|
|
224
|
+
async def _process(item: Any, key_path: str = "") -> Any:
|
|
204
225
|
if isinstance(item, str) and item.startswith(self._config.TASK_FILES_DIR):
|
|
205
|
-
|
|
226
|
+
if await exists(item):
|
|
227
|
+
meta = await self._upload_to_s3(item)
|
|
228
|
+
metadata_map[key_path] = meta
|
|
229
|
+
return meta.uri
|
|
230
|
+
return item
|
|
206
231
|
if isinstance(item, dict):
|
|
207
|
-
return {k: await _process(v) for k, v in item.items()}
|
|
208
|
-
|
|
232
|
+
return {k: await _process(v, f"{key_path}.{k}" if key_path else k) for k, v in item.items()}
|
|
233
|
+
if isinstance(item, list):
|
|
234
|
+
return [await _process(v, f"{key_path}[{i}]") for i, v in enumerate(item)]
|
|
235
|
+
return item
|
|
209
236
|
|
|
210
|
-
|
|
237
|
+
updated_result = cast(dict[str, Any], await _process(result))
|
|
238
|
+
return updated_result, metadata_map
|
avtomatika_worker/task_files.py
CHANGED
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
from contextlib import asynccontextmanager
|
|
2
|
+
from json import dumps, loads
|
|
2
3
|
from os.path import dirname, join
|
|
3
|
-
from typing import AsyncGenerator
|
|
4
|
+
from typing import TYPE_CHECKING, Any, AsyncGenerator
|
|
4
5
|
|
|
5
6
|
from aiofiles import open as aiopen
|
|
6
7
|
from aiofiles.os import listdir, makedirs
|
|
7
8
|
from aiofiles.ospath import exists as aio_exists
|
|
8
9
|
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from rxon.models import FileMetadata
|
|
12
|
+
|
|
13
|
+
from .s3 import S3Manager
|
|
14
|
+
|
|
9
15
|
|
|
10
16
|
class TaskFiles:
|
|
11
17
|
"""
|
|
@@ -14,12 +20,13 @@ class TaskFiles:
|
|
|
14
20
|
within an isolated workspace for each task.
|
|
15
21
|
"""
|
|
16
22
|
|
|
17
|
-
def __init__(self, task_dir: str):
|
|
23
|
+
def __init__(self, task_dir: str, s3_manager: "S3Manager" = None):
|
|
18
24
|
"""
|
|
19
25
|
Initializes TaskFiles with a specific task directory.
|
|
20
26
|
The directory is not created until needed.
|
|
21
27
|
"""
|
|
22
28
|
self._task_dir = task_dir
|
|
29
|
+
self._s3_manager = s3_manager
|
|
23
30
|
|
|
24
31
|
async def get_root(self) -> str:
|
|
25
32
|
"""
|
|
@@ -37,6 +44,24 @@ class TaskFiles:
|
|
|
37
44
|
root = await self.get_root()
|
|
38
45
|
return join(root, filename)
|
|
39
46
|
|
|
47
|
+
def get_root_sync(self) -> str:
|
|
48
|
+
"""
|
|
49
|
+
Synchronously returns the root directory for the task.
|
|
50
|
+
Creates the directory on disk if it doesn't exist.
|
|
51
|
+
"""
|
|
52
|
+
import os
|
|
53
|
+
|
|
54
|
+
os.makedirs(self._task_dir, exist_ok=True)
|
|
55
|
+
return self._task_dir
|
|
56
|
+
|
|
57
|
+
def path_to_sync(self, filename: str) -> str:
|
|
58
|
+
"""
|
|
59
|
+
Synchronously returns an absolute path for a file within the task directory.
|
|
60
|
+
Guarantees that the task root directory exists.
|
|
61
|
+
"""
|
|
62
|
+
root = self.get_root_sync()
|
|
63
|
+
return join(root, filename)
|
|
64
|
+
|
|
40
65
|
@asynccontextmanager
|
|
41
66
|
async def open(self, filename: str, mode: str = "r") -> AsyncGenerator:
|
|
42
67
|
"""
|
|
@@ -79,6 +104,39 @@ class TaskFiles:
|
|
|
79
104
|
async with self.open(filename, mode) as f:
|
|
80
105
|
await f.write(data)
|
|
81
106
|
|
|
107
|
+
async def write_json(self, filename: str, data: Any) -> "FileMetadata | None":
|
|
108
|
+
"""Writes data as JSON and optionally uploads to S3 if manager is available."""
|
|
109
|
+
content = dumps(data, indent=2)
|
|
110
|
+
await self.write(filename, content)
|
|
111
|
+
if self._s3_manager:
|
|
112
|
+
return await self.upload_file(filename)
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
async def read_json(self, filename: str) -> Any:
|
|
116
|
+
"""Reads a file and parses it as JSON."""
|
|
117
|
+
content = await self.read(filename)
|
|
118
|
+
return loads(content)
|
|
119
|
+
|
|
120
|
+
async def upload_file(self, filename: str) -> "FileMetadata":
|
|
121
|
+
"""Uploads a specific file to S3 and returns its metadata."""
|
|
122
|
+
if not self._s3_manager:
|
|
123
|
+
raise RuntimeError("S3Manager not configured for this TaskFiles instance.")
|
|
124
|
+
path = await self.path_to(filename)
|
|
125
|
+
return await self._s3_manager._upload_to_s3(path)
|
|
126
|
+
|
|
127
|
+
async def upload_dir(self, dirname: str = "") -> "FileMetadata":
|
|
128
|
+
"""Uploads the entire task directory or a subdirectory to S3."""
|
|
129
|
+
if not self._s3_manager:
|
|
130
|
+
raise RuntimeError("S3Manager not configured for this TaskFiles instance.")
|
|
131
|
+
path = join(self._task_dir, dirname) if dirname else self._task_dir
|
|
132
|
+
return await self._s3_manager._upload_to_s3(path)
|
|
133
|
+
|
|
134
|
+
async def download_file(self, uri: str, filename: str, verify_meta: "FileMetadata" = None) -> str:
|
|
135
|
+
"""Downloads a file from S3 to the task directory with optional integrity check."""
|
|
136
|
+
if not self._s3_manager:
|
|
137
|
+
raise RuntimeError("S3Manager not configured for this TaskFiles instance.")
|
|
138
|
+
return await self._s3_manager._process_s3_uri(uri, self._task_dir.split("/")[-1], verify_meta=verify_meta)
|
|
139
|
+
|
|
82
140
|
async def list(self) -> list[str]:
|
|
83
141
|
"""
|
|
84
142
|
Asynchronously lists all file and directory names within the task root.
|
avtomatika_worker/types.py
CHANGED
|
@@ -1,16 +1,21 @@
|
|
|
1
|
-
from
|
|
1
|
+
from typing import Any, Awaitable, Callable, Dict
|
|
2
|
+
|
|
3
|
+
from rxon.constants import (
|
|
2
4
|
ERROR_CODE_INVALID_INPUT as INVALID_INPUT_ERROR,
|
|
3
5
|
)
|
|
4
|
-
from .constants import (
|
|
6
|
+
from rxon.constants import (
|
|
5
7
|
ERROR_CODE_PERMANENT as PERMANENT_ERROR,
|
|
6
8
|
)
|
|
7
|
-
from .constants import (
|
|
9
|
+
from rxon.constants import (
|
|
8
10
|
ERROR_CODE_TRANSIENT as TRANSIENT_ERROR,
|
|
9
11
|
)
|
|
10
12
|
|
|
13
|
+
Middleware = Callable[[Dict[str, Any], Callable[[], Awaitable[Any]]], Awaitable[Any]]
|
|
14
|
+
CapacityChecker = Callable[[str], bool]
|
|
15
|
+
|
|
11
16
|
|
|
12
17
|
class ParamValidationError(Exception):
|
|
13
|
-
|
|
18
|
+
pass
|
|
14
19
|
|
|
15
20
|
|
|
16
21
|
__all__ = [
|