avtomatika-worker 1.0a2__py3-none-any.whl → 1.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avtomatika_worker/__init__.py +2 -1
- avtomatika_worker/config.py +46 -36
- avtomatika_worker/s3.py +141 -0
- avtomatika_worker/task_files.py +97 -0
- avtomatika_worker/types.py +4 -0
- avtomatika_worker/worker.py +211 -78
- avtomatika_worker-1.0b2.dist-info/METADATA +610 -0
- avtomatika_worker-1.0b2.dist-info/RECORD +11 -0
- avtomatika_worker-1.0a2.dist-info/METADATA +0 -307
- avtomatika_worker-1.0a2.dist-info/RECORD +0 -9
- {avtomatika_worker-1.0a2.dist-info → avtomatika_worker-1.0b2.dist-info}/WHEEL +0 -0
- {avtomatika_worker-1.0a2.dist-info → avtomatika_worker-1.0b2.dist-info}/licenses/LICENSE +0 -0
- {avtomatika_worker-1.0a2.dist-info → avtomatika_worker-1.0b2.dist-info}/top_level.txt +0 -0
|
@@ -1,307 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: avtomatika-worker
|
|
3
|
-
Version: 1.0a2
|
|
4
|
-
Summary: Worker SDK for the Avtomatika orchestrator.
|
|
5
|
-
Project-URL: Homepage, https://github.com/avtomatila-ai/avtomatika-worker
|
|
6
|
-
Project-URL: Bug Tracker, https://github.com/avtomatila-ai/avtomatika-worker/issues
|
|
7
|
-
Classifier: Development Status :: 3 - Alpha
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.11
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
Requires-Dist: aiohttp~=3.13.2
|
|
15
|
-
Requires-Dist: python-json-logger~=4.0.0
|
|
16
|
-
Provides-Extra: test
|
|
17
|
-
Requires-Dist: pytest; extra == "test"
|
|
18
|
-
Requires-Dist: pytest-asyncio; extra == "test"
|
|
19
|
-
Requires-Dist: aioresponses; extra == "test"
|
|
20
|
-
Requires-Dist: pytest-mock; extra == "test"
|
|
21
|
-
Dynamic: license-file
|
|
22
|
-
|
|
23
|
-
# Avtomatika Worker SDK
|
|
24
|
-
|
|
25
|
-
This is an SDK for creating workers compatible with the **Avtomatika** orchestrator. The SDK handles all the complexity of interacting with the orchestrator, allowing you to focus on writing your business logic.
|
|
26
|
-
|
|
27
|
-
## Installation
|
|
28
|
-
|
|
29
|
-
```bash
|
|
30
|
-
pip install avtomatika-worker
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
## Quick Start
|
|
34
|
-
|
|
35
|
-
Creating a worker is simple. You instantiate the `Worker` class and then register your task-handling functions using the `@worker.task` decorator.
|
|
36
|
-
|
|
37
|
-
```python
|
|
38
|
-
import asyncio
|
|
39
|
-
from avtomatika_worker import Worker
|
|
40
|
-
|
|
41
|
-
# 1. Create a worker instance
|
|
42
|
-
worker = Worker(
|
|
43
|
-
worker_type="image-processing",
|
|
44
|
-
skill_dependencies={
|
|
45
|
-
"resize_image": ["pillow"],
|
|
46
|
-
"add_watermark": ["pillow", "numpy"],
|
|
47
|
-
}
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
# 2. Register a task handler using the decorator
|
|
51
|
-
@worker.task("resize_image")
|
|
52
|
-
async def image_resizer(params: dict, **kwargs):
|
|
53
|
-
"""
|
|
54
|
-
An example handler that receives task parameters,
|
|
55
|
-
performs the work, and returns the result.
|
|
56
|
-
"""
|
|
57
|
-
task_id = kwargs.get("task_id")
|
|
58
|
-
job_id = kwargs.get("job_id")
|
|
59
|
-
|
|
60
|
-
print(f"Task {task_id} (Job: {job_id}): resizing image...")
|
|
61
|
-
print(f"Parameters: {params}")
|
|
62
|
-
|
|
63
|
-
# ... your business logic here ...
|
|
64
|
-
await asyncio.sleep(1) # Simulate I/O-bound work
|
|
65
|
-
|
|
66
|
-
# Return the result
|
|
67
|
-
return {
|
|
68
|
-
"status": "success",
|
|
69
|
-
"data": {
|
|
70
|
-
"resized_path": f"/path/to/resized_{params.get('filename')}"
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
# 3. Run the worker
|
|
75
|
-
if __name__ == "__main__":
|
|
76
|
-
# The SDK will automatically connect to the orchestrator,
|
|
77
|
-
# register itself, and start polling for tasks.
|
|
78
|
-
worker.run_with_health_check()
|
|
79
|
-
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
## Key Features
|
|
83
|
-
|
|
84
|
-
### 1. Task Handlers
|
|
85
|
-
|
|
86
|
-
Each handler is an asynchronous function that accepts two arguments:
|
|
87
|
-
|
|
88
|
-
- `params` (`dict`): A dictionary with the parameters that the orchestrator passed for this task.
|
|
89
|
-
- `**kwargs`: Additional metadata about the task, including:
|
|
90
|
-
- `task_id` (`str`): The unique ID of the task itself.
|
|
91
|
-
- `job_id` (`str`): The ID of the parent `Job` to which the task belongs.
|
|
92
|
-
- `priority` (`int`): The execution priority of the task.
|
|
93
|
-
|
|
94
|
-
### 2. Concurrency Limiting
|
|
95
|
-
|
|
96
|
-
The worker allows you to control how many tasks are executed in parallel. This can be configured at two levels:
|
|
97
|
-
|
|
98
|
-
- **Global Limit**: A maximum number of tasks that the worker can execute simultaneously, regardless of their type.
|
|
99
|
-
- **Per-Type Limit**: A specific limit for a group of tasks that share a common resource (e.g., a GPU, a specific API).
|
|
100
|
-
|
|
101
|
-
The worker dynamically reports its available capacity to the orchestrator. When a limit is reached, the worker informs the orchestrator that it can no longer accept tasks of that type until a slot becomes free.
|
|
102
|
-
|
|
103
|
-
**Example:**
|
|
104
|
-
|
|
105
|
-
Let's configure a worker that can run up to **10 tasks in total**, but no more than **1 video processing task** and **4 audio transcription tasks** at the same time.
|
|
106
|
-
|
|
107
|
-
```python
|
|
108
|
-
import asyncio
|
|
109
|
-
from avtomatika_worker import Worker
|
|
110
|
-
|
|
111
|
-
# 1. Configure limits during initialization
|
|
112
|
-
worker = Worker(
|
|
113
|
-
worker_type="media-processor",
|
|
114
|
-
max_concurrent_tasks=10,
|
|
115
|
-
task_type_limits={
|
|
116
|
-
"video_processing": 1,
|
|
117
|
-
"audio_processing": 4,
|
|
118
|
-
}
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
# 2. Assign a type to each task using the decorator
|
|
122
|
-
@worker.task("upscale_video", task_type="video_processing")
|
|
123
|
-
async def upscale_video(params: dict, **kwargs):
|
|
124
|
-
# This task uses the 'video_processing' slot
|
|
125
|
-
print("Upscaling video...")
|
|
126
|
-
await asyncio.sleep(5)
|
|
127
|
-
return {"status": "success"}
|
|
128
|
-
|
|
129
|
-
@worker.task("blur_video_faces", task_type="video_processing")
|
|
130
|
-
async def blur_video_faces(params: dict, **kwargs):
|
|
131
|
-
# This task also uses the 'video_processing' slot
|
|
132
|
-
print("Blurring faces in video...")
|
|
133
|
-
await asyncio.sleep(5)
|
|
134
|
-
return {"status": "success"}
|
|
135
|
-
|
|
136
|
-
@worker.task("transcribe_audio", task_type="audio_processing")
|
|
137
|
-
async def transcribe_audio(params: dict, **kwargs):
|
|
138
|
-
# This task uses one of the four 'audio_processing' slots
|
|
139
|
-
print("Transcribing audio...")
|
|
140
|
-
await asyncio.sleep(2)
|
|
141
|
-
return {"status": "success"}
|
|
142
|
-
|
|
143
|
-
@worker.task("generate_report")
|
|
144
|
-
async def generate_report(params: dict, **kwargs):
|
|
145
|
-
# This task has no specific type and is only limited by the global limit
|
|
146
|
-
print("Generating report...")
|
|
147
|
-
await asyncio.sleep(1)
|
|
148
|
-
return {"status": "success"}
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
if __name__ == "__main__":
|
|
152
|
-
worker.run_with_health_check()
|
|
153
|
-
```
|
|
154
|
-
In this example, even though the global limit is 10, the orchestrator will only ever send one task (`upscale_video` or `blur_video_faces`) to this worker at a time, because they both share the single "video_processing" slot.
|
|
155
|
-
|
|
156
|
-
### 3. Returning Results and Handling Errors
|
|
157
|
-
|
|
158
|
-
The result returned by a handler directly influences the subsequent flow of the pipeline in the orchestrator.
|
|
159
|
-
|
|
160
|
-
#### Successful Execution
|
|
161
|
-
|
|
162
|
-
```python
|
|
163
|
-
return {
|
|
164
|
-
"status": "success",
|
|
165
|
-
"data": {"output": "some_value"}
|
|
166
|
-
}
|
|
167
|
-
```
|
|
168
|
-
- The orchestrator will receive this data and use the `"success"` key in the `transitions` dictionary to determine the next step.
|
|
169
|
-
|
|
170
|
-
#### Custom Statuses
|
|
171
|
-
|
|
172
|
-
You can return custom statuses to implement complex branching logic in the orchestrator.
|
|
173
|
-
```python
|
|
174
|
-
return {
|
|
175
|
-
"status": "needs_manual_review",
|
|
176
|
-
"data": {"reason": "Low confidence score"}
|
|
177
|
-
}
|
|
178
|
-
```
|
|
179
|
-
- The orchestrator will look for the `"needs_manual_review"` key in `transitions`.
|
|
180
|
-
|
|
181
|
-
#### Error Handling
|
|
182
|
-
|
|
183
|
-
To control the orchestrator's fault tolerance mechanism, you can return standardized error types.
|
|
184
|
-
|
|
185
|
-
- **Transient Error (`TRANSIENT_ERROR`)**: For issues that might be resolved on a retry (e.g., a network failure).
|
|
186
|
-
```python
|
|
187
|
-
from avtomatika_worker.typing import TRANSIENT_ERROR
|
|
188
|
-
return {
|
|
189
|
-
"status": "failure",
|
|
190
|
-
"error": {
|
|
191
|
-
"code": TRANSIENT_ERROR,
|
|
192
|
-
"message": "External API timeout"
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
```
|
|
196
|
-
- **Permanent Error (`PERMANENT_ERROR`)**: For unresolvable problems (e.g., an invalid file format).
|
|
197
|
-
```python
|
|
198
|
-
from avtomatika_worker.typing import PERMANENT_ERROR
|
|
199
|
-
return {
|
|
200
|
-
"status": "failure",
|
|
201
|
-
"error": {
|
|
202
|
-
"code": PERMANENT_ERROR,
|
|
203
|
-
"message": "Corrupted input file"
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
```
|
|
207
|
-
|
|
208
|
-
### 4. Failover and Load Balancing
|
|
209
|
-
|
|
210
|
-
The SDK supports connecting to multiple orchestrator instances to ensure high availability (`FAILOVER`) and load balancing (`ROUND_ROBIN`).
|
|
211
|
-
|
|
212
|
-
- **Configuration**: Set via the `ORCHESTrators_CONFIG` environment variable, which must contain a JSON string.
|
|
213
|
-
- **Mode**: Controlled by the `MULTI_ORCHESTRATOR_MODE` variable.
|
|
214
|
-
|
|
215
|
-
**Example `ORCHESTRATORS_CONFIG`:**
|
|
216
|
-
```json
|
|
217
|
-
[
|
|
218
|
-
{"url": "http://orchestrator-1.my-domain.com:8080", "weight": 100},
|
|
219
|
-
{"url": "http://orchestrator-2.my-domain.com:8080", "weight": 100}
|
|
220
|
-
]
|
|
221
|
-
```
|
|
222
|
-
|
|
223
|
-
- **`FAILOVER` (default):** The worker will connect to the first orchestrator. If it becomes unavailable, it will automatically switch to the next one in the list.
|
|
224
|
-
- **`ROUND_ROBIN`:** The worker will send requests to fetch tasks to each orchestrator in turn.
|
|
225
|
-
|
|
226
|
-
### 5. Handling Large Files (S3 Payload Offloading)
|
|
227
|
-
|
|
228
|
-
The SDK supports working with large files "out of the box" via S3-compatible storage.
|
|
229
|
-
|
|
230
|
-
- **Automatic Download**: If a value in `params` is a URI of the form `s3://...`, the SDK will automatically download the file to the local disk and replace the URI in `params` with the local path.
|
|
231
|
-
- **Automatic Upload**: If your handler returns a local file path in `data` (located within the `WORKER_PAYLOAD_DIR` directory), the SDK will automatically upload this file to S3 and replace the path with an `s3://` URI in the final result.
|
|
232
|
-
|
|
233
|
-
This functionality is transparent to your code and only requires configuring environment variables for S3 access.
|
|
234
|
-
|
|
235
|
-
### 6. WebSocket Support
|
|
236
|
-
|
|
237
|
-
If enabled, the SDK establishes a persistent WebSocket connection with the orchestrator to receive real-time commands, such as canceling an ongoing task.
|
|
238
|
-
|
|
239
|
-
## Advanced Features
|
|
240
|
-
|
|
241
|
-
### Reporting Skill & Model Dependencies
|
|
242
|
-
|
|
243
|
-
For more advanced scheduling, the worker can report detailed information about its skills and their dependencies on specific models. This allows the orchestrator to make smarter decisions, such as dispatching tasks to workers that already have the required models loaded in memory.
|
|
244
|
-
|
|
245
|
-
This is configured via the `skill_dependencies` argument in the `Worker` constructor.
|
|
246
|
-
|
|
247
|
-
- **`skill_dependencies`**: A dictionary where keys are skill names (as registered with `@worker.task`) and values are lists of model names required by that skill.
|
|
248
|
-
|
|
249
|
-
Based on this configuration and the current state of the worker's `hot_cache` (the set of models currently loaded in memory), the worker will automatically include two new fields in its heartbeat messages:
|
|
250
|
-
|
|
251
|
-
- **`skill_dependencies`**: The same dictionary provided during initialization.
|
|
252
|
-
- **`hot_skills`**: A dynamically calculated list of skills that are ready for immediate execution (i.e., all of their dependent models are in the `hot_cache`).
|
|
253
|
-
|
|
254
|
-
**Example:**
|
|
255
|
-
|
|
256
|
-
Consider a worker configured like this:
|
|
257
|
-
```python
|
|
258
|
-
worker = Worker(
|
|
259
|
-
worker_type="ai-processor",
|
|
260
|
-
skill_dependencies={
|
|
261
|
-
"image_generation": ["stable_diffusion_v1.5", "vae-ft-mse"],
|
|
262
|
-
"upscale": ["realesrgan_x4"],
|
|
263
|
-
}
|
|
264
|
-
)
|
|
265
|
-
```
|
|
266
|
-
|
|
267
|
-
- Initially, `hot_cache` is empty. The worker's heartbeat will include `skill_dependencies` but not `hot_skills`.
|
|
268
|
-
- A task handler calls `add_to_hot_cache("stable_diffusion_v1.5")`. The next heartbeat will still not include `hot_skills` because the `image_generation` skill is only partially loaded.
|
|
269
|
-
- The handler then calls `add_to_hot_cache("vae-ft-mse")`. Now, all dependencies for `image_generation` are met. The next heartbeat will include:
|
|
270
|
-
```json
|
|
271
|
-
{
|
|
272
|
-
"hot_skills": ["image_generation"],
|
|
273
|
-
"skill_dependencies": {
|
|
274
|
-
"image_generation": ["stable_diffusion_v1.5", "vae-ft-mse"],
|
|
275
|
-
"upscale": ["realesrgan_x4"]
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
```
|
|
279
|
-
This information is sent automatically. Your task handlers are only responsible for managing the `hot_cache` by calling `add_to_hot_cache()` and `remove_from_hot_cache()`, which are passed as arguments to the handler.
|
|
280
|
-
|
|
281
|
-
## Configuration
|
|
282
|
-
|
|
283
|
-
The worker is fully configured via environment variables.
|
|
284
|
-
|
|
285
|
-
| Variable | Description | Default |
|
|
286
|
-
| --- | --- | --- |
|
|
287
|
-
| `ORCHESTRATOR_URL` | The URL of a single orchestrator (used if `ORCHESTRATORS_CONFIG` is not set). | `http://localhost:8080` |
|
|
288
|
-
| `ORCHESTRATORS_CONFIG`| A JSON string with a list of orchestrators for `FAILOVER` or `ROUND_ROBIN` modes. | `[]` |
|
|
289
|
-
| `MULTI_ORCHESTRATOR_MODE` | The mode for handling multiple orchestrators. Possible values: `FAILOVER`, `ROUND_ROBIN`. | `FAILOVER` |
|
|
290
|
-
| `WORKER_ID` | **(Required)** A unique identifier for the worker. | - |
|
|
291
|
-
| `WORKER_TOKEN` | A common authentication token for all workers. | `default-token` |
|
|
292
|
-
| `WORKER_INDIVIDUAL_TOKEN` | An individual token for this worker (overrides `WORKER_TOKEN`). | - |
|
|
293
|
-
| `WORKER_ENABLE_WEBSOCKETS` | Enable (`true`) or disable (`false`) WebSocket support. | `false` |
|
|
294
|
-
| `WORKER_HEARTBEAT_DEBOUNCE_DELAY` | The delay in seconds for debouncing immediate heartbeats. | `0.1` |
|
|
295
|
-
| `WORKER_PAYLOAD_DIR` | The directory for temporarily storing files when working with S3. | `/tmp/payloads` |
|
|
296
|
-
| `S3_ENDPOINT_URL` | The URL of the S3-compatible storage. | - |
|
|
297
|
-
| `S3_ACCESS_KEY` | The access key for S3. | - |
|
|
298
|
-
| `S3_SECRET_KEY` | The secret key for S3. | - |
|
|
299
|
-
| `S3_DEFAULT_BUCKET`| The default bucket name for uploading results. | `avtomatika-payloads` |
|
|
300
|
-
|
|
301
|
-
## Development
|
|
302
|
-
|
|
303
|
-
To install the necessary dependencies for running tests, use the following command:
|
|
304
|
-
|
|
305
|
-
```bash
|
|
306
|
-
pip install .[test]
|
|
307
|
-
```
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
avtomatika_worker/__init__.py,sha256=j0up34aVy7xyI67xg04TVbXSSSKGdO49vsBKhtH_D0M,287
|
|
2
|
-
avtomatika_worker/config.py,sha256=oEQMpmP4AkGKdgEE1BJxojdQkK7LrogmRKJ7ib-M9xs,4555
|
|
3
|
-
avtomatika_worker/types.py,sha256=2YL6MRG2LImCUKcb0G-B3757n7zWrrUc8NXnoCLKJlo,154
|
|
4
|
-
avtomatika_worker/worker.py,sha256=lyKvIPVcokQrd6qagit_BbMoZyyqivCdNyV4fwSJTY0,18421
|
|
5
|
-
avtomatika_worker-1.0a2.dist-info/licenses/LICENSE,sha256=tqCjw9Y1vbU-hLcWi__7wQstLbt2T1XWPdbQYqCxuWY,1072
|
|
6
|
-
avtomatika_worker-1.0a2.dist-info/METADATA,sha256=uJHXVdHzcJBdfQ-1rowBEYg488kbwifkvpDwvgocMqs,12288
|
|
7
|
-
avtomatika_worker-1.0a2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
avtomatika_worker-1.0a2.dist-info/top_level.txt,sha256=d3b5BUeUrHM1Cn-cbStz-hpucikEBlPOvtcmQ_j3qAs,18
|
|
9
|
-
avtomatika_worker-1.0a2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|