avtomatika 1.0b3__py3-none-any.whl → 1.0b5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avtomatika/__init__.py +2 -2
- avtomatika/api.html +0 -11
- avtomatika/blueprint.py +9 -11
- avtomatika/config.py +7 -0
- avtomatika/context.py +18 -18
- avtomatika/data_types.py +6 -7
- avtomatika/datastore.py +2 -2
- avtomatika/dispatcher.py +20 -21
- avtomatika/engine.py +107 -68
- avtomatika/executor.py +168 -148
- avtomatika/history/base.py +7 -7
- avtomatika/history/noop.py +7 -7
- avtomatika/history/postgres.py +7 -9
- avtomatika/history/sqlite.py +7 -10
- avtomatika/logging_config.py +1 -1
- avtomatika/storage/__init__.py +2 -2
- avtomatika/storage/base.py +31 -20
- avtomatika/storage/memory.py +36 -43
- avtomatika/storage/redis.py +124 -60
- avtomatika/worker_config_loader.py +2 -2
- avtomatika/ws_manager.py +1 -2
- {avtomatika-1.0b3.dist-info → avtomatika-1.0b5.dist-info}/METADATA +44 -9
- avtomatika-1.0b5.dist-info/RECORD +37 -0
- avtomatika-1.0b3.dist-info/RECORD +0 -37
- {avtomatika-1.0b3.dist-info → avtomatika-1.0b5.dist-info}/WHEEL +0 -0
- {avtomatika-1.0b3.dist-info → avtomatika-1.0b5.dist-info}/licenses/LICENSE +0 -0
- {avtomatika-1.0b3.dist-info → avtomatika-1.0b5.dist-info}/top_level.txt +0 -0
avtomatika/storage/redis.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
from asyncio import CancelledError, get_running_loop
|
|
2
2
|
from logging import getLogger
|
|
3
|
-
from
|
|
3
|
+
from os import getenv
|
|
4
|
+
from socket import gethostname
|
|
5
|
+
from typing import Any
|
|
4
6
|
|
|
5
|
-
from
|
|
7
|
+
from msgpack import packb, unpackb
|
|
6
8
|
from redis import Redis, WatchError
|
|
7
9
|
from redis.exceptions import NoScriptError, ResponseError
|
|
8
10
|
|
|
@@ -14,23 +16,41 @@ logger = getLogger(__name__)
|
|
|
14
16
|
class RedisStorage(StorageBackend):
|
|
15
17
|
"""Implementation of the state store based on Redis."""
|
|
16
18
|
|
|
17
|
-
def __init__(
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
redis_client: Redis,
|
|
22
|
+
prefix: str = "orchestrator:job",
|
|
23
|
+
group_name: str = "orchestrator_group",
|
|
24
|
+
consumer_name: str | None = None,
|
|
25
|
+
min_idle_time_ms: int = 60000,
|
|
26
|
+
):
|
|
18
27
|
self._redis = redis_client
|
|
19
28
|
self._prefix = prefix
|
|
29
|
+
self._stream_key = "orchestrator:job_stream"
|
|
30
|
+
self._group_name = group_name
|
|
31
|
+
self._consumer_name = consumer_name or getenv("INSTANCE_ID", gethostname())
|
|
32
|
+
self._group_created = False
|
|
33
|
+
self._min_idle_time_ms = min_idle_time_ms
|
|
20
34
|
|
|
21
35
|
def _get_key(self, job_id: str) -> str:
|
|
22
36
|
return f"{self._prefix}:{job_id}"
|
|
23
37
|
|
|
24
|
-
|
|
38
|
+
@staticmethod
|
|
39
|
+
def _pack(data: Any) -> bytes:
|
|
40
|
+
return packb(data, use_bin_type=True)
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _unpack(data: bytes) -> Any:
|
|
44
|
+
return unpackb(data, raw=False)
|
|
45
|
+
|
|
46
|
+
async def get_job_state(self, job_id: str) -> dict[str, Any] | None:
|
|
25
47
|
"""Get the job state from Redis."""
|
|
26
48
|
key = self._get_key(job_id)
|
|
27
49
|
data = await self._redis.get(key)
|
|
28
|
-
return
|
|
50
|
+
return self._unpack(data) if data else None
|
|
29
51
|
|
|
30
|
-
async def get_priority_queue_stats(self, task_type: str) ->
|
|
52
|
+
async def get_priority_queue_stats(self, task_type: str) -> dict[str, Any]:
|
|
31
53
|
"""Gets statistics for the priority queue (Sorted Set) for a given task type."""
|
|
32
|
-
# In our implementation, the queue is tied to the worker type, not the task type.
|
|
33
|
-
# For simplicity, we assume that the task type corresponds to the worker type.
|
|
34
54
|
worker_type = task_type
|
|
35
55
|
key = f"orchestrator:task_queue:{worker_type}"
|
|
36
56
|
|
|
@@ -63,15 +83,15 @@ class RedisStorage(StorageBackend):
|
|
|
63
83
|
key = f"orchestrator:task_cancel:{task_id}"
|
|
64
84
|
await self._redis.set(key, "1", ex=3600)
|
|
65
85
|
|
|
66
|
-
async def save_job_state(self, job_id: str, state:
|
|
86
|
+
async def save_job_state(self, job_id: str, state: dict[str, Any]) -> None:
|
|
67
87
|
"""Save the job state to Redis."""
|
|
68
88
|
key = self._get_key(job_id)
|
|
69
|
-
await self._redis.set(key,
|
|
89
|
+
await self._redis.set(key, self._pack(state))
|
|
70
90
|
|
|
71
91
|
async def update_job_state(
|
|
72
92
|
self,
|
|
73
93
|
job_id: str,
|
|
74
|
-
update_data:
|
|
94
|
+
update_data: dict[str, Any],
|
|
75
95
|
) -> dict[Any, Any] | None | Any:
|
|
76
96
|
"""Atomically update the job state in Redis using a transaction."""
|
|
77
97
|
key = self._get_key(job_id)
|
|
@@ -81,13 +101,13 @@ class RedisStorage(StorageBackend):
|
|
|
81
101
|
try:
|
|
82
102
|
await pipe.watch(key)
|
|
83
103
|
current_state_raw = await pipe.get(key)
|
|
84
|
-
current_state =
|
|
104
|
+
current_state = self._unpack(current_state_raw) if current_state_raw else {}
|
|
85
105
|
|
|
86
106
|
# Simple dictionary merge. For nested structures, a deep merge may be required.
|
|
87
107
|
current_state.update(update_data)
|
|
88
108
|
|
|
89
109
|
pipe.multi()
|
|
90
|
-
pipe.set(key,
|
|
110
|
+
pipe.set(key, self._pack(current_state))
|
|
91
111
|
await pipe.execute()
|
|
92
112
|
return current_state
|
|
93
113
|
except WatchError:
|
|
@@ -96,35 +116,29 @@ class RedisStorage(StorageBackend):
|
|
|
96
116
|
async def register_worker(
|
|
97
117
|
self,
|
|
98
118
|
worker_id: str,
|
|
99
|
-
worker_info:
|
|
119
|
+
worker_info: dict[str, Any],
|
|
100
120
|
ttl: int,
|
|
101
121
|
) -> None:
|
|
102
|
-
"""Registers a worker in Redis.
|
|
103
|
-
|
|
104
|
-
Note: The 'address' key in `worker_info` is no longer used,
|
|
105
|
-
as in the PULL model, workers initiate the connection with the
|
|
106
|
-
orchestrator themselves.
|
|
107
|
-
"""
|
|
108
|
-
# Set default reputation for new workers
|
|
122
|
+
"""Registers a worker in Redis."""
|
|
109
123
|
worker_info.setdefault("reputation", 1.0)
|
|
110
124
|
key = f"orchestrator:worker:info:{worker_id}"
|
|
111
|
-
await self._redis.set(key,
|
|
125
|
+
await self._redis.set(key, self._pack(worker_info), ex=ttl)
|
|
112
126
|
|
|
113
127
|
async def enqueue_task_for_worker(
|
|
114
128
|
self,
|
|
115
129
|
worker_id: str,
|
|
116
|
-
task_payload:
|
|
130
|
+
task_payload: dict[str, Any],
|
|
117
131
|
priority: float,
|
|
118
132
|
) -> None:
|
|
119
133
|
"""Adds a task to the priority queue (Sorted Set) for a worker."""
|
|
120
134
|
key = f"orchestrator:task_queue:{worker_id}"
|
|
121
|
-
await self._redis.zadd(key, {
|
|
135
|
+
await self._redis.zadd(key, {self._pack(task_payload): priority})
|
|
122
136
|
|
|
123
137
|
async def dequeue_task_for_worker(
|
|
124
138
|
self,
|
|
125
139
|
worker_id: str,
|
|
126
140
|
timeout: int,
|
|
127
|
-
) ->
|
|
141
|
+
) -> dict[str, Any] | None:
|
|
128
142
|
"""Retrieves the highest priority task from the queue (Sorted Set),
|
|
129
143
|
using the blocking BZPOPMAX operation.
|
|
130
144
|
"""
|
|
@@ -132,7 +146,7 @@ class RedisStorage(StorageBackend):
|
|
|
132
146
|
try:
|
|
133
147
|
# BZPOPMAX returns a tuple (key, member, score)
|
|
134
148
|
result = await self._redis.bzpopmax([key], timeout=timeout)
|
|
135
|
-
return
|
|
149
|
+
return self._unpack(result[1]) if result else None
|
|
136
150
|
except CancelledError:
|
|
137
151
|
return None
|
|
138
152
|
except ResponseError as e:
|
|
@@ -145,7 +159,7 @@ class RedisStorage(StorageBackend):
|
|
|
145
159
|
# Non-blocking fallback for tests
|
|
146
160
|
res = await self._redis.zpopmax(key)
|
|
147
161
|
if res:
|
|
148
|
-
return
|
|
162
|
+
return self._unpack(res[0][0])
|
|
149
163
|
raise e
|
|
150
164
|
|
|
151
165
|
async def refresh_worker_ttl(self, worker_id: str, ttl: int) -> bool:
|
|
@@ -158,9 +172,9 @@ class RedisStorage(StorageBackend):
|
|
|
158
172
|
async def update_worker_status(
|
|
159
173
|
self,
|
|
160
174
|
worker_id: str,
|
|
161
|
-
status_update:
|
|
175
|
+
status_update: dict[str, Any],
|
|
162
176
|
ttl: int,
|
|
163
|
-
) ->
|
|
177
|
+
) -> dict[str, Any] | None:
|
|
164
178
|
key = f"orchestrator:worker:info:{worker_id}"
|
|
165
179
|
async with self._redis.pipeline(transaction=True) as pipe:
|
|
166
180
|
try:
|
|
@@ -169,7 +183,7 @@ class RedisStorage(StorageBackend):
|
|
|
169
183
|
if not current_state_raw:
|
|
170
184
|
return None
|
|
171
185
|
|
|
172
|
-
current_state =
|
|
186
|
+
current_state = self._unpack(current_state_raw)
|
|
173
187
|
|
|
174
188
|
# Create a potential new state to compare against the current one
|
|
175
189
|
new_state = current_state.copy()
|
|
@@ -179,7 +193,7 @@ class RedisStorage(StorageBackend):
|
|
|
179
193
|
|
|
180
194
|
# Only write to Redis if the state has actually changed.
|
|
181
195
|
if new_state != current_state:
|
|
182
|
-
pipe.set(key,
|
|
196
|
+
pipe.set(key, self._pack(new_state), ex=ttl)
|
|
183
197
|
current_state = new_state # Update the state to be returned
|
|
184
198
|
else:
|
|
185
199
|
# If nothing changed, just refresh the TTL to keep the worker alive.
|
|
@@ -195,8 +209,8 @@ class RedisStorage(StorageBackend):
|
|
|
195
209
|
async def update_worker_data(
|
|
196
210
|
self,
|
|
197
211
|
worker_id: str,
|
|
198
|
-
update_data:
|
|
199
|
-
) ->
|
|
212
|
+
update_data: dict[str, Any],
|
|
213
|
+
) -> dict[str, Any] | None:
|
|
200
214
|
key = f"orchestrator:worker:info:{worker_id}"
|
|
201
215
|
async with self._redis.pipeline(transaction=True) as pipe:
|
|
202
216
|
try:
|
|
@@ -205,12 +219,12 @@ class RedisStorage(StorageBackend):
|
|
|
205
219
|
if not current_state_raw:
|
|
206
220
|
return None
|
|
207
221
|
|
|
208
|
-
current_state =
|
|
222
|
+
current_state = self._unpack(current_state_raw)
|
|
209
223
|
current_state.update(update_data)
|
|
210
224
|
|
|
211
225
|
pipe.multi()
|
|
212
226
|
# Do not set TTL, as this is a data update, not a heartbeat
|
|
213
|
-
pipe.set(key,
|
|
227
|
+
pipe.set(key, self._pack(current_state))
|
|
214
228
|
await pipe.execute()
|
|
215
229
|
return current_state
|
|
216
230
|
except WatchError:
|
|
@@ -229,7 +243,7 @@ class RedisStorage(StorageBackend):
|
|
|
229
243
|
return []
|
|
230
244
|
|
|
231
245
|
worker_data_list = await self._redis.mget(worker_keys)
|
|
232
|
-
return [
|
|
246
|
+
return [self._unpack(data) for data in worker_data_list if data]
|
|
233
247
|
|
|
234
248
|
async def add_job_to_watch(self, job_id: str, timeout_at: float) -> None:
|
|
235
249
|
"""Adds a job to a Redis sorted set.
|
|
@@ -259,18 +273,74 @@ class RedisStorage(StorageBackend):
|
|
|
259
273
|
return []
|
|
260
274
|
|
|
261
275
|
async def enqueue_job(self, job_id: str) -> None:
|
|
262
|
-
"""Adds a job to the Redis
|
|
263
|
-
await self._redis.
|
|
276
|
+
"""Adds a job to the Redis stream."""
|
|
277
|
+
await self._redis.xadd(self._stream_key, {"job_id": job_id})
|
|
278
|
+
|
|
279
|
+
async def dequeue_job(self) -> tuple[str, str] | None:
|
|
280
|
+
"""Retrieves a job from the Redis stream using consumer groups.
|
|
281
|
+
Implements a recovery strategy: checks for pending messages first.
|
|
282
|
+
"""
|
|
283
|
+
if not self._group_created:
|
|
284
|
+
try:
|
|
285
|
+
await self._redis.xgroup_create(self._stream_key, self._group_name, id="0", mkstream=True)
|
|
286
|
+
except ResponseError as e:
|
|
287
|
+
if "BUSYGROUP" not in str(e):
|
|
288
|
+
raise e
|
|
289
|
+
self._group_created = True
|
|
264
290
|
|
|
265
|
-
async def dequeue_job(self) -> str | None:
|
|
266
|
-
"""Retrieves a job from the Redis queue (list) with blocking."""
|
|
267
291
|
try:
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
292
|
+
try:
|
|
293
|
+
autoclaim_result = await self._redis.xautoclaim(
|
|
294
|
+
self._stream_key,
|
|
295
|
+
self._group_name,
|
|
296
|
+
self._consumer_name,
|
|
297
|
+
min_idle_time=self._min_idle_time_ms,
|
|
298
|
+
start_id="0-0",
|
|
299
|
+
count=1,
|
|
300
|
+
)
|
|
301
|
+
if autoclaim_result and autoclaim_result[1]:
|
|
302
|
+
messages = autoclaim_result[1]
|
|
303
|
+
message_id, data = messages[0]
|
|
304
|
+
if data:
|
|
305
|
+
job_id = data[b"job_id"].decode("utf-8")
|
|
306
|
+
logger.info(f"Reclaimed pending message {message_id} for consumer {self._consumer_name}")
|
|
307
|
+
return job_id, message_id.decode("utf-8")
|
|
308
|
+
except Exception as e:
|
|
309
|
+
if "unknown command" in str(e).lower() or isinstance(e, ResponseError):
|
|
310
|
+
pending_result = await self._redis.xreadgroup(
|
|
311
|
+
self._group_name,
|
|
312
|
+
self._consumer_name,
|
|
313
|
+
{self._stream_key: "0"},
|
|
314
|
+
count=1,
|
|
315
|
+
)
|
|
316
|
+
if pending_result:
|
|
317
|
+
stream_name, messages = pending_result[0]
|
|
318
|
+
if messages:
|
|
319
|
+
message_id, data = messages[0]
|
|
320
|
+
job_id = data[b"job_id"].decode("utf-8")
|
|
321
|
+
return job_id, message_id.decode("utf-8")
|
|
322
|
+
else:
|
|
323
|
+
raise e
|
|
324
|
+
|
|
325
|
+
result = await self._redis.xreadgroup(
|
|
326
|
+
self._group_name,
|
|
327
|
+
self._consumer_name,
|
|
328
|
+
{self._stream_key: ">"},
|
|
329
|
+
count=1,
|
|
330
|
+
)
|
|
331
|
+
if result:
|
|
332
|
+
stream_name, messages = result[0]
|
|
333
|
+
message_id, data = messages[0]
|
|
334
|
+
job_id = data[b"job_id"].decode("utf-8")
|
|
335
|
+
return job_id, message_id.decode("utf-8")
|
|
336
|
+
return None
|
|
271
337
|
except CancelledError:
|
|
272
338
|
return None
|
|
273
339
|
|
|
340
|
+
async def ack_job(self, message_id: str) -> None:
|
|
341
|
+
"""Acknowledges a message in the Redis stream."""
|
|
342
|
+
await self._redis.xack(self._stream_key, self._group_name, message_id)
|
|
343
|
+
|
|
274
344
|
async def quarantine_job(self, job_id: str) -> None:
|
|
275
345
|
"""Moves the job ID to the 'quarantine' list in Redis."""
|
|
276
346
|
await self._redis.lpush("orchestrator:quarantine_queue", job_id) # type: ignore[arg-type]
|
|
@@ -290,31 +360,27 @@ class RedisStorage(StorageBackend):
|
|
|
290
360
|
using a Lua script for atomicity.
|
|
291
361
|
Returns the new value of the counter.
|
|
292
362
|
"""
|
|
293
|
-
# Note: This implementation is simplified for fakeredis compatibility,
|
|
294
|
-
# which does not support Lua scripting well. In a production Redis,
|
|
295
|
-
# a Lua script would be more efficient to set the EXPIRE only once.
|
|
296
|
-
# This version resets the TTL on every call, which is acceptable for tests.
|
|
297
363
|
async with self._redis.pipeline(transaction=True) as pipe:
|
|
298
364
|
pipe.incr(key)
|
|
299
365
|
pipe.expire(key, ttl)
|
|
300
366
|
results = await pipe.execute()
|
|
301
367
|
return results[0]
|
|
302
368
|
|
|
303
|
-
async def save_client_config(self, token: str, config:
|
|
369
|
+
async def save_client_config(self, token: str, config: dict[str, Any]) -> None:
|
|
304
370
|
"""Saves the static client configuration as a hash."""
|
|
305
371
|
key = f"orchestrator:client_config:{token}"
|
|
306
|
-
# Convert all values to strings for storage in a Redis hash
|
|
307
|
-
str_config = {k:
|
|
372
|
+
# Convert all values to binary strings for storage in a Redis hash
|
|
373
|
+
str_config = {k: self._pack(v) for k, v in config.items()}
|
|
308
374
|
await self._redis.hset(key, mapping=str_config)
|
|
309
375
|
|
|
310
|
-
async def get_client_config(self, token: str) ->
|
|
376
|
+
async def get_client_config(self, token: str) -> dict[str, Any] | None:
|
|
311
377
|
"""Gets the static client configuration."""
|
|
312
378
|
key = f"orchestrator:client_config:{token}"
|
|
313
379
|
config_raw = await self._redis.hgetall(key) # type: ignore[misc]
|
|
314
380
|
if not config_raw:
|
|
315
381
|
return None
|
|
316
|
-
# Decode keys and values, parse
|
|
317
|
-
return {k.decode("utf-8"):
|
|
382
|
+
# Decode keys and values, parse binary
|
|
383
|
+
return {k.decode("utf-8"): self._unpack(v) for k, v in config_raw.items()}
|
|
318
384
|
|
|
319
385
|
async def initialize_client_quota(self, token: str, quota: int) -> None:
|
|
320
386
|
"""Sets or resets the quota counter."""
|
|
@@ -370,8 +436,8 @@ class RedisStorage(StorageBackend):
|
|
|
370
436
|
await self._redis.flushdb()
|
|
371
437
|
|
|
372
438
|
async def get_job_queue_length(self) -> int:
|
|
373
|
-
"""Returns the length of the job
|
|
374
|
-
return await self._redis.
|
|
439
|
+
"""Returns the length of the job stream."""
|
|
440
|
+
return await self._redis.xlen(self._stream_key)
|
|
375
441
|
|
|
376
442
|
async def get_active_worker_count(self) -> int:
|
|
377
443
|
"""Returns the number of active worker keys."""
|
|
@@ -385,22 +451,21 @@ class RedisStorage(StorageBackend):
|
|
|
385
451
|
key = f"orchestrator:worker:token:{worker_id}"
|
|
386
452
|
await self._redis.set(key, token)
|
|
387
453
|
|
|
388
|
-
async def get_worker_token(self, worker_id: str) ->
|
|
454
|
+
async def get_worker_token(self, worker_id: str) -> str | None:
|
|
389
455
|
"""Retrieves the individual token for a specific worker."""
|
|
390
456
|
key = f"orchestrator:worker:token:{worker_id}"
|
|
391
457
|
token = await self._redis.get(key)
|
|
392
458
|
return token.decode("utf-8") if token else None
|
|
393
459
|
|
|
394
|
-
async def get_worker_info(self, worker_id: str) ->
|
|
460
|
+
async def get_worker_info(self, worker_id: str) -> dict[str, Any] | None:
|
|
395
461
|
"""Gets the full info for a worker by its ID."""
|
|
396
462
|
key = f"orchestrator:worker:info:{worker_id}"
|
|
397
463
|
data = await self._redis.get(key)
|
|
398
|
-
return
|
|
464
|
+
return self._unpack(data) if data else None
|
|
399
465
|
|
|
400
466
|
async def acquire_lock(self, key: str, holder_id: str, ttl: int) -> bool:
|
|
401
467
|
"""Attempts to acquire a lock using Redis SET NX."""
|
|
402
468
|
redis_key = f"orchestrator:lock:{key}"
|
|
403
|
-
# Returns True if set was successful (key didn't exist), None otherwise
|
|
404
469
|
result = await self._redis.set(redis_key, holder_id, nx=True, ex=ttl)
|
|
405
470
|
return bool(result)
|
|
406
471
|
|
|
@@ -419,7 +484,6 @@ class RedisStorage(StorageBackend):
|
|
|
419
484
|
result = await self._redis.eval(LUA_RELEASE_SCRIPT, 1, redis_key, holder_id)
|
|
420
485
|
return bool(result)
|
|
421
486
|
except ResponseError as e:
|
|
422
|
-
# Fallback for fakeredis if needed, though fakeredis usually supports eval
|
|
423
487
|
if "unknown command" in str(e):
|
|
424
488
|
current_val = await self._redis.get(redis_key)
|
|
425
489
|
if current_val and current_val.decode("utf-8") == holder_id:
|
|
@@ -2,7 +2,7 @@ from hashlib import sha256
|
|
|
2
2
|
from logging import getLogger
|
|
3
3
|
from os.path import exists
|
|
4
4
|
from tomllib import load
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
7
|
from .storage.base import StorageBackend
|
|
8
8
|
|
|
@@ -25,7 +25,7 @@ async def load_worker_configs_to_redis(storage: StorageBackend, config_path: str
|
|
|
25
25
|
|
|
26
26
|
try:
|
|
27
27
|
with open(config_path, "rb") as f:
|
|
28
|
-
workers_config:
|
|
28
|
+
workers_config: dict[str, Any] = load(f)
|
|
29
29
|
except Exception as e:
|
|
30
30
|
logger.error(f"Failed to load or parse worker config file '{config_path}': {e}")
|
|
31
31
|
raise ValueError(f"Invalid worker configuration file: {e}") from e
|
avtomatika/ws_manager.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from asyncio import Lock
|
|
2
2
|
from logging import getLogger
|
|
3
|
-
from typing import Dict
|
|
4
3
|
|
|
5
4
|
from aiohttp import web
|
|
6
5
|
|
|
@@ -11,7 +10,7 @@ class WebSocketManager:
|
|
|
11
10
|
"""Manages active WebSocket connections from workers."""
|
|
12
11
|
|
|
13
12
|
def __init__(self):
|
|
14
|
-
self._connections:
|
|
13
|
+
self._connections: dict[str, web.WebSocketResponse] = {}
|
|
15
14
|
self._lock = Lock()
|
|
16
15
|
|
|
17
16
|
async def register(self, worker_id: str, ws: web.WebSocketResponse):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b5
|
|
4
4
|
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
5
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
|
|
@@ -17,13 +17,13 @@ Requires-Dist: python-json-logger~=4.0
|
|
|
17
17
|
Requires-Dist: graphviz~=0.21
|
|
18
18
|
Requires-Dist: zstandard~=0.24
|
|
19
19
|
Requires-Dist: aioprometheus~=23.12
|
|
20
|
+
Requires-Dist: msgpack~=1.1
|
|
21
|
+
Requires-Dist: orjson~=3.11
|
|
20
22
|
Provides-Extra: redis
|
|
21
23
|
Requires-Dist: redis~=7.1; extra == "redis"
|
|
22
|
-
Requires-Dist: orjson~=3.11; extra == "redis"
|
|
23
24
|
Provides-Extra: history
|
|
24
25
|
Requires-Dist: aiosqlite~=0.22; extra == "history"
|
|
25
26
|
Requires-Dist: asyncpg~=0.30; extra == "history"
|
|
26
|
-
Requires-Dist: orjson~=3.11; extra == "history"
|
|
27
27
|
Provides-Extra: telemetry
|
|
28
28
|
Requires-Dist: opentelemetry-api~=1.39; extra == "telemetry"
|
|
29
29
|
Requires-Dist: opentelemetry-sdk~=1.39; extra == "telemetry"
|
|
@@ -334,6 +334,24 @@ The orchestrator's behavior can be configured through environment variables. Add
|
|
|
334
334
|
|
|
335
335
|
**Important:** The system employs **strict validation** for configuration files (`clients.toml`, `workers.toml`) at startup. If a configuration file is invalid (e.g., malformed TOML, missing required fields), the application will **fail fast** and exit with an error, rather than starting in a partially broken state. This ensures the security and integrity of the deployment.
|
|
336
336
|
|
|
337
|
+
### Configuration Files
|
|
338
|
+
|
|
339
|
+
To manage access and worker settings securely, Avtomatika uses TOML configuration files.
|
|
340
|
+
|
|
341
|
+
- **`clients.toml`**: Defines API clients, their tokens, plans, and quotas.
|
|
342
|
+
```toml
|
|
343
|
+
[client_premium]
|
|
344
|
+
token = "secret-token-123"
|
|
345
|
+
plan = "premium"
|
|
346
|
+
```
|
|
347
|
+
- **`workers.toml`**: Defines individual tokens for workers to enhance security.
|
|
348
|
+
```toml
|
|
349
|
+
[gpu-worker-01]
|
|
350
|
+
token = "worker-secret-456"
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
For detailed specifications and examples, please refer to the [**Configuration Guide**](docs/configuration.md).
|
|
354
|
+
|
|
337
355
|
### Fault Tolerance
|
|
338
356
|
|
|
339
357
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
@@ -342,18 +360,25 @@ The orchestrator has built-in mechanisms for handling failures based on the `err
|
|
|
342
360
|
* **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
|
|
343
361
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
344
362
|
|
|
363
|
+
### Concurrency & Performance
|
|
364
|
+
|
|
365
|
+
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
366
|
+
|
|
367
|
+
* **`EXECUTOR_MAX_CONCURRENT_JOBS`**: Limits the number of job handlers running simultaneously within the Orchestrator process (default: `100`). If this limit is reached, new jobs remain in the Redis queue until a slot becomes available. This ensures the event loop remains responsive even with a massive backlog of pending jobs.
|
|
368
|
+
|
|
345
369
|
### High Availability & Distributed Locking
|
|
346
370
|
|
|
347
371
|
The architecture supports horizontal scaling. Multiple Orchestrator instances can run behind a load balancer.
|
|
348
372
|
|
|
349
373
|
* **Stateless API:** The API is stateless; all state is persisted in Redis.
|
|
374
|
+
* **Instance Identity:** Each instance should have a unique `INSTANCE_ID` (defaults to hostname) for correct handling of Redis Streams consumer groups.
|
|
350
375
|
* **Distributed Locking:** Background processes (`Watcher`, `ReputationCalculator`) use distributed locks (via Redis `SET NX`) to coordinate and prevent race conditions when multiple instances are active.
|
|
351
376
|
|
|
352
377
|
### Storage Backend
|
|
353
378
|
|
|
354
379
|
By default, the engine uses in-memory storage. For production, you must configure persistent storage via environment variables.
|
|
355
380
|
|
|
356
|
-
* **Redis (StorageBackend)**: For storing current job states.
|
|
381
|
+
* **Redis (StorageBackend)**: For storing current job states (serialized with `msgpack`) and managing task queues (using Redis Streams with consumer groups).
|
|
357
382
|
* Install:
|
|
358
383
|
```bash
|
|
359
384
|
pip install "avtomatika[redis]"
|
|
@@ -418,11 +443,21 @@ To run the `avtomatika` test suite:
|
|
|
418
443
|
pytest avtomatika/tests/
|
|
419
444
|
```
|
|
420
445
|
|
|
446
|
+
### Interactive API Documentation
|
|
447
|
+
|
|
448
|
+
Avtomatika provides a built-in interactive API documentation page (similar to Swagger UI) that is automatically generated based on your registered blueprints.
|
|
449
|
+
|
|
450
|
+
* **Endpoint:** `/_public/docs`
|
|
451
|
+
* **Features:**
|
|
452
|
+
* **List of all system endpoints:** Detailed documentation for Public, Protected, and Worker API groups.
|
|
453
|
+
* **Dynamic Blueprint Documentation:** Automatically generates and lists documentation for all blueprints registered in the engine, including their specific API endpoints.
|
|
454
|
+
* **Interactive Testing:** Allows you to test API calls directly from the browser. You can provide authentication tokens, parameters, and request bodies to see real server responses.
|
|
455
|
+
|
|
421
456
|
## Detailed Documentation
|
|
422
457
|
|
|
423
|
-
For a deeper dive into the system, please refer to the following documents
|
|
458
|
+
For a deeper dive into the system, please refer to the following documents:
|
|
424
459
|
|
|
425
|
-
- [**Architecture Guide**](docs/architecture.md): A detailed overview of the system components and their interactions.
|
|
426
|
-
- [**API Reference**](docs/api_reference.md): Full specification of the HTTP API.
|
|
427
|
-
- [**Deployment Guide**](docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
|
|
428
|
-
- [**Cookbook**](docs/cookbook/README.md): Examples and best practices for creating blueprints.
|
|
460
|
+
- [**Architecture Guide**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/architecture.md): A detailed overview of the system components and their interactions.
|
|
461
|
+
- [**API Reference**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/api_reference.md): Full specification of the HTTP API.
|
|
462
|
+
- [**Deployment Guide**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/deployment.md): Instructions for deploying with Gunicorn/Uvicorn and NGINX.
|
|
463
|
+
- [**Cookbook**](https://github.com/avtomatika-ai/avtomatika/blob/main/docs/cookbook/README.md): Examples and best practices for creating blueprints.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
avtomatika/__init__.py,sha256=D5r3L-H06uxsY_wgfh7u9YR29QvZMer1BlvzjW9Umfo,701
|
|
2
|
+
avtomatika/api.html,sha256=RLx-D1uFCSAXIf_2WgFlSTWrWPcmonNYM-9oNanKXBg,32835
|
|
3
|
+
avtomatika/blueprint.py,sha256=jU1Un9yiIUbZ9I7k20XyAhnMbXyrnkC0AakMX3GbK6k,9207
|
|
4
|
+
avtomatika/client_config_loader.py,sha256=zVVHZlxSqZUaNpZ4zoU0T1CFYXdxy-3vKSmPcaFuHSY,2772
|
|
5
|
+
avtomatika/compression.py,sha256=bhA1kw4YrCR3I3kdquZSY0fAzCrRrjtz55uepzLUDKI,2498
|
|
6
|
+
avtomatika/config.py,sha256=_XMLdg-iIuXxbmnns17DC8xe8DeLH4xTOCddqUeYxV4,2337
|
|
7
|
+
avtomatika/context.py,sha256=MT_RMMxSCAVEhlo5HUQdQ1uaR86D9d4s_3nrd6O5xAg,4241
|
|
8
|
+
avtomatika/data_types.py,sha256=VWWbTcJaQt43JaD09qZL1spwNdzCR9vPXVthlM133gM,1362
|
|
9
|
+
avtomatika/datastore.py,sha256=gJjhZ5kxjF8pmbbPQb_qu3HPUpfy2c6T75KZ-smb_zg,545
|
|
10
|
+
avtomatika/dispatcher.py,sha256=Tsye9zOcLN2c7O0AalbX9Il5l3XdT64UDV7iQKiLIJE,9627
|
|
11
|
+
avtomatika/engine.py,sha256=tHYmOCb74gBN-YhxBNVmsE-e5i88wmudLYfK4LWX8Bk,39041
|
|
12
|
+
avtomatika/executor.py,sha256=-SKqLs-DVtMw5P7bZInK57f0M4zRFoVFr1fU5MUrtiY,22543
|
|
13
|
+
avtomatika/health_checker.py,sha256=WXwvRJ-3cZC2Udc_ogsyIQp7VzcvJjq_IaqzkTdE0TE,1265
|
|
14
|
+
avtomatika/logging_config.py,sha256=7RkcdFyhyiCz8MIipDO689mTQVofUJEv-k59QmtqYgc,1368
|
|
15
|
+
avtomatika/metrics.py,sha256=7XDhr_xMJ9JpElpZmBG7R0ml7AMdAp9UYp_W-i7tyLg,1858
|
|
16
|
+
avtomatika/py.typed,sha256=CT_L7gw2MLcQY-X0vs-xB5Vr0wzvGo7GuQYPI_qwJE8,65
|
|
17
|
+
avtomatika/quota.py,sha256=DNcaL6k0J1REeP8sVqbY9FprY_3BSr2SxM2Vf4mEqdw,1612
|
|
18
|
+
avtomatika/ratelimit.py,sha256=hFGW5oN9G6_W_jnHmopXW8bRjjzlvanY19MLghsNLE8,1306
|
|
19
|
+
avtomatika/reputation.py,sha256=IHcaIAILWZftPPmXj5En28OSDNK7U8ivQ-w30zIF8fk,3748
|
|
20
|
+
avtomatika/security.py,sha256=afj28O3xB20EmA75DAQCQm_QKzx_tX2Qv9zE9TlcFvM,4441
|
|
21
|
+
avtomatika/telemetry.py,sha256=ZBt1_xJ36PzDSz-zdCXeNp58NiezUgbqvMctTG25PT0,2352
|
|
22
|
+
avtomatika/watcher.py,sha256=IHaqSqp3XSGXjRY-LEeTG9BJpq2nqJSnmjY_Vdvk3jo,3493
|
|
23
|
+
avtomatika/worker_config_loader.py,sha256=n0j8gfuJDacWONr8744RsHTCWpc_1ZTRMC-rJZh6P6A,2249
|
|
24
|
+
avtomatika/ws_manager.py,sha256=pi5xe0ivsCjRZw08ri5N-gAChMH2I2YPLpl3E2tP89k,3057
|
|
25
|
+
avtomatika/history/base.py,sha256=Gfw0Gb4Mt9wQrMlYLugZwey_6-cDej5OUctiMTCWg7Q,1668
|
|
26
|
+
avtomatika/history/noop.py,sha256=ETVtPiTfkaMpzhGD8c0_4Iu6pWD89dnPrrRrSIjmc8s,970
|
|
27
|
+
avtomatika/history/postgres.py,sha256=CtwupdKGV_gmxcUlbv2xSFadaF8OX2Gul_FYz0K05SE,7551
|
|
28
|
+
avtomatika/history/sqlite.py,sha256=NuuSW9HhzHJcz7_S2otKkudGF4nRVUU2m28ZgVgqMro,8675
|
|
29
|
+
avtomatika/storage/__init__.py,sha256=mGRj_40dWZ7R7uYbqC6gCsUWCKHAbZz4ZVIhYg5dT_E,262
|
|
30
|
+
avtomatika/storage/base.py,sha256=NGQNLPL5z1AX7TzZkQTHAq3gOmLLhhIr8pO-u0VLrBg,10824
|
|
31
|
+
avtomatika/storage/memory.py,sha256=x1FI33KfY6wpGVMY5dGetZR-GAyAajQTg1J2T8slO3U,10938
|
|
32
|
+
avtomatika/storage/redis.py,sha256=of4K5qC_jKMMSj-oyP6rYsAPvTuGLdsLCl4pqfrlhGA,20341
|
|
33
|
+
avtomatika-1.0b5.dist-info/licenses/LICENSE,sha256=tqCjw9Y1vbU-hLcWi__7wQstLbt2T1XWPdbQYqCxuWY,1072
|
|
34
|
+
avtomatika-1.0b5.dist-info/METADATA,sha256=t1mcWWtHdh-SnsTymYpxYNztsQ50mvSz3QxZA4FBWrI,21644
|
|
35
|
+
avtomatika-1.0b5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
36
|
+
avtomatika-1.0b5.dist-info/top_level.txt,sha256=gLDWhA_wxHj0I6fG5X8vw9fE0HSN4hTE2dEJzeVS2x8,11
|
|
37
|
+
avtomatika-1.0b5.dist-info/RECORD,,
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
avtomatika/__init__.py,sha256=nlk59j7YcK1gapRUVfHjvFZVAD_PZoamgHEptchP3TA,698
|
|
2
|
-
avtomatika/api.html,sha256=Z-Ikqrle7YPXagx2D-C5ylVZicLQFSsIzPsHCQgqMHM,33628
|
|
3
|
-
avtomatika/blueprint.py,sha256=Hx5h0upr_IYbCy1ebUTpXw4bnt5yYhgWtdPLVE1_h48,9403
|
|
4
|
-
avtomatika/client_config_loader.py,sha256=zVVHZlxSqZUaNpZ4zoU0T1CFYXdxy-3vKSmPcaFuHSY,2772
|
|
5
|
-
avtomatika/compression.py,sha256=bhA1kw4YrCR3I3kdquZSY0fAzCrRrjtz55uepzLUDKI,2498
|
|
6
|
-
avtomatika/config.py,sha256=0vlMfVMjxwVUC8m_NglGocC_EoklzAc0qmt3UJbxm10,2087
|
|
7
|
-
avtomatika/context.py,sha256=rnF09jqQGkaKlax8P5ku9USwijSm6dommDGZbeVrzLk,4295
|
|
8
|
-
avtomatika/data_types.py,sha256=g-g5hPnCpzeATgOn5v7EvDm5ps314owFJD5iWJ6IPR0,1425
|
|
9
|
-
avtomatika/datastore.py,sha256=ERMyiFYQpAhVYijxzTrrdm6jtIPFf4dngWIa0qod3Wc,551
|
|
10
|
-
avtomatika/dispatcher.py,sha256=a_7DjJwSXbW-ZzqcjZG0ZXMYDD2JLZxpQRIzHOrjeow,9688
|
|
11
|
-
avtomatika/engine.py,sha256=zwouopyGjHkyiE3dMndxu1uAIMOFvnV1h8-ZIFHIH-k,37507
|
|
12
|
-
avtomatika/executor.py,sha256=JHwT2DR-Hbrb_-Le1-mVaXiiQ7z-PkMsuIYB9ciiVo0,21201
|
|
13
|
-
avtomatika/health_checker.py,sha256=WXwvRJ-3cZC2Udc_ogsyIQp7VzcvJjq_IaqzkTdE0TE,1265
|
|
14
|
-
avtomatika/logging_config.py,sha256=e0-eEEGHw1zz9ZshzXaxfavV0uZfamRNdcAeHnrgBYQ,1370
|
|
15
|
-
avtomatika/metrics.py,sha256=7XDhr_xMJ9JpElpZmBG7R0ml7AMdAp9UYp_W-i7tyLg,1858
|
|
16
|
-
avtomatika/py.typed,sha256=CT_L7gw2MLcQY-X0vs-xB5Vr0wzvGo7GuQYPI_qwJE8,65
|
|
17
|
-
avtomatika/quota.py,sha256=DNcaL6k0J1REeP8sVqbY9FprY_3BSr2SxM2Vf4mEqdw,1612
|
|
18
|
-
avtomatika/ratelimit.py,sha256=hFGW5oN9G6_W_jnHmopXW8bRjjzlvanY19MLghsNLE8,1306
|
|
19
|
-
avtomatika/reputation.py,sha256=IHcaIAILWZftPPmXj5En28OSDNK7U8ivQ-w30zIF8fk,3748
|
|
20
|
-
avtomatika/security.py,sha256=afj28O3xB20EmA75DAQCQm_QKzx_tX2Qv9zE9TlcFvM,4441
|
|
21
|
-
avtomatika/telemetry.py,sha256=ZBt1_xJ36PzDSz-zdCXeNp58NiezUgbqvMctTG25PT0,2352
|
|
22
|
-
avtomatika/watcher.py,sha256=IHaqSqp3XSGXjRY-LEeTG9BJpq2nqJSnmjY_Vdvk3jo,3493
|
|
23
|
-
avtomatika/worker_config_loader.py,sha256=Ir8jbZ_07U8NAcu3r_EXM1jQvNpVEvHRP0k9vsq3mio,2255
|
|
24
|
-
avtomatika/ws_manager.py,sha256=v3nz-w4AhoV_vqs3y8twXaMxm7s52wg2wzCMzTkPd8M,3081
|
|
25
|
-
avtomatika/history/base.py,sha256=p0zItsdxFzd889LujV8py6GwK4CUfqAt8QL915mrT4k,1680
|
|
26
|
-
avtomatika/history/noop.py,sha256=Hk5yJsS4S5G5A7NRRMEafIV_IFI9hddSwEvRg2Reh0M,982
|
|
27
|
-
avtomatika/history/postgres.py,sha256=zanh_WktXM_C8ZPsYGiI1x4ScyHDNE1LVOeYiN72NdY,7685
|
|
28
|
-
avtomatika/history/sqlite.py,sha256=hivl--uJ47MosrD6qhBwW3KYAQvVgbPNM6UYYatqFKM,8862
|
|
29
|
-
avtomatika/storage/__init__.py,sha256=ygqv240XuYuHjU_2eci0J3FWoJLNSRpUFA2GzBrHMKg,259
|
|
30
|
-
avtomatika/storage/base.py,sha256=BCC7uAQrko1UCwZo5kGF-0blwJiFcLCcT-pMnhYAxqY,10494
|
|
31
|
-
avtomatika/storage/memory.py,sha256=7VhQO02SbYc65uDTOY9g43CVOgsodxzg-WYo0JGpUec,11387
|
|
32
|
-
avtomatika/storage/redis.py,sha256=kgNUJuwcxQvCzul0m5COKhDnfJGKReMNeWxtG_BGfLc,18171
|
|
33
|
-
avtomatika-1.0b3.dist-info/licenses/LICENSE,sha256=tqCjw9Y1vbU-hLcWi__7wQstLbt2T1XWPdbQYqCxuWY,1072
|
|
34
|
-
avtomatika-1.0b3.dist-info/METADATA,sha256=OXavLWy_3WKrw5oW2Q8WdWvAdFFDA1-XvXGN59LQLKM,19455
|
|
35
|
-
avtomatika-1.0b3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
36
|
-
avtomatika-1.0b3.dist-info/top_level.txt,sha256=gLDWhA_wxHj0I6fG5X8vw9fE0HSN4hTE2dEJzeVS2x8,11
|
|
37
|
-
avtomatika-1.0b3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|