avtomatika 1.0b9__py3-none-any.whl → 1.0b10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avtomatika/api/handlers.py +2 -2
- avtomatika/api.html +1 -1
- avtomatika/blueprint.py +3 -2
- avtomatika/constants.py +76 -2
- avtomatika/dispatcher.py +3 -0
- avtomatika/engine.py +13 -4
- avtomatika/logging_config.py +16 -7
- avtomatika/scheduler_config_loader.py +5 -2
- avtomatika/services/worker_service.py +4 -3
- avtomatika/storage/memory.py +3 -3
- avtomatika/storage/redis.py +9 -8
- avtomatika/telemetry.py +8 -7
- avtomatika/utils/webhook_sender.py +3 -3
- avtomatika/ws_manager.py +13 -5
- {avtomatika-1.0b9.dist-info → avtomatika-1.0b10.dist-info}/METADATA +10 -4
- {avtomatika-1.0b9.dist-info → avtomatika-1.0b10.dist-info}/RECORD +19 -19
- {avtomatika-1.0b9.dist-info → avtomatika-1.0b10.dist-info}/WHEEL +0 -0
- {avtomatika-1.0b9.dist-info → avtomatika-1.0b10.dist-info}/licenses/LICENSE +0 -0
- {avtomatika-1.0b9.dist-info → avtomatika-1.0b10.dist-info}/top_level.txt +0 -0
avtomatika/api/handlers.py
CHANGED
|
@@ -25,11 +25,11 @@ from ..worker_config_loader import load_worker_configs_to_redis
|
|
|
25
25
|
logger = getLogger(__name__)
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def json_dumps(obj) -> str:
|
|
28
|
+
def json_dumps(obj: Any) -> str:
|
|
29
29
|
return dumps(obj).decode("utf-8")
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def json_response(data, **kwargs) -> web.Response:
|
|
32
|
+
def json_response(data: Any, **kwargs: Any) -> web.Response:
|
|
33
33
|
return web.json_response(data, dumps=json_dumps, **kwargs)
|
|
34
34
|
|
|
35
35
|
|
avtomatika/api.html
CHANGED
|
@@ -211,7 +211,7 @@
|
|
|
211
211
|
],
|
|
212
212
|
request: { body: null },
|
|
213
213
|
responses: [
|
|
214
|
-
{ code: '200 OK', description: 'Successful response.', body: { "id": "...", "status": "..." } }
|
|
214
|
+
{ code: '200 OK', description: 'Successful response.', body: { "id": "...", "status": "running", "progress": 0.75, "progress_message": "Processing..." } }
|
|
215
215
|
]
|
|
216
216
|
},
|
|
217
217
|
{
|
avtomatika/blueprint.py
CHANGED
|
@@ -62,7 +62,8 @@ class ConditionalHandler:
|
|
|
62
62
|
try:
|
|
63
63
|
context_area = getattr(context, self.condition.area)
|
|
64
64
|
actual_value = context_area[self.condition.field]
|
|
65
|
-
|
|
65
|
+
result = self.condition.op(actual_value, self.condition.value)
|
|
66
|
+
return bool(result)
|
|
66
67
|
except (AttributeError, KeyError):
|
|
67
68
|
return False
|
|
68
69
|
|
|
@@ -279,7 +280,7 @@ class StateMachineBlueprint:
|
|
|
279
280
|
f"No suitable handler found for state '{state}' in blueprint '{self.name}' for the given context.",
|
|
280
281
|
)
|
|
281
282
|
|
|
282
|
-
def render_graph(self, output_filename: str | None = None, output_format: str = "png"):
|
|
283
|
+
def render_graph(self, output_filename: str | None = None, output_format: str = "png") -> str | None:
|
|
283
284
|
from graphviz import Digraph # type: ignore[import]
|
|
284
285
|
|
|
285
286
|
dot = Digraph(comment=f"State Machine for {self.name}")
|
avtomatika/constants.py
CHANGED
|
@@ -1,6 +1,80 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Centralized constants for the Avtomatika protocol.
|
|
3
|
-
(Legacy wrapper, pointing to
|
|
3
|
+
(Legacy wrapper, pointing to rxon.constants)
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
from rxon.constants import
|
|
6
|
+
from rxon.constants import (
|
|
7
|
+
AUTH_HEADER_CLIENT,
|
|
8
|
+
AUTH_HEADER_WORKER,
|
|
9
|
+
COMMAND_CANCEL_TASK,
|
|
10
|
+
ENDPOINT_TASK_NEXT,
|
|
11
|
+
ENDPOINT_TASK_RESULT,
|
|
12
|
+
ENDPOINT_WORKER_HEARTBEAT,
|
|
13
|
+
ENDPOINT_WORKER_REGISTER,
|
|
14
|
+
ERROR_CODE_DEPENDENCY,
|
|
15
|
+
ERROR_CODE_INTEGRITY_MISMATCH,
|
|
16
|
+
ERROR_CODE_INTERNAL,
|
|
17
|
+
ERROR_CODE_INVALID_INPUT,
|
|
18
|
+
ERROR_CODE_PERMANENT,
|
|
19
|
+
ERROR_CODE_RESOURCE_EXHAUSTED,
|
|
20
|
+
ERROR_CODE_SECURITY,
|
|
21
|
+
ERROR_CODE_TIMEOUT,
|
|
22
|
+
ERROR_CODE_TRANSIENT,
|
|
23
|
+
JOB_STATUS_CANCELLED,
|
|
24
|
+
JOB_STATUS_ERROR,
|
|
25
|
+
JOB_STATUS_FAILED,
|
|
26
|
+
JOB_STATUS_FINISHED,
|
|
27
|
+
JOB_STATUS_PENDING,
|
|
28
|
+
JOB_STATUS_QUARANTINED,
|
|
29
|
+
JOB_STATUS_RUNNING,
|
|
30
|
+
JOB_STATUS_WAITING_FOR_HUMAN,
|
|
31
|
+
JOB_STATUS_WAITING_FOR_PARALLEL,
|
|
32
|
+
JOB_STATUS_WAITING_FOR_WORKER,
|
|
33
|
+
MSG_TYPE_PROGRESS,
|
|
34
|
+
PROTOCOL_VERSION,
|
|
35
|
+
PROTOCOL_VERSION_HEADER,
|
|
36
|
+
STS_TOKEN_ENDPOINT,
|
|
37
|
+
TASK_STATUS_CANCELLED,
|
|
38
|
+
TASK_STATUS_FAILURE,
|
|
39
|
+
TASK_STATUS_SUCCESS,
|
|
40
|
+
WORKER_API_PREFIX,
|
|
41
|
+
WS_ENDPOINT,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"AUTH_HEADER_CLIENT",
|
|
46
|
+
"AUTH_HEADER_WORKER",
|
|
47
|
+
"COMMAND_CANCEL_TASK",
|
|
48
|
+
"ENDPOINT_TASK_NEXT",
|
|
49
|
+
"ENDPOINT_TASK_RESULT",
|
|
50
|
+
"ENDPOINT_WORKER_HEARTBEAT",
|
|
51
|
+
"ENDPOINT_WORKER_REGISTER",
|
|
52
|
+
"ERROR_CODE_DEPENDENCY",
|
|
53
|
+
"ERROR_CODE_INTEGRITY_MISMATCH",
|
|
54
|
+
"ERROR_CODE_INTERNAL",
|
|
55
|
+
"ERROR_CODE_INVALID_INPUT",
|
|
56
|
+
"ERROR_CODE_PERMANENT",
|
|
57
|
+
"ERROR_CODE_RESOURCE_EXHAUSTED",
|
|
58
|
+
"ERROR_CODE_SECURITY",
|
|
59
|
+
"ERROR_CODE_TIMEOUT",
|
|
60
|
+
"ERROR_CODE_TRANSIENT",
|
|
61
|
+
"JOB_STATUS_CANCELLED",
|
|
62
|
+
"JOB_STATUS_ERROR",
|
|
63
|
+
"JOB_STATUS_FAILED",
|
|
64
|
+
"JOB_STATUS_FINISHED",
|
|
65
|
+
"JOB_STATUS_PENDING",
|
|
66
|
+
"JOB_STATUS_QUARANTINED",
|
|
67
|
+
"JOB_STATUS_RUNNING",
|
|
68
|
+
"JOB_STATUS_WAITING_FOR_HUMAN",
|
|
69
|
+
"JOB_STATUS_WAITING_FOR_PARALLEL",
|
|
70
|
+
"JOB_STATUS_WAITING_FOR_WORKER",
|
|
71
|
+
"MSG_TYPE_PROGRESS",
|
|
72
|
+
"PROTOCOL_VERSION",
|
|
73
|
+
"PROTOCOL_VERSION_HEADER",
|
|
74
|
+
"STS_TOKEN_ENDPOINT",
|
|
75
|
+
"TASK_STATUS_CANCELLED",
|
|
76
|
+
"TASK_STATUS_FAILURE",
|
|
77
|
+
"TASK_STATUS_SUCCESS",
|
|
78
|
+
"WORKER_API_PREFIX",
|
|
79
|
+
"WS_ENDPOINT",
|
|
80
|
+
]
|
avtomatika/dispatcher.py
CHANGED
|
@@ -184,6 +184,9 @@ class Dispatcher:
|
|
|
184
184
|
selected_worker = self._select_default(capable_workers, task_type)
|
|
185
185
|
|
|
186
186
|
worker_id = selected_worker.get("worker_id")
|
|
187
|
+
if not worker_id:
|
|
188
|
+
raise RuntimeError(f"Selected worker for task '{task_type}' has no worker_id")
|
|
189
|
+
|
|
187
190
|
logger.info(
|
|
188
191
|
f"Dispatching task '{task_type}' to worker {worker_id} (strategy: {dispatch_strategy})",
|
|
189
192
|
)
|
avtomatika/engine.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from asyncio import TimeoutError as AsyncTimeoutError
|
|
2
2
|
from asyncio import create_task, gather, get_running_loop, wait_for
|
|
3
3
|
from logging import getLogger
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
7
|
from aiohttp import ClientSession, web
|
|
@@ -58,7 +58,7 @@ def json_dumps(obj: Any) -> str:
|
|
|
58
58
|
return dumps(obj).decode("utf-8")
|
|
59
59
|
|
|
60
60
|
|
|
61
|
-
def json_response(data, **kwargs: Any) -> web.Response:
|
|
61
|
+
def json_response(data: Any, **kwargs: Any) -> web.Response:
|
|
62
62
|
return web.json_response(data, dumps=json_dumps, **kwargs)
|
|
63
63
|
|
|
64
64
|
|
|
@@ -70,11 +70,15 @@ class OrchestratorEngine:
|
|
|
70
70
|
self.config = config
|
|
71
71
|
self.blueprints: dict[str, StateMachineBlueprint] = {}
|
|
72
72
|
self.history_storage: HistoryStorageBase = NoOpHistoryStorage()
|
|
73
|
-
self.ws_manager = WebSocketManager()
|
|
73
|
+
self.ws_manager = WebSocketManager(self.storage)
|
|
74
74
|
self.app = web.Application(middlewares=[compression_middleware])
|
|
75
75
|
self.app[ENGINE_KEY] = self
|
|
76
|
-
self.worker_service = None
|
|
76
|
+
self.worker_service: Optional[WorkerService] = None
|
|
77
77
|
self._setup_done = False
|
|
78
|
+
self.webhook_sender: WebhookSender
|
|
79
|
+
self.dispatcher: Dispatcher
|
|
80
|
+
self.runner: web.AppRunner
|
|
81
|
+
self.site: web.TCPSite
|
|
78
82
|
|
|
79
83
|
from rxon import HttpListener
|
|
80
84
|
|
|
@@ -176,6 +180,9 @@ class OrchestratorEngine:
|
|
|
176
180
|
except ValueError as e:
|
|
177
181
|
raise web.HTTPBadRequest(text=str(e)) from e
|
|
178
182
|
|
|
183
|
+
if self.worker_service is None:
|
|
184
|
+
raise web.HTTPInternalServerError(text="WorkerService is not initialized.")
|
|
185
|
+
|
|
179
186
|
if message_type == "register":
|
|
180
187
|
return await self.worker_service.register_worker(payload)
|
|
181
188
|
|
|
@@ -352,6 +359,7 @@ class OrchestratorEngine:
|
|
|
352
359
|
initial_data: dict[str, Any],
|
|
353
360
|
source: str = "internal",
|
|
354
361
|
tracing_context: dict[str, str] | None = None,
|
|
362
|
+
data_metadata: dict[str, Any] | None = None,
|
|
355
363
|
) -> str:
|
|
356
364
|
"""Creates a job directly, bypassing the HTTP API layer.
|
|
357
365
|
Useful for internal schedulers and triggers.
|
|
@@ -377,6 +385,7 @@ class OrchestratorEngine:
|
|
|
377
385
|
"status": JOB_STATUS_PENDING,
|
|
378
386
|
"tracing_context": tracing_context or {},
|
|
379
387
|
"client_config": client_config,
|
|
388
|
+
"data_metadata": data_metadata or {},
|
|
380
389
|
}
|
|
381
390
|
await self.storage.save_job_state(job_id, job_state)
|
|
382
391
|
await self.storage.enqueue_job(job_id)
|
avtomatika/logging_config.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from logging import DEBUG, Formatter, StreamHandler, getLogger
|
|
3
3
|
from sys import stdout
|
|
4
|
+
from typing import Any, Literal, Optional
|
|
4
5
|
from zoneinfo import ZoneInfo
|
|
5
6
|
|
|
6
7
|
from pythonjsonlogger import json
|
|
@@ -9,14 +10,22 @@ from pythonjsonlogger import json
|
|
|
9
10
|
class TimezoneFormatter(Formatter):
|
|
10
11
|
"""Formatter that respects a custom timezone."""
|
|
11
12
|
|
|
12
|
-
def __init__(
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
fmt: Optional[str] = None,
|
|
16
|
+
datefmt: Optional[str] = None,
|
|
17
|
+
style: Literal["%", "{", "$"] = "%",
|
|
18
|
+
validate: bool = True,
|
|
19
|
+
*,
|
|
20
|
+
tz_name: str = "UTC",
|
|
21
|
+
) -> None:
|
|
13
22
|
super().__init__(fmt, datefmt, style, validate)
|
|
14
23
|
self.tz = ZoneInfo(tz_name)
|
|
15
24
|
|
|
16
|
-
def converter(self, timestamp):
|
|
25
|
+
def converter(self, timestamp: float) -> datetime: # type: ignore[override]
|
|
17
26
|
return datetime.fromtimestamp(timestamp, self.tz)
|
|
18
27
|
|
|
19
|
-
def formatTime(self, record, datefmt=None):
|
|
28
|
+
def formatTime(self, record: Any, datefmt: Optional[str] = None) -> str:
|
|
20
29
|
dt = self.converter(record.created)
|
|
21
30
|
if datefmt:
|
|
22
31
|
s = dt.strftime(datefmt)
|
|
@@ -28,14 +37,14 @@ class TimezoneFormatter(Formatter):
|
|
|
28
37
|
return s
|
|
29
38
|
|
|
30
39
|
|
|
31
|
-
class TimezoneJsonFormatter(json.JsonFormatter):
|
|
40
|
+
class TimezoneJsonFormatter(json.JsonFormatter): # type: ignore[name-defined]
|
|
32
41
|
"""JSON Formatter that respects a custom timezone."""
|
|
33
42
|
|
|
34
|
-
def __init__(self, *args, tz_name="UTC", **kwargs):
|
|
43
|
+
def __init__(self, *args: Any, tz_name: str = "UTC", **kwargs: Any) -> None:
|
|
35
44
|
super().__init__(*args, **kwargs)
|
|
36
45
|
self.tz = ZoneInfo(tz_name)
|
|
37
46
|
|
|
38
|
-
def formatTime(self, record, datefmt=None):
|
|
47
|
+
def formatTime(self, record: Any, datefmt: Optional[str] = None) -> str:
|
|
39
48
|
# Override formatTime to use timezone-aware datetime
|
|
40
49
|
dt = datetime.fromtimestamp(record.created, self.tz)
|
|
41
50
|
if datefmt:
|
|
@@ -44,7 +53,7 @@ class TimezoneJsonFormatter(json.JsonFormatter):
|
|
|
44
53
|
return dt.isoformat()
|
|
45
54
|
|
|
46
55
|
|
|
47
|
-
def setup_logging(log_level: str = "INFO", log_format: str = "json", tz_name: str = "UTC"):
|
|
56
|
+
def setup_logging(log_level: str = "INFO", log_format: str = "json", tz_name: str = "UTC") -> None:
|
|
48
57
|
"""Configures structured logging for the entire application."""
|
|
49
58
|
logger = getLogger("avtomatika")
|
|
50
59
|
logger.setLevel(log_level)
|
|
@@ -22,14 +22,17 @@ def load_schedules_from_file(file_path: str) -> list[ScheduledJobConfig]:
|
|
|
22
22
|
|
|
23
23
|
schedules = []
|
|
24
24
|
for name, config in data.items():
|
|
25
|
-
# Skip sections that might be metadata (though TOML structure usually implies all top-level keys are jobs)
|
|
26
25
|
if not isinstance(config, dict):
|
|
27
26
|
continue
|
|
28
27
|
|
|
28
|
+
blueprint = config.get("blueprint")
|
|
29
|
+
if not isinstance(blueprint, str):
|
|
30
|
+
raise ValueError(f"Schedule '{name}' is missing a 'blueprint' name.")
|
|
31
|
+
|
|
29
32
|
schedules.append(
|
|
30
33
|
ScheduledJobConfig(
|
|
31
34
|
name=name,
|
|
32
|
-
blueprint=
|
|
35
|
+
blueprint=blueprint,
|
|
33
36
|
input_data=config.get("input_data", {}),
|
|
34
37
|
interval_seconds=config.get("interval_seconds"),
|
|
35
38
|
daily_at=config.get("daily_at"),
|
|
@@ -10,9 +10,11 @@ from rxon.validators import validate_identifier
|
|
|
10
10
|
from ..app_keys import S3_SERVICE_KEY
|
|
11
11
|
from ..config import Config
|
|
12
12
|
from ..constants import (
|
|
13
|
+
ERROR_CODE_DEPENDENCY,
|
|
13
14
|
ERROR_CODE_INTEGRITY_MISMATCH,
|
|
14
15
|
ERROR_CODE_INVALID_INPUT,
|
|
15
16
|
ERROR_CODE_PERMANENT,
|
|
17
|
+
ERROR_CODE_SECURITY,
|
|
16
18
|
ERROR_CODE_TRANSIENT,
|
|
17
19
|
JOB_STATUS_CANCELLED,
|
|
18
20
|
JOB_STATUS_FAILED,
|
|
@@ -214,9 +216,9 @@ class WorkerService:
|
|
|
214
216
|
job_id = job_state["id"]
|
|
215
217
|
logger.warning(f"Task {task_id} for job {job_id} failed with error type '{error_type}'.")
|
|
216
218
|
|
|
217
|
-
if error_type
|
|
219
|
+
if error_type in (ERROR_CODE_PERMANENT, ERROR_CODE_SECURITY, ERROR_CODE_DEPENDENCY):
|
|
218
220
|
job_state["status"] = JOB_STATUS_QUARANTINED
|
|
219
|
-
job_state["error_message"] = f"Task failed with permanent error: {error_message}"
|
|
221
|
+
job_state["error_message"] = f"Task failed with permanent error ({error_type}): {error_message}"
|
|
220
222
|
await self.storage.save_job_state(job_id, job_state)
|
|
221
223
|
await self.storage.quarantine_job(job_id)
|
|
222
224
|
elif error_type == ERROR_CODE_INVALID_INPUT:
|
|
@@ -230,7 +232,6 @@ class WorkerService:
|
|
|
230
232
|
logger.critical(f"Data integrity mismatch detected for job {job_id}: {error_message}")
|
|
231
233
|
else:
|
|
232
234
|
await self.engine.handle_task_failure(job_state, task_id, error_message)
|
|
233
|
-
|
|
234
235
|
return "result_accepted_failure"
|
|
235
236
|
|
|
236
237
|
async def issue_access_token(self, worker_id: str) -> TokenResponse:
|
avtomatika/storage/memory.py
CHANGED
|
@@ -12,12 +12,12 @@ class MemoryStorage(StorageBackend):
|
|
|
12
12
|
Not persistent.
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
def __init__(self):
|
|
15
|
+
def __init__(self) -> None:
|
|
16
16
|
self._jobs: dict[str, dict[str, Any]] = {}
|
|
17
17
|
self._workers: dict[str, dict[str, Any]] = {}
|
|
18
18
|
self._worker_ttls: dict[str, float] = {}
|
|
19
|
-
self._worker_task_queues: dict[str, PriorityQueue] = {}
|
|
20
|
-
self._job_queue = Queue()
|
|
19
|
+
self._worker_task_queues: dict[str, PriorityQueue[Any]] = {}
|
|
20
|
+
self._job_queue: Queue[str] = Queue()
|
|
21
21
|
self._quarantine_queue: list[str] = []
|
|
22
22
|
self._watched_jobs: dict[str, float] = {}
|
|
23
23
|
self._client_configs: dict[str, dict[str, Any]] = {}
|
avtomatika/storage/redis.py
CHANGED
|
@@ -95,7 +95,7 @@ class RedisStorage(StorageBackend):
|
|
|
95
95
|
self,
|
|
96
96
|
job_id: str,
|
|
97
97
|
update_data: dict[str, Any],
|
|
98
|
-
) -> dict[
|
|
98
|
+
) -> dict[str, Any]:
|
|
99
99
|
"""Atomically update the job state in Redis using a transaction."""
|
|
100
100
|
key = self._get_key(job_id)
|
|
101
101
|
|
|
@@ -104,7 +104,7 @@ class RedisStorage(StorageBackend):
|
|
|
104
104
|
try:
|
|
105
105
|
await pipe.watch(key)
|
|
106
106
|
current_state_raw = await pipe.get(key)
|
|
107
|
-
current_state = self._unpack(current_state_raw) if current_state_raw else {}
|
|
107
|
+
current_state: dict[str, Any] = self._unpack(current_state_raw) if current_state_raw else {}
|
|
108
108
|
current_state.update(update_data)
|
|
109
109
|
|
|
110
110
|
pipe.multi()
|
|
@@ -147,7 +147,7 @@ class RedisStorage(StorageBackend):
|
|
|
147
147
|
key = f"orchestrator:worker:info:{worker_id}"
|
|
148
148
|
tasks_key = f"orchestrator:worker:tasks:{worker_id}"
|
|
149
149
|
|
|
150
|
-
tasks = await self._redis.smembers(tasks_key) # type: ignore
|
|
150
|
+
tasks = await self._redis.smembers(tasks_key) # type: ignore[var-annotated]
|
|
151
151
|
|
|
152
152
|
async with self._redis.pipeline(transaction=True) as pipe:
|
|
153
153
|
pipe.delete(key)
|
|
@@ -156,7 +156,7 @@ class RedisStorage(StorageBackend):
|
|
|
156
156
|
pipe.srem("orchestrator:index:workers:idle", worker_id)
|
|
157
157
|
|
|
158
158
|
for task in tasks:
|
|
159
|
-
task_str = task.decode("utf-8") if isinstance(task, bytes) else task
|
|
159
|
+
task_str = task.decode("utf-8") if isinstance(task, bytes) else str(task)
|
|
160
160
|
pipe.srem(f"orchestrator:index:workers:task:{task_str}", worker_id)
|
|
161
161
|
|
|
162
162
|
await pipe.execute()
|
|
@@ -204,8 +204,8 @@ class RedisStorage(StorageBackend):
|
|
|
204
204
|
"""Finds idle workers that support the given task using set intersection."""
|
|
205
205
|
task_index = f"orchestrator:index:workers:task:{task_type}"
|
|
206
206
|
idle_index = "orchestrator:index:workers:idle"
|
|
207
|
-
worker_ids = await self._redis.sinter(task_index, idle_index) # type: ignore
|
|
208
|
-
return [wid.decode("utf-8") if isinstance(wid, bytes) else wid for wid in worker_ids]
|
|
207
|
+
worker_ids = await self._redis.sinter(task_index, idle_index) # type: ignore[var-annotated]
|
|
208
|
+
return [wid.decode("utf-8") if isinstance(wid, bytes) else str(wid) for wid in worker_ids]
|
|
209
209
|
|
|
210
210
|
async def enqueue_task_for_worker(self, worker_id: str, task_payload: dict[str, Any], priority: float) -> None:
|
|
211
211
|
key = f"orchestrator:task_queue:{worker_id}"
|
|
@@ -274,13 +274,14 @@ class RedisStorage(StorageBackend):
|
|
|
274
274
|
existence = await pipe.execute()
|
|
275
275
|
dead_ids = [worker_ids[i] for i, exists in enumerate(existence) if not exists]
|
|
276
276
|
for wid in dead_ids:
|
|
277
|
-
tasks = await self._redis.smembers(f"orchestrator:worker:tasks:{wid}") # type: ignore
|
|
277
|
+
tasks = await self._redis.smembers(f"orchestrator:worker:tasks:{wid}") # type: ignore[var-annotated]
|
|
278
278
|
async with self._redis.pipeline(transaction=True) as p:
|
|
279
279
|
p.delete(f"orchestrator:worker:tasks:{wid}")
|
|
280
280
|
p.srem("orchestrator:index:workers:all", wid)
|
|
281
281
|
p.srem("orchestrator:index:workers:idle", wid)
|
|
282
282
|
for t in tasks:
|
|
283
|
-
|
|
283
|
+
t_str = t.decode() if isinstance(t, bytes) else str(t)
|
|
284
|
+
p.srem(f"orchestrator:index:workers:task:{t_str}", wid)
|
|
284
285
|
await p.execute()
|
|
285
286
|
|
|
286
287
|
async def add_job_to_watch(self, job_id: str, timeout_at: float) -> None:
|
avtomatika/telemetry.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from logging import getLogger
|
|
2
2
|
from os import getenv
|
|
3
|
+
from typing import Any
|
|
3
4
|
|
|
4
5
|
logger = getLogger(__name__)
|
|
5
6
|
|
|
@@ -17,28 +18,28 @@ except ImportError:
|
|
|
17
18
|
TELEMETRY_ENABLED = False
|
|
18
19
|
|
|
19
20
|
class DummySpan:
|
|
20
|
-
def __enter__(self):
|
|
21
|
+
def __enter__(self) -> "DummySpan":
|
|
21
22
|
return self
|
|
22
23
|
|
|
23
|
-
def __exit__(self, *args):
|
|
24
|
+
def __exit__(self, *args: Any) -> None:
|
|
24
25
|
pass
|
|
25
26
|
|
|
26
|
-
def set_attribute(self, key, value):
|
|
27
|
+
def set_attribute(self, key: str, value: Any) -> None:
|
|
27
28
|
pass
|
|
28
29
|
|
|
29
30
|
class DummyTracer:
|
|
30
31
|
@staticmethod
|
|
31
|
-
def start_as_current_span(name, context=None):
|
|
32
|
+
def start_as_current_span(name: str, context: Any = None) -> DummySpan:
|
|
32
33
|
return DummySpan()
|
|
33
34
|
|
|
34
35
|
class NoOpTrace:
|
|
35
|
-
def get_tracer(self, name):
|
|
36
|
+
def get_tracer(self, name: str) -> DummyTracer:
|
|
36
37
|
return DummyTracer()
|
|
37
38
|
|
|
38
|
-
trace = NoOpTrace()
|
|
39
|
+
trace: Any = NoOpTrace() # type: ignore[no-redef]
|
|
39
40
|
|
|
40
41
|
|
|
41
|
-
def setup_telemetry(service_name: str = "avtomatika"):
|
|
42
|
+
def setup_telemetry(service_name: str = "avtomatika") -> Any:
|
|
42
43
|
"""Configures OpenTelemetry for the application if installed."""
|
|
43
44
|
if not TELEMETRY_ENABLED:
|
|
44
45
|
logger.info("opentelemetry-sdk not found. Telemetry is disabled.")
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from asyncio import CancelledError, Queue, QueueFull, create_task, sleep
|
|
1
|
+
from asyncio import CancelledError, Queue, QueueFull, Task, create_task, sleep
|
|
2
2
|
from contextlib import suppress
|
|
3
3
|
from dataclasses import asdict, dataclass
|
|
4
4
|
from logging import getLogger
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any, Optional
|
|
6
6
|
|
|
7
7
|
from aiohttp import ClientSession, ClientTimeout
|
|
8
8
|
|
|
@@ -24,7 +24,7 @@ class WebhookSender:
|
|
|
24
24
|
self.timeout = ClientTimeout(total=10)
|
|
25
25
|
self.max_retries = 3
|
|
26
26
|
self._queue: Queue[tuple[str, WebhookPayload]] = Queue(maxsize=1000)
|
|
27
|
-
self._worker_task = None
|
|
27
|
+
self._worker_task: Optional[Task[None]] = None
|
|
28
28
|
|
|
29
29
|
def start(self) -> None:
|
|
30
30
|
if not self._worker_task:
|
avtomatika/ws_manager.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Any
|
|
|
5
5
|
from aiohttp import web
|
|
6
6
|
|
|
7
7
|
from .constants import MSG_TYPE_PROGRESS
|
|
8
|
+
from .storage.base import StorageBackend
|
|
8
9
|
|
|
9
10
|
logger = getLogger(__name__)
|
|
10
11
|
|
|
@@ -12,9 +13,10 @@ logger = getLogger(__name__)
|
|
|
12
13
|
class WebSocketManager:
|
|
13
14
|
"""Manages active WebSocket connections from workers."""
|
|
14
15
|
|
|
15
|
-
def __init__(self) -> None:
|
|
16
|
+
def __init__(self, storage: StorageBackend) -> None:
|
|
16
17
|
self._connections: dict[str, web.WebSocketResponse] = {}
|
|
17
18
|
self._lock = Lock()
|
|
19
|
+
self.storage = storage
|
|
18
20
|
|
|
19
21
|
async def register(self, worker_id: str, ws: web.WebSocketResponse) -> None:
|
|
20
22
|
"""Registers a new WebSocket connection for a worker."""
|
|
@@ -48,15 +50,21 @@ class WebSocketManager:
|
|
|
48
50
|
logger.warning(f"Cannot send command: No active WebSocket connection for worker {worker_id}.")
|
|
49
51
|
return False
|
|
50
52
|
|
|
51
|
-
|
|
52
|
-
async def handle_message(worker_id: str, message: dict[str, Any]) -> None:
|
|
53
|
+
async def handle_message(self, worker_id: str, message: dict[str, Any]) -> None:
|
|
53
54
|
"""Handles an incoming message from a worker."""
|
|
54
55
|
event_type = message.get("event")
|
|
55
56
|
if event_type == MSG_TYPE_PROGRESS:
|
|
57
|
+
job_id = message.get("job_id")
|
|
58
|
+
progress = message.get("progress", 0)
|
|
59
|
+
msg_text = message.get("message", "")
|
|
56
60
|
logger.info(
|
|
57
|
-
f"Received progress update from worker {worker_id} for job {
|
|
58
|
-
f"{message.get('progress', 0) * 100:.0f}% - {message.get('message', '')}"
|
|
61
|
+
f"Received progress update from worker {worker_id} for job {job_id}: {progress * 100:.0f}% - {msg_text}"
|
|
59
62
|
)
|
|
63
|
+
if job_id:
|
|
64
|
+
try:
|
|
65
|
+
await self.storage.update_job_state(job_id, {"progress": progress, "progress_message": msg_text})
|
|
66
|
+
except Exception as e:
|
|
67
|
+
logger.error(f"Failed to update progress for job {job_id}: {e}")
|
|
60
68
|
else:
|
|
61
69
|
logger.debug(f"Received unhandled event from worker {worker_id}: {event_type}")
|
|
62
70
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b10
|
|
4
4
|
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
5
|
Author-email: Dmitrii Gagarin <madgagarin@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
@@ -15,7 +15,7 @@ Classifier: Typing :: Typed
|
|
|
15
15
|
Requires-Python: >=3.11
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: rxon
|
|
18
|
+
Requires-Dist: rxon==1.0b2
|
|
19
19
|
Requires-Dist: aiohttp~=3.12
|
|
20
20
|
Requires-Dist: python-json-logger~=4.0
|
|
21
21
|
Requires-Dist: graphviz~=0.21
|
|
@@ -494,10 +494,16 @@ For detailed specifications and examples, please refer to the [**Configuration G
|
|
|
494
494
|
|
|
495
495
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
496
496
|
|
|
497
|
-
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure
|
|
498
|
-
* **
|
|
497
|
+
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure). The orchestrator will automatically retry the task several times.
|
|
498
|
+
* **RESOURCE_EXHAUSTED_ERROR / TIMEOUT_ERROR / INTERNAL_ERROR**: Treated as transient errors and retried.
|
|
499
|
+
* **PERMANENT_ERROR**: A permanent error. The task will be immediately sent to quarantine.
|
|
500
|
+
* **SECURITY_ERROR / DEPENDENCY_ERROR**: Treated as permanent errors (e.g., security violation or missing model). Immediate quarantine.
|
|
499
501
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
500
502
|
|
|
503
|
+
### Progress Tracking
|
|
504
|
+
|
|
505
|
+
Workers can report real-time execution progress (0-100%) and status messages. This information is automatically persisted by the Orchestrator and exposed via the Job Status API (`GET /api/v1/jobs/{job_id}`).
|
|
506
|
+
|
|
501
507
|
### Concurrency & Performance
|
|
502
508
|
|
|
503
509
|
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
avtomatika/__init__.py,sha256=D5r3L-H06uxsY_wgfh7u9YR29QvZMer1BlvzjW9Umfo,701
|
|
2
|
-
avtomatika/api.html,sha256=
|
|
2
|
+
avtomatika/api.html,sha256=6Sj0vwAUZsbLKwlB58ONAttCB52e8h3fidspLOwMMGE,32894
|
|
3
3
|
avtomatika/app_keys.py,sha256=Zd2TaGPduzyEFJgdPvgSH1skdBx2mX-Prj1ma9fAXRo,1275
|
|
4
|
-
avtomatika/blueprint.py,sha256=
|
|
4
|
+
avtomatika/blueprint.py,sha256=ZRMis9LOtBwZ9MMHqMF7WAgfMxE0M6-xP5s1IwLgpow,11875
|
|
5
5
|
avtomatika/client_config_loader.py,sha256=zVVHZlxSqZUaNpZ4zoU0T1CFYXdxy-3vKSmPcaFuHSY,2772
|
|
6
6
|
avtomatika/compression.py,sha256=bhA1kw4YrCR3I3kdquZSY0fAzCrRrjtz55uepzLUDKI,2498
|
|
7
7
|
avtomatika/config.py,sha256=27ov8BNbiUpkZ1sjtx3pifRavwcxJ_zUgIdkL_pgqv8,3595
|
|
8
|
-
avtomatika/constants.py,sha256=
|
|
8
|
+
avtomatika/constants.py,sha256=j9fkZ1NWLTwl5IcmO9VoMf1N2Okqk0wYfyDybfRi-Fc,2081
|
|
9
9
|
avtomatika/context.py,sha256=T6Ux4Fb1DwWRGTpMNeukM51MQDQbGk2HS6Cwpc0dc1s,4248
|
|
10
10
|
avtomatika/data_types.py,sha256=D_IUzMW8zMz-_MaqVp9MG53rG37Cb3McyRZuIXxvdlE,1108
|
|
11
11
|
avtomatika/datastore.py,sha256=gJjhZ5kxjF8pmbbPQb_qu3HPUpfy2c6T75KZ-smb_zg,545
|
|
12
|
-
avtomatika/dispatcher.py,sha256=
|
|
13
|
-
avtomatika/engine.py,sha256=
|
|
12
|
+
avtomatika/dispatcher.py,sha256=5J5GBWFfaGCGXUkM-2fhMeg2n2nTO0BH3ffkzsnSsaE,8784
|
|
13
|
+
avtomatika/engine.py,sha256=Hb6MLanMjx1GDAfkbNJU-K4RXMuPZQP7_HA_0VR8WMw,20916
|
|
14
14
|
avtomatika/executor.py,sha256=X5AU7hWflH8rSYKxl_wh2RhdYhpyktynmK8mcfJgT-8,24218
|
|
15
15
|
avtomatika/health_checker.py,sha256=jXYSH4BPeZ4LCxSZV4uXM4BZhGJYgpoAOWQXE8yojLo,2078
|
|
16
|
-
avtomatika/logging_config.py,sha256=
|
|
16
|
+
avtomatika/logging_config.py,sha256=cVY8aOeaWncsvkN015WgC74NTF6r55-OA3E1ux8P824,3347
|
|
17
17
|
avtomatika/metrics.py,sha256=tiksK1fFSOMlz8zFu6GT19JTduvxMTNlLu0QFrTHoQI,1866
|
|
18
18
|
avtomatika/py.typed,sha256=CT_L7gw2MLcQY-X0vs-xB5Vr0wzvGo7GuQYPI_qwJE8,65
|
|
19
19
|
avtomatika/quota.py,sha256=DNcaL6k0J1REeP8sVqbY9FprY_3BSr2SxM2Vf4mEqdw,1612
|
|
@@ -21,28 +21,28 @@ avtomatika/ratelimit.py,sha256=hFGW5oN9G6_W_jnHmopXW8bRjjzlvanY19MLghsNLE8,1306
|
|
|
21
21
|
avtomatika/reputation.py,sha256=pK-x9FrPN2Oc2gtPa1AZJHlhvkd7xlRe4orxM2auJJc,3979
|
|
22
22
|
avtomatika/s3.py,sha256=I0fDw5I44RJAqSv4tREvwHp2cxB0mGY_l2cVZWpe3As,14110
|
|
23
23
|
avtomatika/scheduler.py,sha256=F5Kv5Rx34nDd0mE5jxjwpjRg8duDZBEr91N5Y6CNR24,4231
|
|
24
|
-
avtomatika/scheduler_config_loader.py,sha256=
|
|
24
|
+
avtomatika/scheduler_config_loader.py,sha256=38x-4G4yRrhSrLdmZ4aTb7WggE-BcGblKZO7x97nW6Y,1352
|
|
25
25
|
avtomatika/security.py,sha256=eENEUc0OsHm6wN2H-ckGmiaV9qrZSbYsHFCWyYb3aLs,3271
|
|
26
|
-
avtomatika/telemetry.py,sha256=
|
|
26
|
+
avtomatika/telemetry.py,sha256=17QVxb2vqx3vCkhvzL0JFYc6zvTFndUyZ5balj5wXuA,2504
|
|
27
27
|
avtomatika/watcher.py,sha256=IKBqJ_r52ya0wiH8Gb0qFRMC8DFsusdRzPHjruWvFh4,3558
|
|
28
28
|
avtomatika/worker_config_loader.py,sha256=n0j8gfuJDacWONr8744RsHTCWpc_1ZTRMC-rJZh6P6A,2249
|
|
29
|
-
avtomatika/ws_manager.py,sha256=
|
|
30
|
-
avtomatika/api/handlers.py,sha256=
|
|
29
|
+
avtomatika/ws_manager.py,sha256=0-vo7_IaDJbD58omTSrHm4SZHjePIlVLxEpiVTgNvbQ,3491
|
|
30
|
+
avtomatika/api/handlers.py,sha256=D8oEWsRG7YqJCwXVQaSbLHA35AKm-HahFKM_FDqfqCE,11701
|
|
31
31
|
avtomatika/api/routes.py,sha256=MrtcRNjybxODmKhab0FCzgZGPRcfznwpFtDCdgh8RT4,3937
|
|
32
32
|
avtomatika/history/base.py,sha256=RsCvCkHK1teHjXSk9ZHVEtpQlIjz8kWsfKYHVnapf6c,3848
|
|
33
33
|
avtomatika/history/noop.py,sha256=hLzt0RblsrKUtoyQNauOni6jCi-IYCWEPsiR0vh7tho,1226
|
|
34
34
|
avtomatika/history/postgres.py,sha256=T0XpDurnh48pPI-2JhB285GdNIexNkCSu8ExhLJzcxc,9538
|
|
35
35
|
avtomatika/history/sqlite.py,sha256=txWax9RVzBQzIZuU-SjHnEXEzBmGzIjqzoVsK2oyiAQ,9252
|
|
36
36
|
avtomatika/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
-
avtomatika/services/worker_service.py,sha256=
|
|
37
|
+
avtomatika/services/worker_service.py,sha256=cPik-DUYPka0lOT38fvmVpEnmt1_n2p44z9emyikdLg,11518
|
|
38
38
|
avtomatika/storage/__init__.py,sha256=mGRj_40dWZ7R7uYbqC6gCsUWCKHAbZz4ZVIhYg5dT_E,262
|
|
39
39
|
avtomatika/storage/base.py,sha256=Tb_4fF0Vr10cgoXepA-1YUSgi27qYKQ7Qz1Y87XiRII,13375
|
|
40
|
-
avtomatika/storage/memory.py,sha256=
|
|
41
|
-
avtomatika/storage/redis.py,sha256=
|
|
40
|
+
avtomatika/storage/memory.py,sha256=23eNAcEleM6Yqi_kSn-dLEBJRMzrhlgRHVgxrVhZPrk,14560
|
|
41
|
+
avtomatika/storage/redis.py,sha256=MPSQRuAzWNtKQZco_5ExvCpaWbs5_80or5QVrU1GcIM,20235
|
|
42
42
|
avtomatika/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
-
avtomatika/utils/webhook_sender.py,sha256=
|
|
44
|
-
avtomatika-1.
|
|
45
|
-
avtomatika-1.
|
|
46
|
-
avtomatika-1.
|
|
47
|
-
avtomatika-1.
|
|
48
|
-
avtomatika-1.
|
|
43
|
+
avtomatika/utils/webhook_sender.py,sha256=LoJ6z_1p-OngjPYl9Pk1N1t9xrP6-v-7xOg_AmWPuVc,3644
|
|
44
|
+
avtomatika-1.0b10.dist-info/licenses/LICENSE,sha256=tqCjw9Y1vbU-hLcWi__7wQstLbt2T1XWPdbQYqCxuWY,1072
|
|
45
|
+
avtomatika-1.0b10.dist-info/METADATA,sha256=iFXea4IsLOM9dqBBcMDPqbEZDGTMbLcQoV-7SDpk_xQ,28593
|
|
46
|
+
avtomatika-1.0b10.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
47
|
+
avtomatika-1.0b10.dist-info/top_level.txt,sha256=gLDWhA_wxHj0I6fG5X8vw9fE0HSN4hTE2dEJzeVS2x8,11
|
|
48
|
+
avtomatika-1.0b10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|