avtomatika 1.0b9__py3-none-any.whl → 1.0b11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avtomatika/api/handlers.py +2 -2
- avtomatika/api.html +1 -1
- avtomatika/blueprint.py +11 -3
- avtomatika/constants.py +76 -2
- avtomatika/dispatcher.py +3 -0
- avtomatika/engine.py +13 -4
- avtomatika/executor.py +38 -18
- avtomatika/logging_config.py +16 -7
- avtomatika/s3.py +2 -3
- avtomatika/scheduler_config_loader.py +5 -2
- avtomatika/services/worker_service.py +26 -22
- avtomatika/storage/base.py +14 -0
- avtomatika/storage/memory.py +14 -3
- avtomatika/storage/redis.py +25 -12
- avtomatika/telemetry.py +8 -7
- avtomatika/utils/webhook_sender.py +3 -3
- avtomatika/ws_manager.py +13 -5
- {avtomatika-1.0b9.dist-info → avtomatika-1.0b11.dist-info}/METADATA +10 -5
- {avtomatika-1.0b9.dist-info → avtomatika-1.0b11.dist-info}/RECORD +22 -22
- {avtomatika-1.0b9.dist-info → avtomatika-1.0b11.dist-info}/WHEEL +0 -0
- {avtomatika-1.0b9.dist-info → avtomatika-1.0b11.dist-info}/licenses/LICENSE +0 -0
- {avtomatika-1.0b9.dist-info → avtomatika-1.0b11.dist-info}/top_level.txt +0 -0
avtomatika/api/handlers.py
CHANGED
|
@@ -25,11 +25,11 @@ from ..worker_config_loader import load_worker_configs_to_redis
|
|
|
25
25
|
logger = getLogger(__name__)
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def json_dumps(obj) -> str:
|
|
28
|
+
def json_dumps(obj: Any) -> str:
|
|
29
29
|
return dumps(obj).decode("utf-8")
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def json_response(data, **kwargs) -> web.Response:
|
|
32
|
+
def json_response(data: Any, **kwargs: Any) -> web.Response:
|
|
33
33
|
return web.json_response(data, dumps=json_dumps, **kwargs)
|
|
34
34
|
|
|
35
35
|
|
avtomatika/api.html
CHANGED
|
@@ -211,7 +211,7 @@
|
|
|
211
211
|
],
|
|
212
212
|
request: { body: null },
|
|
213
213
|
responses: [
|
|
214
|
-
{ code: '200 OK', description: 'Successful response.', body: { "id": "...", "status": "..." } }
|
|
214
|
+
{ code: '200 OK', description: 'Successful response.', body: { "id": "...", "status": "running", "progress": 0.75, "progress_message": "Processing..." } }
|
|
215
215
|
]
|
|
216
216
|
},
|
|
217
217
|
{
|
avtomatika/blueprint.py
CHANGED
|
@@ -62,7 +62,8 @@ class ConditionalHandler:
|
|
|
62
62
|
try:
|
|
63
63
|
context_area = getattr(context, self.condition.area)
|
|
64
64
|
actual_value = context_area[self.condition.field]
|
|
65
|
-
|
|
65
|
+
result = self.condition.op(actual_value, self.condition.value)
|
|
66
|
+
return bool(result)
|
|
66
67
|
except (AttributeError, KeyError):
|
|
67
68
|
return False
|
|
68
69
|
|
|
@@ -130,7 +131,14 @@ class StateMachineBlueprint:
|
|
|
130
131
|
self.name = name
|
|
131
132
|
self.api_endpoint = api_endpoint
|
|
132
133
|
self.api_version = api_version
|
|
133
|
-
self.data_stores: dict[str, AsyncDictStore] =
|
|
134
|
+
self.data_stores: dict[str, AsyncDictStore] = {}
|
|
135
|
+
if data_stores:
|
|
136
|
+
for ds_name, ds_data in data_stores.items():
|
|
137
|
+
if isinstance(ds_data, AsyncDictStore):
|
|
138
|
+
self.data_stores[ds_name] = ds_data
|
|
139
|
+
else:
|
|
140
|
+
self.data_stores[ds_name] = AsyncDictStore(ds_data)
|
|
141
|
+
|
|
134
142
|
self.handlers: dict[str, Callable] = {}
|
|
135
143
|
self.aggregator_handlers: dict[str, Callable] = {}
|
|
136
144
|
self.conditional_handlers: list[ConditionalHandler] = []
|
|
@@ -279,7 +287,7 @@ class StateMachineBlueprint:
|
|
|
279
287
|
f"No suitable handler found for state '{state}' in blueprint '{self.name}' for the given context.",
|
|
280
288
|
)
|
|
281
289
|
|
|
282
|
-
def render_graph(self, output_filename: str | None = None, output_format: str = "png"):
|
|
290
|
+
def render_graph(self, output_filename: str | None = None, output_format: str = "png") -> str | None:
|
|
283
291
|
from graphviz import Digraph # type: ignore[import]
|
|
284
292
|
|
|
285
293
|
dot = Digraph(comment=f"State Machine for {self.name}")
|
avtomatika/constants.py
CHANGED
|
@@ -1,6 +1,80 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Centralized constants for the Avtomatika protocol.
|
|
3
|
-
(Legacy wrapper, pointing to
|
|
3
|
+
(Legacy wrapper, pointing to rxon.constants)
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
from rxon.constants import
|
|
6
|
+
from rxon.constants import (
|
|
7
|
+
AUTH_HEADER_CLIENT,
|
|
8
|
+
AUTH_HEADER_WORKER,
|
|
9
|
+
COMMAND_CANCEL_TASK,
|
|
10
|
+
ENDPOINT_TASK_NEXT,
|
|
11
|
+
ENDPOINT_TASK_RESULT,
|
|
12
|
+
ENDPOINT_WORKER_HEARTBEAT,
|
|
13
|
+
ENDPOINT_WORKER_REGISTER,
|
|
14
|
+
ERROR_CODE_DEPENDENCY,
|
|
15
|
+
ERROR_CODE_INTEGRITY_MISMATCH,
|
|
16
|
+
ERROR_CODE_INTERNAL,
|
|
17
|
+
ERROR_CODE_INVALID_INPUT,
|
|
18
|
+
ERROR_CODE_PERMANENT,
|
|
19
|
+
ERROR_CODE_RESOURCE_EXHAUSTED,
|
|
20
|
+
ERROR_CODE_SECURITY,
|
|
21
|
+
ERROR_CODE_TIMEOUT,
|
|
22
|
+
ERROR_CODE_TRANSIENT,
|
|
23
|
+
JOB_STATUS_CANCELLED,
|
|
24
|
+
JOB_STATUS_ERROR,
|
|
25
|
+
JOB_STATUS_FAILED,
|
|
26
|
+
JOB_STATUS_FINISHED,
|
|
27
|
+
JOB_STATUS_PENDING,
|
|
28
|
+
JOB_STATUS_QUARANTINED,
|
|
29
|
+
JOB_STATUS_RUNNING,
|
|
30
|
+
JOB_STATUS_WAITING_FOR_HUMAN,
|
|
31
|
+
JOB_STATUS_WAITING_FOR_PARALLEL,
|
|
32
|
+
JOB_STATUS_WAITING_FOR_WORKER,
|
|
33
|
+
MSG_TYPE_PROGRESS,
|
|
34
|
+
PROTOCOL_VERSION,
|
|
35
|
+
PROTOCOL_VERSION_HEADER,
|
|
36
|
+
STS_TOKEN_ENDPOINT,
|
|
37
|
+
TASK_STATUS_CANCELLED,
|
|
38
|
+
TASK_STATUS_FAILURE,
|
|
39
|
+
TASK_STATUS_SUCCESS,
|
|
40
|
+
WORKER_API_PREFIX,
|
|
41
|
+
WS_ENDPOINT,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"AUTH_HEADER_CLIENT",
|
|
46
|
+
"AUTH_HEADER_WORKER",
|
|
47
|
+
"COMMAND_CANCEL_TASK",
|
|
48
|
+
"ENDPOINT_TASK_NEXT",
|
|
49
|
+
"ENDPOINT_TASK_RESULT",
|
|
50
|
+
"ENDPOINT_WORKER_HEARTBEAT",
|
|
51
|
+
"ENDPOINT_WORKER_REGISTER",
|
|
52
|
+
"ERROR_CODE_DEPENDENCY",
|
|
53
|
+
"ERROR_CODE_INTEGRITY_MISMATCH",
|
|
54
|
+
"ERROR_CODE_INTERNAL",
|
|
55
|
+
"ERROR_CODE_INVALID_INPUT",
|
|
56
|
+
"ERROR_CODE_PERMANENT",
|
|
57
|
+
"ERROR_CODE_RESOURCE_EXHAUSTED",
|
|
58
|
+
"ERROR_CODE_SECURITY",
|
|
59
|
+
"ERROR_CODE_TIMEOUT",
|
|
60
|
+
"ERROR_CODE_TRANSIENT",
|
|
61
|
+
"JOB_STATUS_CANCELLED",
|
|
62
|
+
"JOB_STATUS_ERROR",
|
|
63
|
+
"JOB_STATUS_FAILED",
|
|
64
|
+
"JOB_STATUS_FINISHED",
|
|
65
|
+
"JOB_STATUS_PENDING",
|
|
66
|
+
"JOB_STATUS_QUARANTINED",
|
|
67
|
+
"JOB_STATUS_RUNNING",
|
|
68
|
+
"JOB_STATUS_WAITING_FOR_HUMAN",
|
|
69
|
+
"JOB_STATUS_WAITING_FOR_PARALLEL",
|
|
70
|
+
"JOB_STATUS_WAITING_FOR_WORKER",
|
|
71
|
+
"MSG_TYPE_PROGRESS",
|
|
72
|
+
"PROTOCOL_VERSION",
|
|
73
|
+
"PROTOCOL_VERSION_HEADER",
|
|
74
|
+
"STS_TOKEN_ENDPOINT",
|
|
75
|
+
"TASK_STATUS_CANCELLED",
|
|
76
|
+
"TASK_STATUS_FAILURE",
|
|
77
|
+
"TASK_STATUS_SUCCESS",
|
|
78
|
+
"WORKER_API_PREFIX",
|
|
79
|
+
"WS_ENDPOINT",
|
|
80
|
+
]
|
avtomatika/dispatcher.py
CHANGED
|
@@ -184,6 +184,9 @@ class Dispatcher:
|
|
|
184
184
|
selected_worker = self._select_default(capable_workers, task_type)
|
|
185
185
|
|
|
186
186
|
worker_id = selected_worker.get("worker_id")
|
|
187
|
+
if not worker_id:
|
|
188
|
+
raise RuntimeError(f"Selected worker for task '{task_type}' has no worker_id")
|
|
189
|
+
|
|
187
190
|
logger.info(
|
|
188
191
|
f"Dispatching task '{task_type}' to worker {worker_id} (strategy: {dispatch_strategy})",
|
|
189
192
|
)
|
avtomatika/engine.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from asyncio import TimeoutError as AsyncTimeoutError
|
|
2
2
|
from asyncio import create_task, gather, get_running_loop, wait_for
|
|
3
3
|
from logging import getLogger
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
7
|
from aiohttp import ClientSession, web
|
|
@@ -58,7 +58,7 @@ def json_dumps(obj: Any) -> str:
|
|
|
58
58
|
return dumps(obj).decode("utf-8")
|
|
59
59
|
|
|
60
60
|
|
|
61
|
-
def json_response(data, **kwargs: Any) -> web.Response:
|
|
61
|
+
def json_response(data: Any, **kwargs: Any) -> web.Response:
|
|
62
62
|
return web.json_response(data, dumps=json_dumps, **kwargs)
|
|
63
63
|
|
|
64
64
|
|
|
@@ -70,11 +70,15 @@ class OrchestratorEngine:
|
|
|
70
70
|
self.config = config
|
|
71
71
|
self.blueprints: dict[str, StateMachineBlueprint] = {}
|
|
72
72
|
self.history_storage: HistoryStorageBase = NoOpHistoryStorage()
|
|
73
|
-
self.ws_manager = WebSocketManager()
|
|
73
|
+
self.ws_manager = WebSocketManager(self.storage)
|
|
74
74
|
self.app = web.Application(middlewares=[compression_middleware])
|
|
75
75
|
self.app[ENGINE_KEY] = self
|
|
76
|
-
self.worker_service = None
|
|
76
|
+
self.worker_service: Optional[WorkerService] = None
|
|
77
77
|
self._setup_done = False
|
|
78
|
+
self.webhook_sender: WebhookSender
|
|
79
|
+
self.dispatcher: Dispatcher
|
|
80
|
+
self.runner: web.AppRunner
|
|
81
|
+
self.site: web.TCPSite
|
|
78
82
|
|
|
79
83
|
from rxon import HttpListener
|
|
80
84
|
|
|
@@ -176,6 +180,9 @@ class OrchestratorEngine:
|
|
|
176
180
|
except ValueError as e:
|
|
177
181
|
raise web.HTTPBadRequest(text=str(e)) from e
|
|
178
182
|
|
|
183
|
+
if self.worker_service is None:
|
|
184
|
+
raise web.HTTPInternalServerError(text="WorkerService is not initialized.")
|
|
185
|
+
|
|
179
186
|
if message_type == "register":
|
|
180
187
|
return await self.worker_service.register_worker(payload)
|
|
181
188
|
|
|
@@ -352,6 +359,7 @@ class OrchestratorEngine:
|
|
|
352
359
|
initial_data: dict[str, Any],
|
|
353
360
|
source: str = "internal",
|
|
354
361
|
tracing_context: dict[str, str] | None = None,
|
|
362
|
+
data_metadata: dict[str, Any] | None = None,
|
|
355
363
|
) -> str:
|
|
356
364
|
"""Creates a job directly, bypassing the HTTP API layer.
|
|
357
365
|
Useful for internal schedulers and triggers.
|
|
@@ -377,6 +385,7 @@ class OrchestratorEngine:
|
|
|
377
385
|
"status": JOB_STATUS_PENDING,
|
|
378
386
|
"tracing_context": tracing_context or {},
|
|
379
387
|
"client_config": client_config,
|
|
388
|
+
"data_metadata": data_metadata or {},
|
|
380
389
|
}
|
|
381
390
|
await self.storage.save_job_state(job_id, job_state)
|
|
382
391
|
await self.storage.enqueue_job(job_id)
|
avtomatika/executor.py
CHANGED
|
@@ -238,6 +238,9 @@ class JobExecutor:
|
|
|
238
238
|
action_factory.sub_blueprint_to_run,
|
|
239
239
|
duration_ms,
|
|
240
240
|
)
|
|
241
|
+
elif job_state["current_state"] in blueprint.end_states:
|
|
242
|
+
status = JOB_STATUS_FINISHED if job_state["current_state"] == "finished" else JOB_STATUS_FAILED
|
|
243
|
+
await self._handle_terminal_reached(job_state, status, duration_ms)
|
|
241
244
|
|
|
242
245
|
except Exception as e:
|
|
243
246
|
# This catches errors within the handler's execution.
|
|
@@ -248,6 +251,40 @@ class JobExecutor:
|
|
|
248
251
|
if message_id in self._processing_messages:
|
|
249
252
|
self._processing_messages.remove(message_id)
|
|
250
253
|
|
|
254
|
+
async def _handle_terminal_reached(
|
|
255
|
+
self,
|
|
256
|
+
job_state: dict[str, Any],
|
|
257
|
+
status: str,
|
|
258
|
+
duration_ms: int,
|
|
259
|
+
) -> None:
|
|
260
|
+
job_id = job_state["id"]
|
|
261
|
+
current_state = job_state["current_state"]
|
|
262
|
+
logger.info(f"Job {job_id} reached terminal state '{current_state}' with status '{status}'")
|
|
263
|
+
|
|
264
|
+
await self.history_storage.log_job_event(
|
|
265
|
+
{
|
|
266
|
+
"job_id": job_id,
|
|
267
|
+
"state": current_state,
|
|
268
|
+
"event_type": "job_completed",
|
|
269
|
+
"duration_ms": duration_ms,
|
|
270
|
+
"context_snapshot": job_state,
|
|
271
|
+
},
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
job_state["status"] = status
|
|
275
|
+
await self.storage.save_job_state(job_id, job_state)
|
|
276
|
+
|
|
277
|
+
# Clean up S3 files if service is available
|
|
278
|
+
s3_service = self.engine.app.get(S3_SERVICE_KEY)
|
|
279
|
+
if s3_service:
|
|
280
|
+
task_files = s3_service.get_task_files(job_id)
|
|
281
|
+
if task_files:
|
|
282
|
+
create_task(task_files.cleanup())
|
|
283
|
+
|
|
284
|
+
await self._check_and_resume_parent(job_state)
|
|
285
|
+
event_type = "job_finished" if status == JOB_STATUS_FINISHED else "job_failed"
|
|
286
|
+
await self.engine.send_job_webhook(job_state, event_type)
|
|
287
|
+
|
|
251
288
|
async def _handle_transition(
|
|
252
289
|
self,
|
|
253
290
|
job_state: dict[str, Any],
|
|
@@ -270,28 +307,11 @@ class JobExecutor:
|
|
|
270
307
|
},
|
|
271
308
|
)
|
|
272
309
|
|
|
273
|
-
# When transitioning to a new state, reset the retry counter.
|
|
274
310
|
job_state["retry_count"] = 0
|
|
275
311
|
job_state["current_state"] = next_state
|
|
276
312
|
job_state["status"] = JOB_STATUS_RUNNING
|
|
277
313
|
await self.storage.save_job_state(job_id, job_state)
|
|
278
|
-
|
|
279
|
-
if next_state not in TERMINAL_STATES:
|
|
280
|
-
await self.storage.enqueue_job(job_id)
|
|
281
|
-
else:
|
|
282
|
-
logger.info(f"Job {job_id} reached terminal state {next_state}")
|
|
283
|
-
|
|
284
|
-
# Clean up S3 files if service is available
|
|
285
|
-
s3_service = self.engine.app.get(S3_SERVICE_KEY)
|
|
286
|
-
if s3_service:
|
|
287
|
-
task_files = s3_service.get_task_files(job_id)
|
|
288
|
-
if task_files:
|
|
289
|
-
# Run cleanup in background to not block response
|
|
290
|
-
create_task(task_files.cleanup())
|
|
291
|
-
|
|
292
|
-
await self._check_and_resume_parent(job_state)
|
|
293
|
-
event_type = "job_finished" if next_state == JOB_STATUS_FINISHED else "job_failed"
|
|
294
|
-
await self.engine.send_job_webhook(job_state, event_type)
|
|
314
|
+
await self.storage.enqueue_job(job_id)
|
|
295
315
|
|
|
296
316
|
async def _handle_dispatch(
|
|
297
317
|
self,
|
avtomatika/logging_config.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from logging import DEBUG, Formatter, StreamHandler, getLogger
|
|
3
3
|
from sys import stdout
|
|
4
|
+
from typing import Any, Literal, Optional
|
|
4
5
|
from zoneinfo import ZoneInfo
|
|
5
6
|
|
|
6
7
|
from pythonjsonlogger import json
|
|
@@ -9,14 +10,22 @@ from pythonjsonlogger import json
|
|
|
9
10
|
class TimezoneFormatter(Formatter):
|
|
10
11
|
"""Formatter that respects a custom timezone."""
|
|
11
12
|
|
|
12
|
-
def __init__(
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
fmt: Optional[str] = None,
|
|
16
|
+
datefmt: Optional[str] = None,
|
|
17
|
+
style: Literal["%", "{", "$"] = "%",
|
|
18
|
+
validate: bool = True,
|
|
19
|
+
*,
|
|
20
|
+
tz_name: str = "UTC",
|
|
21
|
+
) -> None:
|
|
13
22
|
super().__init__(fmt, datefmt, style, validate)
|
|
14
23
|
self.tz = ZoneInfo(tz_name)
|
|
15
24
|
|
|
16
|
-
def converter(self, timestamp):
|
|
25
|
+
def converter(self, timestamp: float) -> datetime: # type: ignore[override]
|
|
17
26
|
return datetime.fromtimestamp(timestamp, self.tz)
|
|
18
27
|
|
|
19
|
-
def formatTime(self, record, datefmt=None):
|
|
28
|
+
def formatTime(self, record: Any, datefmt: Optional[str] = None) -> str:
|
|
20
29
|
dt = self.converter(record.created)
|
|
21
30
|
if datefmt:
|
|
22
31
|
s = dt.strftime(datefmt)
|
|
@@ -28,14 +37,14 @@ class TimezoneFormatter(Formatter):
|
|
|
28
37
|
return s
|
|
29
38
|
|
|
30
39
|
|
|
31
|
-
class TimezoneJsonFormatter(json.JsonFormatter):
|
|
40
|
+
class TimezoneJsonFormatter(json.JsonFormatter): # type: ignore[name-defined]
|
|
32
41
|
"""JSON Formatter that respects a custom timezone."""
|
|
33
42
|
|
|
34
|
-
def __init__(self, *args, tz_name="UTC", **kwargs):
|
|
43
|
+
def __init__(self, *args: Any, tz_name: str = "UTC", **kwargs: Any) -> None:
|
|
35
44
|
super().__init__(*args, **kwargs)
|
|
36
45
|
self.tz = ZoneInfo(tz_name)
|
|
37
46
|
|
|
38
|
-
def formatTime(self, record, datefmt=None):
|
|
47
|
+
def formatTime(self, record: Any, datefmt: Optional[str] = None) -> str:
|
|
39
48
|
# Override formatTime to use timezone-aware datetime
|
|
40
49
|
dt = datetime.fromtimestamp(record.created, self.tz)
|
|
41
50
|
if datefmt:
|
|
@@ -44,7 +53,7 @@ class TimezoneJsonFormatter(json.JsonFormatter):
|
|
|
44
53
|
return dt.isoformat()
|
|
45
54
|
|
|
46
55
|
|
|
47
|
-
def setup_logging(log_level: str = "INFO", log_format: str = "json", tz_name: str = "UTC"):
|
|
56
|
+
def setup_logging(log_level: str = "INFO", log_format: str = "json", tz_name: str = "UTC") -> None:
|
|
48
57
|
"""Configures structured logging for the entire application."""
|
|
49
58
|
logger = getLogger("avtomatika")
|
|
50
59
|
logger.setLevel(log_level)
|
avtomatika/s3.py
CHANGED
|
@@ -335,12 +335,11 @@ class S3Service:
|
|
|
335
335
|
try:
|
|
336
336
|
self._store = S3Store(
|
|
337
337
|
bucket=self.config.S3_DEFAULT_BUCKET,
|
|
338
|
-
|
|
339
|
-
|
|
338
|
+
aws_access_key_id=self.config.S3_ACCESS_KEY,
|
|
339
|
+
aws_secret_access_key=self.config.S3_SECRET_KEY,
|
|
340
340
|
region=self.config.S3_REGION,
|
|
341
341
|
endpoint=self.config.S3_ENDPOINT_URL,
|
|
342
342
|
allow_http="http://" in self.config.S3_ENDPOINT_URL,
|
|
343
|
-
force_path_style=True,
|
|
344
343
|
)
|
|
345
344
|
self._semaphore = Semaphore(self.config.S3_MAX_CONCURRENCY)
|
|
346
345
|
logger.info(
|
|
@@ -22,14 +22,17 @@ def load_schedules_from_file(file_path: str) -> list[ScheduledJobConfig]:
|
|
|
22
22
|
|
|
23
23
|
schedules = []
|
|
24
24
|
for name, config in data.items():
|
|
25
|
-
# Skip sections that might be metadata (though TOML structure usually implies all top-level keys are jobs)
|
|
26
25
|
if not isinstance(config, dict):
|
|
27
26
|
continue
|
|
28
27
|
|
|
28
|
+
blueprint = config.get("blueprint")
|
|
29
|
+
if not isinstance(blueprint, str):
|
|
30
|
+
raise ValueError(f"Schedule '{name}' is missing a 'blueprint' name.")
|
|
31
|
+
|
|
29
32
|
schedules.append(
|
|
30
33
|
ScheduledJobConfig(
|
|
31
34
|
name=name,
|
|
32
|
-
blueprint=
|
|
35
|
+
blueprint=blueprint,
|
|
33
36
|
input_data=config.get("input_data", {}),
|
|
34
37
|
interval_seconds=config.get("interval_seconds"),
|
|
35
38
|
daily_at=config.get("daily_at"),
|
|
@@ -10,9 +10,11 @@ from rxon.validators import validate_identifier
|
|
|
10
10
|
from ..app_keys import S3_SERVICE_KEY
|
|
11
11
|
from ..config import Config
|
|
12
12
|
from ..constants import (
|
|
13
|
+
ERROR_CODE_DEPENDENCY,
|
|
13
14
|
ERROR_CODE_INTEGRITY_MISMATCH,
|
|
14
15
|
ERROR_CODE_INVALID_INPUT,
|
|
15
16
|
ERROR_CODE_PERMANENT,
|
|
17
|
+
ERROR_CODE_SECURITY,
|
|
16
18
|
ERROR_CODE_TRANSIENT,
|
|
17
19
|
JOB_STATUS_CANCELLED,
|
|
18
20
|
JOB_STATUS_FAILED,
|
|
@@ -102,7 +104,6 @@ class WorkerService:
|
|
|
102
104
|
|
|
103
105
|
job_id = result_payload.get("job_id")
|
|
104
106
|
task_id = result_payload.get("task_id")
|
|
105
|
-
result_data = result_payload.get("result", {})
|
|
106
107
|
|
|
107
108
|
if not job_id or not task_id:
|
|
108
109
|
raise ValueError("job_id and task_id are required")
|
|
@@ -111,25 +112,33 @@ class WorkerService:
|
|
|
111
112
|
if not job_state:
|
|
112
113
|
raise LookupError("Job not found")
|
|
113
114
|
|
|
115
|
+
result_status = result_payload.get("status", TASK_STATUS_SUCCESS)
|
|
116
|
+
worker_data_content = result_payload.get("data")
|
|
117
|
+
|
|
114
118
|
if job_state.get("status") == JOB_STATUS_WAITING_FOR_PARALLEL:
|
|
115
119
|
await self.storage.remove_job_from_watch(f"{job_id}:{task_id}")
|
|
116
|
-
job_state.setdefault("aggregation_results", {})[task_id] = result_data
|
|
117
120
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
branches.
|
|
121
|
+
def _update_parallel_results(state: dict[str, Any]) -> dict[str, Any]:
|
|
122
|
+
state.setdefault("aggregation_results", {})[task_id] = result_payload
|
|
123
|
+
branches = state.setdefault("active_branches", [])
|
|
124
|
+
if task_id in branches:
|
|
125
|
+
branches.remove(task_id)
|
|
126
|
+
|
|
127
|
+
if not branches:
|
|
128
|
+
state["status"] = JOB_STATUS_RUNNING
|
|
129
|
+
state["current_state"] = state["aggregation_target"]
|
|
130
|
+
return state
|
|
121
131
|
|
|
122
|
-
|
|
132
|
+
updated_job_state = await self.storage.update_job_state_atomic(job_id, _update_parallel_results)
|
|
133
|
+
|
|
134
|
+
if not updated_job_state.get("active_branches"):
|
|
123
135
|
logger.info(f"All parallel branches for job {job_id} have completed.")
|
|
124
|
-
job_state["status"] = JOB_STATUS_RUNNING
|
|
125
|
-
job_state["current_state"] = job_state["aggregation_target"]
|
|
126
|
-
await self.storage.save_job_state(job_id, job_state)
|
|
127
136
|
await self.storage.enqueue_job(job_id)
|
|
128
137
|
else:
|
|
138
|
+
remaining = len(updated_job_state["active_branches"])
|
|
129
139
|
logger.info(
|
|
130
|
-
f"Branch {task_id} for job {job_id} completed. Waiting for {
|
|
140
|
+
f"Branch {task_id} for job {job_id} completed. Waiting for {remaining} more.",
|
|
131
141
|
)
|
|
132
|
-
await self.storage.save_job_state(job_id, job_state)
|
|
133
142
|
|
|
134
143
|
return "parallel_branch_result_accepted"
|
|
135
144
|
|
|
@@ -146,14 +155,12 @@ class WorkerService:
|
|
|
146
155
|
"event_type": "task_finished",
|
|
147
156
|
"duration_ms": duration_ms,
|
|
148
157
|
"worker_id": authenticated_worker_id,
|
|
149
|
-
"context_snapshot": {**job_state, "result":
|
|
158
|
+
"context_snapshot": {**job_state, "result": result_payload},
|
|
150
159
|
},
|
|
151
160
|
)
|
|
152
161
|
|
|
153
|
-
result_status = result_data.get("status", TASK_STATUS_SUCCESS) # Default to success? Constant?
|
|
154
|
-
|
|
155
162
|
if result_status == TASK_STATUS_FAILURE:
|
|
156
|
-
return await self._handle_task_failure(job_state, task_id,
|
|
163
|
+
return await self._handle_task_failure(job_state, task_id, result_payload)
|
|
157
164
|
|
|
158
165
|
if result_status == TASK_STATUS_CANCELLED:
|
|
159
166
|
logger.info(f"Task {task_id} for job {job_id} was cancelled by worker.")
|
|
@@ -169,13 +176,11 @@ class WorkerService:
|
|
|
169
176
|
return "result_accepted_cancelled"
|
|
170
177
|
|
|
171
178
|
transitions = job_state.get("current_task_transitions", {})
|
|
172
|
-
result_status = result_data.get("status", TASK_STATUS_SUCCESS)
|
|
173
179
|
next_state = transitions.get(result_status)
|
|
174
180
|
|
|
175
181
|
if next_state:
|
|
176
182
|
logger.info(f"Job {job_id} transitioning based on worker status '{result_status}' to state '{next_state}'")
|
|
177
183
|
|
|
178
|
-
worker_data_content = result_data.get("data")
|
|
179
184
|
if worker_data_content and isinstance(worker_data_content, dict):
|
|
180
185
|
if "state_history" not in job_state:
|
|
181
186
|
job_state["state_history"] = {}
|
|
@@ -200,8 +205,8 @@ class WorkerService:
|
|
|
200
205
|
await self.storage.save_job_state(job_id, job_state)
|
|
201
206
|
return "result_accepted_failure"
|
|
202
207
|
|
|
203
|
-
async def _handle_task_failure(self, job_state: dict, task_id: str,
|
|
204
|
-
error_details =
|
|
208
|
+
async def _handle_task_failure(self, job_state: dict, task_id: str, result_payload: dict) -> str:
|
|
209
|
+
error_details = result_payload.get("error", {})
|
|
205
210
|
error_type = ERROR_CODE_TRANSIENT
|
|
206
211
|
error_message = "No error details provided."
|
|
207
212
|
|
|
@@ -214,9 +219,9 @@ class WorkerService:
|
|
|
214
219
|
job_id = job_state["id"]
|
|
215
220
|
logger.warning(f"Task {task_id} for job {job_id} failed with error type '{error_type}'.")
|
|
216
221
|
|
|
217
|
-
if error_type
|
|
222
|
+
if error_type in (ERROR_CODE_PERMANENT, ERROR_CODE_SECURITY, ERROR_CODE_DEPENDENCY):
|
|
218
223
|
job_state["status"] = JOB_STATUS_QUARANTINED
|
|
219
|
-
job_state["error_message"] = f"Task failed with permanent error: {error_message}"
|
|
224
|
+
job_state["error_message"] = f"Task failed with permanent error ({error_type}): {error_message}"
|
|
220
225
|
await self.storage.save_job_state(job_id, job_state)
|
|
221
226
|
await self.storage.quarantine_job(job_id)
|
|
222
227
|
elif error_type == ERROR_CODE_INVALID_INPUT:
|
|
@@ -230,7 +235,6 @@ class WorkerService:
|
|
|
230
235
|
logger.critical(f"Data integrity mismatch detected for job {job_id}: {error_message}")
|
|
231
236
|
else:
|
|
232
237
|
await self.engine.handle_task_failure(job_state, task_id, error_message)
|
|
233
|
-
|
|
234
238
|
return "result_accepted_failure"
|
|
235
239
|
|
|
236
240
|
async def issue_access_token(self, worker_id: str) -> TokenResponse:
|
avtomatika/storage/base.py
CHANGED
|
@@ -90,6 +90,20 @@ class StorageBackend(ABC):
|
|
|
90
90
|
"""
|
|
91
91
|
raise NotImplementedError
|
|
92
92
|
|
|
93
|
+
@abstractmethod
|
|
94
|
+
async def update_job_state_atomic(
|
|
95
|
+
self,
|
|
96
|
+
job_id: str,
|
|
97
|
+
update_callback: Any,
|
|
98
|
+
) -> dict[str, Any]:
|
|
99
|
+
"""Atomically update the state of a job using a callback function.
|
|
100
|
+
|
|
101
|
+
:param job_id: Unique identifier for the job.
|
|
102
|
+
:param update_callback: A callable that takes the current state and returns the updated state.
|
|
103
|
+
:return: The updated full state of the job.
|
|
104
|
+
"""
|
|
105
|
+
raise NotImplementedError
|
|
106
|
+
|
|
93
107
|
@abstractmethod
|
|
94
108
|
async def register_worker(
|
|
95
109
|
self,
|
avtomatika/storage/memory.py
CHANGED
|
@@ -12,12 +12,12 @@ class MemoryStorage(StorageBackend):
|
|
|
12
12
|
Not persistent.
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
def __init__(self):
|
|
15
|
+
def __init__(self) -> None:
|
|
16
16
|
self._jobs: dict[str, dict[str, Any]] = {}
|
|
17
17
|
self._workers: dict[str, dict[str, Any]] = {}
|
|
18
18
|
self._worker_ttls: dict[str, float] = {}
|
|
19
|
-
self._worker_task_queues: dict[str, PriorityQueue] = {}
|
|
20
|
-
self._job_queue = Queue()
|
|
19
|
+
self._worker_task_queues: dict[str, PriorityQueue[Any]] = {}
|
|
20
|
+
self._job_queue: Queue[str] = Queue()
|
|
21
21
|
self._quarantine_queue: list[str] = []
|
|
22
22
|
self._watched_jobs: dict[str, float] = {}
|
|
23
23
|
self._client_configs: dict[str, dict[str, Any]] = {}
|
|
@@ -62,6 +62,17 @@ class MemoryStorage(StorageBackend):
|
|
|
62
62
|
self._jobs[job_id].update(update_data)
|
|
63
63
|
return self._jobs[job_id]
|
|
64
64
|
|
|
65
|
+
async def update_job_state_atomic(
|
|
66
|
+
self,
|
|
67
|
+
job_id: str,
|
|
68
|
+
update_callback: Any,
|
|
69
|
+
) -> dict[str, Any]:
|
|
70
|
+
async with self._lock:
|
|
71
|
+
current_state = self._jobs.get(job_id, {})
|
|
72
|
+
updated_state = update_callback(current_state)
|
|
73
|
+
self._jobs[job_id] = updated_state
|
|
74
|
+
return updated_state
|
|
75
|
+
|
|
65
76
|
async def register_worker(
|
|
66
77
|
self,
|
|
67
78
|
worker_id: str,
|
avtomatika/storage/redis.py
CHANGED
|
@@ -95,8 +95,21 @@ class RedisStorage(StorageBackend):
|
|
|
95
95
|
self,
|
|
96
96
|
job_id: str,
|
|
97
97
|
update_data: dict[str, Any],
|
|
98
|
-
) -> dict[
|
|
98
|
+
) -> dict[str, Any]:
|
|
99
99
|
"""Atomically update the job state in Redis using a transaction."""
|
|
100
|
+
|
|
101
|
+
def _merge(state: dict[str, Any]) -> dict[str, Any]:
|
|
102
|
+
state.update(update_data)
|
|
103
|
+
return state
|
|
104
|
+
|
|
105
|
+
return await self.update_job_state_atomic(job_id, _merge)
|
|
106
|
+
|
|
107
|
+
async def update_job_state_atomic(
|
|
108
|
+
self,
|
|
109
|
+
job_id: str,
|
|
110
|
+
update_callback: Any,
|
|
111
|
+
) -> dict[str, Any]:
|
|
112
|
+
"""Atomically update the job state in Redis using a transaction and callback."""
|
|
100
113
|
key = self._get_key(job_id)
|
|
101
114
|
|
|
102
115
|
async with self._redis.pipeline(transaction=True) as pipe:
|
|
@@ -104,13 +117,12 @@ class RedisStorage(StorageBackend):
|
|
|
104
117
|
try:
|
|
105
118
|
await pipe.watch(key)
|
|
106
119
|
current_state_raw = await pipe.get(key)
|
|
107
|
-
current_state = self._unpack(current_state_raw) if current_state_raw else {}
|
|
108
|
-
current_state
|
|
109
|
-
|
|
120
|
+
current_state: dict[str, Any] = self._unpack(current_state_raw) if current_state_raw else {}
|
|
121
|
+
updated_state = update_callback(current_state)
|
|
110
122
|
pipe.multi()
|
|
111
|
-
pipe.set(key, self._pack(
|
|
123
|
+
pipe.set(key, self._pack(updated_state))
|
|
112
124
|
await pipe.execute()
|
|
113
|
-
return
|
|
125
|
+
return updated_state
|
|
114
126
|
except WatchError:
|
|
115
127
|
continue
|
|
116
128
|
|
|
@@ -147,7 +159,7 @@ class RedisStorage(StorageBackend):
|
|
|
147
159
|
key = f"orchestrator:worker:info:{worker_id}"
|
|
148
160
|
tasks_key = f"orchestrator:worker:tasks:{worker_id}"
|
|
149
161
|
|
|
150
|
-
tasks = await self._redis.smembers(tasks_key) # type: ignore
|
|
162
|
+
tasks = await self._redis.smembers(tasks_key) # type: ignore[var-annotated]
|
|
151
163
|
|
|
152
164
|
async with self._redis.pipeline(transaction=True) as pipe:
|
|
153
165
|
pipe.delete(key)
|
|
@@ -156,7 +168,7 @@ class RedisStorage(StorageBackend):
|
|
|
156
168
|
pipe.srem("orchestrator:index:workers:idle", worker_id)
|
|
157
169
|
|
|
158
170
|
for task in tasks:
|
|
159
|
-
task_str = task.decode("utf-8") if isinstance(task, bytes) else task
|
|
171
|
+
task_str = task.decode("utf-8") if isinstance(task, bytes) else str(task)
|
|
160
172
|
pipe.srem(f"orchestrator:index:workers:task:{task_str}", worker_id)
|
|
161
173
|
|
|
162
174
|
await pipe.execute()
|
|
@@ -204,8 +216,8 @@ class RedisStorage(StorageBackend):
|
|
|
204
216
|
"""Finds idle workers that support the given task using set intersection."""
|
|
205
217
|
task_index = f"orchestrator:index:workers:task:{task_type}"
|
|
206
218
|
idle_index = "orchestrator:index:workers:idle"
|
|
207
|
-
worker_ids = await self._redis.sinter(task_index, idle_index) # type: ignore
|
|
208
|
-
return [wid.decode("utf-8") if isinstance(wid, bytes) else wid for wid in worker_ids]
|
|
219
|
+
worker_ids = await self._redis.sinter(task_index, idle_index) # type: ignore[var-annotated]
|
|
220
|
+
return [wid.decode("utf-8") if isinstance(wid, bytes) else str(wid) for wid in worker_ids]
|
|
209
221
|
|
|
210
222
|
async def enqueue_task_for_worker(self, worker_id: str, task_payload: dict[str, Any], priority: float) -> None:
|
|
211
223
|
key = f"orchestrator:task_queue:{worker_id}"
|
|
@@ -274,13 +286,14 @@ class RedisStorage(StorageBackend):
|
|
|
274
286
|
existence = await pipe.execute()
|
|
275
287
|
dead_ids = [worker_ids[i] for i, exists in enumerate(existence) if not exists]
|
|
276
288
|
for wid in dead_ids:
|
|
277
|
-
tasks = await self._redis.smembers(f"orchestrator:worker:tasks:{wid}") # type: ignore
|
|
289
|
+
tasks = await self._redis.smembers(f"orchestrator:worker:tasks:{wid}") # type: ignore[var-annotated]
|
|
278
290
|
async with self._redis.pipeline(transaction=True) as p:
|
|
279
291
|
p.delete(f"orchestrator:worker:tasks:{wid}")
|
|
280
292
|
p.srem("orchestrator:index:workers:all", wid)
|
|
281
293
|
p.srem("orchestrator:index:workers:idle", wid)
|
|
282
294
|
for t in tasks:
|
|
283
|
-
|
|
295
|
+
t_str = t.decode() if isinstance(t, bytes) else str(t)
|
|
296
|
+
p.srem(f"orchestrator:index:workers:task:{t_str}", wid)
|
|
284
297
|
await p.execute()
|
|
285
298
|
|
|
286
299
|
async def add_job_to_watch(self, job_id: str, timeout_at: float) -> None:
|
avtomatika/telemetry.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from logging import getLogger
|
|
2
2
|
from os import getenv
|
|
3
|
+
from typing import Any
|
|
3
4
|
|
|
4
5
|
logger = getLogger(__name__)
|
|
5
6
|
|
|
@@ -17,28 +18,28 @@ except ImportError:
|
|
|
17
18
|
TELEMETRY_ENABLED = False
|
|
18
19
|
|
|
19
20
|
class DummySpan:
|
|
20
|
-
def __enter__(self):
|
|
21
|
+
def __enter__(self) -> "DummySpan":
|
|
21
22
|
return self
|
|
22
23
|
|
|
23
|
-
def __exit__(self, *args):
|
|
24
|
+
def __exit__(self, *args: Any) -> None:
|
|
24
25
|
pass
|
|
25
26
|
|
|
26
|
-
def set_attribute(self, key, value):
|
|
27
|
+
def set_attribute(self, key: str, value: Any) -> None:
|
|
27
28
|
pass
|
|
28
29
|
|
|
29
30
|
class DummyTracer:
|
|
30
31
|
@staticmethod
|
|
31
|
-
def start_as_current_span(name, context=None):
|
|
32
|
+
def start_as_current_span(name: str, context: Any = None) -> DummySpan:
|
|
32
33
|
return DummySpan()
|
|
33
34
|
|
|
34
35
|
class NoOpTrace:
|
|
35
|
-
def get_tracer(self, name):
|
|
36
|
+
def get_tracer(self, name: str) -> DummyTracer:
|
|
36
37
|
return DummyTracer()
|
|
37
38
|
|
|
38
|
-
trace = NoOpTrace()
|
|
39
|
+
trace: Any = NoOpTrace() # type: ignore[no-redef]
|
|
39
40
|
|
|
40
41
|
|
|
41
|
-
def setup_telemetry(service_name: str = "avtomatika"):
|
|
42
|
+
def setup_telemetry(service_name: str = "avtomatika") -> Any:
|
|
42
43
|
"""Configures OpenTelemetry for the application if installed."""
|
|
43
44
|
if not TELEMETRY_ENABLED:
|
|
44
45
|
logger.info("opentelemetry-sdk not found. Telemetry is disabled.")
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from asyncio import CancelledError, Queue, QueueFull, create_task, sleep
|
|
1
|
+
from asyncio import CancelledError, Queue, QueueFull, Task, create_task, sleep
|
|
2
2
|
from contextlib import suppress
|
|
3
3
|
from dataclasses import asdict, dataclass
|
|
4
4
|
from logging import getLogger
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any, Optional
|
|
6
6
|
|
|
7
7
|
from aiohttp import ClientSession, ClientTimeout
|
|
8
8
|
|
|
@@ -24,7 +24,7 @@ class WebhookSender:
|
|
|
24
24
|
self.timeout = ClientTimeout(total=10)
|
|
25
25
|
self.max_retries = 3
|
|
26
26
|
self._queue: Queue[tuple[str, WebhookPayload]] = Queue(maxsize=1000)
|
|
27
|
-
self._worker_task = None
|
|
27
|
+
self._worker_task: Optional[Task[None]] = None
|
|
28
28
|
|
|
29
29
|
def start(self) -> None:
|
|
30
30
|
if not self._worker_task:
|
avtomatika/ws_manager.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Any
|
|
|
5
5
|
from aiohttp import web
|
|
6
6
|
|
|
7
7
|
from .constants import MSG_TYPE_PROGRESS
|
|
8
|
+
from .storage.base import StorageBackend
|
|
8
9
|
|
|
9
10
|
logger = getLogger(__name__)
|
|
10
11
|
|
|
@@ -12,9 +13,10 @@ logger = getLogger(__name__)
|
|
|
12
13
|
class WebSocketManager:
|
|
13
14
|
"""Manages active WebSocket connections from workers."""
|
|
14
15
|
|
|
15
|
-
def __init__(self) -> None:
|
|
16
|
+
def __init__(self, storage: StorageBackend) -> None:
|
|
16
17
|
self._connections: dict[str, web.WebSocketResponse] = {}
|
|
17
18
|
self._lock = Lock()
|
|
19
|
+
self.storage = storage
|
|
18
20
|
|
|
19
21
|
async def register(self, worker_id: str, ws: web.WebSocketResponse) -> None:
|
|
20
22
|
"""Registers a new WebSocket connection for a worker."""
|
|
@@ -48,15 +50,21 @@ class WebSocketManager:
|
|
|
48
50
|
logger.warning(f"Cannot send command: No active WebSocket connection for worker {worker_id}.")
|
|
49
51
|
return False
|
|
50
52
|
|
|
51
|
-
|
|
52
|
-
async def handle_message(worker_id: str, message: dict[str, Any]) -> None:
|
|
53
|
+
async def handle_message(self, worker_id: str, message: dict[str, Any]) -> None:
|
|
53
54
|
"""Handles an incoming message from a worker."""
|
|
54
55
|
event_type = message.get("event")
|
|
55
56
|
if event_type == MSG_TYPE_PROGRESS:
|
|
57
|
+
job_id = message.get("job_id")
|
|
58
|
+
progress = message.get("progress", 0)
|
|
59
|
+
msg_text = message.get("message", "")
|
|
56
60
|
logger.info(
|
|
57
|
-
f"Received progress update from worker {worker_id} for job {
|
|
58
|
-
f"{message.get('progress', 0) * 100:.0f}% - {message.get('message', '')}"
|
|
61
|
+
f"Received progress update from worker {worker_id} for job {job_id}: {progress * 100:.0f}% - {msg_text}"
|
|
59
62
|
)
|
|
63
|
+
if job_id:
|
|
64
|
+
try:
|
|
65
|
+
await self.storage.update_job_state(job_id, {"progress": progress, "progress_message": msg_text})
|
|
66
|
+
except Exception as e:
|
|
67
|
+
logger.error(f"Failed to update progress for job {job_id}: {e}")
|
|
60
68
|
else:
|
|
61
69
|
logger.debug(f"Received unhandled event from worker {worker_id}: {event_type}")
|
|
62
70
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b11
|
|
4
4
|
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
5
|
Author-email: Dmitrii Gagarin <madgagarin@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
@@ -15,7 +15,7 @@ Classifier: Typing :: Typed
|
|
|
15
15
|
Requires-Python: >=3.11
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: rxon
|
|
18
|
+
Requires-Dist: rxon==1.0b2
|
|
19
19
|
Requires-Dist: aiohttp~=3.12
|
|
20
20
|
Requires-Dist: python-json-logger~=4.0
|
|
21
21
|
Requires-Dist: graphviz~=0.21
|
|
@@ -58,7 +58,6 @@ Dynamic: license-file
|
|
|
58
58
|
|
|
59
59
|
[](https://opensource.org/licenses/MIT)
|
|
60
60
|
[](https://www.python.org/downloads/release/python-3110/)
|
|
61
|
-
[](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml)
|
|
62
61
|
[](https://github.com/astral-sh/ruff)
|
|
63
62
|
|
|
64
63
|
Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
|
|
@@ -494,10 +493,16 @@ For detailed specifications and examples, please refer to the [**Configuration G
|
|
|
494
493
|
|
|
495
494
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
496
495
|
|
|
497
|
-
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure
|
|
498
|
-
* **
|
|
496
|
+
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure). The orchestrator will automatically retry the task several times.
|
|
497
|
+
* **RESOURCE_EXHAUSTED_ERROR / TIMEOUT_ERROR / INTERNAL_ERROR**: Treated as transient errors and retried.
|
|
498
|
+
* **PERMANENT_ERROR**: A permanent error. The task will be immediately sent to quarantine.
|
|
499
|
+
* **SECURITY_ERROR / DEPENDENCY_ERROR**: Treated as permanent errors (e.g., security violation or missing model). Immediate quarantine.
|
|
499
500
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
500
501
|
|
|
502
|
+
### Progress Tracking
|
|
503
|
+
|
|
504
|
+
Workers can report real-time execution progress (0-100%) and status messages. This information is automatically persisted by the Orchestrator and exposed via the Job Status API (`GET /api/v1/jobs/{job_id}`).
|
|
505
|
+
|
|
501
506
|
### Concurrency & Performance
|
|
502
507
|
|
|
503
508
|
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
@@ -1,48 +1,48 @@
|
|
|
1
1
|
avtomatika/__init__.py,sha256=D5r3L-H06uxsY_wgfh7u9YR29QvZMer1BlvzjW9Umfo,701
|
|
2
|
-
avtomatika/api.html,sha256=
|
|
2
|
+
avtomatika/api.html,sha256=6Sj0vwAUZsbLKwlB58ONAttCB52e8h3fidspLOwMMGE,32894
|
|
3
3
|
avtomatika/app_keys.py,sha256=Zd2TaGPduzyEFJgdPvgSH1skdBx2mX-Prj1ma9fAXRo,1275
|
|
4
|
-
avtomatika/blueprint.py,sha256=
|
|
4
|
+
avtomatika/blueprint.py,sha256=OPJShSdh8asl9G2kWzbFu1CKMzsq15fo37I0eYlISkg,12119
|
|
5
5
|
avtomatika/client_config_loader.py,sha256=zVVHZlxSqZUaNpZ4zoU0T1CFYXdxy-3vKSmPcaFuHSY,2772
|
|
6
6
|
avtomatika/compression.py,sha256=bhA1kw4YrCR3I3kdquZSY0fAzCrRrjtz55uepzLUDKI,2498
|
|
7
7
|
avtomatika/config.py,sha256=27ov8BNbiUpkZ1sjtx3pifRavwcxJ_zUgIdkL_pgqv8,3595
|
|
8
|
-
avtomatika/constants.py,sha256=
|
|
8
|
+
avtomatika/constants.py,sha256=j9fkZ1NWLTwl5IcmO9VoMf1N2Okqk0wYfyDybfRi-Fc,2081
|
|
9
9
|
avtomatika/context.py,sha256=T6Ux4Fb1DwWRGTpMNeukM51MQDQbGk2HS6Cwpc0dc1s,4248
|
|
10
10
|
avtomatika/data_types.py,sha256=D_IUzMW8zMz-_MaqVp9MG53rG37Cb3McyRZuIXxvdlE,1108
|
|
11
11
|
avtomatika/datastore.py,sha256=gJjhZ5kxjF8pmbbPQb_qu3HPUpfy2c6T75KZ-smb_zg,545
|
|
12
|
-
avtomatika/dispatcher.py,sha256=
|
|
13
|
-
avtomatika/engine.py,sha256=
|
|
14
|
-
avtomatika/executor.py,sha256=
|
|
12
|
+
avtomatika/dispatcher.py,sha256=5J5GBWFfaGCGXUkM-2fhMeg2n2nTO0BH3ffkzsnSsaE,8784
|
|
13
|
+
avtomatika/engine.py,sha256=Hb6MLanMjx1GDAfkbNJU-K4RXMuPZQP7_HA_0VR8WMw,20916
|
|
14
|
+
avtomatika/executor.py,sha256=bu8_Xmr_hRNsatAKUzypIFWOZQT2yE_gMU4XfGBt4u4,24923
|
|
15
15
|
avtomatika/health_checker.py,sha256=jXYSH4BPeZ4LCxSZV4uXM4BZhGJYgpoAOWQXE8yojLo,2078
|
|
16
|
-
avtomatika/logging_config.py,sha256=
|
|
16
|
+
avtomatika/logging_config.py,sha256=cVY8aOeaWncsvkN015WgC74NTF6r55-OA3E1ux8P824,3347
|
|
17
17
|
avtomatika/metrics.py,sha256=tiksK1fFSOMlz8zFu6GT19JTduvxMTNlLu0QFrTHoQI,1866
|
|
18
18
|
avtomatika/py.typed,sha256=CT_L7gw2MLcQY-X0vs-xB5Vr0wzvGo7GuQYPI_qwJE8,65
|
|
19
19
|
avtomatika/quota.py,sha256=DNcaL6k0J1REeP8sVqbY9FprY_3BSr2SxM2Vf4mEqdw,1612
|
|
20
20
|
avtomatika/ratelimit.py,sha256=hFGW5oN9G6_W_jnHmopXW8bRjjzlvanY19MLghsNLE8,1306
|
|
21
21
|
avtomatika/reputation.py,sha256=pK-x9FrPN2Oc2gtPa1AZJHlhvkd7xlRe4orxM2auJJc,3979
|
|
22
|
-
avtomatika/s3.py,sha256=
|
|
22
|
+
avtomatika/s3.py,sha256=Byc5C_KTo0mOErQRlhDJNPZplyqrWxKe4GSeU99Zaqk,14079
|
|
23
23
|
avtomatika/scheduler.py,sha256=F5Kv5Rx34nDd0mE5jxjwpjRg8duDZBEr91N5Y6CNR24,4231
|
|
24
|
-
avtomatika/scheduler_config_loader.py,sha256=
|
|
24
|
+
avtomatika/scheduler_config_loader.py,sha256=38x-4G4yRrhSrLdmZ4aTb7WggE-BcGblKZO7x97nW6Y,1352
|
|
25
25
|
avtomatika/security.py,sha256=eENEUc0OsHm6wN2H-ckGmiaV9qrZSbYsHFCWyYb3aLs,3271
|
|
26
|
-
avtomatika/telemetry.py,sha256=
|
|
26
|
+
avtomatika/telemetry.py,sha256=17QVxb2vqx3vCkhvzL0JFYc6zvTFndUyZ5balj5wXuA,2504
|
|
27
27
|
avtomatika/watcher.py,sha256=IKBqJ_r52ya0wiH8Gb0qFRMC8DFsusdRzPHjruWvFh4,3558
|
|
28
28
|
avtomatika/worker_config_loader.py,sha256=n0j8gfuJDacWONr8744RsHTCWpc_1ZTRMC-rJZh6P6A,2249
|
|
29
|
-
avtomatika/ws_manager.py,sha256=
|
|
30
|
-
avtomatika/api/handlers.py,sha256=
|
|
29
|
+
avtomatika/ws_manager.py,sha256=0-vo7_IaDJbD58omTSrHm4SZHjePIlVLxEpiVTgNvbQ,3491
|
|
30
|
+
avtomatika/api/handlers.py,sha256=D8oEWsRG7YqJCwXVQaSbLHA35AKm-HahFKM_FDqfqCE,11701
|
|
31
31
|
avtomatika/api/routes.py,sha256=MrtcRNjybxODmKhab0FCzgZGPRcfznwpFtDCdgh8RT4,3937
|
|
32
32
|
avtomatika/history/base.py,sha256=RsCvCkHK1teHjXSk9ZHVEtpQlIjz8kWsfKYHVnapf6c,3848
|
|
33
33
|
avtomatika/history/noop.py,sha256=hLzt0RblsrKUtoyQNauOni6jCi-IYCWEPsiR0vh7tho,1226
|
|
34
34
|
avtomatika/history/postgres.py,sha256=T0XpDurnh48pPI-2JhB285GdNIexNkCSu8ExhLJzcxc,9538
|
|
35
35
|
avtomatika/history/sqlite.py,sha256=txWax9RVzBQzIZuU-SjHnEXEzBmGzIjqzoVsK2oyiAQ,9252
|
|
36
36
|
avtomatika/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
-
avtomatika/services/worker_service.py,sha256=
|
|
37
|
+
avtomatika/services/worker_service.py,sha256=lFHVqbXG-4v5Ec17FZCHaB6Uu8U2nWOGpPZOKjzwM00,11596
|
|
38
38
|
avtomatika/storage/__init__.py,sha256=mGRj_40dWZ7R7uYbqC6gCsUWCKHAbZz4ZVIhYg5dT_E,262
|
|
39
|
-
avtomatika/storage/base.py,sha256=
|
|
40
|
-
avtomatika/storage/memory.py,sha256=
|
|
41
|
-
avtomatika/storage/redis.py,sha256=
|
|
39
|
+
avtomatika/storage/base.py,sha256=54II8RfrEQzCT9NH_ECorM9SdvM-e5f-_MRtJInBczw,13856
|
|
40
|
+
avtomatika/storage/memory.py,sha256=DqUd7SQmneJCNd-YaWLQL-Gpz3FwRHFAH6xx2CIIqY4,14915
|
|
41
|
+
avtomatika/storage/redis.py,sha256=fN0e3_2CP-8H1WKTBljXeHMVkrzLXs8znE2YkKbjuy0,20658
|
|
42
42
|
avtomatika/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
-
avtomatika/utils/webhook_sender.py,sha256=
|
|
44
|
-
avtomatika-1.
|
|
45
|
-
avtomatika-1.
|
|
46
|
-
avtomatika-1.
|
|
47
|
-
avtomatika-1.
|
|
48
|
-
avtomatika-1.
|
|
43
|
+
avtomatika/utils/webhook_sender.py,sha256=LoJ6z_1p-OngjPYl9Pk1N1t9xrP6-v-7xOg_AmWPuVc,3644
|
|
44
|
+
avtomatika-1.0b11.dist-info/licenses/LICENSE,sha256=tqCjw9Y1vbU-hLcWi__7wQstLbt2T1XWPdbQYqCxuWY,1072
|
|
45
|
+
avtomatika-1.0b11.dist-info/METADATA,sha256=3Ok1N9NHujsXKORqv28ZSZmfHpIJ3_GjWGDkXbVyGSU,28432
|
|
46
|
+
avtomatika-1.0b11.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
47
|
+
avtomatika-1.0b11.dist-info/top_level.txt,sha256=gLDWhA_wxHj0I6fG5X8vw9fE0HSN4hTE2dEJzeVS2x8,11
|
|
48
|
+
avtomatika-1.0b11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|