avtomatika 1.0b9__tar.gz → 1.0b11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {avtomatika-1.0b9/src/avtomatika.egg-info → avtomatika-1.0b11}/PKG-INFO +10 -5
- {avtomatika-1.0b9 → avtomatika-1.0b11}/README.md +8 -3
- {avtomatika-1.0b9 → avtomatika-1.0b11}/pyproject.toml +2 -2
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/api/handlers.py +2 -2
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/api.html +1 -1
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/blueprint.py +11 -3
- avtomatika-1.0b11/src/avtomatika/constants.py +80 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/dispatcher.py +3 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/engine.py +13 -4
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/executor.py +38 -18
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/logging_config.py +16 -7
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/s3.py +2 -3
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/scheduler_config_loader.py +5 -2
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/services/worker_service.py +26 -22
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/storage/base.py +14 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/storage/memory.py +14 -3
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/storage/redis.py +25 -12
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/telemetry.py +8 -7
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/utils/webhook_sender.py +3 -3
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/ws_manager.py +13 -5
- {avtomatika-1.0b9 → avtomatika-1.0b11/src/avtomatika.egg-info}/PKG-INFO +10 -5
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika.egg-info/requires.txt +1 -1
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_error_handling.py +6 -15
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_executor.py +4 -1
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_integration.py +10 -8
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_ws_manager.py +19 -5
- avtomatika-1.0b9/src/avtomatika/constants.py +0 -6
- {avtomatika-1.0b9 → avtomatika-1.0b11}/LICENSE +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/setup.cfg +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/__init__.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/api/routes.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/app_keys.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/client_config_loader.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/compression.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/config.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/context.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/data_types.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/datastore.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/health_checker.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/history/base.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/history/noop.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/history/postgres.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/history/sqlite.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/metrics.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/py.typed +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/quota.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/ratelimit.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/reputation.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/scheduler.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/security.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/services/__init__.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/storage/__init__.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/utils/__init__.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/watcher.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/worker_config_loader.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika.egg-info/SOURCES.txt +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika.egg-info/dependency_links.txt +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika.egg-info/top_level.txt +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_blueprint_conditions.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_blueprint_integrity.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_blueprints.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_client_config_loader.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_compression.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_config_validation.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_context.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_dispatcher.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_dispatcher_extended.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_engine.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_handlers.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_handlers_sts.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_health_checker.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_history.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_horizontal_scaling.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_logging_config.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_memory_locking.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_memory_storage.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_metrics.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_mtls.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_noop_history.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_optimization.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_postgres_history.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_ratelimit.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_redis_locking.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_redis_storage.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_reputation.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_rxon_handler.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_s3.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_s3_metadata.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_scheduler.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_sts.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_telemetry.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_validation_integration.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_watcher.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_webhook_sender.py +0 -0
- {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_worker_config_loader.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b11
|
|
4
4
|
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
5
|
Author-email: Dmitrii Gagarin <madgagarin@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
@@ -15,7 +15,7 @@ Classifier: Typing :: Typed
|
|
|
15
15
|
Requires-Python: >=3.11
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: rxon
|
|
18
|
+
Requires-Dist: rxon==1.0b2
|
|
19
19
|
Requires-Dist: aiohttp~=3.12
|
|
20
20
|
Requires-Dist: python-json-logger~=4.0
|
|
21
21
|
Requires-Dist: graphviz~=0.21
|
|
@@ -58,7 +58,6 @@ Dynamic: license-file
|
|
|
58
58
|
|
|
59
59
|
[](https://opensource.org/licenses/MIT)
|
|
60
60
|
[](https://www.python.org/downloads/release/python-3110/)
|
|
61
|
-
[](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml)
|
|
62
61
|
[](https://github.com/astral-sh/ruff)
|
|
63
62
|
|
|
64
63
|
Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
|
|
@@ -494,10 +493,16 @@ For detailed specifications and examples, please refer to the [**Configuration G
|
|
|
494
493
|
|
|
495
494
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
496
495
|
|
|
497
|
-
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure
|
|
498
|
-
* **
|
|
496
|
+
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure). The orchestrator will automatically retry the task several times.
|
|
497
|
+
* **RESOURCE_EXHAUSTED_ERROR / TIMEOUT_ERROR / INTERNAL_ERROR**: Treated as transient errors and retried.
|
|
498
|
+
* **PERMANENT_ERROR**: A permanent error. The task will be immediately sent to quarantine.
|
|
499
|
+
* **SECURITY_ERROR / DEPENDENCY_ERROR**: Treated as permanent errors (e.g., security violation or missing model). Immediate quarantine.
|
|
499
500
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
500
501
|
|
|
502
|
+
### Progress Tracking
|
|
503
|
+
|
|
504
|
+
Workers can report real-time execution progress (0-100%) and status messages. This information is automatically persisted by the Orchestrator and exposed via the Job Status API (`GET /api/v1/jobs/{job_id}`).
|
|
505
|
+
|
|
501
506
|
### Concurrency & Performance
|
|
502
507
|
|
|
503
508
|
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://opensource.org/licenses/MIT)
|
|
4
4
|
[](https://www.python.org/downloads/release/python-3110/)
|
|
5
|
-
[](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml)
|
|
6
5
|
[](https://github.com/astral-sh/ruff)
|
|
7
6
|
|
|
8
7
|
Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
|
|
@@ -438,10 +437,16 @@ For detailed specifications and examples, please refer to the [**Configuration G
|
|
|
438
437
|
|
|
439
438
|
The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
|
|
440
439
|
|
|
441
|
-
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure
|
|
442
|
-
* **
|
|
440
|
+
* **TRANSIENT_ERROR**: A temporary error (e.g., network failure). The orchestrator will automatically retry the task several times.
|
|
441
|
+
* **RESOURCE_EXHAUSTED_ERROR / TIMEOUT_ERROR / INTERNAL_ERROR**: Treated as transient errors and retried.
|
|
442
|
+
* **PERMANENT_ERROR**: A permanent error. The task will be immediately sent to quarantine.
|
|
443
|
+
* **SECURITY_ERROR / DEPENDENCY_ERROR**: Treated as permanent errors (e.g., security violation or missing model). Immediate quarantine.
|
|
443
444
|
* **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
|
|
444
445
|
|
|
446
|
+
### Progress Tracking
|
|
447
|
+
|
|
448
|
+
Workers can report real-time execution progress (0-100%) and status messages. This information is automatically persisted by the Orchestrator and exposed via the Job Status API (`GET /api/v1/jobs/{job_id}`).
|
|
449
|
+
|
|
445
450
|
### Concurrency & Performance
|
|
446
451
|
|
|
447
452
|
To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "avtomatika"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.0b11"
|
|
8
8
|
description = "A state-machine based orchestrator for long-running AI and other jobs."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -21,7 +21,7 @@ classifiers = [
|
|
|
21
21
|
"Typing :: Typed",
|
|
22
22
|
]
|
|
23
23
|
dependencies = [
|
|
24
|
-
"rxon",
|
|
24
|
+
"rxon==1.0b2",
|
|
25
25
|
"aiohttp~=3.12",
|
|
26
26
|
"python-json-logger~=4.0",
|
|
27
27
|
"graphviz~=0.21",
|
|
@@ -25,11 +25,11 @@ from ..worker_config_loader import load_worker_configs_to_redis
|
|
|
25
25
|
logger = getLogger(__name__)
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def json_dumps(obj) -> str:
|
|
28
|
+
def json_dumps(obj: Any) -> str:
|
|
29
29
|
return dumps(obj).decode("utf-8")
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def json_response(data, **kwargs) -> web.Response:
|
|
32
|
+
def json_response(data: Any, **kwargs: Any) -> web.Response:
|
|
33
33
|
return web.json_response(data, dumps=json_dumps, **kwargs)
|
|
34
34
|
|
|
35
35
|
|
|
@@ -211,7 +211,7 @@
|
|
|
211
211
|
],
|
|
212
212
|
request: { body: null },
|
|
213
213
|
responses: [
|
|
214
|
-
{ code: '200 OK', description: 'Successful response.', body: { "id": "...", "status": "..." } }
|
|
214
|
+
{ code: '200 OK', description: 'Successful response.', body: { "id": "...", "status": "running", "progress": 0.75, "progress_message": "Processing..." } }
|
|
215
215
|
]
|
|
216
216
|
},
|
|
217
217
|
{
|
|
@@ -62,7 +62,8 @@ class ConditionalHandler:
|
|
|
62
62
|
try:
|
|
63
63
|
context_area = getattr(context, self.condition.area)
|
|
64
64
|
actual_value = context_area[self.condition.field]
|
|
65
|
-
|
|
65
|
+
result = self.condition.op(actual_value, self.condition.value)
|
|
66
|
+
return bool(result)
|
|
66
67
|
except (AttributeError, KeyError):
|
|
67
68
|
return False
|
|
68
69
|
|
|
@@ -130,7 +131,14 @@ class StateMachineBlueprint:
|
|
|
130
131
|
self.name = name
|
|
131
132
|
self.api_endpoint = api_endpoint
|
|
132
133
|
self.api_version = api_version
|
|
133
|
-
self.data_stores: dict[str, AsyncDictStore] =
|
|
134
|
+
self.data_stores: dict[str, AsyncDictStore] = {}
|
|
135
|
+
if data_stores:
|
|
136
|
+
for ds_name, ds_data in data_stores.items():
|
|
137
|
+
if isinstance(ds_data, AsyncDictStore):
|
|
138
|
+
self.data_stores[ds_name] = ds_data
|
|
139
|
+
else:
|
|
140
|
+
self.data_stores[ds_name] = AsyncDictStore(ds_data)
|
|
141
|
+
|
|
134
142
|
self.handlers: dict[str, Callable] = {}
|
|
135
143
|
self.aggregator_handlers: dict[str, Callable] = {}
|
|
136
144
|
self.conditional_handlers: list[ConditionalHandler] = []
|
|
@@ -279,7 +287,7 @@ class StateMachineBlueprint:
|
|
|
279
287
|
f"No suitable handler found for state '{state}' in blueprint '{self.name}' for the given context.",
|
|
280
288
|
)
|
|
281
289
|
|
|
282
|
-
def render_graph(self, output_filename: str | None = None, output_format: str = "png"):
|
|
290
|
+
def render_graph(self, output_filename: str | None = None, output_format: str = "png") -> str | None:
|
|
283
291
|
from graphviz import Digraph # type: ignore[import]
|
|
284
292
|
|
|
285
293
|
dot = Digraph(comment=f"State Machine for {self.name}")
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized constants for the Avtomatika protocol.
|
|
3
|
+
(Legacy wrapper, pointing to rxon.constants)
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from rxon.constants import (
|
|
7
|
+
AUTH_HEADER_CLIENT,
|
|
8
|
+
AUTH_HEADER_WORKER,
|
|
9
|
+
COMMAND_CANCEL_TASK,
|
|
10
|
+
ENDPOINT_TASK_NEXT,
|
|
11
|
+
ENDPOINT_TASK_RESULT,
|
|
12
|
+
ENDPOINT_WORKER_HEARTBEAT,
|
|
13
|
+
ENDPOINT_WORKER_REGISTER,
|
|
14
|
+
ERROR_CODE_DEPENDENCY,
|
|
15
|
+
ERROR_CODE_INTEGRITY_MISMATCH,
|
|
16
|
+
ERROR_CODE_INTERNAL,
|
|
17
|
+
ERROR_CODE_INVALID_INPUT,
|
|
18
|
+
ERROR_CODE_PERMANENT,
|
|
19
|
+
ERROR_CODE_RESOURCE_EXHAUSTED,
|
|
20
|
+
ERROR_CODE_SECURITY,
|
|
21
|
+
ERROR_CODE_TIMEOUT,
|
|
22
|
+
ERROR_CODE_TRANSIENT,
|
|
23
|
+
JOB_STATUS_CANCELLED,
|
|
24
|
+
JOB_STATUS_ERROR,
|
|
25
|
+
JOB_STATUS_FAILED,
|
|
26
|
+
JOB_STATUS_FINISHED,
|
|
27
|
+
JOB_STATUS_PENDING,
|
|
28
|
+
JOB_STATUS_QUARANTINED,
|
|
29
|
+
JOB_STATUS_RUNNING,
|
|
30
|
+
JOB_STATUS_WAITING_FOR_HUMAN,
|
|
31
|
+
JOB_STATUS_WAITING_FOR_PARALLEL,
|
|
32
|
+
JOB_STATUS_WAITING_FOR_WORKER,
|
|
33
|
+
MSG_TYPE_PROGRESS,
|
|
34
|
+
PROTOCOL_VERSION,
|
|
35
|
+
PROTOCOL_VERSION_HEADER,
|
|
36
|
+
STS_TOKEN_ENDPOINT,
|
|
37
|
+
TASK_STATUS_CANCELLED,
|
|
38
|
+
TASK_STATUS_FAILURE,
|
|
39
|
+
TASK_STATUS_SUCCESS,
|
|
40
|
+
WORKER_API_PREFIX,
|
|
41
|
+
WS_ENDPOINT,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"AUTH_HEADER_CLIENT",
|
|
46
|
+
"AUTH_HEADER_WORKER",
|
|
47
|
+
"COMMAND_CANCEL_TASK",
|
|
48
|
+
"ENDPOINT_TASK_NEXT",
|
|
49
|
+
"ENDPOINT_TASK_RESULT",
|
|
50
|
+
"ENDPOINT_WORKER_HEARTBEAT",
|
|
51
|
+
"ENDPOINT_WORKER_REGISTER",
|
|
52
|
+
"ERROR_CODE_DEPENDENCY",
|
|
53
|
+
"ERROR_CODE_INTEGRITY_MISMATCH",
|
|
54
|
+
"ERROR_CODE_INTERNAL",
|
|
55
|
+
"ERROR_CODE_INVALID_INPUT",
|
|
56
|
+
"ERROR_CODE_PERMANENT",
|
|
57
|
+
"ERROR_CODE_RESOURCE_EXHAUSTED",
|
|
58
|
+
"ERROR_CODE_SECURITY",
|
|
59
|
+
"ERROR_CODE_TIMEOUT",
|
|
60
|
+
"ERROR_CODE_TRANSIENT",
|
|
61
|
+
"JOB_STATUS_CANCELLED",
|
|
62
|
+
"JOB_STATUS_ERROR",
|
|
63
|
+
"JOB_STATUS_FAILED",
|
|
64
|
+
"JOB_STATUS_FINISHED",
|
|
65
|
+
"JOB_STATUS_PENDING",
|
|
66
|
+
"JOB_STATUS_QUARANTINED",
|
|
67
|
+
"JOB_STATUS_RUNNING",
|
|
68
|
+
"JOB_STATUS_WAITING_FOR_HUMAN",
|
|
69
|
+
"JOB_STATUS_WAITING_FOR_PARALLEL",
|
|
70
|
+
"JOB_STATUS_WAITING_FOR_WORKER",
|
|
71
|
+
"MSG_TYPE_PROGRESS",
|
|
72
|
+
"PROTOCOL_VERSION",
|
|
73
|
+
"PROTOCOL_VERSION_HEADER",
|
|
74
|
+
"STS_TOKEN_ENDPOINT",
|
|
75
|
+
"TASK_STATUS_CANCELLED",
|
|
76
|
+
"TASK_STATUS_FAILURE",
|
|
77
|
+
"TASK_STATUS_SUCCESS",
|
|
78
|
+
"WORKER_API_PREFIX",
|
|
79
|
+
"WS_ENDPOINT",
|
|
80
|
+
]
|
|
@@ -184,6 +184,9 @@ class Dispatcher:
|
|
|
184
184
|
selected_worker = self._select_default(capable_workers, task_type)
|
|
185
185
|
|
|
186
186
|
worker_id = selected_worker.get("worker_id")
|
|
187
|
+
if not worker_id:
|
|
188
|
+
raise RuntimeError(f"Selected worker for task '{task_type}' has no worker_id")
|
|
189
|
+
|
|
187
190
|
logger.info(
|
|
188
191
|
f"Dispatching task '{task_type}' to worker {worker_id} (strategy: {dispatch_strategy})",
|
|
189
192
|
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from asyncio import TimeoutError as AsyncTimeoutError
|
|
2
2
|
from asyncio import create_task, gather, get_running_loop, wait_for
|
|
3
3
|
from logging import getLogger
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
7
|
from aiohttp import ClientSession, web
|
|
@@ -58,7 +58,7 @@ def json_dumps(obj: Any) -> str:
|
|
|
58
58
|
return dumps(obj).decode("utf-8")
|
|
59
59
|
|
|
60
60
|
|
|
61
|
-
def json_response(data, **kwargs: Any) -> web.Response:
|
|
61
|
+
def json_response(data: Any, **kwargs: Any) -> web.Response:
|
|
62
62
|
return web.json_response(data, dumps=json_dumps, **kwargs)
|
|
63
63
|
|
|
64
64
|
|
|
@@ -70,11 +70,15 @@ class OrchestratorEngine:
|
|
|
70
70
|
self.config = config
|
|
71
71
|
self.blueprints: dict[str, StateMachineBlueprint] = {}
|
|
72
72
|
self.history_storage: HistoryStorageBase = NoOpHistoryStorage()
|
|
73
|
-
self.ws_manager = WebSocketManager()
|
|
73
|
+
self.ws_manager = WebSocketManager(self.storage)
|
|
74
74
|
self.app = web.Application(middlewares=[compression_middleware])
|
|
75
75
|
self.app[ENGINE_KEY] = self
|
|
76
|
-
self.worker_service = None
|
|
76
|
+
self.worker_service: Optional[WorkerService] = None
|
|
77
77
|
self._setup_done = False
|
|
78
|
+
self.webhook_sender: WebhookSender
|
|
79
|
+
self.dispatcher: Dispatcher
|
|
80
|
+
self.runner: web.AppRunner
|
|
81
|
+
self.site: web.TCPSite
|
|
78
82
|
|
|
79
83
|
from rxon import HttpListener
|
|
80
84
|
|
|
@@ -176,6 +180,9 @@ class OrchestratorEngine:
|
|
|
176
180
|
except ValueError as e:
|
|
177
181
|
raise web.HTTPBadRequest(text=str(e)) from e
|
|
178
182
|
|
|
183
|
+
if self.worker_service is None:
|
|
184
|
+
raise web.HTTPInternalServerError(text="WorkerService is not initialized.")
|
|
185
|
+
|
|
179
186
|
if message_type == "register":
|
|
180
187
|
return await self.worker_service.register_worker(payload)
|
|
181
188
|
|
|
@@ -352,6 +359,7 @@ class OrchestratorEngine:
|
|
|
352
359
|
initial_data: dict[str, Any],
|
|
353
360
|
source: str = "internal",
|
|
354
361
|
tracing_context: dict[str, str] | None = None,
|
|
362
|
+
data_metadata: dict[str, Any] | None = None,
|
|
355
363
|
) -> str:
|
|
356
364
|
"""Creates a job directly, bypassing the HTTP API layer.
|
|
357
365
|
Useful for internal schedulers and triggers.
|
|
@@ -377,6 +385,7 @@ class OrchestratorEngine:
|
|
|
377
385
|
"status": JOB_STATUS_PENDING,
|
|
378
386
|
"tracing_context": tracing_context or {},
|
|
379
387
|
"client_config": client_config,
|
|
388
|
+
"data_metadata": data_metadata or {},
|
|
380
389
|
}
|
|
381
390
|
await self.storage.save_job_state(job_id, job_state)
|
|
382
391
|
await self.storage.enqueue_job(job_id)
|
|
@@ -238,6 +238,9 @@ class JobExecutor:
|
|
|
238
238
|
action_factory.sub_blueprint_to_run,
|
|
239
239
|
duration_ms,
|
|
240
240
|
)
|
|
241
|
+
elif job_state["current_state"] in blueprint.end_states:
|
|
242
|
+
status = JOB_STATUS_FINISHED if job_state["current_state"] == "finished" else JOB_STATUS_FAILED
|
|
243
|
+
await self._handle_terminal_reached(job_state, status, duration_ms)
|
|
241
244
|
|
|
242
245
|
except Exception as e:
|
|
243
246
|
# This catches errors within the handler's execution.
|
|
@@ -248,6 +251,40 @@ class JobExecutor:
|
|
|
248
251
|
if message_id in self._processing_messages:
|
|
249
252
|
self._processing_messages.remove(message_id)
|
|
250
253
|
|
|
254
|
+
async def _handle_terminal_reached(
|
|
255
|
+
self,
|
|
256
|
+
job_state: dict[str, Any],
|
|
257
|
+
status: str,
|
|
258
|
+
duration_ms: int,
|
|
259
|
+
) -> None:
|
|
260
|
+
job_id = job_state["id"]
|
|
261
|
+
current_state = job_state["current_state"]
|
|
262
|
+
logger.info(f"Job {job_id} reached terminal state '{current_state}' with status '{status}'")
|
|
263
|
+
|
|
264
|
+
await self.history_storage.log_job_event(
|
|
265
|
+
{
|
|
266
|
+
"job_id": job_id,
|
|
267
|
+
"state": current_state,
|
|
268
|
+
"event_type": "job_completed",
|
|
269
|
+
"duration_ms": duration_ms,
|
|
270
|
+
"context_snapshot": job_state,
|
|
271
|
+
},
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
job_state["status"] = status
|
|
275
|
+
await self.storage.save_job_state(job_id, job_state)
|
|
276
|
+
|
|
277
|
+
# Clean up S3 files if service is available
|
|
278
|
+
s3_service = self.engine.app.get(S3_SERVICE_KEY)
|
|
279
|
+
if s3_service:
|
|
280
|
+
task_files = s3_service.get_task_files(job_id)
|
|
281
|
+
if task_files:
|
|
282
|
+
create_task(task_files.cleanup())
|
|
283
|
+
|
|
284
|
+
await self._check_and_resume_parent(job_state)
|
|
285
|
+
event_type = "job_finished" if status == JOB_STATUS_FINISHED else "job_failed"
|
|
286
|
+
await self.engine.send_job_webhook(job_state, event_type)
|
|
287
|
+
|
|
251
288
|
async def _handle_transition(
|
|
252
289
|
self,
|
|
253
290
|
job_state: dict[str, Any],
|
|
@@ -270,28 +307,11 @@ class JobExecutor:
|
|
|
270
307
|
},
|
|
271
308
|
)
|
|
272
309
|
|
|
273
|
-
# When transitioning to a new state, reset the retry counter.
|
|
274
310
|
job_state["retry_count"] = 0
|
|
275
311
|
job_state["current_state"] = next_state
|
|
276
312
|
job_state["status"] = JOB_STATUS_RUNNING
|
|
277
313
|
await self.storage.save_job_state(job_id, job_state)
|
|
278
|
-
|
|
279
|
-
if next_state not in TERMINAL_STATES:
|
|
280
|
-
await self.storage.enqueue_job(job_id)
|
|
281
|
-
else:
|
|
282
|
-
logger.info(f"Job {job_id} reached terminal state {next_state}")
|
|
283
|
-
|
|
284
|
-
# Clean up S3 files if service is available
|
|
285
|
-
s3_service = self.engine.app.get(S3_SERVICE_KEY)
|
|
286
|
-
if s3_service:
|
|
287
|
-
task_files = s3_service.get_task_files(job_id)
|
|
288
|
-
if task_files:
|
|
289
|
-
# Run cleanup in background to not block response
|
|
290
|
-
create_task(task_files.cleanup())
|
|
291
|
-
|
|
292
|
-
await self._check_and_resume_parent(job_state)
|
|
293
|
-
event_type = "job_finished" if next_state == JOB_STATUS_FINISHED else "job_failed"
|
|
294
|
-
await self.engine.send_job_webhook(job_state, event_type)
|
|
314
|
+
await self.storage.enqueue_job(job_id)
|
|
295
315
|
|
|
296
316
|
async def _handle_dispatch(
|
|
297
317
|
self,
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from logging import DEBUG, Formatter, StreamHandler, getLogger
|
|
3
3
|
from sys import stdout
|
|
4
|
+
from typing import Any, Literal, Optional
|
|
4
5
|
from zoneinfo import ZoneInfo
|
|
5
6
|
|
|
6
7
|
from pythonjsonlogger import json
|
|
@@ -9,14 +10,22 @@ from pythonjsonlogger import json
|
|
|
9
10
|
class TimezoneFormatter(Formatter):
|
|
10
11
|
"""Formatter that respects a custom timezone."""
|
|
11
12
|
|
|
12
|
-
def __init__(
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
fmt: Optional[str] = None,
|
|
16
|
+
datefmt: Optional[str] = None,
|
|
17
|
+
style: Literal["%", "{", "$"] = "%",
|
|
18
|
+
validate: bool = True,
|
|
19
|
+
*,
|
|
20
|
+
tz_name: str = "UTC",
|
|
21
|
+
) -> None:
|
|
13
22
|
super().__init__(fmt, datefmt, style, validate)
|
|
14
23
|
self.tz = ZoneInfo(tz_name)
|
|
15
24
|
|
|
16
|
-
def converter(self, timestamp):
|
|
25
|
+
def converter(self, timestamp: float) -> datetime: # type: ignore[override]
|
|
17
26
|
return datetime.fromtimestamp(timestamp, self.tz)
|
|
18
27
|
|
|
19
|
-
def formatTime(self, record, datefmt=None):
|
|
28
|
+
def formatTime(self, record: Any, datefmt: Optional[str] = None) -> str:
|
|
20
29
|
dt = self.converter(record.created)
|
|
21
30
|
if datefmt:
|
|
22
31
|
s = dt.strftime(datefmt)
|
|
@@ -28,14 +37,14 @@ class TimezoneFormatter(Formatter):
|
|
|
28
37
|
return s
|
|
29
38
|
|
|
30
39
|
|
|
31
|
-
class TimezoneJsonFormatter(json.JsonFormatter):
|
|
40
|
+
class TimezoneJsonFormatter(json.JsonFormatter): # type: ignore[name-defined]
|
|
32
41
|
"""JSON Formatter that respects a custom timezone."""
|
|
33
42
|
|
|
34
|
-
def __init__(self, *args, tz_name="UTC", **kwargs):
|
|
43
|
+
def __init__(self, *args: Any, tz_name: str = "UTC", **kwargs: Any) -> None:
|
|
35
44
|
super().__init__(*args, **kwargs)
|
|
36
45
|
self.tz = ZoneInfo(tz_name)
|
|
37
46
|
|
|
38
|
-
def formatTime(self, record, datefmt=None):
|
|
47
|
+
def formatTime(self, record: Any, datefmt: Optional[str] = None) -> str:
|
|
39
48
|
# Override formatTime to use timezone-aware datetime
|
|
40
49
|
dt = datetime.fromtimestamp(record.created, self.tz)
|
|
41
50
|
if datefmt:
|
|
@@ -44,7 +53,7 @@ class TimezoneJsonFormatter(json.JsonFormatter):
|
|
|
44
53
|
return dt.isoformat()
|
|
45
54
|
|
|
46
55
|
|
|
47
|
-
def setup_logging(log_level: str = "INFO", log_format: str = "json", tz_name: str = "UTC"):
|
|
56
|
+
def setup_logging(log_level: str = "INFO", log_format: str = "json", tz_name: str = "UTC") -> None:
|
|
48
57
|
"""Configures structured logging for the entire application."""
|
|
49
58
|
logger = getLogger("avtomatika")
|
|
50
59
|
logger.setLevel(log_level)
|
|
@@ -335,12 +335,11 @@ class S3Service:
|
|
|
335
335
|
try:
|
|
336
336
|
self._store = S3Store(
|
|
337
337
|
bucket=self.config.S3_DEFAULT_BUCKET,
|
|
338
|
-
|
|
339
|
-
|
|
338
|
+
aws_access_key_id=self.config.S3_ACCESS_KEY,
|
|
339
|
+
aws_secret_access_key=self.config.S3_SECRET_KEY,
|
|
340
340
|
region=self.config.S3_REGION,
|
|
341
341
|
endpoint=self.config.S3_ENDPOINT_URL,
|
|
342
342
|
allow_http="http://" in self.config.S3_ENDPOINT_URL,
|
|
343
|
-
force_path_style=True,
|
|
344
343
|
)
|
|
345
344
|
self._semaphore = Semaphore(self.config.S3_MAX_CONCURRENCY)
|
|
346
345
|
logger.info(
|
|
@@ -22,14 +22,17 @@ def load_schedules_from_file(file_path: str) -> list[ScheduledJobConfig]:
|
|
|
22
22
|
|
|
23
23
|
schedules = []
|
|
24
24
|
for name, config in data.items():
|
|
25
|
-
# Skip sections that might be metadata (though TOML structure usually implies all top-level keys are jobs)
|
|
26
25
|
if not isinstance(config, dict):
|
|
27
26
|
continue
|
|
28
27
|
|
|
28
|
+
blueprint = config.get("blueprint")
|
|
29
|
+
if not isinstance(blueprint, str):
|
|
30
|
+
raise ValueError(f"Schedule '{name}' is missing a 'blueprint' name.")
|
|
31
|
+
|
|
29
32
|
schedules.append(
|
|
30
33
|
ScheduledJobConfig(
|
|
31
34
|
name=name,
|
|
32
|
-
blueprint=
|
|
35
|
+
blueprint=blueprint,
|
|
33
36
|
input_data=config.get("input_data", {}),
|
|
34
37
|
interval_seconds=config.get("interval_seconds"),
|
|
35
38
|
daily_at=config.get("daily_at"),
|
|
@@ -10,9 +10,11 @@ from rxon.validators import validate_identifier
|
|
|
10
10
|
from ..app_keys import S3_SERVICE_KEY
|
|
11
11
|
from ..config import Config
|
|
12
12
|
from ..constants import (
|
|
13
|
+
ERROR_CODE_DEPENDENCY,
|
|
13
14
|
ERROR_CODE_INTEGRITY_MISMATCH,
|
|
14
15
|
ERROR_CODE_INVALID_INPUT,
|
|
15
16
|
ERROR_CODE_PERMANENT,
|
|
17
|
+
ERROR_CODE_SECURITY,
|
|
16
18
|
ERROR_CODE_TRANSIENT,
|
|
17
19
|
JOB_STATUS_CANCELLED,
|
|
18
20
|
JOB_STATUS_FAILED,
|
|
@@ -102,7 +104,6 @@ class WorkerService:
|
|
|
102
104
|
|
|
103
105
|
job_id = result_payload.get("job_id")
|
|
104
106
|
task_id = result_payload.get("task_id")
|
|
105
|
-
result_data = result_payload.get("result", {})
|
|
106
107
|
|
|
107
108
|
if not job_id or not task_id:
|
|
108
109
|
raise ValueError("job_id and task_id are required")
|
|
@@ -111,25 +112,33 @@ class WorkerService:
|
|
|
111
112
|
if not job_state:
|
|
112
113
|
raise LookupError("Job not found")
|
|
113
114
|
|
|
115
|
+
result_status = result_payload.get("status", TASK_STATUS_SUCCESS)
|
|
116
|
+
worker_data_content = result_payload.get("data")
|
|
117
|
+
|
|
114
118
|
if job_state.get("status") == JOB_STATUS_WAITING_FOR_PARALLEL:
|
|
115
119
|
await self.storage.remove_job_from_watch(f"{job_id}:{task_id}")
|
|
116
|
-
job_state.setdefault("aggregation_results", {})[task_id] = result_data
|
|
117
120
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
branches.
|
|
121
|
+
def _update_parallel_results(state: dict[str, Any]) -> dict[str, Any]:
|
|
122
|
+
state.setdefault("aggregation_results", {})[task_id] = result_payload
|
|
123
|
+
branches = state.setdefault("active_branches", [])
|
|
124
|
+
if task_id in branches:
|
|
125
|
+
branches.remove(task_id)
|
|
126
|
+
|
|
127
|
+
if not branches:
|
|
128
|
+
state["status"] = JOB_STATUS_RUNNING
|
|
129
|
+
state["current_state"] = state["aggregation_target"]
|
|
130
|
+
return state
|
|
121
131
|
|
|
122
|
-
|
|
132
|
+
updated_job_state = await self.storage.update_job_state_atomic(job_id, _update_parallel_results)
|
|
133
|
+
|
|
134
|
+
if not updated_job_state.get("active_branches"):
|
|
123
135
|
logger.info(f"All parallel branches for job {job_id} have completed.")
|
|
124
|
-
job_state["status"] = JOB_STATUS_RUNNING
|
|
125
|
-
job_state["current_state"] = job_state["aggregation_target"]
|
|
126
|
-
await self.storage.save_job_state(job_id, job_state)
|
|
127
136
|
await self.storage.enqueue_job(job_id)
|
|
128
137
|
else:
|
|
138
|
+
remaining = len(updated_job_state["active_branches"])
|
|
129
139
|
logger.info(
|
|
130
|
-
f"Branch {task_id} for job {job_id} completed. Waiting for {
|
|
140
|
+
f"Branch {task_id} for job {job_id} completed. Waiting for {remaining} more.",
|
|
131
141
|
)
|
|
132
|
-
await self.storage.save_job_state(job_id, job_state)
|
|
133
142
|
|
|
134
143
|
return "parallel_branch_result_accepted"
|
|
135
144
|
|
|
@@ -146,14 +155,12 @@ class WorkerService:
|
|
|
146
155
|
"event_type": "task_finished",
|
|
147
156
|
"duration_ms": duration_ms,
|
|
148
157
|
"worker_id": authenticated_worker_id,
|
|
149
|
-
"context_snapshot": {**job_state, "result":
|
|
158
|
+
"context_snapshot": {**job_state, "result": result_payload},
|
|
150
159
|
},
|
|
151
160
|
)
|
|
152
161
|
|
|
153
|
-
result_status = result_data.get("status", TASK_STATUS_SUCCESS) # Default to success? Constant?
|
|
154
|
-
|
|
155
162
|
if result_status == TASK_STATUS_FAILURE:
|
|
156
|
-
return await self._handle_task_failure(job_state, task_id,
|
|
163
|
+
return await self._handle_task_failure(job_state, task_id, result_payload)
|
|
157
164
|
|
|
158
165
|
if result_status == TASK_STATUS_CANCELLED:
|
|
159
166
|
logger.info(f"Task {task_id} for job {job_id} was cancelled by worker.")
|
|
@@ -169,13 +176,11 @@ class WorkerService:
|
|
|
169
176
|
return "result_accepted_cancelled"
|
|
170
177
|
|
|
171
178
|
transitions = job_state.get("current_task_transitions", {})
|
|
172
|
-
result_status = result_data.get("status", TASK_STATUS_SUCCESS)
|
|
173
179
|
next_state = transitions.get(result_status)
|
|
174
180
|
|
|
175
181
|
if next_state:
|
|
176
182
|
logger.info(f"Job {job_id} transitioning based on worker status '{result_status}' to state '{next_state}'")
|
|
177
183
|
|
|
178
|
-
worker_data_content = result_data.get("data")
|
|
179
184
|
if worker_data_content and isinstance(worker_data_content, dict):
|
|
180
185
|
if "state_history" not in job_state:
|
|
181
186
|
job_state["state_history"] = {}
|
|
@@ -200,8 +205,8 @@ class WorkerService:
|
|
|
200
205
|
await self.storage.save_job_state(job_id, job_state)
|
|
201
206
|
return "result_accepted_failure"
|
|
202
207
|
|
|
203
|
-
async def _handle_task_failure(self, job_state: dict, task_id: str,
|
|
204
|
-
error_details =
|
|
208
|
+
async def _handle_task_failure(self, job_state: dict, task_id: str, result_payload: dict) -> str:
|
|
209
|
+
error_details = result_payload.get("error", {})
|
|
205
210
|
error_type = ERROR_CODE_TRANSIENT
|
|
206
211
|
error_message = "No error details provided."
|
|
207
212
|
|
|
@@ -214,9 +219,9 @@ class WorkerService:
|
|
|
214
219
|
job_id = job_state["id"]
|
|
215
220
|
logger.warning(f"Task {task_id} for job {job_id} failed with error type '{error_type}'.")
|
|
216
221
|
|
|
217
|
-
if error_type
|
|
222
|
+
if error_type in (ERROR_CODE_PERMANENT, ERROR_CODE_SECURITY, ERROR_CODE_DEPENDENCY):
|
|
218
223
|
job_state["status"] = JOB_STATUS_QUARANTINED
|
|
219
|
-
job_state["error_message"] = f"Task failed with permanent error: {error_message}"
|
|
224
|
+
job_state["error_message"] = f"Task failed with permanent error ({error_type}): {error_message}"
|
|
220
225
|
await self.storage.save_job_state(job_id, job_state)
|
|
221
226
|
await self.storage.quarantine_job(job_id)
|
|
222
227
|
elif error_type == ERROR_CODE_INVALID_INPUT:
|
|
@@ -230,7 +235,6 @@ class WorkerService:
|
|
|
230
235
|
logger.critical(f"Data integrity mismatch detected for job {job_id}: {error_message}")
|
|
231
236
|
else:
|
|
232
237
|
await self.engine.handle_task_failure(job_state, task_id, error_message)
|
|
233
|
-
|
|
234
238
|
return "result_accepted_failure"
|
|
235
239
|
|
|
236
240
|
async def issue_access_token(self, worker_id: str) -> TokenResponse:
|
|
@@ -90,6 +90,20 @@ class StorageBackend(ABC):
|
|
|
90
90
|
"""
|
|
91
91
|
raise NotImplementedError
|
|
92
92
|
|
|
93
|
+
@abstractmethod
|
|
94
|
+
async def update_job_state_atomic(
|
|
95
|
+
self,
|
|
96
|
+
job_id: str,
|
|
97
|
+
update_callback: Any,
|
|
98
|
+
) -> dict[str, Any]:
|
|
99
|
+
"""Atomically update the state of a job using a callback function.
|
|
100
|
+
|
|
101
|
+
:param job_id: Unique identifier for the job.
|
|
102
|
+
:param update_callback: A callable that takes the current state and returns the updated state.
|
|
103
|
+
:return: The updated full state of the job.
|
|
104
|
+
"""
|
|
105
|
+
raise NotImplementedError
|
|
106
|
+
|
|
93
107
|
@abstractmethod
|
|
94
108
|
async def register_worker(
|
|
95
109
|
self,
|
|
@@ -12,12 +12,12 @@ class MemoryStorage(StorageBackend):
|
|
|
12
12
|
Not persistent.
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
def __init__(self):
|
|
15
|
+
def __init__(self) -> None:
|
|
16
16
|
self._jobs: dict[str, dict[str, Any]] = {}
|
|
17
17
|
self._workers: dict[str, dict[str, Any]] = {}
|
|
18
18
|
self._worker_ttls: dict[str, float] = {}
|
|
19
|
-
self._worker_task_queues: dict[str, PriorityQueue] = {}
|
|
20
|
-
self._job_queue = Queue()
|
|
19
|
+
self._worker_task_queues: dict[str, PriorityQueue[Any]] = {}
|
|
20
|
+
self._job_queue: Queue[str] = Queue()
|
|
21
21
|
self._quarantine_queue: list[str] = []
|
|
22
22
|
self._watched_jobs: dict[str, float] = {}
|
|
23
23
|
self._client_configs: dict[str, dict[str, Any]] = {}
|
|
@@ -62,6 +62,17 @@ class MemoryStorage(StorageBackend):
|
|
|
62
62
|
self._jobs[job_id].update(update_data)
|
|
63
63
|
return self._jobs[job_id]
|
|
64
64
|
|
|
65
|
+
async def update_job_state_atomic(
|
|
66
|
+
self,
|
|
67
|
+
job_id: str,
|
|
68
|
+
update_callback: Any,
|
|
69
|
+
) -> dict[str, Any]:
|
|
70
|
+
async with self._lock:
|
|
71
|
+
current_state = self._jobs.get(job_id, {})
|
|
72
|
+
updated_state = update_callback(current_state)
|
|
73
|
+
self._jobs[job_id] = updated_state
|
|
74
|
+
return updated_state
|
|
75
|
+
|
|
65
76
|
async def register_worker(
|
|
66
77
|
self,
|
|
67
78
|
worker_id: str,
|