avtomatika 1.0b9__tar.gz → 1.0b11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {avtomatika-1.0b9/src/avtomatika.egg-info → avtomatika-1.0b11}/PKG-INFO +10 -5
  2. {avtomatika-1.0b9 → avtomatika-1.0b11}/README.md +8 -3
  3. {avtomatika-1.0b9 → avtomatika-1.0b11}/pyproject.toml +2 -2
  4. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/api/handlers.py +2 -2
  5. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/api.html +1 -1
  6. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/blueprint.py +11 -3
  7. avtomatika-1.0b11/src/avtomatika/constants.py +80 -0
  8. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/dispatcher.py +3 -0
  9. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/engine.py +13 -4
  10. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/executor.py +38 -18
  11. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/logging_config.py +16 -7
  12. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/s3.py +2 -3
  13. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/scheduler_config_loader.py +5 -2
  14. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/services/worker_service.py +26 -22
  15. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/storage/base.py +14 -0
  16. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/storage/memory.py +14 -3
  17. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/storage/redis.py +25 -12
  18. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/telemetry.py +8 -7
  19. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/utils/webhook_sender.py +3 -3
  20. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/ws_manager.py +13 -5
  21. {avtomatika-1.0b9 → avtomatika-1.0b11/src/avtomatika.egg-info}/PKG-INFO +10 -5
  22. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika.egg-info/requires.txt +1 -1
  23. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_error_handling.py +6 -15
  24. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_executor.py +4 -1
  25. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_integration.py +10 -8
  26. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_ws_manager.py +19 -5
  27. avtomatika-1.0b9/src/avtomatika/constants.py +0 -6
  28. {avtomatika-1.0b9 → avtomatika-1.0b11}/LICENSE +0 -0
  29. {avtomatika-1.0b9 → avtomatika-1.0b11}/setup.cfg +0 -0
  30. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/__init__.py +0 -0
  31. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/api/routes.py +0 -0
  32. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/app_keys.py +0 -0
  33. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/client_config_loader.py +0 -0
  34. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/compression.py +0 -0
  35. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/config.py +0 -0
  36. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/context.py +0 -0
  37. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/data_types.py +0 -0
  38. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/datastore.py +0 -0
  39. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/health_checker.py +0 -0
  40. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/history/base.py +0 -0
  41. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/history/noop.py +0 -0
  42. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/history/postgres.py +0 -0
  43. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/history/sqlite.py +0 -0
  44. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/metrics.py +0 -0
  45. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/py.typed +0 -0
  46. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/quota.py +0 -0
  47. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/ratelimit.py +0 -0
  48. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/reputation.py +0 -0
  49. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/scheduler.py +0 -0
  50. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/security.py +0 -0
  51. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/services/__init__.py +0 -0
  52. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/storage/__init__.py +0 -0
  53. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/utils/__init__.py +0 -0
  54. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/watcher.py +0 -0
  55. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika/worker_config_loader.py +0 -0
  56. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika.egg-info/SOURCES.txt +0 -0
  57. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika.egg-info/dependency_links.txt +0 -0
  58. {avtomatika-1.0b9 → avtomatika-1.0b11}/src/avtomatika.egg-info/top_level.txt +0 -0
  59. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_blueprint_conditions.py +0 -0
  60. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_blueprint_integrity.py +0 -0
  61. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_blueprints.py +0 -0
  62. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_client_config_loader.py +0 -0
  63. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_compression.py +0 -0
  64. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_config_validation.py +0 -0
  65. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_context.py +0 -0
  66. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_dispatcher.py +0 -0
  67. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_dispatcher_extended.py +0 -0
  68. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_engine.py +0 -0
  69. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_handlers.py +0 -0
  70. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_handlers_sts.py +0 -0
  71. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_health_checker.py +0 -0
  72. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_history.py +0 -0
  73. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_horizontal_scaling.py +0 -0
  74. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_logging_config.py +0 -0
  75. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_memory_locking.py +0 -0
  76. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_memory_storage.py +0 -0
  77. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_metrics.py +0 -0
  78. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_mtls.py +0 -0
  79. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_noop_history.py +0 -0
  80. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_optimization.py +0 -0
  81. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_postgres_history.py +0 -0
  82. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_ratelimit.py +0 -0
  83. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_redis_locking.py +0 -0
  84. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_redis_storage.py +0 -0
  85. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_reputation.py +0 -0
  86. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_rxon_handler.py +0 -0
  87. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_s3.py +0 -0
  88. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_s3_metadata.py +0 -0
  89. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_scheduler.py +0 -0
  90. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_sts.py +0 -0
  91. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_telemetry.py +0 -0
  92. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_validation_integration.py +0 -0
  93. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_watcher.py +0 -0
  94. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_webhook_sender.py +0 -0
  95. {avtomatika-1.0b9 → avtomatika-1.0b11}/tests/test_worker_config_loader.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avtomatika
3
- Version: 1.0b9
3
+ Version: 1.0b11
4
4
  Summary: A state-machine based orchestrator for long-running AI and other jobs.
5
5
  Author-email: Dmitrii Gagarin <madgagarin@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
@@ -15,7 +15,7 @@ Classifier: Typing :: Typed
15
15
  Requires-Python: >=3.11
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: rxon
18
+ Requires-Dist: rxon==1.0b2
19
19
  Requires-Dist: aiohttp~=3.12
20
20
  Requires-Dist: python-json-logger~=4.0
21
21
  Requires-Dist: graphviz~=0.21
@@ -58,7 +58,6 @@ Dynamic: license-file
58
58
 
59
59
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
60
60
  [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/release/python-3110/)
61
- [![Tests](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml/badge.svg)](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml)
62
61
  [![Code Style: Ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
63
62
 
64
63
  Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
@@ -494,10 +493,16 @@ For detailed specifications and examples, please refer to the [**Configuration G
494
493
 
495
494
  The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
496
495
 
497
- * **TRANSIENT_ERROR**: A temporary error (e.g., network failure, rate limit). The orchestrator will automatically retry the task several times.
498
- * **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
496
+ * **TRANSIENT_ERROR**: A temporary error (e.g., network failure). The orchestrator will automatically retry the task several times.
497
+ * **RESOURCE_EXHAUSTED_ERROR / TIMEOUT_ERROR / INTERNAL_ERROR**: Treated as transient errors and retried.
498
+ * **PERMANENT_ERROR**: A permanent error. The task will be immediately sent to quarantine.
499
+ * **SECURITY_ERROR / DEPENDENCY_ERROR**: Treated as permanent errors (e.g., security violation or missing model). Immediate quarantine.
499
500
  * **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
500
501
 
502
+ ### Progress Tracking
503
+
504
+ Workers can report real-time execution progress (0-100%) and status messages. This information is automatically persisted by the Orchestrator and exposed via the Job Status API (`GET /api/v1/jobs/{job_id}`).
505
+
501
506
  ### Concurrency & Performance
502
507
 
503
508
  To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
@@ -2,7 +2,6 @@
2
2
 
3
3
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
4
4
  [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/release/python-3110/)
5
- [![Tests](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml/badge.svg)](https://github.com/avtomatika-ai/avtomatika/actions/workflows/ci.yml)
6
5
  [![Code Style: Ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
7
6
 
8
7
  Avtomatika is a powerful, state-driven engine for managing complex asynchronous workflows in Python. It provides a robust framework for building scalable and resilient applications by separating process logic from execution logic.
@@ -438,10 +437,16 @@ For detailed specifications and examples, please refer to the [**Configuration G
438
437
 
439
438
  The orchestrator has built-in mechanisms for handling failures based on the `error.code` field in a worker's response.
440
439
 
441
- * **TRANSIENT_ERROR**: A temporary error (e.g., network failure, rate limit). The orchestrator will automatically retry the task several times.
442
- * **PERMANENT_ERROR**: A permanent error (e.g., a corrupted file). The task will be immediately sent to quarantine for manual investigation.
440
+ * **TRANSIENT_ERROR**: A temporary error (e.g., network failure). The orchestrator will automatically retry the task several times.
441
+ * **RESOURCE_EXHAUSTED_ERROR / TIMEOUT_ERROR / INTERNAL_ERROR**: Treated as transient errors and retried.
442
+ * **PERMANENT_ERROR**: A permanent error. The task will be immediately sent to quarantine.
443
+ * **SECURITY_ERROR / DEPENDENCY_ERROR**: Treated as permanent errors (e.g., security violation or missing model). Immediate quarantine.
443
444
  * **INVALID_INPUT_ERROR**: An error in the input data. The entire pipeline (Job) will be immediately moved to the failed state.
444
445
 
446
+ ### Progress Tracking
447
+
448
+ Workers can report real-time execution progress (0-100%) and status messages. This information is automatically persisted by the Orchestrator and exposed via the Job Status API (`GET /api/v1/jobs/{job_id}`).
449
+
445
450
  ### Concurrency & Performance
446
451
 
447
452
  To prevent system overload during high traffic, the Orchestrator implements a backpressure mechanism for its internal job processing logic.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "avtomatika"
7
- version = "1.0b9"
7
+ version = "1.0b11"
8
8
  description = "A state-machine based orchestrator for long-running AI and other jobs."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Typing :: Typed",
22
22
  ]
23
23
  dependencies = [
24
- "rxon",
24
+ "rxon==1.0b2",
25
25
  "aiohttp~=3.12",
26
26
  "python-json-logger~=4.0",
27
27
  "graphviz~=0.21",
@@ -25,11 +25,11 @@ from ..worker_config_loader import load_worker_configs_to_redis
25
25
  logger = getLogger(__name__)
26
26
 
27
27
 
28
- def json_dumps(obj) -> str:
28
+ def json_dumps(obj: Any) -> str:
29
29
  return dumps(obj).decode("utf-8")
30
30
 
31
31
 
32
- def json_response(data, **kwargs) -> web.Response:
32
+ def json_response(data: Any, **kwargs: Any) -> web.Response:
33
33
  return web.json_response(data, dumps=json_dumps, **kwargs)
34
34
 
35
35
 
@@ -211,7 +211,7 @@
211
211
  ],
212
212
  request: { body: null },
213
213
  responses: [
214
- { code: '200 OK', description: 'Successful response.', body: { "id": "...", "status": "..." } }
214
+ { code: '200 OK', description: 'Successful response.', body: { "id": "...", "status": "running", "progress": 0.75, "progress_message": "Processing..." } }
215
215
  ]
216
216
  },
217
217
  {
@@ -62,7 +62,8 @@ class ConditionalHandler:
62
62
  try:
63
63
  context_area = getattr(context, self.condition.area)
64
64
  actual_value = context_area[self.condition.field]
65
- return self.condition.op(actual_value, self.condition.value)
65
+ result = self.condition.op(actual_value, self.condition.value)
66
+ return bool(result)
66
67
  except (AttributeError, KeyError):
67
68
  return False
68
69
 
@@ -130,7 +131,14 @@ class StateMachineBlueprint:
130
131
  self.name = name
131
132
  self.api_endpoint = api_endpoint
132
133
  self.api_version = api_version
133
- self.data_stores: dict[str, AsyncDictStore] = data_stores if data_stores is not None else {}
134
+ self.data_stores: dict[str, AsyncDictStore] = {}
135
+ if data_stores:
136
+ for ds_name, ds_data in data_stores.items():
137
+ if isinstance(ds_data, AsyncDictStore):
138
+ self.data_stores[ds_name] = ds_data
139
+ else:
140
+ self.data_stores[ds_name] = AsyncDictStore(ds_data)
141
+
134
142
  self.handlers: dict[str, Callable] = {}
135
143
  self.aggregator_handlers: dict[str, Callable] = {}
136
144
  self.conditional_handlers: list[ConditionalHandler] = []
@@ -279,7 +287,7 @@ class StateMachineBlueprint:
279
287
  f"No suitable handler found for state '{state}' in blueprint '{self.name}' for the given context.",
280
288
  )
281
289
 
282
- def render_graph(self, output_filename: str | None = None, output_format: str = "png"):
290
+ def render_graph(self, output_filename: str | None = None, output_format: str = "png") -> str | None:
283
291
  from graphviz import Digraph # type: ignore[import]
284
292
 
285
293
  dot = Digraph(comment=f"State Machine for {self.name}")
@@ -0,0 +1,80 @@
1
+ """
2
+ Centralized constants for the Avtomatika protocol.
3
+ (Legacy wrapper, pointing to rxon.constants)
4
+ """
5
+
6
+ from rxon.constants import (
7
+ AUTH_HEADER_CLIENT,
8
+ AUTH_HEADER_WORKER,
9
+ COMMAND_CANCEL_TASK,
10
+ ENDPOINT_TASK_NEXT,
11
+ ENDPOINT_TASK_RESULT,
12
+ ENDPOINT_WORKER_HEARTBEAT,
13
+ ENDPOINT_WORKER_REGISTER,
14
+ ERROR_CODE_DEPENDENCY,
15
+ ERROR_CODE_INTEGRITY_MISMATCH,
16
+ ERROR_CODE_INTERNAL,
17
+ ERROR_CODE_INVALID_INPUT,
18
+ ERROR_CODE_PERMANENT,
19
+ ERROR_CODE_RESOURCE_EXHAUSTED,
20
+ ERROR_CODE_SECURITY,
21
+ ERROR_CODE_TIMEOUT,
22
+ ERROR_CODE_TRANSIENT,
23
+ JOB_STATUS_CANCELLED,
24
+ JOB_STATUS_ERROR,
25
+ JOB_STATUS_FAILED,
26
+ JOB_STATUS_FINISHED,
27
+ JOB_STATUS_PENDING,
28
+ JOB_STATUS_QUARANTINED,
29
+ JOB_STATUS_RUNNING,
30
+ JOB_STATUS_WAITING_FOR_HUMAN,
31
+ JOB_STATUS_WAITING_FOR_PARALLEL,
32
+ JOB_STATUS_WAITING_FOR_WORKER,
33
+ MSG_TYPE_PROGRESS,
34
+ PROTOCOL_VERSION,
35
+ PROTOCOL_VERSION_HEADER,
36
+ STS_TOKEN_ENDPOINT,
37
+ TASK_STATUS_CANCELLED,
38
+ TASK_STATUS_FAILURE,
39
+ TASK_STATUS_SUCCESS,
40
+ WORKER_API_PREFIX,
41
+ WS_ENDPOINT,
42
+ )
43
+
44
+ __all__ = [
45
+ "AUTH_HEADER_CLIENT",
46
+ "AUTH_HEADER_WORKER",
47
+ "COMMAND_CANCEL_TASK",
48
+ "ENDPOINT_TASK_NEXT",
49
+ "ENDPOINT_TASK_RESULT",
50
+ "ENDPOINT_WORKER_HEARTBEAT",
51
+ "ENDPOINT_WORKER_REGISTER",
52
+ "ERROR_CODE_DEPENDENCY",
53
+ "ERROR_CODE_INTEGRITY_MISMATCH",
54
+ "ERROR_CODE_INTERNAL",
55
+ "ERROR_CODE_INVALID_INPUT",
56
+ "ERROR_CODE_PERMANENT",
57
+ "ERROR_CODE_RESOURCE_EXHAUSTED",
58
+ "ERROR_CODE_SECURITY",
59
+ "ERROR_CODE_TIMEOUT",
60
+ "ERROR_CODE_TRANSIENT",
61
+ "JOB_STATUS_CANCELLED",
62
+ "JOB_STATUS_ERROR",
63
+ "JOB_STATUS_FAILED",
64
+ "JOB_STATUS_FINISHED",
65
+ "JOB_STATUS_PENDING",
66
+ "JOB_STATUS_QUARANTINED",
67
+ "JOB_STATUS_RUNNING",
68
+ "JOB_STATUS_WAITING_FOR_HUMAN",
69
+ "JOB_STATUS_WAITING_FOR_PARALLEL",
70
+ "JOB_STATUS_WAITING_FOR_WORKER",
71
+ "MSG_TYPE_PROGRESS",
72
+ "PROTOCOL_VERSION",
73
+ "PROTOCOL_VERSION_HEADER",
74
+ "STS_TOKEN_ENDPOINT",
75
+ "TASK_STATUS_CANCELLED",
76
+ "TASK_STATUS_FAILURE",
77
+ "TASK_STATUS_SUCCESS",
78
+ "WORKER_API_PREFIX",
79
+ "WS_ENDPOINT",
80
+ ]
@@ -184,6 +184,9 @@ class Dispatcher:
184
184
  selected_worker = self._select_default(capable_workers, task_type)
185
185
 
186
186
  worker_id = selected_worker.get("worker_id")
187
+ if not worker_id:
188
+ raise RuntimeError(f"Selected worker for task '{task_type}' has no worker_id")
189
+
187
190
  logger.info(
188
191
  f"Dispatching task '{task_type}' to worker {worker_id} (strategy: {dispatch_strategy})",
189
192
  )
@@ -1,7 +1,7 @@
1
1
  from asyncio import TimeoutError as AsyncTimeoutError
2
2
  from asyncio import create_task, gather, get_running_loop, wait_for
3
3
  from logging import getLogger
4
- from typing import Any
4
+ from typing import Any, Optional
5
5
  from uuid import uuid4
6
6
 
7
7
  from aiohttp import ClientSession, web
@@ -58,7 +58,7 @@ def json_dumps(obj: Any) -> str:
58
58
  return dumps(obj).decode("utf-8")
59
59
 
60
60
 
61
- def json_response(data, **kwargs: Any) -> web.Response:
61
+ def json_response(data: Any, **kwargs: Any) -> web.Response:
62
62
  return web.json_response(data, dumps=json_dumps, **kwargs)
63
63
 
64
64
 
@@ -70,11 +70,15 @@ class OrchestratorEngine:
70
70
  self.config = config
71
71
  self.blueprints: dict[str, StateMachineBlueprint] = {}
72
72
  self.history_storage: HistoryStorageBase = NoOpHistoryStorage()
73
- self.ws_manager = WebSocketManager()
73
+ self.ws_manager = WebSocketManager(self.storage)
74
74
  self.app = web.Application(middlewares=[compression_middleware])
75
75
  self.app[ENGINE_KEY] = self
76
- self.worker_service = None
76
+ self.worker_service: Optional[WorkerService] = None
77
77
  self._setup_done = False
78
+ self.webhook_sender: WebhookSender
79
+ self.dispatcher: Dispatcher
80
+ self.runner: web.AppRunner
81
+ self.site: web.TCPSite
78
82
 
79
83
  from rxon import HttpListener
80
84
 
@@ -176,6 +180,9 @@ class OrchestratorEngine:
176
180
  except ValueError as e:
177
181
  raise web.HTTPBadRequest(text=str(e)) from e
178
182
 
183
+ if self.worker_service is None:
184
+ raise web.HTTPInternalServerError(text="WorkerService is not initialized.")
185
+
179
186
  if message_type == "register":
180
187
  return await self.worker_service.register_worker(payload)
181
188
 
@@ -352,6 +359,7 @@ class OrchestratorEngine:
352
359
  initial_data: dict[str, Any],
353
360
  source: str = "internal",
354
361
  tracing_context: dict[str, str] | None = None,
362
+ data_metadata: dict[str, Any] | None = None,
355
363
  ) -> str:
356
364
  """Creates a job directly, bypassing the HTTP API layer.
357
365
  Useful for internal schedulers and triggers.
@@ -377,6 +385,7 @@ class OrchestratorEngine:
377
385
  "status": JOB_STATUS_PENDING,
378
386
  "tracing_context": tracing_context or {},
379
387
  "client_config": client_config,
388
+ "data_metadata": data_metadata or {},
380
389
  }
381
390
  await self.storage.save_job_state(job_id, job_state)
382
391
  await self.storage.enqueue_job(job_id)
@@ -238,6 +238,9 @@ class JobExecutor:
238
238
  action_factory.sub_blueprint_to_run,
239
239
  duration_ms,
240
240
  )
241
+ elif job_state["current_state"] in blueprint.end_states:
242
+ status = JOB_STATUS_FINISHED if job_state["current_state"] == "finished" else JOB_STATUS_FAILED
243
+ await self._handle_terminal_reached(job_state, status, duration_ms)
241
244
 
242
245
  except Exception as e:
243
246
  # This catches errors within the handler's execution.
@@ -248,6 +251,40 @@ class JobExecutor:
248
251
  if message_id in self._processing_messages:
249
252
  self._processing_messages.remove(message_id)
250
253
 
254
+ async def _handle_terminal_reached(
255
+ self,
256
+ job_state: dict[str, Any],
257
+ status: str,
258
+ duration_ms: int,
259
+ ) -> None:
260
+ job_id = job_state["id"]
261
+ current_state = job_state["current_state"]
262
+ logger.info(f"Job {job_id} reached terminal state '{current_state}' with status '{status}'")
263
+
264
+ await self.history_storage.log_job_event(
265
+ {
266
+ "job_id": job_id,
267
+ "state": current_state,
268
+ "event_type": "job_completed",
269
+ "duration_ms": duration_ms,
270
+ "context_snapshot": job_state,
271
+ },
272
+ )
273
+
274
+ job_state["status"] = status
275
+ await self.storage.save_job_state(job_id, job_state)
276
+
277
+ # Clean up S3 files if service is available
278
+ s3_service = self.engine.app.get(S3_SERVICE_KEY)
279
+ if s3_service:
280
+ task_files = s3_service.get_task_files(job_id)
281
+ if task_files:
282
+ create_task(task_files.cleanup())
283
+
284
+ await self._check_and_resume_parent(job_state)
285
+ event_type = "job_finished" if status == JOB_STATUS_FINISHED else "job_failed"
286
+ await self.engine.send_job_webhook(job_state, event_type)
287
+
251
288
  async def _handle_transition(
252
289
  self,
253
290
  job_state: dict[str, Any],
@@ -270,28 +307,11 @@ class JobExecutor:
270
307
  },
271
308
  )
272
309
 
273
- # When transitioning to a new state, reset the retry counter.
274
310
  job_state["retry_count"] = 0
275
311
  job_state["current_state"] = next_state
276
312
  job_state["status"] = JOB_STATUS_RUNNING
277
313
  await self.storage.save_job_state(job_id, job_state)
278
-
279
- if next_state not in TERMINAL_STATES:
280
- await self.storage.enqueue_job(job_id)
281
- else:
282
- logger.info(f"Job {job_id} reached terminal state {next_state}")
283
-
284
- # Clean up S3 files if service is available
285
- s3_service = self.engine.app.get(S3_SERVICE_KEY)
286
- if s3_service:
287
- task_files = s3_service.get_task_files(job_id)
288
- if task_files:
289
- # Run cleanup in background to not block response
290
- create_task(task_files.cleanup())
291
-
292
- await self._check_and_resume_parent(job_state)
293
- event_type = "job_finished" if next_state == JOB_STATUS_FINISHED else "job_failed"
294
- await self.engine.send_job_webhook(job_state, event_type)
314
+ await self.storage.enqueue_job(job_id)
295
315
 
296
316
  async def _handle_dispatch(
297
317
  self,
@@ -1,6 +1,7 @@
1
1
  from datetime import datetime
2
2
  from logging import DEBUG, Formatter, StreamHandler, getLogger
3
3
  from sys import stdout
4
+ from typing import Any, Literal, Optional
4
5
  from zoneinfo import ZoneInfo
5
6
 
6
7
  from pythonjsonlogger import json
@@ -9,14 +10,22 @@ from pythonjsonlogger import json
9
10
  class TimezoneFormatter(Formatter):
10
11
  """Formatter that respects a custom timezone."""
11
12
 
12
- def __init__(self, fmt=None, datefmt=None, style="%", validate=True, *, tz_name="UTC"):
13
+ def __init__(
14
+ self,
15
+ fmt: Optional[str] = None,
16
+ datefmt: Optional[str] = None,
17
+ style: Literal["%", "{", "$"] = "%",
18
+ validate: bool = True,
19
+ *,
20
+ tz_name: str = "UTC",
21
+ ) -> None:
13
22
  super().__init__(fmt, datefmt, style, validate)
14
23
  self.tz = ZoneInfo(tz_name)
15
24
 
16
- def converter(self, timestamp):
25
+ def converter(self, timestamp: float) -> datetime: # type: ignore[override]
17
26
  return datetime.fromtimestamp(timestamp, self.tz)
18
27
 
19
- def formatTime(self, record, datefmt=None):
28
+ def formatTime(self, record: Any, datefmt: Optional[str] = None) -> str:
20
29
  dt = self.converter(record.created)
21
30
  if datefmt:
22
31
  s = dt.strftime(datefmt)
@@ -28,14 +37,14 @@ class TimezoneFormatter(Formatter):
28
37
  return s
29
38
 
30
39
 
31
- class TimezoneJsonFormatter(json.JsonFormatter):
40
+ class TimezoneJsonFormatter(json.JsonFormatter): # type: ignore[name-defined]
32
41
  """JSON Formatter that respects a custom timezone."""
33
42
 
34
- def __init__(self, *args, tz_name="UTC", **kwargs):
43
+ def __init__(self, *args: Any, tz_name: str = "UTC", **kwargs: Any) -> None:
35
44
  super().__init__(*args, **kwargs)
36
45
  self.tz = ZoneInfo(tz_name)
37
46
 
38
- def formatTime(self, record, datefmt=None):
47
+ def formatTime(self, record: Any, datefmt: Optional[str] = None) -> str:
39
48
  # Override formatTime to use timezone-aware datetime
40
49
  dt = datetime.fromtimestamp(record.created, self.tz)
41
50
  if datefmt:
@@ -44,7 +53,7 @@ class TimezoneJsonFormatter(json.JsonFormatter):
44
53
  return dt.isoformat()
45
54
 
46
55
 
47
- def setup_logging(log_level: str = "INFO", log_format: str = "json", tz_name: str = "UTC"):
56
+ def setup_logging(log_level: str = "INFO", log_format: str = "json", tz_name: str = "UTC") -> None:
48
57
  """Configures structured logging for the entire application."""
49
58
  logger = getLogger("avtomatika")
50
59
  logger.setLevel(log_level)
@@ -335,12 +335,11 @@ class S3Service:
335
335
  try:
336
336
  self._store = S3Store(
337
337
  bucket=self.config.S3_DEFAULT_BUCKET,
338
- access_key_id=self.config.S3_ACCESS_KEY,
339
- secret_access_key=self.config.S3_SECRET_KEY,
338
+ aws_access_key_id=self.config.S3_ACCESS_KEY,
339
+ aws_secret_access_key=self.config.S3_SECRET_KEY,
340
340
  region=self.config.S3_REGION,
341
341
  endpoint=self.config.S3_ENDPOINT_URL,
342
342
  allow_http="http://" in self.config.S3_ENDPOINT_URL,
343
- force_path_style=True,
344
343
  )
345
344
  self._semaphore = Semaphore(self.config.S3_MAX_CONCURRENCY)
346
345
  logger.info(
@@ -22,14 +22,17 @@ def load_schedules_from_file(file_path: str) -> list[ScheduledJobConfig]:
22
22
 
23
23
  schedules = []
24
24
  for name, config in data.items():
25
- # Skip sections that might be metadata (though TOML structure usually implies all top-level keys are jobs)
26
25
  if not isinstance(config, dict):
27
26
  continue
28
27
 
28
+ blueprint = config.get("blueprint")
29
+ if not isinstance(blueprint, str):
30
+ raise ValueError(f"Schedule '{name}' is missing a 'blueprint' name.")
31
+
29
32
  schedules.append(
30
33
  ScheduledJobConfig(
31
34
  name=name,
32
- blueprint=config.get("blueprint"),
35
+ blueprint=blueprint,
33
36
  input_data=config.get("input_data", {}),
34
37
  interval_seconds=config.get("interval_seconds"),
35
38
  daily_at=config.get("daily_at"),
@@ -10,9 +10,11 @@ from rxon.validators import validate_identifier
10
10
  from ..app_keys import S3_SERVICE_KEY
11
11
  from ..config import Config
12
12
  from ..constants import (
13
+ ERROR_CODE_DEPENDENCY,
13
14
  ERROR_CODE_INTEGRITY_MISMATCH,
14
15
  ERROR_CODE_INVALID_INPUT,
15
16
  ERROR_CODE_PERMANENT,
17
+ ERROR_CODE_SECURITY,
16
18
  ERROR_CODE_TRANSIENT,
17
19
  JOB_STATUS_CANCELLED,
18
20
  JOB_STATUS_FAILED,
@@ -102,7 +104,6 @@ class WorkerService:
102
104
 
103
105
  job_id = result_payload.get("job_id")
104
106
  task_id = result_payload.get("task_id")
105
- result_data = result_payload.get("result", {})
106
107
 
107
108
  if not job_id or not task_id:
108
109
  raise ValueError("job_id and task_id are required")
@@ -111,25 +112,33 @@ class WorkerService:
111
112
  if not job_state:
112
113
  raise LookupError("Job not found")
113
114
 
115
+ result_status = result_payload.get("status", TASK_STATUS_SUCCESS)
116
+ worker_data_content = result_payload.get("data")
117
+
114
118
  if job_state.get("status") == JOB_STATUS_WAITING_FOR_PARALLEL:
115
119
  await self.storage.remove_job_from_watch(f"{job_id}:{task_id}")
116
- job_state.setdefault("aggregation_results", {})[task_id] = result_data
117
120
 
118
- branches = job_state.setdefault("active_branches", [])
119
- if task_id in branches:
120
- branches.remove(task_id)
121
+ def _update_parallel_results(state: dict[str, Any]) -> dict[str, Any]:
122
+ state.setdefault("aggregation_results", {})[task_id] = result_payload
123
+ branches = state.setdefault("active_branches", [])
124
+ if task_id in branches:
125
+ branches.remove(task_id)
126
+
127
+ if not branches:
128
+ state["status"] = JOB_STATUS_RUNNING
129
+ state["current_state"] = state["aggregation_target"]
130
+ return state
121
131
 
122
- if not branches:
132
+ updated_job_state = await self.storage.update_job_state_atomic(job_id, _update_parallel_results)
133
+
134
+ if not updated_job_state.get("active_branches"):
123
135
  logger.info(f"All parallel branches for job {job_id} have completed.")
124
- job_state["status"] = JOB_STATUS_RUNNING
125
- job_state["current_state"] = job_state["aggregation_target"]
126
- await self.storage.save_job_state(job_id, job_state)
127
136
  await self.storage.enqueue_job(job_id)
128
137
  else:
138
+ remaining = len(updated_job_state["active_branches"])
129
139
  logger.info(
130
- f"Branch {task_id} for job {job_id} completed. Waiting for {len(branches)} more.",
140
+ f"Branch {task_id} for job {job_id} completed. Waiting for {remaining} more.",
131
141
  )
132
- await self.storage.save_job_state(job_id, job_state)
133
142
 
134
143
  return "parallel_branch_result_accepted"
135
144
 
@@ -146,14 +155,12 @@ class WorkerService:
146
155
  "event_type": "task_finished",
147
156
  "duration_ms": duration_ms,
148
157
  "worker_id": authenticated_worker_id,
149
- "context_snapshot": {**job_state, "result": result_data},
158
+ "context_snapshot": {**job_state, "result": result_payload},
150
159
  },
151
160
  )
152
161
 
153
- result_status = result_data.get("status", TASK_STATUS_SUCCESS) # Default to success? Constant?
154
-
155
162
  if result_status == TASK_STATUS_FAILURE:
156
- return await self._handle_task_failure(job_state, task_id, result_data)
163
+ return await self._handle_task_failure(job_state, task_id, result_payload)
157
164
 
158
165
  if result_status == TASK_STATUS_CANCELLED:
159
166
  logger.info(f"Task {task_id} for job {job_id} was cancelled by worker.")
@@ -169,13 +176,11 @@ class WorkerService:
169
176
  return "result_accepted_cancelled"
170
177
 
171
178
  transitions = job_state.get("current_task_transitions", {})
172
- result_status = result_data.get("status", TASK_STATUS_SUCCESS)
173
179
  next_state = transitions.get(result_status)
174
180
 
175
181
  if next_state:
176
182
  logger.info(f"Job {job_id} transitioning based on worker status '{result_status}' to state '{next_state}'")
177
183
 
178
- worker_data_content = result_data.get("data")
179
184
  if worker_data_content and isinstance(worker_data_content, dict):
180
185
  if "state_history" not in job_state:
181
186
  job_state["state_history"] = {}
@@ -200,8 +205,8 @@ class WorkerService:
200
205
  await self.storage.save_job_state(job_id, job_state)
201
206
  return "result_accepted_failure"
202
207
 
203
- async def _handle_task_failure(self, job_state: dict, task_id: str, result_data: dict) -> str:
204
- error_details = result_data.get("error", {})
208
+ async def _handle_task_failure(self, job_state: dict, task_id: str, result_payload: dict) -> str:
209
+ error_details = result_payload.get("error", {})
205
210
  error_type = ERROR_CODE_TRANSIENT
206
211
  error_message = "No error details provided."
207
212
 
@@ -214,9 +219,9 @@ class WorkerService:
214
219
  job_id = job_state["id"]
215
220
  logger.warning(f"Task {task_id} for job {job_id} failed with error type '{error_type}'.")
216
221
 
217
- if error_type == ERROR_CODE_PERMANENT:
222
+ if error_type in (ERROR_CODE_PERMANENT, ERROR_CODE_SECURITY, ERROR_CODE_DEPENDENCY):
218
223
  job_state["status"] = JOB_STATUS_QUARANTINED
219
- job_state["error_message"] = f"Task failed with permanent error: {error_message}"
224
+ job_state["error_message"] = f"Task failed with permanent error ({error_type}): {error_message}"
220
225
  await self.storage.save_job_state(job_id, job_state)
221
226
  await self.storage.quarantine_job(job_id)
222
227
  elif error_type == ERROR_CODE_INVALID_INPUT:
@@ -230,7 +235,6 @@ class WorkerService:
230
235
  logger.critical(f"Data integrity mismatch detected for job {job_id}: {error_message}")
231
236
  else:
232
237
  await self.engine.handle_task_failure(job_state, task_id, error_message)
233
-
234
238
  return "result_accepted_failure"
235
239
 
236
240
  async def issue_access_token(self, worker_id: str) -> TokenResponse:
@@ -90,6 +90,20 @@ class StorageBackend(ABC):
90
90
  """
91
91
  raise NotImplementedError
92
92
 
93
+ @abstractmethod
94
+ async def update_job_state_atomic(
95
+ self,
96
+ job_id: str,
97
+ update_callback: Any,
98
+ ) -> dict[str, Any]:
99
+ """Atomically update the state of a job using a callback function.
100
+
101
+ :param job_id: Unique identifier for the job.
102
+ :param update_callback: A callable that takes the current state and returns the updated state.
103
+ :return: The updated full state of the job.
104
+ """
105
+ raise NotImplementedError
106
+
93
107
  @abstractmethod
94
108
  async def register_worker(
95
109
  self,
@@ -12,12 +12,12 @@ class MemoryStorage(StorageBackend):
12
12
  Not persistent.
13
13
  """
14
14
 
15
- def __init__(self):
15
+ def __init__(self) -> None:
16
16
  self._jobs: dict[str, dict[str, Any]] = {}
17
17
  self._workers: dict[str, dict[str, Any]] = {}
18
18
  self._worker_ttls: dict[str, float] = {}
19
- self._worker_task_queues: dict[str, PriorityQueue] = {}
20
- self._job_queue = Queue()
19
+ self._worker_task_queues: dict[str, PriorityQueue[Any]] = {}
20
+ self._job_queue: Queue[str] = Queue()
21
21
  self._quarantine_queue: list[str] = []
22
22
  self._watched_jobs: dict[str, float] = {}
23
23
  self._client_configs: dict[str, dict[str, Any]] = {}
@@ -62,6 +62,17 @@ class MemoryStorage(StorageBackend):
62
62
  self._jobs[job_id].update(update_data)
63
63
  return self._jobs[job_id]
64
64
 
65
+ async def update_job_state_atomic(
66
+ self,
67
+ job_id: str,
68
+ update_callback: Any,
69
+ ) -> dict[str, Any]:
70
+ async with self._lock:
71
+ current_state = self._jobs.get(job_id, {})
72
+ updated_state = update_callback(current_state)
73
+ self._jobs[job_id] = updated_state
74
+ return updated_state
75
+
65
76
  async def register_worker(
66
77
  self,
67
78
  worker_id: str,