avtomatika 1.0b6__py3-none-any.whl → 1.0b8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avtomatika/api/handlers.py +549 -0
- avtomatika/api/routes.py +118 -0
- avtomatika/app_keys.py +33 -0
- avtomatika/blueprint.py +125 -54
- avtomatika/config.py +10 -0
- avtomatika/context.py +2 -2
- avtomatika/data_types.py +4 -2
- avtomatika/dispatcher.py +9 -27
- avtomatika/engine.py +70 -601
- avtomatika/executor.py +55 -22
- avtomatika/health_checker.py +23 -5
- avtomatika/history/base.py +60 -6
- avtomatika/history/noop.py +18 -7
- avtomatika/history/postgres.py +8 -6
- avtomatika/history/sqlite.py +7 -5
- avtomatika/metrics.py +1 -1
- avtomatika/reputation.py +46 -40
- avtomatika/s3.py +323 -0
- avtomatika/scheduler.py +8 -8
- avtomatika/storage/base.py +45 -4
- avtomatika/storage/memory.py +56 -13
- avtomatika/storage/redis.py +185 -252
- avtomatika/utils/__init__.py +0 -0
- avtomatika/utils/webhook_sender.py +96 -0
- avtomatika/watcher.py +34 -38
- avtomatika/ws_manager.py +7 -6
- {avtomatika-1.0b6.dist-info → avtomatika-1.0b8.dist-info}/METADATA +91 -3
- avtomatika-1.0b8.dist-info/RECORD +46 -0
- avtomatika-1.0b6.dist-info/RECORD +0 -40
- {avtomatika-1.0b6.dist-info → avtomatika-1.0b8.dist-info}/WHEEL +0 -0
- {avtomatika-1.0b6.dist-info → avtomatika-1.0b8.dist-info}/licenses/LICENSE +0 -0
- {avtomatika-1.0b6.dist-info → avtomatika-1.0b8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,549 @@
|
|
|
1
|
+
from importlib import resources
|
|
2
|
+
from logging import getLogger
|
|
3
|
+
from typing import Any, Callable
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from aiohttp import WSMsgType, web
|
|
7
|
+
from aioprometheus import render
|
|
8
|
+
from orjson import OPT_INDENT_2, dumps, loads
|
|
9
|
+
|
|
10
|
+
from .. import metrics
|
|
11
|
+
from ..app_keys import (
|
|
12
|
+
ENGINE_KEY,
|
|
13
|
+
)
|
|
14
|
+
from ..blueprint import StateMachineBlueprint
|
|
15
|
+
from ..client_config_loader import load_client_configs_to_redis
|
|
16
|
+
from ..constants import (
|
|
17
|
+
ERROR_CODE_INVALID_INPUT,
|
|
18
|
+
ERROR_CODE_PERMANENT,
|
|
19
|
+
ERROR_CODE_TRANSIENT,
|
|
20
|
+
JOB_STATUS_CANCELLED,
|
|
21
|
+
JOB_STATUS_FAILED,
|
|
22
|
+
JOB_STATUS_PENDING,
|
|
23
|
+
JOB_STATUS_QUARANTINED,
|
|
24
|
+
JOB_STATUS_RUNNING,
|
|
25
|
+
JOB_STATUS_WAITING_FOR_HUMAN,
|
|
26
|
+
JOB_STATUS_WAITING_FOR_PARALLEL,
|
|
27
|
+
JOB_STATUS_WAITING_FOR_WORKER,
|
|
28
|
+
TASK_STATUS_CANCELLED,
|
|
29
|
+
TASK_STATUS_FAILURE,
|
|
30
|
+
TASK_STATUS_SUCCESS,
|
|
31
|
+
)
|
|
32
|
+
from ..worker_config_loader import load_worker_configs_to_redis
|
|
33
|
+
|
|
34
|
+
logger = getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def json_dumps(obj) -> str:
|
|
38
|
+
return dumps(obj).decode("utf-8")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def json_response(data, **kwargs) -> web.Response:
|
|
42
|
+
return web.json_response(data, dumps=json_dumps, **kwargs)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def status_handler(_request: web.Request) -> web.Response:
|
|
46
|
+
return json_response({"status": "ok"})
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def metrics_handler(_request: web.Request) -> web.Response:
|
|
50
|
+
return web.Response(body=render(), content_type="text/plain")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def create_job_handler_factory(blueprint: StateMachineBlueprint) -> Callable[[web.Request], Any]:
|
|
54
|
+
async def handler(request: web.Request) -> web.Response:
|
|
55
|
+
engine = request.app[ENGINE_KEY]
|
|
56
|
+
try:
|
|
57
|
+
request_body = await request.json(loads=loads)
|
|
58
|
+
initial_data = request_body.get("initial_data", {})
|
|
59
|
+
# Backward compatibility: if initial_data key is missing, assume body is initial_data
|
|
60
|
+
if (
|
|
61
|
+
"initial_data" not in request_body
|
|
62
|
+
and request_body
|
|
63
|
+
and not any(k in request_body for k in ("webhook_url",))
|
|
64
|
+
):
|
|
65
|
+
initial_data = request_body
|
|
66
|
+
|
|
67
|
+
webhook_url = request_body.get("webhook_url")
|
|
68
|
+
except Exception:
|
|
69
|
+
return json_response({"error": "Invalid JSON body"}, status=400)
|
|
70
|
+
|
|
71
|
+
client_config = request["client_config"]
|
|
72
|
+
carrier = {str(k): v for k, v in request.headers.items()}
|
|
73
|
+
|
|
74
|
+
job_id = str(uuid4())
|
|
75
|
+
job_state = {
|
|
76
|
+
"id": job_id,
|
|
77
|
+
"blueprint_name": blueprint.name,
|
|
78
|
+
"current_state": blueprint.start_state,
|
|
79
|
+
"initial_data": initial_data,
|
|
80
|
+
"state_history": {},
|
|
81
|
+
"status": JOB_STATUS_PENDING,
|
|
82
|
+
"tracing_context": carrier,
|
|
83
|
+
"client_config": client_config,
|
|
84
|
+
"webhook_url": webhook_url,
|
|
85
|
+
}
|
|
86
|
+
await engine.storage.save_job_state(job_id, job_state)
|
|
87
|
+
await engine.storage.enqueue_job(job_id)
|
|
88
|
+
metrics.jobs_total.inc({metrics.LABEL_BLUEPRINT: blueprint.name})
|
|
89
|
+
return json_response({"status": "accepted", "job_id": job_id}, status=202)
|
|
90
|
+
|
|
91
|
+
return handler
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
async def get_job_status_handler(request: web.Request) -> web.Response:
|
|
95
|
+
engine = request.app[ENGINE_KEY]
|
|
96
|
+
job_id = request.match_info.get("job_id")
|
|
97
|
+
if not job_id:
|
|
98
|
+
return json_response({"error": "job_id is required in path"}, status=400)
|
|
99
|
+
job_state = await engine.storage.get_job_state(job_id)
|
|
100
|
+
if not job_state:
|
|
101
|
+
return json_response({"error": "Job not found"}, status=404)
|
|
102
|
+
return json_response(job_state, status=200)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
async def cancel_job_handler(request: web.Request) -> web.Response:
|
|
106
|
+
engine = request.app[ENGINE_KEY]
|
|
107
|
+
job_id = request.match_info.get("job_id")
|
|
108
|
+
if not job_id:
|
|
109
|
+
return json_response({"error": "job_id is required in path"}, status=400)
|
|
110
|
+
|
|
111
|
+
job_state = await engine.storage.get_job_state(job_id)
|
|
112
|
+
if not job_state:
|
|
113
|
+
return json_response({"error": "Job not found"}, status=404)
|
|
114
|
+
|
|
115
|
+
if job_state.get("status") != JOB_STATUS_WAITING_FOR_WORKER:
|
|
116
|
+
return json_response(
|
|
117
|
+
{"error": "Job is not in a state that can be cancelled (must be waiting for a worker)."},
|
|
118
|
+
status=409,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
worker_id = job_state.get("task_worker_id")
|
|
122
|
+
if not worker_id:
|
|
123
|
+
return json_response(
|
|
124
|
+
{"error": "Cannot cancel job: worker_id not found in job state."},
|
|
125
|
+
status=500,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
worker_info = await engine.storage.get_worker_info(worker_id)
|
|
129
|
+
task_id = job_state.get("current_task_id")
|
|
130
|
+
if not task_id:
|
|
131
|
+
return json_response(
|
|
132
|
+
{"error": "Cannot cancel job: task_id not found in job state."},
|
|
133
|
+
status=500,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Set Redis flag as a reliable fallback/primary mechanism
|
|
137
|
+
await engine.storage.set_task_cancellation_flag(task_id)
|
|
138
|
+
|
|
139
|
+
# Attempt WebSocket-based cancellation if supported
|
|
140
|
+
if worker_info and worker_info.get("capabilities", {}).get("websockets"):
|
|
141
|
+
command = {"command": "cancel_task", "task_id": task_id, "job_id": job_id}
|
|
142
|
+
sent = await engine.ws_manager.send_command(worker_id, command)
|
|
143
|
+
if sent:
|
|
144
|
+
return json_response({"status": "cancellation_request_sent"})
|
|
145
|
+
else:
|
|
146
|
+
logger.warning(f"Failed to send WebSocket cancellation for task {task_id}, but Redis flag is set.")
|
|
147
|
+
# Proceed to return success, as the Redis flag will handle it
|
|
148
|
+
|
|
149
|
+
return json_response({"status": "cancellation_request_accepted"})
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
async def get_job_history_handler(request: web.Request) -> web.Response:
|
|
153
|
+
engine = request.app[ENGINE_KEY]
|
|
154
|
+
job_id = request.match_info.get("job_id")
|
|
155
|
+
if not job_id:
|
|
156
|
+
return json_response({"error": "job_id is required in path"}, status=400)
|
|
157
|
+
history = await engine.history_storage.get_job_history(job_id)
|
|
158
|
+
return json_response(history)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
async def get_blueprint_graph_handler(request: web.Request) -> web.Response:
|
|
162
|
+
engine = request.app[ENGINE_KEY]
|
|
163
|
+
blueprint_name = request.match_info.get("blueprint_name")
|
|
164
|
+
if not blueprint_name:
|
|
165
|
+
return json_response({"error": "blueprint_name is required in path"}, status=400)
|
|
166
|
+
|
|
167
|
+
blueprint = engine.blueprints.get(blueprint_name)
|
|
168
|
+
if not blueprint:
|
|
169
|
+
return json_response({"error": "Blueprint not found"}, status=404)
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
graph_dot = blueprint.render_graph()
|
|
173
|
+
return web.Response(text=graph_dot, content_type="text/vnd.graphviz")
|
|
174
|
+
except FileNotFoundError:
|
|
175
|
+
error_msg = "Graphviz is not installed on the server. Cannot generate graph."
|
|
176
|
+
logger.error(error_msg)
|
|
177
|
+
return json_response({"error": error_msg}, status=501)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
async def get_workers_handler(request: web.Request) -> web.Response:
|
|
181
|
+
engine = request.app[ENGINE_KEY]
|
|
182
|
+
workers = await engine.storage.get_available_workers()
|
|
183
|
+
return json_response(workers)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
async def get_jobs_handler(request: web.Request) -> web.Response:
|
|
187
|
+
engine = request.app[ENGINE_KEY]
|
|
188
|
+
try:
|
|
189
|
+
limit = int(request.query.get("limit", "100"))
|
|
190
|
+
offset = int(request.query.get("offset", "0"))
|
|
191
|
+
except ValueError:
|
|
192
|
+
return json_response({"error": "Invalid limit/offset parameter"}, status=400)
|
|
193
|
+
|
|
194
|
+
jobs = await engine.history_storage.get_jobs(limit=limit, offset=offset)
|
|
195
|
+
return json_response(jobs)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
async def get_dashboard_handler(request: web.Request) -> web.Response:
|
|
199
|
+
engine = request.app[ENGINE_KEY]
|
|
200
|
+
worker_count = await engine.storage.get_active_worker_count()
|
|
201
|
+
queue_length = await engine.storage.get_job_queue_length()
|
|
202
|
+
job_summary = await engine.history_storage.get_job_summary()
|
|
203
|
+
|
|
204
|
+
dashboard_data = {
|
|
205
|
+
"workers": {"total": worker_count},
|
|
206
|
+
"jobs": {"queued": queue_length, **job_summary},
|
|
207
|
+
}
|
|
208
|
+
return json_response(dashboard_data)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
async def task_result_handler(request: web.Request) -> web.Response:
|
|
212
|
+
engine = request.app[ENGINE_KEY]
|
|
213
|
+
try:
|
|
214
|
+
data = await request.json(loads=loads)
|
|
215
|
+
job_id = data.get("job_id")
|
|
216
|
+
task_id = data.get("task_id")
|
|
217
|
+
result = data.get("result", {})
|
|
218
|
+
result_status = result.get("status", TASK_STATUS_SUCCESS)
|
|
219
|
+
error_message = result.get("error")
|
|
220
|
+
payload_worker_id = data.get("worker_id")
|
|
221
|
+
except Exception:
|
|
222
|
+
return json_response({"error": "Invalid JSON body"}, status=400)
|
|
223
|
+
|
|
224
|
+
# Security check: Ensure the worker_id from the payload matches the authenticated worker
|
|
225
|
+
authenticated_worker_id = request.get("worker_id")
|
|
226
|
+
if not authenticated_worker_id:
|
|
227
|
+
return json_response({"error": "Could not identify authenticated worker."}, status=500)
|
|
228
|
+
|
|
229
|
+
if payload_worker_id and payload_worker_id != authenticated_worker_id:
|
|
230
|
+
return json_response(
|
|
231
|
+
{
|
|
232
|
+
"error": f"Forbidden: Authenticated worker '{authenticated_worker_id}' "
|
|
233
|
+
f"cannot submit results for another worker '{payload_worker_id}'.",
|
|
234
|
+
},
|
|
235
|
+
status=403,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
if not job_id or not task_id:
|
|
239
|
+
return json_response({"error": "job_id and task_id are required"}, status=400)
|
|
240
|
+
|
|
241
|
+
job_state = await engine.storage.get_job_state(job_id)
|
|
242
|
+
if not job_state:
|
|
243
|
+
return json_response({"error": "Job not found"}, status=404)
|
|
244
|
+
|
|
245
|
+
# Handle parallel task completion
|
|
246
|
+
if job_state.get("status") == JOB_STATUS_WAITING_FOR_PARALLEL:
|
|
247
|
+
await engine.storage.remove_job_from_watch(f"{job_id}:{task_id}")
|
|
248
|
+
job_state.setdefault("aggregation_results", {})[task_id] = result
|
|
249
|
+
job_state.setdefault("active_branches", []).remove(task_id)
|
|
250
|
+
|
|
251
|
+
if not job_state["active_branches"]:
|
|
252
|
+
logger.info(f"All parallel branches for job {job_id} have completed.")
|
|
253
|
+
job_state["status"] = JOB_STATUS_RUNNING
|
|
254
|
+
job_state["current_state"] = job_state["aggregation_target"]
|
|
255
|
+
await engine.storage.save_job_state(job_id, job_state)
|
|
256
|
+
await engine.storage.enqueue_job(job_id)
|
|
257
|
+
else:
|
|
258
|
+
logger.info(
|
|
259
|
+
f"Branch {task_id} for job {job_id} completed. Waiting for {len(job_state['active_branches'])} more.",
|
|
260
|
+
)
|
|
261
|
+
await engine.storage.save_job_state(job_id, job_state)
|
|
262
|
+
|
|
263
|
+
return json_response({"status": "parallel_branch_result_accepted"}, status=200)
|
|
264
|
+
|
|
265
|
+
await engine.storage.remove_job_from_watch(job_id)
|
|
266
|
+
|
|
267
|
+
import time
|
|
268
|
+
|
|
269
|
+
now = time.monotonic()
|
|
270
|
+
dispatched_at = job_state.get("task_dispatched_at", now)
|
|
271
|
+
duration_ms = int((now - dispatched_at) * 1000)
|
|
272
|
+
|
|
273
|
+
await engine.history_storage.log_job_event(
|
|
274
|
+
{
|
|
275
|
+
"job_id": job_id,
|
|
276
|
+
"state": job_state.get("current_state"),
|
|
277
|
+
"event_type": "task_finished",
|
|
278
|
+
"duration_ms": duration_ms,
|
|
279
|
+
"worker_id": authenticated_worker_id,
|
|
280
|
+
"context_snapshot": {**job_state, "result": result},
|
|
281
|
+
},
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
job_state["tracing_context"] = {str(k): v for k, v in request.headers.items()}
|
|
285
|
+
|
|
286
|
+
if result_status == TASK_STATUS_FAILURE:
|
|
287
|
+
error_details = result.get("error", {})
|
|
288
|
+
error_type = ERROR_CODE_TRANSIENT
|
|
289
|
+
error_message = "No error details provided."
|
|
290
|
+
|
|
291
|
+
if isinstance(error_details, dict):
|
|
292
|
+
error_type = error_details.get("code", ERROR_CODE_TRANSIENT)
|
|
293
|
+
error_message = error_details.get("message", "No error message provided.")
|
|
294
|
+
elif isinstance(error_details, str):
|
|
295
|
+
error_message = error_details
|
|
296
|
+
|
|
297
|
+
logger.warning(f"Task {task_id} for job {job_id} failed with error type '{error_type}'.")
|
|
298
|
+
|
|
299
|
+
if error_type == ERROR_CODE_PERMANENT:
|
|
300
|
+
job_state["status"] = JOB_STATUS_QUARANTINED
|
|
301
|
+
job_state["error_message"] = f"Task failed with permanent error: {error_message}"
|
|
302
|
+
await engine.storage.save_job_state(job_id, job_state)
|
|
303
|
+
await engine.storage.quarantine_job(job_id)
|
|
304
|
+
elif error_type == ERROR_CODE_INVALID_INPUT:
|
|
305
|
+
job_state["status"] = JOB_STATUS_FAILED
|
|
306
|
+
job_state["error_message"] = f"Task failed due to invalid input: {error_message}"
|
|
307
|
+
await engine.storage.save_job_state(job_id, job_state)
|
|
308
|
+
else: # TRANSIENT_ERROR
|
|
309
|
+
await engine.handle_task_failure(job_state, task_id, error_message)
|
|
310
|
+
|
|
311
|
+
return json_response({"status": "result_accepted_failure"}, status=200)
|
|
312
|
+
|
|
313
|
+
if result_status == TASK_STATUS_CANCELLED:
|
|
314
|
+
logger.info(f"Task {task_id} for job {job_id} was cancelled by worker.")
|
|
315
|
+
job_state["status"] = JOB_STATUS_CANCELLED
|
|
316
|
+
await engine.storage.save_job_state(job_id, job_state)
|
|
317
|
+
transitions = job_state.get("current_task_transitions", {})
|
|
318
|
+
if next_state := transitions.get("cancelled"):
|
|
319
|
+
job_state["current_state"] = next_state
|
|
320
|
+
job_state["status"] = JOB_STATUS_RUNNING
|
|
321
|
+
await engine.storage.save_job_state(job_id, job_state)
|
|
322
|
+
await engine.storage.enqueue_job(job_id)
|
|
323
|
+
return json_response({"status": "result_accepted_cancelled"}, status=200)
|
|
324
|
+
|
|
325
|
+
transitions = job_state.get("current_task_transitions", {})
|
|
326
|
+
if next_state := transitions.get(result_status):
|
|
327
|
+
logger.info(f"Job {job_id} transitioning based on worker status '{result_status}' to state '{next_state}'")
|
|
328
|
+
|
|
329
|
+
worker_data = result.get("data")
|
|
330
|
+
if worker_data and isinstance(worker_data, dict):
|
|
331
|
+
if "state_history" not in job_state:
|
|
332
|
+
job_state["state_history"] = {}
|
|
333
|
+
job_state["state_history"].update(worker_data)
|
|
334
|
+
|
|
335
|
+
job_state["current_state"] = next_state
|
|
336
|
+
job_state["status"] = JOB_STATUS_RUNNING
|
|
337
|
+
await engine.storage.save_job_state(job_id, job_state)
|
|
338
|
+
await engine.storage.enqueue_job(job_id)
|
|
339
|
+
else:
|
|
340
|
+
logger.error(f"Job {job_id} failed. Worker returned unhandled status '{result_status}'.")
|
|
341
|
+
job_state["status"] = JOB_STATUS_FAILED
|
|
342
|
+
job_state["error_message"] = f"Worker returned unhandled status: {result_status}"
|
|
343
|
+
await engine.storage.save_job_state(job_id, job_state)
|
|
344
|
+
|
|
345
|
+
return json_response({"status": "result_accepted_success"}, status=200)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
async def human_approval_webhook_handler(request: web.Request) -> web.Response:
|
|
349
|
+
engine = request.app[ENGINE_KEY]
|
|
350
|
+
job_id = request.match_info.get("job_id")
|
|
351
|
+
if not job_id:
|
|
352
|
+
return json_response({"error": "job_id is required in path"}, status=400)
|
|
353
|
+
try:
|
|
354
|
+
data = await request.json(loads=loads)
|
|
355
|
+
decision = data.get("decision")
|
|
356
|
+
if not decision:
|
|
357
|
+
return json_response({"error": "decision is required in body"}, status=400)
|
|
358
|
+
except Exception:
|
|
359
|
+
return json_response({"error": "Invalid JSON body"}, status=400)
|
|
360
|
+
job_state = await engine.storage.get_job_state(job_id)
|
|
361
|
+
if not job_state:
|
|
362
|
+
return json_response({"error": "Job not found"}, status=404)
|
|
363
|
+
if job_state.get("status") not in [JOB_STATUS_WAITING_FOR_WORKER, JOB_STATUS_WAITING_FOR_HUMAN]:
|
|
364
|
+
return json_response({"error": "Job is not in a state that can be approved"}, status=409)
|
|
365
|
+
transitions = job_state.get("current_task_transitions", {})
|
|
366
|
+
next_state = transitions.get(decision)
|
|
367
|
+
if not next_state:
|
|
368
|
+
return json_response({"error": f"Invalid decision '{decision}' for this job"}, status=400)
|
|
369
|
+
job_state["current_state"] = next_state
|
|
370
|
+
job_state["status"] = JOB_STATUS_RUNNING
|
|
371
|
+
await engine.storage.save_job_state(job_id, job_state)
|
|
372
|
+
await engine.storage.enqueue_job(job_id)
|
|
373
|
+
return json_response({"status": "approval_received", "job_id": job_id})
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
async def get_quarantined_jobs_handler(request: web.Request) -> web.Response:
|
|
377
|
+
engine = request.app[ENGINE_KEY]
|
|
378
|
+
jobs = await engine.storage.get_quarantined_jobs()
|
|
379
|
+
return json_response(jobs)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
async def reload_worker_configs_handler(request: web.Request) -> web.Response:
|
|
383
|
+
engine = request.app[ENGINE_KEY]
|
|
384
|
+
logger.info("Received request to reload worker configurations.")
|
|
385
|
+
if not engine.config.WORKERS_CONFIG_PATH:
|
|
386
|
+
return json_response(
|
|
387
|
+
{"error": "WORKERS_CONFIG_PATH is not set, cannot reload configs."},
|
|
388
|
+
status=400,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
await load_worker_configs_to_redis(engine.storage, engine.config.WORKERS_CONFIG_PATH)
|
|
392
|
+
return json_response({"status": "worker_configs_reloaded"})
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
async def flush_db_handler(request: web.Request) -> web.Response:
|
|
396
|
+
engine = request.app[ENGINE_KEY]
|
|
397
|
+
logger.warning("Received request to flush the database.")
|
|
398
|
+
await engine.storage.flush_all()
|
|
399
|
+
await load_client_configs_to_redis(engine.storage)
|
|
400
|
+
return json_response({"status": "db_flushed"}, status=200)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
async def docs_handler(request: web.Request) -> web.Response:
|
|
404
|
+
engine = request.app[ENGINE_KEY]
|
|
405
|
+
try:
|
|
406
|
+
content = resources.read_text("avtomatika", "api.html")
|
|
407
|
+
except FileNotFoundError:
|
|
408
|
+
logger.error("api.html not found within the avtomatika package.")
|
|
409
|
+
return json_response({"error": "Documentation file not found on server."}, status=500)
|
|
410
|
+
|
|
411
|
+
blueprint_endpoints = []
|
|
412
|
+
for bp in engine.blueprints.values():
|
|
413
|
+
if not bp.api_endpoint:
|
|
414
|
+
continue
|
|
415
|
+
|
|
416
|
+
version_prefix = f"/{bp.api_version}" if bp.api_version else ""
|
|
417
|
+
endpoint_path = bp.api_endpoint if bp.api_endpoint.startswith("/") else f"/{bp.api_endpoint}"
|
|
418
|
+
full_path = f"/api{version_prefix}{endpoint_path}"
|
|
419
|
+
|
|
420
|
+
blueprint_endpoints.append(
|
|
421
|
+
{
|
|
422
|
+
"id": f"post-create-{bp.name.replace('_', '-')}",
|
|
423
|
+
"name": f"Create {bp.name.replace('_', ' ').title()} Job",
|
|
424
|
+
"method": "POST",
|
|
425
|
+
"path": full_path,
|
|
426
|
+
"description": f"Creates and starts a new instance (Job) of the `{bp.name}` blueprint.",
|
|
427
|
+
"request": {"body": {"initial_data": {}}},
|
|
428
|
+
"responses": [
|
|
429
|
+
{
|
|
430
|
+
"code": "202 Accepted",
|
|
431
|
+
"description": "Job successfully accepted for processing.",
|
|
432
|
+
"body": {"status": "accepted", "job_id": "..."},
|
|
433
|
+
}
|
|
434
|
+
],
|
|
435
|
+
}
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
if blueprint_endpoints:
|
|
439
|
+
endpoints_json = dumps(blueprint_endpoints, option=OPT_INDENT_2).decode("utf-8")
|
|
440
|
+
marker = "group: 'Protected API',\n endpoints: ["
|
|
441
|
+
content = content.replace(marker, f"{marker}\n{endpoints_json.strip('[]')},")
|
|
442
|
+
|
|
443
|
+
return web.Response(text=content, content_type="text/html")
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
|
|
447
|
+
engine = request.app[ENGINE_KEY]
|
|
448
|
+
worker_id = request.match_info.get("worker_id")
|
|
449
|
+
if not worker_id:
|
|
450
|
+
raise web.HTTPBadRequest(text="worker_id is required")
|
|
451
|
+
|
|
452
|
+
ws = web.WebSocketResponse()
|
|
453
|
+
await ws.prepare(request)
|
|
454
|
+
|
|
455
|
+
await engine.ws_manager.register(worker_id, ws)
|
|
456
|
+
try:
|
|
457
|
+
async for msg in ws:
|
|
458
|
+
if msg.type == WSMsgType.TEXT:
|
|
459
|
+
try:
|
|
460
|
+
data = msg.json()
|
|
461
|
+
await engine.ws_manager.handle_message(worker_id, data)
|
|
462
|
+
except Exception as e:
|
|
463
|
+
logger.error(f"Error processing WebSocket message from {worker_id}: {e}")
|
|
464
|
+
elif msg.type == WSMsgType.ERROR:
|
|
465
|
+
logger.error(f"WebSocket connection for {worker_id} closed with exception {ws.exception()}")
|
|
466
|
+
break
|
|
467
|
+
finally:
|
|
468
|
+
await engine.ws_manager.unregister(worker_id)
|
|
469
|
+
return ws
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
async def handle_get_next_task(request: web.Request) -> web.Response:
|
|
473
|
+
engine = request.app[ENGINE_KEY]
|
|
474
|
+
worker_id = request.match_info.get("worker_id")
|
|
475
|
+
if not worker_id:
|
|
476
|
+
return json_response({"error": "worker_id is required in path"}, status=400)
|
|
477
|
+
|
|
478
|
+
logger.debug(f"Worker {worker_id} is requesting a new task.")
|
|
479
|
+
task = await engine.storage.dequeue_task_for_worker(worker_id, engine.config.WORKER_POLL_TIMEOUT_SECONDS)
|
|
480
|
+
|
|
481
|
+
if task:
|
|
482
|
+
logger.info(f"Sending task {task.get('task_id')} to worker {worker_id}")
|
|
483
|
+
return json_response(task, status=200)
|
|
484
|
+
logger.debug(f"No tasks for worker {worker_id}, responding 204.")
|
|
485
|
+
return web.Response(status=204)
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
async def worker_update_handler(request: web.Request) -> web.Response:
|
|
489
|
+
engine = request.app[ENGINE_KEY]
|
|
490
|
+
worker_id = request.match_info.get("worker_id")
|
|
491
|
+
if not worker_id:
|
|
492
|
+
return json_response({"error": "worker_id is required in path"}, status=400)
|
|
493
|
+
|
|
494
|
+
ttl = engine.config.WORKER_HEALTH_CHECK_INTERVAL_SECONDS * 2
|
|
495
|
+
update_data = None
|
|
496
|
+
|
|
497
|
+
if request.can_read_body:
|
|
498
|
+
try:
|
|
499
|
+
update_data = await request.json(loads=loads)
|
|
500
|
+
except Exception:
|
|
501
|
+
logger.warning(
|
|
502
|
+
f"Received PATCH from worker {worker_id} with non-JSON body. Treating as TTL-only heartbeat."
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
if update_data:
|
|
506
|
+
updated_worker = await engine.storage.update_worker_status(worker_id, update_data, ttl)
|
|
507
|
+
if not updated_worker:
|
|
508
|
+
return json_response({"error": "Worker not found"}, status=404)
|
|
509
|
+
|
|
510
|
+
await engine.history_storage.log_worker_event(
|
|
511
|
+
{
|
|
512
|
+
"worker_id": worker_id,
|
|
513
|
+
"event_type": "status_update",
|
|
514
|
+
"worker_info_snapshot": updated_worker,
|
|
515
|
+
},
|
|
516
|
+
)
|
|
517
|
+
return json_response(updated_worker, status=200)
|
|
518
|
+
else:
|
|
519
|
+
refreshed = await engine.storage.refresh_worker_ttl(worker_id, ttl)
|
|
520
|
+
if not refreshed:
|
|
521
|
+
return json_response({"error": "Worker not found"}, status=404)
|
|
522
|
+
return json_response({"status": "ttl_refreshed"})
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
async def register_worker_handler(request: web.Request) -> web.Response:
|
|
526
|
+
engine = request.app[ENGINE_KEY]
|
|
527
|
+
worker_data = request.get("worker_registration_data")
|
|
528
|
+
if not worker_data:
|
|
529
|
+
return json_response({"error": "Worker data not found in request"}, status=500)
|
|
530
|
+
|
|
531
|
+
worker_id = worker_data.get("worker_id")
|
|
532
|
+
if not worker_id:
|
|
533
|
+
return json_response({"error": "Missing required field: worker_id"}, status=400)
|
|
534
|
+
|
|
535
|
+
ttl = engine.config.WORKER_HEALTH_CHECK_INTERVAL_SECONDS * 2
|
|
536
|
+
await engine.storage.register_worker(worker_id, worker_data, ttl)
|
|
537
|
+
|
|
538
|
+
logger.info(
|
|
539
|
+
f"Worker '{worker_id}' registered with info: {worker_data}",
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
await engine.history_storage.log_worker_event(
|
|
543
|
+
{
|
|
544
|
+
"worker_id": worker_id,
|
|
545
|
+
"event_type": "registered",
|
|
546
|
+
"worker_info_snapshot": worker_data,
|
|
547
|
+
},
|
|
548
|
+
)
|
|
549
|
+
return json_response({"status": "registered"}, status=200)
|
avtomatika/api/routes.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from aiohttp import web
|
|
4
|
+
|
|
5
|
+
from ..app_keys import ENGINE_KEY
|
|
6
|
+
from ..history.noop import NoOpHistoryStorage
|
|
7
|
+
from ..quota import quota_middleware_factory
|
|
8
|
+
from ..ratelimit import rate_limit_middleware_factory
|
|
9
|
+
from ..security import client_auth_middleware_factory, worker_auth_middleware_factory
|
|
10
|
+
from .handlers import (
|
|
11
|
+
cancel_job_handler,
|
|
12
|
+
create_job_handler_factory,
|
|
13
|
+
docs_handler,
|
|
14
|
+
flush_db_handler,
|
|
15
|
+
get_blueprint_graph_handler,
|
|
16
|
+
get_dashboard_handler,
|
|
17
|
+
get_job_history_handler,
|
|
18
|
+
get_job_status_handler,
|
|
19
|
+
get_jobs_handler,
|
|
20
|
+
get_quarantined_jobs_handler,
|
|
21
|
+
get_workers_handler,
|
|
22
|
+
handle_get_next_task,
|
|
23
|
+
human_approval_webhook_handler,
|
|
24
|
+
metrics_handler,
|
|
25
|
+
register_worker_handler,
|
|
26
|
+
reload_worker_configs_handler,
|
|
27
|
+
status_handler,
|
|
28
|
+
task_result_handler,
|
|
29
|
+
websocket_handler,
|
|
30
|
+
worker_update_handler,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
from ..engine import OrchestratorEngine
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def setup_routes(app: web.Application, engine: "OrchestratorEngine") -> None:
|
|
38
|
+
"""Sets up all application routes and sub-applications."""
|
|
39
|
+
|
|
40
|
+
# --- Public API (Unprotected) ---
|
|
41
|
+
public_app = web.Application()
|
|
42
|
+
public_app[ENGINE_KEY] = engine
|
|
43
|
+
public_app.router.add_get("/status", status_handler)
|
|
44
|
+
public_app.router.add_get("/metrics", metrics_handler)
|
|
45
|
+
public_app.router.add_post("/webhooks/approval/{job_id}", human_approval_webhook_handler)
|
|
46
|
+
public_app.router.add_post("/debug/flush_db", flush_db_handler)
|
|
47
|
+
public_app.router.add_get("/docs", docs_handler)
|
|
48
|
+
public_app.router.add_get("/jobs/quarantined", get_quarantined_jobs_handler)
|
|
49
|
+
app.add_subapp("/_public/", public_app)
|
|
50
|
+
|
|
51
|
+
# --- Protected API (Client Access) ---
|
|
52
|
+
auth_middleware = client_auth_middleware_factory(engine.storage)
|
|
53
|
+
quota_middleware = quota_middleware_factory(engine.storage)
|
|
54
|
+
api_middlewares = [auth_middleware, quota_middleware]
|
|
55
|
+
|
|
56
|
+
protected_app = web.Application(middlewares=api_middlewares)
|
|
57
|
+
protected_app[ENGINE_KEY] = engine
|
|
58
|
+
versioned_apps: dict[str, web.Application] = {}
|
|
59
|
+
has_unversioned_routes = False
|
|
60
|
+
|
|
61
|
+
# Register Blueprint routes
|
|
62
|
+
for bp in engine.blueprints.values():
|
|
63
|
+
if not bp.api_endpoint:
|
|
64
|
+
continue
|
|
65
|
+
endpoint = bp.api_endpoint if bp.api_endpoint.startswith("/") else f"/{bp.api_endpoint}"
|
|
66
|
+
|
|
67
|
+
handler = create_job_handler_factory(bp)
|
|
68
|
+
|
|
69
|
+
if bp.api_version:
|
|
70
|
+
if bp.api_version not in versioned_apps:
|
|
71
|
+
versioned_apps[bp.api_version] = web.Application(middlewares=api_middlewares)
|
|
72
|
+
versioned_apps[bp.api_version][ENGINE_KEY] = engine
|
|
73
|
+
versioned_apps[bp.api_version].router.add_post(endpoint, handler)
|
|
74
|
+
else:
|
|
75
|
+
protected_app.router.add_post(endpoint, handler)
|
|
76
|
+
has_unversioned_routes = True
|
|
77
|
+
|
|
78
|
+
# Common routes for all protected apps
|
|
79
|
+
all_protected_apps = list(versioned_apps.values())
|
|
80
|
+
if has_unversioned_routes:
|
|
81
|
+
all_protected_apps.append(protected_app)
|
|
82
|
+
|
|
83
|
+
for sub_app in all_protected_apps:
|
|
84
|
+
_register_common_routes(sub_app, engine)
|
|
85
|
+
|
|
86
|
+
# Mount protected apps
|
|
87
|
+
if has_unversioned_routes:
|
|
88
|
+
app.add_subapp("/api/", protected_app)
|
|
89
|
+
for version, sub_app in versioned_apps.items():
|
|
90
|
+
app.add_subapp(f"/api/{version}", sub_app)
|
|
91
|
+
|
|
92
|
+
# --- Worker API (Worker Access) ---
|
|
93
|
+
worker_auth_middleware = worker_auth_middleware_factory(engine.storage, engine.config)
|
|
94
|
+
worker_middlewares = [worker_auth_middleware]
|
|
95
|
+
if engine.config.RATE_LIMITING_ENABLED:
|
|
96
|
+
worker_rate_limiter = rate_limit_middleware_factory(storage=engine.storage, limit=5, period=60)
|
|
97
|
+
worker_middlewares.append(worker_rate_limiter)
|
|
98
|
+
|
|
99
|
+
worker_app = web.Application(middlewares=worker_middlewares)
|
|
100
|
+
worker_app[ENGINE_KEY] = engine
|
|
101
|
+
worker_app.router.add_post("/workers/register", register_worker_handler)
|
|
102
|
+
worker_app.router.add_get("/workers/{worker_id}/tasks/next", handle_get_next_task)
|
|
103
|
+
worker_app.router.add_patch("/workers/{worker_id}", worker_update_handler)
|
|
104
|
+
worker_app.router.add_post("/tasks/result", task_result_handler)
|
|
105
|
+
worker_app.router.add_get("/ws/{worker_id}", websocket_handler)
|
|
106
|
+
app.add_subapp("/_worker/", worker_app)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _register_common_routes(app: web.Application, engine: "OrchestratorEngine") -> None:
|
|
110
|
+
app.router.add_get("/jobs/{job_id}", get_job_status_handler)
|
|
111
|
+
app.router.add_post("/jobs/{job_id}/cancel", cancel_job_handler)
|
|
112
|
+
if not isinstance(engine.history_storage, NoOpHistoryStorage):
|
|
113
|
+
app.router.add_get("/jobs/{job_id}/history", get_job_history_handler)
|
|
114
|
+
app.router.add_get("/blueprints/{blueprint_name}/graph", get_blueprint_graph_handler)
|
|
115
|
+
app.router.add_get("/workers", get_workers_handler)
|
|
116
|
+
app.router.add_get("/jobs", get_jobs_handler)
|
|
117
|
+
app.router.add_get("/dashboard", get_dashboard_handler)
|
|
118
|
+
app.router.add_post("/admin/reload-workers", reload_worker_configs_handler)
|