avtomatika 1.0b3__py3-none-any.whl → 1.0b5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avtomatika/__init__.py +2 -2
- avtomatika/api.html +0 -11
- avtomatika/blueprint.py +9 -11
- avtomatika/config.py +7 -0
- avtomatika/context.py +18 -18
- avtomatika/data_types.py +6 -7
- avtomatika/datastore.py +2 -2
- avtomatika/dispatcher.py +20 -21
- avtomatika/engine.py +107 -68
- avtomatika/executor.py +168 -148
- avtomatika/history/base.py +7 -7
- avtomatika/history/noop.py +7 -7
- avtomatika/history/postgres.py +7 -9
- avtomatika/history/sqlite.py +7 -10
- avtomatika/logging_config.py +1 -1
- avtomatika/storage/__init__.py +2 -2
- avtomatika/storage/base.py +31 -20
- avtomatika/storage/memory.py +36 -43
- avtomatika/storage/redis.py +124 -60
- avtomatika/worker_config_loader.py +2 -2
- avtomatika/ws_manager.py +1 -2
- {avtomatika-1.0b3.dist-info → avtomatika-1.0b5.dist-info}/METADATA +44 -9
- avtomatika-1.0b5.dist-info/RECORD +37 -0
- avtomatika-1.0b3.dist-info/RECORD +0 -37
- {avtomatika-1.0b3.dist-info → avtomatika-1.0b5.dist-info}/WHEEL +0 -0
- {avtomatika-1.0b3.dist-info → avtomatika-1.0b5.dist-info}/licenses/LICENSE +0 -0
- {avtomatika-1.0b3.dist-info → avtomatika-1.0b5.dist-info}/top_level.txt +0 -0
avtomatika/engine.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
from asyncio import Task, create_task, gather, get_running_loop, wait_for
|
|
2
2
|
from asyncio import TimeoutError as AsyncTimeoutError
|
|
3
3
|
from logging import getLogger
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Callable
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
7
|
from aiohttp import ClientSession, WSMsgType, web
|
|
8
8
|
from aiohttp.web import AppKey
|
|
9
9
|
from aioprometheus import render
|
|
10
|
+
from orjson import OPT_INDENT_2, dumps, loads
|
|
10
11
|
|
|
11
12
|
from . import metrics
|
|
12
13
|
from .blueprint import StateMachineBlueprint
|
|
@@ -42,15 +43,21 @@ WATCHER_TASK_KEY = AppKey("watcher_task", Task)
|
|
|
42
43
|
REPUTATION_CALCULATOR_TASK_KEY = AppKey("reputation_calculator_task", Task)
|
|
43
44
|
HEALTH_CHECKER_TASK_KEY = AppKey("health_checker_task", Task)
|
|
44
45
|
|
|
45
|
-
|
|
46
46
|
metrics.init_metrics()
|
|
47
47
|
|
|
48
|
-
|
|
49
48
|
logger = getLogger(__name__)
|
|
50
49
|
|
|
51
50
|
|
|
51
|
+
def json_dumps(obj: Any) -> str:
|
|
52
|
+
return dumps(obj).decode("utf-8")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def json_response(data: Any, **kwargs: Any) -> web.Response:
|
|
56
|
+
return web.json_response(data, dumps=json_dumps, **kwargs)
|
|
57
|
+
|
|
58
|
+
|
|
52
59
|
async def status_handler(_request: web.Request) -> web.Response:
|
|
53
|
-
return
|
|
60
|
+
return json_response({"status": "ok"})
|
|
54
61
|
|
|
55
62
|
|
|
56
63
|
async def metrics_handler(_request: web.Request) -> web.Response:
|
|
@@ -63,7 +70,7 @@ class OrchestratorEngine:
|
|
|
63
70
|
setup_telemetry()
|
|
64
71
|
self.storage = storage
|
|
65
72
|
self.config = config
|
|
66
|
-
self.blueprints:
|
|
73
|
+
self.blueprints: dict[str, StateMachineBlueprint] = {}
|
|
67
74
|
self.history_storage: HistoryStorageBase = NoOpHistoryStorage()
|
|
68
75
|
self.ws_manager = WebSocketManager()
|
|
69
76
|
self.app = web.Application(middlewares=[compression_middleware])
|
|
@@ -245,9 +252,9 @@ class OrchestratorEngine:
|
|
|
245
252
|
def _create_job_handler(self, blueprint: StateMachineBlueprint) -> Callable:
|
|
246
253
|
async def handler(request: web.Request) -> web.Response:
|
|
247
254
|
try:
|
|
248
|
-
initial_data = await request.json()
|
|
255
|
+
initial_data = await request.json(loads=loads)
|
|
249
256
|
except Exception:
|
|
250
|
-
return
|
|
257
|
+
return json_response({"error": "Invalid JSON body"}, status=400)
|
|
251
258
|
|
|
252
259
|
client_config = request["client_config"]
|
|
253
260
|
carrier = {str(k): v for k, v in request.headers.items()}
|
|
@@ -266,37 +273,37 @@ class OrchestratorEngine:
|
|
|
266
273
|
await self.storage.save_job_state(job_id, job_state)
|
|
267
274
|
await self.storage.enqueue_job(job_id)
|
|
268
275
|
metrics.jobs_total.inc({metrics.LABEL_BLUEPRINT: blueprint.name})
|
|
269
|
-
return
|
|
276
|
+
return json_response({"status": "accepted", "job_id": job_id}, status=202)
|
|
270
277
|
|
|
271
278
|
return handler
|
|
272
279
|
|
|
273
280
|
async def _get_job_status_handler(self, request: web.Request) -> web.Response:
|
|
274
281
|
job_id = request.match_info.get("job_id")
|
|
275
282
|
if not job_id:
|
|
276
|
-
return
|
|
283
|
+
return json_response({"error": "job_id is required in path"}, status=400)
|
|
277
284
|
job_state = await self.storage.get_job_state(job_id)
|
|
278
285
|
if not job_state:
|
|
279
|
-
return
|
|
280
|
-
return
|
|
286
|
+
return json_response({"error": "Job not found"}, status=404)
|
|
287
|
+
return json_response(job_state, status=200)
|
|
281
288
|
|
|
282
289
|
async def _cancel_job_handler(self, request: web.Request) -> web.Response:
|
|
283
290
|
job_id = request.match_info.get("job_id")
|
|
284
291
|
if not job_id:
|
|
285
|
-
return
|
|
292
|
+
return json_response({"error": "job_id is required in path"}, status=400)
|
|
286
293
|
|
|
287
294
|
job_state = await self.storage.get_job_state(job_id)
|
|
288
295
|
if not job_state:
|
|
289
|
-
return
|
|
296
|
+
return json_response({"error": "Job not found"}, status=404)
|
|
290
297
|
|
|
291
298
|
if job_state.get("status") != "waiting_for_worker":
|
|
292
|
-
return
|
|
299
|
+
return json_response(
|
|
293
300
|
{"error": "Job is not in a state that can be cancelled (must be waiting for a worker)."},
|
|
294
301
|
status=409,
|
|
295
302
|
)
|
|
296
303
|
|
|
297
304
|
worker_id = job_state.get("task_worker_id")
|
|
298
305
|
if not worker_id:
|
|
299
|
-
return
|
|
306
|
+
return json_response(
|
|
300
307
|
{"error": "Cannot cancel job: worker_id not found in job state."},
|
|
301
308
|
status=500,
|
|
302
309
|
)
|
|
@@ -304,7 +311,7 @@ class OrchestratorEngine:
|
|
|
304
311
|
worker_info = await self.storage.get_worker_info(worker_id)
|
|
305
312
|
task_id = job_state.get("current_task_id")
|
|
306
313
|
if not task_id:
|
|
307
|
-
return
|
|
314
|
+
return json_response(
|
|
308
315
|
{"error": "Cannot cancel job: task_id not found in job state."},
|
|
309
316
|
status=500,
|
|
310
317
|
)
|
|
@@ -317,28 +324,28 @@ class OrchestratorEngine:
|
|
|
317
324
|
command = {"command": "cancel_task", "task_id": task_id, "job_id": job_id}
|
|
318
325
|
sent = await self.ws_manager.send_command(worker_id, command)
|
|
319
326
|
if sent:
|
|
320
|
-
return
|
|
327
|
+
return json_response({"status": "cancellation_request_sent"})
|
|
321
328
|
else:
|
|
322
329
|
logger.warning(f"Failed to send WebSocket cancellation for task {task_id}, but Redis flag is set.")
|
|
323
330
|
# Proceed to return success, as the Redis flag will handle it
|
|
324
331
|
|
|
325
|
-
return
|
|
332
|
+
return json_response({"status": "cancellation_request_accepted"})
|
|
326
333
|
|
|
327
334
|
async def _get_job_history_handler(self, request: web.Request) -> web.Response:
|
|
328
335
|
job_id = request.match_info.get("job_id")
|
|
329
336
|
if not job_id:
|
|
330
|
-
return
|
|
337
|
+
return json_response({"error": "job_id is required in path"}, status=400)
|
|
331
338
|
history = await self.history_storage.get_job_history(job_id)
|
|
332
|
-
return
|
|
339
|
+
return json_response(history)
|
|
333
340
|
|
|
334
341
|
async def _get_blueprint_graph_handler(self, request: web.Request) -> web.Response:
|
|
335
342
|
blueprint_name = request.match_info.get("blueprint_name")
|
|
336
343
|
if not blueprint_name:
|
|
337
|
-
return
|
|
344
|
+
return json_response({"error": "blueprint_name is required in path"}, status=400)
|
|
338
345
|
|
|
339
346
|
blueprint = self.blueprints.get(blueprint_name)
|
|
340
347
|
if not blueprint:
|
|
341
|
-
return
|
|
348
|
+
return json_response({"error": "Blueprint not found"}, status=404)
|
|
342
349
|
|
|
343
350
|
try:
|
|
344
351
|
graph_dot = blueprint.render_graph()
|
|
@@ -346,21 +353,21 @@ class OrchestratorEngine:
|
|
|
346
353
|
except FileNotFoundError:
|
|
347
354
|
error_msg = "Graphviz is not installed on the server. Cannot generate graph."
|
|
348
355
|
logger.error(error_msg)
|
|
349
|
-
return
|
|
356
|
+
return json_response({"error": error_msg}, status=501)
|
|
350
357
|
|
|
351
358
|
async def _get_workers_handler(self, request: web.Request) -> web.Response:
|
|
352
359
|
workers = await self.storage.get_available_workers()
|
|
353
|
-
return
|
|
360
|
+
return json_response(workers)
|
|
354
361
|
|
|
355
362
|
async def _get_jobs_handler(self, request: web.Request) -> web.Response:
|
|
356
363
|
try:
|
|
357
364
|
limit = int(request.query.get("limit", "100"))
|
|
358
365
|
offset = int(request.query.get("offset", "0"))
|
|
359
366
|
except ValueError:
|
|
360
|
-
return
|
|
367
|
+
return json_response({"error": "Invalid limit/offset parameter"}, status=400)
|
|
361
368
|
|
|
362
369
|
jobs = await self.history_storage.get_jobs(limit=limit, offset=offset)
|
|
363
|
-
return
|
|
370
|
+
return json_response(jobs)
|
|
364
371
|
|
|
365
372
|
async def _get_dashboard_handler(self, request: web.Request) -> web.Response:
|
|
366
373
|
worker_count = await self.storage.get_active_worker_count()
|
|
@@ -371,13 +378,13 @@ class OrchestratorEngine:
|
|
|
371
378
|
"workers": {"total": worker_count},
|
|
372
379
|
"jobs": {"queued": queue_length, **job_summary},
|
|
373
380
|
}
|
|
374
|
-
return
|
|
381
|
+
return json_response(dashboard_data)
|
|
375
382
|
|
|
376
383
|
async def _task_result_handler(self, request: web.Request) -> web.Response:
|
|
377
384
|
import logging
|
|
378
385
|
|
|
379
386
|
try:
|
|
380
|
-
data = await request.json()
|
|
387
|
+
data = await request.json(loads=loads)
|
|
381
388
|
job_id = data.get("job_id")
|
|
382
389
|
task_id = data.get("task_id")
|
|
383
390
|
result = data.get("result", {})
|
|
@@ -385,16 +392,16 @@ class OrchestratorEngine:
|
|
|
385
392
|
error_message = result.get("error")
|
|
386
393
|
payload_worker_id = data.get("worker_id")
|
|
387
394
|
except Exception:
|
|
388
|
-
return
|
|
395
|
+
return json_response({"error": "Invalid JSON body"}, status=400)
|
|
389
396
|
|
|
390
397
|
# Security check: Ensure the worker_id from the payload matches the authenticated worker
|
|
391
398
|
authenticated_worker_id = request.get("worker_id")
|
|
392
399
|
if not authenticated_worker_id:
|
|
393
400
|
# This should not happen if the auth middleware is working correctly
|
|
394
|
-
return
|
|
401
|
+
return json_response({"error": "Could not identify authenticated worker."}, status=500)
|
|
395
402
|
|
|
396
403
|
if payload_worker_id and payload_worker_id != authenticated_worker_id:
|
|
397
|
-
return
|
|
404
|
+
return json_response(
|
|
398
405
|
{
|
|
399
406
|
"error": f"Forbidden: Authenticated worker '{authenticated_worker_id}' "
|
|
400
407
|
f"cannot submit results for another worker '{payload_worker_id}'.",
|
|
@@ -403,11 +410,11 @@ class OrchestratorEngine:
|
|
|
403
410
|
)
|
|
404
411
|
|
|
405
412
|
if not job_id or not task_id:
|
|
406
|
-
return
|
|
413
|
+
return json_response({"error": "job_id and task_id are required"}, status=400)
|
|
407
414
|
|
|
408
415
|
job_state = await self.storage.get_job_state(job_id)
|
|
409
416
|
if not job_state:
|
|
410
|
-
return
|
|
417
|
+
return json_response({"error": "Job not found"}, status=404)
|
|
411
418
|
|
|
412
419
|
# Handle parallel task completion
|
|
413
420
|
if job_state.get("status") == "waiting_for_parallel_tasks":
|
|
@@ -428,7 +435,7 @@ class OrchestratorEngine:
|
|
|
428
435
|
)
|
|
429
436
|
await self.storage.save_job_state(job_id, job_state)
|
|
430
437
|
|
|
431
|
-
return
|
|
438
|
+
return json_response({"status": "parallel_branch_result_accepted"}, status=200)
|
|
432
439
|
|
|
433
440
|
await self.storage.remove_job_from_watch(job_id)
|
|
434
441
|
|
|
@@ -477,7 +484,7 @@ class OrchestratorEngine:
|
|
|
477
484
|
else: # TRANSIENT_ERROR or any other/unspecified error
|
|
478
485
|
await self._handle_task_failure(job_state, task_id, error_message)
|
|
479
486
|
|
|
480
|
-
return
|
|
487
|
+
return json_response({"status": "result_accepted_failure"}, status=200)
|
|
481
488
|
|
|
482
489
|
if result_status == "cancelled":
|
|
483
490
|
logging.info(f"Task {task_id} for job {job_id} was cancelled by worker.")
|
|
@@ -490,7 +497,7 @@ class OrchestratorEngine:
|
|
|
490
497
|
job_state["status"] = "running" # It's running the cancellation handler now
|
|
491
498
|
await self.storage.save_job_state(job_id, job_state)
|
|
492
499
|
await self.storage.enqueue_job(job_id)
|
|
493
|
-
return
|
|
500
|
+
return json_response({"status": "result_accepted_cancelled"}, status=200)
|
|
494
501
|
|
|
495
502
|
transitions = job_state.get("current_task_transitions", {})
|
|
496
503
|
if next_state := transitions.get(result_status):
|
|
@@ -512,7 +519,7 @@ class OrchestratorEngine:
|
|
|
512
519
|
job_state["error_message"] = f"Worker returned unhandled status: {result_status}"
|
|
513
520
|
await self.storage.save_job_state(job_id, job_state)
|
|
514
521
|
|
|
515
|
-
return
|
|
522
|
+
return json_response({"status": "result_accepted_success"}, status=200)
|
|
516
523
|
|
|
517
524
|
async def _handle_task_failure(self, job_state: dict, task_id: str, error_message: str | None):
|
|
518
525
|
import logging
|
|
@@ -553,62 +560,97 @@ class OrchestratorEngine:
|
|
|
553
560
|
async def _human_approval_webhook_handler(self, request: web.Request) -> web.Response:
|
|
554
561
|
job_id = request.match_info.get("job_id")
|
|
555
562
|
if not job_id:
|
|
556
|
-
return
|
|
563
|
+
return json_response({"error": "job_id is required in path"}, status=400)
|
|
557
564
|
try:
|
|
558
|
-
data = await request.json()
|
|
565
|
+
data = await request.json(loads=loads)
|
|
559
566
|
decision = data.get("decision")
|
|
560
567
|
if not decision:
|
|
561
|
-
return
|
|
568
|
+
return json_response({"error": "decision is required in body"}, status=400)
|
|
562
569
|
except Exception:
|
|
563
|
-
return
|
|
570
|
+
return json_response({"error": "Invalid JSON body"}, status=400)
|
|
564
571
|
job_state = await self.storage.get_job_state(job_id)
|
|
565
572
|
if not job_state:
|
|
566
|
-
return
|
|
573
|
+
return json_response({"error": "Job not found"}, status=404)
|
|
567
574
|
if job_state.get("status") not in ["waiting_for_worker", "waiting_for_human"]:
|
|
568
|
-
return
|
|
575
|
+
return json_response({"error": "Job is not in a state that can be approved"}, status=409)
|
|
569
576
|
transitions = job_state.get("current_task_transitions", {})
|
|
570
577
|
next_state = transitions.get(decision)
|
|
571
578
|
if not next_state:
|
|
572
|
-
return
|
|
579
|
+
return json_response({"error": f"Invalid decision '{decision}' for this job"}, status=400)
|
|
573
580
|
job_state["current_state"] = next_state
|
|
574
581
|
job_state["status"] = "running"
|
|
575
582
|
await self.storage.save_job_state(job_id, job_state)
|
|
576
583
|
await self.storage.enqueue_job(job_id)
|
|
577
|
-
return
|
|
584
|
+
return json_response({"status": "approval_received", "job_id": job_id})
|
|
578
585
|
|
|
579
586
|
async def _get_quarantined_jobs_handler(self, request: web.Request) -> web.Response:
|
|
580
587
|
"""Returns a list of all job IDs in the quarantine queue."""
|
|
581
588
|
jobs = await self.storage.get_quarantined_jobs()
|
|
582
|
-
return
|
|
589
|
+
return json_response(jobs)
|
|
583
590
|
|
|
584
591
|
async def _reload_worker_configs_handler(self, request: web.Request) -> web.Response:
|
|
585
592
|
"""Handles the dynamic reloading of worker configurations."""
|
|
586
593
|
logger.info("Received request to reload worker configurations.")
|
|
587
594
|
if not self.config.WORKERS_CONFIG_PATH:
|
|
588
|
-
return
|
|
595
|
+
return json_response(
|
|
589
596
|
{"error": "WORKERS_CONFIG_PATH is not set, cannot reload configs."},
|
|
590
597
|
status=400,
|
|
591
598
|
)
|
|
592
599
|
|
|
593
600
|
await load_worker_configs_to_redis(self.storage, self.config.WORKERS_CONFIG_PATH)
|
|
594
|
-
return
|
|
601
|
+
return json_response({"status": "worker_configs_reloaded"})
|
|
595
602
|
|
|
596
603
|
async def _flush_db_handler(self, request: web.Request) -> web.Response:
|
|
597
604
|
logger.warning("Received request to flush the database.")
|
|
598
605
|
await self.storage.flush_all()
|
|
599
606
|
await load_client_configs_to_redis(self.storage)
|
|
600
|
-
return
|
|
607
|
+
return json_response({"status": "db_flushed"}, status=200)
|
|
601
608
|
|
|
602
|
-
|
|
603
|
-
async def _docs_handler(request: web.Request) -> web.Response:
|
|
609
|
+
async def _docs_handler(self, request: web.Request) -> web.Response:
|
|
604
610
|
from importlib import resources
|
|
605
611
|
|
|
606
612
|
try:
|
|
607
613
|
content = resources.read_text("avtomatika", "api.html")
|
|
608
|
-
return web.Response(text=content, content_type="text/html")
|
|
609
614
|
except FileNotFoundError:
|
|
610
615
|
logger.error("api.html not found within the avtomatika package.")
|
|
611
|
-
return
|
|
616
|
+
return json_response({"error": "Documentation file not found on server."}, status=500)
|
|
617
|
+
|
|
618
|
+
# Generate dynamic documentation for registered blueprints
|
|
619
|
+
blueprint_endpoints = []
|
|
620
|
+
for bp in self.blueprints.values():
|
|
621
|
+
if not bp.api_endpoint:
|
|
622
|
+
continue
|
|
623
|
+
|
|
624
|
+
version_prefix = f"/{bp.api_version}" if bp.api_version else ""
|
|
625
|
+
endpoint_path = bp.api_endpoint if bp.api_endpoint.startswith("/") else f"/{bp.api_endpoint}"
|
|
626
|
+
full_path = f"/api{version_prefix}{endpoint_path}"
|
|
627
|
+
|
|
628
|
+
blueprint_endpoints.append(
|
|
629
|
+
{
|
|
630
|
+
"id": f"post-create-{bp.name.replace('_', '-')}",
|
|
631
|
+
"name": f"Create {bp.name.replace('_', ' ').title()} Job",
|
|
632
|
+
"method": "POST",
|
|
633
|
+
"path": full_path,
|
|
634
|
+
"description": f"Creates and starts a new instance (Job) of the `{bp.name}` blueprint.",
|
|
635
|
+
"request": {"body": {"initial_data": {}}},
|
|
636
|
+
"responses": [
|
|
637
|
+
{
|
|
638
|
+
"code": "202 Accepted",
|
|
639
|
+
"description": "Job successfully accepted for processing.",
|
|
640
|
+
"body": {"status": "accepted", "job_id": "..."},
|
|
641
|
+
}
|
|
642
|
+
],
|
|
643
|
+
}
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
# Inject dynamic endpoints into the apiData structure in the HTML
|
|
647
|
+
if blueprint_endpoints:
|
|
648
|
+
endpoints_json = dumps(blueprint_endpoints, option=OPT_INDENT_2).decode("utf-8")
|
|
649
|
+
# We insert the new endpoints at the beginning of the 'Protected API' group
|
|
650
|
+
marker = "group: 'Protected API',\n endpoints: ["
|
|
651
|
+
content = content.replace(marker, f"{marker}\n{endpoints_json.strip('[]')},")
|
|
652
|
+
|
|
653
|
+
return web.Response(text=content, content_type="text/html")
|
|
612
654
|
|
|
613
655
|
def _setup_routes(self):
|
|
614
656
|
public_app = web.Application()
|
|
@@ -625,7 +667,7 @@ class OrchestratorEngine:
|
|
|
625
667
|
api_middlewares = [auth_middleware, quota_middleware]
|
|
626
668
|
|
|
627
669
|
protected_app = web.Application(middlewares=api_middlewares)
|
|
628
|
-
versioned_apps:
|
|
670
|
+
versioned_apps: dict[str, web.Application] = {}
|
|
629
671
|
has_unversioned_routes = False
|
|
630
672
|
|
|
631
673
|
for bp in self.blueprints.values():
|
|
@@ -703,14 +745,14 @@ class OrchestratorEngine:
|
|
|
703
745
|
async def _handle_get_next_task(self, request: web.Request) -> web.Response:
|
|
704
746
|
worker_id = request.match_info.get("worker_id")
|
|
705
747
|
if not worker_id:
|
|
706
|
-
return
|
|
748
|
+
return json_response({"error": "worker_id is required in path"}, status=400)
|
|
707
749
|
|
|
708
750
|
logger.debug(f"Worker {worker_id} is requesting a new task.")
|
|
709
751
|
task = await self.storage.dequeue_task_for_worker(worker_id, self.config.WORKER_POLL_TIMEOUT_SECONDS)
|
|
710
752
|
|
|
711
753
|
if task:
|
|
712
754
|
logger.info(f"Sending task {task.get('task_id')} to worker {worker_id}")
|
|
713
|
-
return
|
|
755
|
+
return json_response(task, status=200)
|
|
714
756
|
logger.debug(f"No tasks for worker {worker_id}, responding 204.")
|
|
715
757
|
return web.Response(status=204)
|
|
716
758
|
|
|
@@ -723,7 +765,7 @@ class OrchestratorEngine:
|
|
|
723
765
|
"""
|
|
724
766
|
worker_id = request.match_info.get("worker_id")
|
|
725
767
|
if not worker_id:
|
|
726
|
-
return
|
|
768
|
+
return json_response({"error": "worker_id is required in path"}, status=400)
|
|
727
769
|
|
|
728
770
|
ttl = self.config.WORKER_HEALTH_CHECK_INTERVAL_SECONDS * 2
|
|
729
771
|
update_data = None
|
|
@@ -731,11 +773,8 @@ class OrchestratorEngine:
|
|
|
731
773
|
# Check for body content without consuming it if it's not JSON
|
|
732
774
|
if request.can_read_body:
|
|
733
775
|
try:
|
|
734
|
-
update_data = await request.json()
|
|
776
|
+
update_data = await request.json(loads=loads)
|
|
735
777
|
except Exception:
|
|
736
|
-
# This can happen if the body is present but not valid JSON.
|
|
737
|
-
# We can treat it as a lightweight heartbeat or return an error.
|
|
738
|
-
# For robustness, let's treat it as a lightweight ping but log a warning.
|
|
739
778
|
logger.warning(
|
|
740
779
|
f"Received PATCH from worker {worker_id} with non-JSON body. Treating as TTL-only heartbeat."
|
|
741
780
|
)
|
|
@@ -744,7 +783,7 @@ class OrchestratorEngine:
|
|
|
744
783
|
# Full update path
|
|
745
784
|
updated_worker = await self.storage.update_worker_status(worker_id, update_data, ttl)
|
|
746
785
|
if not updated_worker:
|
|
747
|
-
return
|
|
786
|
+
return json_response({"error": "Worker not found"}, status=404)
|
|
748
787
|
|
|
749
788
|
await self.history_storage.log_worker_event(
|
|
750
789
|
{
|
|
@@ -753,25 +792,25 @@ class OrchestratorEngine:
|
|
|
753
792
|
"worker_info_snapshot": updated_worker,
|
|
754
793
|
},
|
|
755
794
|
)
|
|
756
|
-
return
|
|
795
|
+
return json_response(updated_worker, status=200)
|
|
757
796
|
else:
|
|
758
797
|
# Lightweight TTL-only heartbeat path
|
|
759
798
|
refreshed = await self.storage.refresh_worker_ttl(worker_id, ttl)
|
|
760
799
|
if not refreshed:
|
|
761
|
-
return
|
|
762
|
-
return
|
|
800
|
+
return json_response({"error": "Worker not found"}, status=404)
|
|
801
|
+
return json_response({"status": "ttl_refreshed"})
|
|
763
802
|
|
|
764
803
|
async def _register_worker_handler(self, request: web.Request) -> web.Response:
|
|
765
804
|
# The worker_registration_data is attached by the auth middleware
|
|
766
805
|
# to avoid reading the request body twice.
|
|
767
806
|
worker_data = request.get("worker_registration_data")
|
|
768
807
|
if not worker_data:
|
|
769
|
-
return
|
|
808
|
+
return json_response({"error": "Worker data not found in request"}, status=500)
|
|
770
809
|
|
|
771
810
|
worker_id = worker_data.get("worker_id")
|
|
772
811
|
# This check is redundant if the middleware works, but good for safety
|
|
773
812
|
if not worker_id:
|
|
774
|
-
return
|
|
813
|
+
return json_response({"error": "Missing required field: worker_id"}, status=400)
|
|
775
814
|
|
|
776
815
|
ttl = self.config.WORKER_HEALTH_CHECK_INTERVAL_SECONDS * 2
|
|
777
816
|
await self.storage.register_worker(worker_id, worker_data, ttl)
|
|
@@ -787,7 +826,7 @@ class OrchestratorEngine:
|
|
|
787
826
|
"worker_info_snapshot": worker_data,
|
|
788
827
|
},
|
|
789
828
|
)
|
|
790
|
-
return
|
|
829
|
+
return json_response({"status": "registered"}, status=200)
|
|
791
830
|
|
|
792
831
|
def run(self):
|
|
793
832
|
self.setup()
|