avtomatika 1.0b4__py3-none-any.whl → 1.0b6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avtomatika/engine.py CHANGED
@@ -1,18 +1,35 @@
1
1
  from asyncio import Task, create_task, gather, get_running_loop, wait_for
2
2
  from asyncio import TimeoutError as AsyncTimeoutError
3
3
  from logging import getLogger
4
- from typing import Callable, Dict
4
+ from typing import Any, Callable
5
5
  from uuid import uuid4
6
6
 
7
7
  from aiohttp import ClientSession, WSMsgType, web
8
8
  from aiohttp.web import AppKey
9
9
  from aioprometheus import render
10
+ from orjson import OPT_INDENT_2, dumps, loads
10
11
 
11
12
  from . import metrics
12
13
  from .blueprint import StateMachineBlueprint
13
14
  from .client_config_loader import load_client_configs_to_redis
14
15
  from .compression import compression_middleware
15
16
  from .config import Config
17
+ from .constants import (
18
+ ERROR_CODE_INVALID_INPUT,
19
+ ERROR_CODE_PERMANENT,
20
+ ERROR_CODE_TRANSIENT,
21
+ JOB_STATUS_CANCELLED,
22
+ JOB_STATUS_FAILED,
23
+ JOB_STATUS_PENDING,
24
+ JOB_STATUS_QUARANTINED,
25
+ JOB_STATUS_RUNNING,
26
+ JOB_STATUS_WAITING_FOR_HUMAN,
27
+ JOB_STATUS_WAITING_FOR_PARALLEL,
28
+ JOB_STATUS_WAITING_FOR_WORKER,
29
+ TASK_STATUS_CANCELLED,
30
+ TASK_STATUS_FAILURE,
31
+ TASK_STATUS_SUCCESS,
32
+ )
16
33
  from .dispatcher import Dispatcher
17
34
  from .executor import JobExecutor
18
35
  from .health_checker import HealthChecker
@@ -22,6 +39,7 @@ from .logging_config import setup_logging
22
39
  from .quota import quota_middleware_factory
23
40
  from .ratelimit import rate_limit_middleware_factory
24
41
  from .reputation import ReputationCalculator
42
+ from .scheduler import Scheduler
25
43
  from .security import client_auth_middleware_factory, worker_auth_middleware_factory
26
44
  from .storage.base import StorageBackend
27
45
  from .telemetry import setup_telemetry
@@ -37,20 +55,29 @@ EXECUTOR_KEY = AppKey("executor", JobExecutor)
37
55
  WATCHER_KEY = AppKey("watcher", Watcher)
38
56
  REPUTATION_CALCULATOR_KEY = AppKey("reputation_calculator", ReputationCalculator)
39
57
  HEALTH_CHECKER_KEY = AppKey("health_checker", HealthChecker)
58
+ SCHEDULER_KEY = AppKey("scheduler", Scheduler)
59
+
40
60
  EXECUTOR_TASK_KEY = AppKey("executor_task", Task)
41
61
  WATCHER_TASK_KEY = AppKey("watcher_task", Task)
42
62
  REPUTATION_CALCULATOR_TASK_KEY = AppKey("reputation_calculator_task", Task)
43
63
  HEALTH_CHECKER_TASK_KEY = AppKey("health_checker_task", Task)
44
-
64
+ SCHEDULER_TASK_KEY = AppKey("scheduler_task", Task)
45
65
 
46
66
  metrics.init_metrics()
47
67
 
48
-
49
68
  logger = getLogger(__name__)
50
69
 
51
70
 
71
+ def json_dumps(obj: Any) -> str:
72
+ return dumps(obj).decode("utf-8")
73
+
74
+
75
+ def json_response(data: Any, **kwargs: Any) -> web.Response:
76
+ return web.json_response(data, dumps=json_dumps, **kwargs)
77
+
78
+
52
79
  async def status_handler(_request: web.Request) -> web.Response:
53
- return web.json_response({"status": "ok"})
80
+ return json_response({"status": "ok"})
54
81
 
55
82
 
56
83
  async def metrics_handler(_request: web.Request) -> web.Response:
@@ -59,11 +86,11 @@ async def metrics_handler(_request: web.Request) -> web.Response:
59
86
 
60
87
  class OrchestratorEngine:
61
88
  def __init__(self, storage: StorageBackend, config: Config):
62
- setup_logging(config.LOG_LEVEL, config.LOG_FORMAT)
89
+ setup_logging(config.LOG_LEVEL, config.LOG_FORMAT, config.TZ)
63
90
  setup_telemetry()
64
91
  self.storage = storage
65
92
  self.config = config
66
- self.blueprints: Dict[str, StateMachineBlueprint] = {}
93
+ self.blueprints: dict[str, StateMachineBlueprint] = {}
67
94
  self.history_storage: HistoryStorageBase = NoOpHistoryStorage()
68
95
  self.ws_manager = WebSocketManager()
69
96
  self.app = web.Application(middlewares=[compression_middleware])
@@ -108,7 +135,7 @@ class OrchestratorEngine:
108
135
  storage_class = module.SQLiteHistoryStorage
109
136
  parsed_uri = urlparse(uri)
110
137
  db_path = parsed_uri.path
111
- storage_args = [db_path]
138
+ storage_args = [db_path, self.config.TZ]
112
139
  except ImportError as e:
113
140
  logger.error(f"Could not import SQLiteHistoryStorage, perhaps aiosqlite is not installed? Error: {e}")
114
141
  self.history_storage = NoOpHistoryStorage()
@@ -118,7 +145,7 @@ class OrchestratorEngine:
118
145
  try:
119
146
  module = import_module(".history.postgres", package="avtomatika")
120
147
  storage_class = module.PostgresHistoryStorage
121
- storage_args = [uri]
148
+ storage_args = [uri, self.config.TZ]
122
149
  except ImportError as e:
123
150
  logger.error(f"Could not import PostgresHistoryStorage, perhaps asyncpg is not installed? Error: {e}")
124
151
  self.history_storage = NoOpHistoryStorage()
@@ -192,11 +219,13 @@ class OrchestratorEngine:
192
219
  app[WATCHER_KEY] = Watcher(self)
193
220
  app[REPUTATION_CALCULATOR_KEY] = ReputationCalculator(self)
194
221
  app[HEALTH_CHECKER_KEY] = HealthChecker(self)
222
+ app[SCHEDULER_KEY] = Scheduler(self)
195
223
 
196
224
  app[EXECUTOR_TASK_KEY] = create_task(app[EXECUTOR_KEY].run())
197
225
  app[WATCHER_TASK_KEY] = create_task(app[WATCHER_KEY].run())
198
226
  app[REPUTATION_CALCULATOR_TASK_KEY] = create_task(app[REPUTATION_CALCULATOR_KEY].run())
199
227
  app[HEALTH_CHECKER_TASK_KEY] = create_task(app[HEALTH_CHECKER_KEY].run())
228
+ app[SCHEDULER_TASK_KEY] = create_task(app[SCHEDULER_KEY].run())
200
229
 
201
230
  async def on_shutdown(self, app: web.Application):
202
231
  logger.info("Shutdown sequence started.")
@@ -204,6 +233,7 @@ class OrchestratorEngine:
204
233
  app[WATCHER_KEY].stop()
205
234
  app[REPUTATION_CALCULATOR_KEY].stop()
206
235
  app[HEALTH_CHECKER_KEY].stop()
236
+ app[SCHEDULER_KEY].stop()
207
237
  logger.info("Background task running flags set to False.")
208
238
 
209
239
  if hasattr(self.history_storage, "close"):
@@ -219,6 +249,8 @@ class OrchestratorEngine:
219
249
  app[WATCHER_TASK_KEY].cancel()
220
250
  app[REPUTATION_CALCULATOR_TASK_KEY].cancel()
221
251
  app[EXECUTOR_TASK_KEY].cancel()
252
+ # Scheduler task manages its own loop cancellation in stop(), but just in case:
253
+ app[SCHEDULER_TASK_KEY].cancel()
222
254
  logger.info("Background tasks cancelled.")
223
255
 
224
256
  logger.info("Gathering background tasks with a 10s timeout...")
@@ -229,6 +261,7 @@ class OrchestratorEngine:
229
261
  app[WATCHER_TASK_KEY],
230
262
  app[REPUTATION_CALCULATOR_TASK_KEY],
231
263
  app[EXECUTOR_TASK_KEY],
264
+ app[SCHEDULER_TASK_KEY],
232
265
  return_exceptions=True,
233
266
  ),
234
267
  timeout=10.0,
@@ -242,12 +275,61 @@ class OrchestratorEngine:
242
275
  logger.info("HTTP session closed.")
243
276
  logger.info("Shutdown sequence finished.")
244
277
 
278
+ async def create_background_job(
279
+ self,
280
+ blueprint_name: str,
281
+ initial_data: dict[str, Any],
282
+ source: str = "internal",
283
+ ) -> str:
284
+ """Creates a job directly, bypassing the HTTP API layer.
285
+ Useful for internal schedulers and triggers.
286
+ """
287
+ blueprint = self.blueprints.get(blueprint_name)
288
+ if not blueprint:
289
+ raise ValueError(f"Blueprint '{blueprint_name}' not found.")
290
+
291
+ job_id = str(uuid4())
292
+ # Use a special internal client config
293
+ client_config = {
294
+ "token": "internal-scheduler",
295
+ "plan": "system",
296
+ "params": {"source": source},
297
+ }
298
+
299
+ job_state = {
300
+ "id": job_id,
301
+ "blueprint_name": blueprint.name,
302
+ "current_state": blueprint.start_state,
303
+ "initial_data": initial_data,
304
+ "state_history": {},
305
+ "status": JOB_STATUS_PENDING,
306
+ "tracing_context": {},
307
+ "client_config": client_config,
308
+ }
309
+ await self.storage.save_job_state(job_id, job_state)
310
+ await self.storage.enqueue_job(job_id)
311
+ metrics.jobs_total.inc({metrics.LABEL_BLUEPRINT: blueprint.name})
312
+
313
+ # Log the creation in history as well (so we can track scheduled jobs)
314
+ await self.history_storage.log_job_event(
315
+ {
316
+ "job_id": job_id,
317
+ "state": "pending",
318
+ "event_type": "job_created",
319
+ "context_snapshot": job_state,
320
+ "metadata": {"source": source, "scheduled": True},
321
+ }
322
+ )
323
+
324
+ logger.info(f"Created background job {job_id} for blueprint '{blueprint_name}' (source: {source})")
325
+ return job_id
326
+
245
327
  def _create_job_handler(self, blueprint: StateMachineBlueprint) -> Callable:
246
328
  async def handler(request: web.Request) -> web.Response:
247
329
  try:
248
- initial_data = await request.json()
330
+ initial_data = await request.json(loads=loads)
249
331
  except Exception:
250
- return web.json_response({"error": "Invalid JSON body"}, status=400)
332
+ return json_response({"error": "Invalid JSON body"}, status=400)
251
333
 
252
334
  client_config = request["client_config"]
253
335
  carrier = {str(k): v for k, v in request.headers.items()}
@@ -259,44 +341,44 @@ class OrchestratorEngine:
259
341
  "current_state": blueprint.start_state,
260
342
  "initial_data": initial_data,
261
343
  "state_history": {},
262
- "status": "pending",
344
+ "status": JOB_STATUS_PENDING,
263
345
  "tracing_context": carrier,
264
346
  "client_config": client_config,
265
347
  }
266
348
  await self.storage.save_job_state(job_id, job_state)
267
349
  await self.storage.enqueue_job(job_id)
268
350
  metrics.jobs_total.inc({metrics.LABEL_BLUEPRINT: blueprint.name})
269
- return web.json_response({"status": "accepted", "job_id": job_id}, status=202)
351
+ return json_response({"status": "accepted", "job_id": job_id}, status=202)
270
352
 
271
353
  return handler
272
354
 
273
355
  async def _get_job_status_handler(self, request: web.Request) -> web.Response:
274
356
  job_id = request.match_info.get("job_id")
275
357
  if not job_id:
276
- return web.json_response({"error": "job_id is required in path"}, status=400)
358
+ return json_response({"error": "job_id is required in path"}, status=400)
277
359
  job_state = await self.storage.get_job_state(job_id)
278
360
  if not job_state:
279
- return web.json_response({"error": "Job not found"}, status=404)
280
- return web.json_response(job_state, status=200)
361
+ return json_response({"error": "Job not found"}, status=404)
362
+ return json_response(job_state, status=200)
281
363
 
282
364
  async def _cancel_job_handler(self, request: web.Request) -> web.Response:
283
365
  job_id = request.match_info.get("job_id")
284
366
  if not job_id:
285
- return web.json_response({"error": "job_id is required in path"}, status=400)
367
+ return json_response({"error": "job_id is required in path"}, status=400)
286
368
 
287
369
  job_state = await self.storage.get_job_state(job_id)
288
370
  if not job_state:
289
- return web.json_response({"error": "Job not found"}, status=404)
371
+ return json_response({"error": "Job not found"}, status=404)
290
372
 
291
- if job_state.get("status") != "waiting_for_worker":
292
- return web.json_response(
373
+ if job_state.get("status") != JOB_STATUS_WAITING_FOR_WORKER:
374
+ return json_response(
293
375
  {"error": "Job is not in a state that can be cancelled (must be waiting for a worker)."},
294
376
  status=409,
295
377
  )
296
378
 
297
379
  worker_id = job_state.get("task_worker_id")
298
380
  if not worker_id:
299
- return web.json_response(
381
+ return json_response(
300
382
  {"error": "Cannot cancel job: worker_id not found in job state."},
301
383
  status=500,
302
384
  )
@@ -304,7 +386,7 @@ class OrchestratorEngine:
304
386
  worker_info = await self.storage.get_worker_info(worker_id)
305
387
  task_id = job_state.get("current_task_id")
306
388
  if not task_id:
307
- return web.json_response(
389
+ return json_response(
308
390
  {"error": "Cannot cancel job: task_id not found in job state."},
309
391
  status=500,
310
392
  )
@@ -317,28 +399,28 @@ class OrchestratorEngine:
317
399
  command = {"command": "cancel_task", "task_id": task_id, "job_id": job_id}
318
400
  sent = await self.ws_manager.send_command(worker_id, command)
319
401
  if sent:
320
- return web.json_response({"status": "cancellation_request_sent"})
402
+ return json_response({"status": "cancellation_request_sent"})
321
403
  else:
322
404
  logger.warning(f"Failed to send WebSocket cancellation for task {task_id}, but Redis flag is set.")
323
405
  # Proceed to return success, as the Redis flag will handle it
324
406
 
325
- return web.json_response({"status": "cancellation_request_accepted"})
407
+ return json_response({"status": "cancellation_request_accepted"})
326
408
 
327
409
  async def _get_job_history_handler(self, request: web.Request) -> web.Response:
328
410
  job_id = request.match_info.get("job_id")
329
411
  if not job_id:
330
- return web.json_response({"error": "job_id is required in path"}, status=400)
412
+ return json_response({"error": "job_id is required in path"}, status=400)
331
413
  history = await self.history_storage.get_job_history(job_id)
332
- return web.json_response(history)
414
+ return json_response(history)
333
415
 
334
416
  async def _get_blueprint_graph_handler(self, request: web.Request) -> web.Response:
335
417
  blueprint_name = request.match_info.get("blueprint_name")
336
418
  if not blueprint_name:
337
- return web.json_response({"error": "blueprint_name is required in path"}, status=400)
419
+ return json_response({"error": "blueprint_name is required in path"}, status=400)
338
420
 
339
421
  blueprint = self.blueprints.get(blueprint_name)
340
422
  if not blueprint:
341
- return web.json_response({"error": "Blueprint not found"}, status=404)
423
+ return json_response({"error": "Blueprint not found"}, status=404)
342
424
 
343
425
  try:
344
426
  graph_dot = blueprint.render_graph()
@@ -346,21 +428,21 @@ class OrchestratorEngine:
346
428
  except FileNotFoundError:
347
429
  error_msg = "Graphviz is not installed on the server. Cannot generate graph."
348
430
  logger.error(error_msg)
349
- return web.json_response({"error": error_msg}, status=501)
431
+ return json_response({"error": error_msg}, status=501)
350
432
 
351
433
  async def _get_workers_handler(self, request: web.Request) -> web.Response:
352
434
  workers = await self.storage.get_available_workers()
353
- return web.json_response(workers)
435
+ return json_response(workers)
354
436
 
355
437
  async def _get_jobs_handler(self, request: web.Request) -> web.Response:
356
438
  try:
357
439
  limit = int(request.query.get("limit", "100"))
358
440
  offset = int(request.query.get("offset", "0"))
359
441
  except ValueError:
360
- return web.json_response({"error": "Invalid limit/offset parameter"}, status=400)
442
+ return json_response({"error": "Invalid limit/offset parameter"}, status=400)
361
443
 
362
444
  jobs = await self.history_storage.get_jobs(limit=limit, offset=offset)
363
- return web.json_response(jobs)
445
+ return json_response(jobs)
364
446
 
365
447
  async def _get_dashboard_handler(self, request: web.Request) -> web.Response:
366
448
  worker_count = await self.storage.get_active_worker_count()
@@ -371,30 +453,30 @@ class OrchestratorEngine:
371
453
  "workers": {"total": worker_count},
372
454
  "jobs": {"queued": queue_length, **job_summary},
373
455
  }
374
- return web.json_response(dashboard_data)
456
+ return json_response(dashboard_data)
375
457
 
376
458
  async def _task_result_handler(self, request: web.Request) -> web.Response:
377
459
  import logging
378
460
 
379
461
  try:
380
- data = await request.json()
462
+ data = await request.json(loads=loads)
381
463
  job_id = data.get("job_id")
382
464
  task_id = data.get("task_id")
383
465
  result = data.get("result", {})
384
- result_status = result.get("status", "success")
466
+ result_status = result.get("status", TASK_STATUS_SUCCESS)
385
467
  error_message = result.get("error")
386
468
  payload_worker_id = data.get("worker_id")
387
469
  except Exception:
388
- return web.json_response({"error": "Invalid JSON body"}, status=400)
470
+ return json_response({"error": "Invalid JSON body"}, status=400)
389
471
 
390
472
  # Security check: Ensure the worker_id from the payload matches the authenticated worker
391
473
  authenticated_worker_id = request.get("worker_id")
392
474
  if not authenticated_worker_id:
393
475
  # This should not happen if the auth middleware is working correctly
394
- return web.json_response({"error": "Could not identify authenticated worker."}, status=500)
476
+ return json_response({"error": "Could not identify authenticated worker."}, status=500)
395
477
 
396
478
  if payload_worker_id and payload_worker_id != authenticated_worker_id:
397
- return web.json_response(
479
+ return json_response(
398
480
  {
399
481
  "error": f"Forbidden: Authenticated worker '{authenticated_worker_id}' "
400
482
  f"cannot submit results for another worker '{payload_worker_id}'.",
@@ -403,21 +485,21 @@ class OrchestratorEngine:
403
485
  )
404
486
 
405
487
  if not job_id or not task_id:
406
- return web.json_response({"error": "job_id and task_id are required"}, status=400)
488
+ return json_response({"error": "job_id and task_id are required"}, status=400)
407
489
 
408
490
  job_state = await self.storage.get_job_state(job_id)
409
491
  if not job_state:
410
- return web.json_response({"error": "Job not found"}, status=404)
492
+ return json_response({"error": "Job not found"}, status=404)
411
493
 
412
494
  # Handle parallel task completion
413
- if job_state.get("status") == "waiting_for_parallel_tasks":
495
+ if job_state.get("status") == JOB_STATUS_WAITING_FOR_PARALLEL:
414
496
  await self.storage.remove_job_from_watch(f"{job_id}:{task_id}")
415
497
  job_state.setdefault("aggregation_results", {})[task_id] = result
416
498
  job_state.setdefault("active_branches", []).remove(task_id)
417
499
 
418
500
  if not job_state["active_branches"]:
419
501
  logger.info(f"All parallel branches for job {job_id} have completed.")
420
- job_state["status"] = "running"
502
+ job_state["status"] = JOB_STATUS_RUNNING
421
503
  job_state["current_state"] = job_state["aggregation_target"]
422
504
  await self.storage.save_job_state(job_id, job_state)
423
505
  await self.storage.enqueue_job(job_id)
@@ -428,7 +510,7 @@ class OrchestratorEngine:
428
510
  )
429
511
  await self.storage.save_job_state(job_id, job_state)
430
512
 
431
- return web.json_response({"status": "parallel_branch_result_accepted"}, status=200)
513
+ return json_response({"status": "parallel_branch_result_accepted"}, status=200)
432
514
 
433
515
  await self.storage.remove_job_from_watch(job_id)
434
516
 
@@ -451,13 +533,13 @@ class OrchestratorEngine:
451
533
 
452
534
  job_state["tracing_context"] = {str(k): v for k, v in request.headers.items()}
453
535
 
454
- if result_status == "failure":
536
+ if result_status == TASK_STATUS_FAILURE:
455
537
  error_details = result.get("error", {})
456
- error_type = "TRANSIENT_ERROR"
538
+ error_type = ERROR_CODE_TRANSIENT
457
539
  error_message = "No error details provided."
458
540
 
459
541
  if isinstance(error_details, dict):
460
- error_type = error_details.get("code", "TRANSIENT_ERROR")
542
+ error_type = error_details.get("code", ERROR_CODE_TRANSIENT)
461
543
  error_message = error_details.get("message", "No error message provided.")
462
544
  elif isinstance(error_details, str):
463
545
  # Fallback for old format where `error` was just a string
@@ -465,32 +547,32 @@ class OrchestratorEngine:
465
547
 
466
548
  logging.warning(f"Task {task_id} for job {job_id} failed with error type '{error_type}'.")
467
549
 
468
- if error_type == "PERMANENT_ERROR":
469
- job_state["status"] = "quarantined"
550
+ if error_type == ERROR_CODE_PERMANENT:
551
+ job_state["status"] = JOB_STATUS_QUARANTINED
470
552
  job_state["error_message"] = f"Task failed with permanent error: {error_message}"
471
553
  await self.storage.save_job_state(job_id, job_state)
472
554
  await self.storage.quarantine_job(job_id)
473
- elif error_type == "INVALID_INPUT_ERROR":
474
- job_state["status"] = "failed"
555
+ elif error_type == ERROR_CODE_INVALID_INPUT:
556
+ job_state["status"] = JOB_STATUS_FAILED
475
557
  job_state["error_message"] = f"Task failed due to invalid input: {error_message}"
476
558
  await self.storage.save_job_state(job_id, job_state)
477
559
  else: # TRANSIENT_ERROR or any other/unspecified error
478
560
  await self._handle_task_failure(job_state, task_id, error_message)
479
561
 
480
- return web.json_response({"status": "result_accepted_failure"}, status=200)
562
+ return json_response({"status": "result_accepted_failure"}, status=200)
481
563
 
482
- if result_status == "cancelled":
564
+ if result_status == TASK_STATUS_CANCELLED:
483
565
  logging.info(f"Task {task_id} for job {job_id} was cancelled by worker.")
484
- job_state["status"] = "cancelled"
566
+ job_state["status"] = JOB_STATUS_CANCELLED
485
567
  await self.storage.save_job_state(job_id, job_state)
486
568
  # Optionally, trigger a specific 'cancelled' transition if defined in the blueprint
487
569
  transitions = job_state.get("current_task_transitions", {})
488
570
  if next_state := transitions.get("cancelled"):
489
571
  job_state["current_state"] = next_state
490
- job_state["status"] = "running" # It's running the cancellation handler now
572
+ job_state["status"] = JOB_STATUS_RUNNING # It's running the cancellation handler now
491
573
  await self.storage.save_job_state(job_id, job_state)
492
574
  await self.storage.enqueue_job(job_id)
493
- return web.json_response({"status": "result_accepted_cancelled"}, status=200)
575
+ return json_response({"status": "result_accepted_cancelled"}, status=200)
494
576
 
495
577
  transitions = job_state.get("current_task_transitions", {})
496
578
  if next_state := transitions.get(result_status):
@@ -503,16 +585,16 @@ class OrchestratorEngine:
503
585
  job_state["state_history"].update(worker_data)
504
586
 
505
587
  job_state["current_state"] = next_state
506
- job_state["status"] = "running"
588
+ job_state["status"] = JOB_STATUS_RUNNING
507
589
  await self.storage.save_job_state(job_id, job_state)
508
590
  await self.storage.enqueue_job(job_id)
509
591
  else:
510
592
  logging.error(f"Job {job_id} failed. Worker returned unhandled status '{result_status}'.")
511
- job_state["status"] = "failed"
593
+ job_state["status"] = JOB_STATUS_FAILED
512
594
  job_state["error_message"] = f"Worker returned unhandled status: {result_status}"
513
595
  await self.storage.save_job_state(job_id, job_state)
514
596
 
515
- return web.json_response({"status": "result_accepted_success"}, status=200)
597
+ return json_response({"status": "result_accepted_success"}, status=200)
516
598
 
517
599
  async def _handle_task_failure(self, job_state: dict, task_id: str, error_message: str | None):
518
600
  import logging
@@ -528,7 +610,7 @@ class OrchestratorEngine:
528
610
  task_info = job_state.get("current_task_info")
529
611
  if not task_info:
530
612
  logging.error(f"Cannot retry job {job_id}: missing 'current_task_info' in job state.")
531
- job_state["status"] = "failed"
613
+ job_state["status"] = JOB_STATUS_FAILED
532
614
  job_state["error_message"] = "Cannot retry: original task info not found."
533
615
  await self.storage.save_job_state(job_id, job_state)
534
616
  return
@@ -537,7 +619,7 @@ class OrchestratorEngine:
537
619
  timeout_seconds = task_info.get("timeout_seconds", self.config.WORKER_TIMEOUT_SECONDS)
538
620
  timeout_at = now + timeout_seconds
539
621
 
540
- job_state["status"] = "waiting_for_worker"
622
+ job_state["status"] = JOB_STATUS_WAITING_FOR_WORKER
541
623
  job_state["task_dispatched_at"] = now
542
624
  await self.storage.save_job_state(job_id, job_state)
543
625
  await self.storage.add_job_to_watch(job_id, timeout_at)
@@ -545,7 +627,7 @@ class OrchestratorEngine:
545
627
  await self.dispatcher.dispatch(job_state, task_info)
546
628
  else:
547
629
  logging.critical(f"Job {job_id} has failed {max_retries + 1} times. Moving to quarantine.")
548
- job_state["status"] = "quarantined"
630
+ job_state["status"] = JOB_STATUS_QUARANTINED
549
631
  job_state["error_message"] = f"Task failed after {max_retries + 1} attempts: {error_message}"
550
632
  await self.storage.save_job_state(job_id, job_state)
551
633
  await self.storage.quarantine_job(job_id)
@@ -553,61 +635,60 @@ class OrchestratorEngine:
553
635
  async def _human_approval_webhook_handler(self, request: web.Request) -> web.Response:
554
636
  job_id = request.match_info.get("job_id")
555
637
  if not job_id:
556
- return web.json_response({"error": "job_id is required in path"}, status=400)
638
+ return json_response({"error": "job_id is required in path"}, status=400)
557
639
  try:
558
- data = await request.json()
640
+ data = await request.json(loads=loads)
559
641
  decision = data.get("decision")
560
642
  if not decision:
561
- return web.json_response({"error": "decision is required in body"}, status=400)
643
+ return json_response({"error": "decision is required in body"}, status=400)
562
644
  except Exception:
563
- return web.json_response({"error": "Invalid JSON body"}, status=400)
645
+ return json_response({"error": "Invalid JSON body"}, status=400)
564
646
  job_state = await self.storage.get_job_state(job_id)
565
647
  if not job_state:
566
- return web.json_response({"error": "Job not found"}, status=404)
567
- if job_state.get("status") not in ["waiting_for_worker", "waiting_for_human"]:
568
- return web.json_response({"error": "Job is not in a state that can be approved"}, status=409)
648
+ return json_response({"error": "Job not found"}, status=404)
649
+ if job_state.get("status") not in [JOB_STATUS_WAITING_FOR_WORKER, JOB_STATUS_WAITING_FOR_HUMAN]:
650
+ return json_response({"error": "Job is not in a state that can be approved"}, status=409)
569
651
  transitions = job_state.get("current_task_transitions", {})
570
652
  next_state = transitions.get(decision)
571
653
  if not next_state:
572
- return web.json_response({"error": f"Invalid decision '{decision}' for this job"}, status=400)
654
+ return json_response({"error": f"Invalid decision '{decision}' for this job"}, status=400)
573
655
  job_state["current_state"] = next_state
574
- job_state["status"] = "running"
656
+ job_state["status"] = JOB_STATUS_RUNNING
575
657
  await self.storage.save_job_state(job_id, job_state)
576
658
  await self.storage.enqueue_job(job_id)
577
- return web.json_response({"status": "approval_received", "job_id": job_id})
659
+ return json_response({"status": "approval_received", "job_id": job_id})
578
660
 
579
661
  async def _get_quarantined_jobs_handler(self, request: web.Request) -> web.Response:
580
662
  """Returns a list of all job IDs in the quarantine queue."""
581
663
  jobs = await self.storage.get_quarantined_jobs()
582
- return web.json_response(jobs)
664
+ return json_response(jobs)
583
665
 
584
666
  async def _reload_worker_configs_handler(self, request: web.Request) -> web.Response:
585
667
  """Handles the dynamic reloading of worker configurations."""
586
668
  logger.info("Received request to reload worker configurations.")
587
669
  if not self.config.WORKERS_CONFIG_PATH:
588
- return web.json_response(
670
+ return json_response(
589
671
  {"error": "WORKERS_CONFIG_PATH is not set, cannot reload configs."},
590
672
  status=400,
591
673
  )
592
674
 
593
675
  await load_worker_configs_to_redis(self.storage, self.config.WORKERS_CONFIG_PATH)
594
- return web.json_response({"status": "worker_configs_reloaded"})
676
+ return json_response({"status": "worker_configs_reloaded"})
595
677
 
596
678
  async def _flush_db_handler(self, request: web.Request) -> web.Response:
597
679
  logger.warning("Received request to flush the database.")
598
680
  await self.storage.flush_all()
599
681
  await load_client_configs_to_redis(self.storage)
600
- return web.json_response({"status": "db_flushed"}, status=200)
682
+ return json_response({"status": "db_flushed"}, status=200)
601
683
 
602
684
  async def _docs_handler(self, request: web.Request) -> web.Response:
603
- import json
604
685
  from importlib import resources
605
686
 
606
687
  try:
607
688
  content = resources.read_text("avtomatika", "api.html")
608
689
  except FileNotFoundError:
609
690
  logger.error("api.html not found within the avtomatika package.")
610
- return web.json_response({"error": "Documentation file not found on server."}, status=500)
691
+ return json_response({"error": "Documentation file not found on server."}, status=500)
611
692
 
612
693
  # Generate dynamic documentation for registered blueprints
613
694
  blueprint_endpoints = []
@@ -639,7 +720,7 @@ class OrchestratorEngine:
639
720
 
640
721
  # Inject dynamic endpoints into the apiData structure in the HTML
641
722
  if blueprint_endpoints:
642
- endpoints_json = json.dumps(blueprint_endpoints, indent=2)
723
+ endpoints_json = dumps(blueprint_endpoints, option=OPT_INDENT_2).decode("utf-8")
643
724
  # We insert the new endpoints at the beginning of the 'Protected API' group
644
725
  marker = "group: 'Protected API',\n endpoints: ["
645
726
  content = content.replace(marker, f"{marker}\n{endpoints_json.strip('[]')},")
@@ -661,7 +742,7 @@ class OrchestratorEngine:
661
742
  api_middlewares = [auth_middleware, quota_middleware]
662
743
 
663
744
  protected_app = web.Application(middlewares=api_middlewares)
664
- versioned_apps: Dict[str, web.Application] = {}
745
+ versioned_apps: dict[str, web.Application] = {}
665
746
  has_unversioned_routes = False
666
747
 
667
748
  for bp in self.blueprints.values():
@@ -739,14 +820,14 @@ class OrchestratorEngine:
739
820
  async def _handle_get_next_task(self, request: web.Request) -> web.Response:
740
821
  worker_id = request.match_info.get("worker_id")
741
822
  if not worker_id:
742
- return web.json_response({"error": "worker_id is required in path"}, status=400)
823
+ return json_response({"error": "worker_id is required in path"}, status=400)
743
824
 
744
825
  logger.debug(f"Worker {worker_id} is requesting a new task.")
745
826
  task = await self.storage.dequeue_task_for_worker(worker_id, self.config.WORKER_POLL_TIMEOUT_SECONDS)
746
827
 
747
828
  if task:
748
829
  logger.info(f"Sending task {task.get('task_id')} to worker {worker_id}")
749
- return web.json_response(task, status=200)
830
+ return json_response(task, status=200)
750
831
  logger.debug(f"No tasks for worker {worker_id}, responding 204.")
751
832
  return web.Response(status=204)
752
833
 
@@ -759,7 +840,7 @@ class OrchestratorEngine:
759
840
  """
760
841
  worker_id = request.match_info.get("worker_id")
761
842
  if not worker_id:
762
- return web.json_response({"error": "worker_id is required in path"}, status=400)
843
+ return json_response({"error": "worker_id is required in path"}, status=400)
763
844
 
764
845
  ttl = self.config.WORKER_HEALTH_CHECK_INTERVAL_SECONDS * 2
765
846
  update_data = None
@@ -767,11 +848,8 @@ class OrchestratorEngine:
767
848
  # Check for body content without consuming it if it's not JSON
768
849
  if request.can_read_body:
769
850
  try:
770
- update_data = await request.json()
851
+ update_data = await request.json(loads=loads)
771
852
  except Exception:
772
- # This can happen if the body is present but not valid JSON.
773
- # We can treat it as a lightweight heartbeat or return an error.
774
- # For robustness, let's treat it as a lightweight ping but log a warning.
775
853
  logger.warning(
776
854
  f"Received PATCH from worker {worker_id} with non-JSON body. Treating as TTL-only heartbeat."
777
855
  )
@@ -780,7 +858,7 @@ class OrchestratorEngine:
780
858
  # Full update path
781
859
  updated_worker = await self.storage.update_worker_status(worker_id, update_data, ttl)
782
860
  if not updated_worker:
783
- return web.json_response({"error": "Worker not found"}, status=404)
861
+ return json_response({"error": "Worker not found"}, status=404)
784
862
 
785
863
  await self.history_storage.log_worker_event(
786
864
  {
@@ -789,25 +867,25 @@ class OrchestratorEngine:
789
867
  "worker_info_snapshot": updated_worker,
790
868
  },
791
869
  )
792
- return web.json_response(updated_worker, status=200)
870
+ return json_response(updated_worker, status=200)
793
871
  else:
794
872
  # Lightweight TTL-only heartbeat path
795
873
  refreshed = await self.storage.refresh_worker_ttl(worker_id, ttl)
796
874
  if not refreshed:
797
- return web.json_response({"error": "Worker not found"}, status=404)
798
- return web.json_response({"status": "ttl_refreshed"})
875
+ return json_response({"error": "Worker not found"}, status=404)
876
+ return json_response({"status": "ttl_refreshed"})
799
877
 
800
878
  async def _register_worker_handler(self, request: web.Request) -> web.Response:
801
879
  # The worker_registration_data is attached by the auth middleware
802
880
  # to avoid reading the request body twice.
803
881
  worker_data = request.get("worker_registration_data")
804
882
  if not worker_data:
805
- return web.json_response({"error": "Worker data not found in request"}, status=500)
883
+ return json_response({"error": "Worker data not found in request"}, status=500)
806
884
 
807
885
  worker_id = worker_data.get("worker_id")
808
886
  # This check is redundant if the middleware works, but good for safety
809
887
  if not worker_id:
810
- return web.json_response({"error": "Missing required field: worker_id"}, status=400)
888
+ return json_response({"error": "Missing required field: worker_id"}, status=400)
811
889
 
812
890
  ttl = self.config.WORKER_HEALTH_CHECK_INTERVAL_SECONDS * 2
813
891
  await self.storage.register_worker(worker_id, worker_data, ttl)
@@ -823,7 +901,7 @@ class OrchestratorEngine:
823
901
  "worker_info_snapshot": worker_data,
824
902
  },
825
903
  )
826
- return web.json_response({"status": "registered"}, status=200)
904
+ return json_response({"status": "registered"}, status=200)
827
905
 
828
906
  def run(self):
829
907
  self.setup()