avtomatika 1.0b5__tar.gz → 1.0b6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {avtomatika-1.0b5/src/avtomatika.egg-info → avtomatika-1.0b6}/PKG-INFO +35 -2
- {avtomatika-1.0b5 → avtomatika-1.0b6}/README.md +34 -1
- {avtomatika-1.0b5 → avtomatika-1.0b6}/pyproject.toml +1 -1
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/config.py +4 -0
- avtomatika-1.0b6/src/avtomatika/constants.py +30 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/engine.py +100 -25
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/history/postgres.py +56 -13
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/history/sqlite.py +54 -34
- avtomatika-1.0b6/src/avtomatika/logging_config.py +92 -0
- avtomatika-1.0b6/src/avtomatika/scheduler.py +119 -0
- avtomatika-1.0b6/src/avtomatika/scheduler_config_loader.py +41 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/security.py +3 -5
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/storage/base.py +17 -3
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/storage/memory.py +41 -4
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/storage/redis.py +17 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6/src/avtomatika.egg-info}/PKG-INFO +35 -2
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika.egg-info/SOURCES.txt +4 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_engine.py +22 -16
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_logging_config.py +16 -4
- avtomatika-1.0b6/tests/test_scheduler.py +200 -0
- avtomatika-1.0b5/src/avtomatika/logging_config.py +0 -41
- {avtomatika-1.0b5 → avtomatika-1.0b6}/LICENSE +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/setup.cfg +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/__init__.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/api.html +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/blueprint.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/client_config_loader.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/compression.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/context.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/data_types.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/datastore.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/dispatcher.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/executor.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/health_checker.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/history/base.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/history/noop.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/metrics.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/py.typed +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/quota.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/ratelimit.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/reputation.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/storage/__init__.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/telemetry.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/watcher.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/worker_config_loader.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/ws_manager.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika.egg-info/dependency_links.txt +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika.egg-info/requires.txt +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika.egg-info/top_level.txt +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_blueprint_conditions.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_blueprints.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_client_config_loader.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_compression.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_config_validation.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_context.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_dispatcher.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_dispatcher_extended.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_error_handling.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_executor.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_health_checker.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_history.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_integration.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_memory_locking.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_memory_storage.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_metrics.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_noop_history.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_postgres_history.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_ratelimit.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_redis_locking.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_redis_storage.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_reputation.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_telemetry.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_watcher.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_worker_config_loader.py +0 -0
- {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_ws_manager.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avtomatika
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.0b6
|
|
4
4
|
Summary: A state-machine based orchestrator for long-running AI and other jobs.
|
|
5
5
|
Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
|
|
@@ -60,6 +60,7 @@ This document serves as a comprehensive guide for developers looking to build pi
|
|
|
60
60
|
- [Delegating Tasks to Workers (dispatch_task)](#delegating-tasks-to-workers-dispatch_task)
|
|
61
61
|
- [Parallel Execution and Aggregation (Fan-out/Fan-in)](#parallel-execution-and-aggregation-fan-outfan-in)
|
|
62
62
|
- [Dependency Injection (DataStore)](#dependency-injection-datastore)
|
|
63
|
+
- [Native Scheduler](#native-scheduler)
|
|
63
64
|
- [Production Configuration](#production-configuration)
|
|
64
65
|
- [Fault Tolerance](#fault-tolerance)
|
|
65
66
|
- [Storage Backend](#storage-backend)
|
|
@@ -74,7 +75,17 @@ The project is based on a simple yet powerful architectural pattern that separat
|
|
|
74
75
|
|
|
75
76
|
* **Orchestrator (OrchestratorEngine)** — The Director. It manages the entire process from start to finish, tracks state, handles errors, and decides what should happen next. It does not perform business tasks itself.
|
|
76
77
|
* **Blueprints (Blueprint)** — The Script. Each blueprint is a detailed plan (a state machine) for a specific business process. It describes the steps (states) and the rules for transitioning between them.
|
|
77
|
-
* **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator
|
|
78
|
+
* **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator.
|
|
79
|
+
|
|
80
|
+
## Ecosystem
|
|
81
|
+
|
|
82
|
+
Avtomatika is part of a larger ecosystem:
|
|
83
|
+
|
|
84
|
+
* **[Avtomatika Worker SDK](https://github.com/avtomatika-ai/avtomatika-worker)**: The official Python SDK for building workers that connect to this engine.
|
|
85
|
+
* **[RCA Protocol](https://github.com/avtomatika-ai/rca)**: The architectural specification and manifesto behind the system.
|
|
86
|
+
* **[Full Example](https://github.com/avtomatika-ai/avtomatika-full-example)**: A complete reference project demonstrating the engine and workers in action.
|
|
87
|
+
|
|
88
|
+
## Installation
|
|
78
89
|
|
|
79
90
|
* **Install the core engine only:**
|
|
80
91
|
```bash
|
|
@@ -328,6 +339,22 @@ async def cache_handler(data_stores):
|
|
|
328
339
|
user_data = await data_stores.cache.get("user:123")
|
|
329
340
|
print(f"User from cache: {user_data}")
|
|
330
341
|
```
|
|
342
|
+
|
|
343
|
+
### 5. Native Scheduler
|
|
344
|
+
|
|
345
|
+
Avtomatika includes a built-in distributed scheduler. It allows you to trigger blueprints periodically (interval, daily, weekly, monthly) without external tools like cron.
|
|
346
|
+
|
|
347
|
+
* **Configuration:** Defined in `schedules.toml`.
|
|
348
|
+
* **Timezone Aware:** Supports global timezone configuration (e.g., `TZ="Europe/Moscow"`).
|
|
349
|
+
* **Distributed Locking:** Safe to run with multiple orchestrator instances; jobs are guaranteed to run only once per interval using distributed locks (Redis/Memory).
|
|
350
|
+
|
|
351
|
+
```toml
|
|
352
|
+
# schedules.toml example
|
|
353
|
+
[nightly_backup]
|
|
354
|
+
blueprint = "backup_flow"
|
|
355
|
+
daily_at = "02:00"
|
|
356
|
+
```
|
|
357
|
+
|
|
331
358
|
## Production Configuration
|
|
332
359
|
|
|
333
360
|
The orchestrator's behavior can be configured through environment variables. Additionally, any configuration parameter loaded from environment variables can be programmatically overridden in your application code after the `Config` object has been initialized. This provides flexibility for different deployment and testing scenarios.
|
|
@@ -349,6 +376,12 @@ To manage access and worker settings securely, Avtomatika uses TOML configuratio
|
|
|
349
376
|
[gpu-worker-01]
|
|
350
377
|
token = "worker-secret-456"
|
|
351
378
|
```
|
|
379
|
+
- **`schedules.toml`**: Defines periodic tasks (CRON-like) for the native scheduler.
|
|
380
|
+
```toml
|
|
381
|
+
[nightly_backup]
|
|
382
|
+
blueprint = "backup_flow"
|
|
383
|
+
daily_at = "02:00"
|
|
384
|
+
```
|
|
352
385
|
|
|
353
386
|
For detailed specifications and examples, please refer to the [**Configuration Guide**](docs/configuration.md).
|
|
354
387
|
|
|
@@ -14,6 +14,7 @@ This document serves as a comprehensive guide for developers looking to build pi
|
|
|
14
14
|
- [Delegating Tasks to Workers (dispatch_task)](#delegating-tasks-to-workers-dispatch_task)
|
|
15
15
|
- [Parallel Execution and Aggregation (Fan-out/Fan-in)](#parallel-execution-and-aggregation-fan-outfan-in)
|
|
16
16
|
- [Dependency Injection (DataStore)](#dependency-injection-datastore)
|
|
17
|
+
- [Native Scheduler](#native-scheduler)
|
|
17
18
|
- [Production Configuration](#production-configuration)
|
|
18
19
|
- [Fault Tolerance](#fault-tolerance)
|
|
19
20
|
- [Storage Backend](#storage-backend)
|
|
@@ -28,7 +29,17 @@ The project is based on a simple yet powerful architectural pattern that separat
|
|
|
28
29
|
|
|
29
30
|
* **Orchestrator (OrchestratorEngine)** — The Director. It manages the entire process from start to finish, tracks state, handles errors, and decides what should happen next. It does not perform business tasks itself.
|
|
30
31
|
* **Blueprints (Blueprint)** — The Script. Each blueprint is a detailed plan (a state machine) for a specific business process. It describes the steps (states) and the rules for transitioning between them.
|
|
31
|
-
* **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator
|
|
32
|
+
* **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator.
|
|
33
|
+
|
|
34
|
+
## Ecosystem
|
|
35
|
+
|
|
36
|
+
Avtomatika is part of a larger ecosystem:
|
|
37
|
+
|
|
38
|
+
* **[Avtomatika Worker SDK](https://github.com/avtomatika-ai/avtomatika-worker)**: The official Python SDK for building workers that connect to this engine.
|
|
39
|
+
* **[RCA Protocol](https://github.com/avtomatika-ai/rca)**: The architectural specification and manifesto behind the system.
|
|
40
|
+
* **[Full Example](https://github.com/avtomatika-ai/avtomatika-full-example)**: A complete reference project demonstrating the engine and workers in action.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
32
43
|
|
|
33
44
|
* **Install the core engine only:**
|
|
34
45
|
```bash
|
|
@@ -282,6 +293,22 @@ async def cache_handler(data_stores):
|
|
|
282
293
|
user_data = await data_stores.cache.get("user:123")
|
|
283
294
|
print(f"User from cache: {user_data}")
|
|
284
295
|
```
|
|
296
|
+
|
|
297
|
+
### 5. Native Scheduler
|
|
298
|
+
|
|
299
|
+
Avtomatika includes a built-in distributed scheduler. It allows you to trigger blueprints periodically (interval, daily, weekly, monthly) without external tools like cron.
|
|
300
|
+
|
|
301
|
+
* **Configuration:** Defined in `schedules.toml`.
|
|
302
|
+
* **Timezone Aware:** Supports global timezone configuration (e.g., `TZ="Europe/Moscow"`).
|
|
303
|
+
* **Distributed Locking:** Safe to run with multiple orchestrator instances; jobs are guaranteed to run only once per interval using distributed locks (Redis/Memory).
|
|
304
|
+
|
|
305
|
+
```toml
|
|
306
|
+
# schedules.toml example
|
|
307
|
+
[nightly_backup]
|
|
308
|
+
blueprint = "backup_flow"
|
|
309
|
+
daily_at = "02:00"
|
|
310
|
+
```
|
|
311
|
+
|
|
285
312
|
## Production Configuration
|
|
286
313
|
|
|
287
314
|
The orchestrator's behavior can be configured through environment variables. Additionally, any configuration parameter loaded from environment variables can be programmatically overridden in your application code after the `Config` object has been initialized. This provides flexibility for different deployment and testing scenarios.
|
|
@@ -303,6 +330,12 @@ To manage access and worker settings securely, Avtomatika uses TOML configuratio
|
|
|
303
330
|
[gpu-worker-01]
|
|
304
331
|
token = "worker-secret-456"
|
|
305
332
|
```
|
|
333
|
+
- **`schedules.toml`**: Defines periodic tasks (CRON-like) for the native scheduler.
|
|
334
|
+
```toml
|
|
335
|
+
[nightly_backup]
|
|
336
|
+
blueprint = "backup_flow"
|
|
337
|
+
daily_at = "02:00"
|
|
338
|
+
```
|
|
306
339
|
|
|
307
340
|
For detailed specifications and examples, please refer to the [**Configuration Guide**](docs/configuration.md).
|
|
308
341
|
|
|
@@ -62,3 +62,7 @@ class Config:
|
|
|
62
62
|
# External config files
|
|
63
63
|
self.WORKERS_CONFIG_PATH: str = getenv("WORKERS_CONFIG_PATH", "")
|
|
64
64
|
self.CLIENTS_CONFIG_PATH: str = getenv("CLIENTS_CONFIG_PATH", "")
|
|
65
|
+
self.SCHEDULES_CONFIG_PATH: str = getenv("SCHEDULES_CONFIG_PATH", "")
|
|
66
|
+
|
|
67
|
+
# Timezone settings
|
|
68
|
+
self.TZ: str = getenv("TZ", "UTC")
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized constants for the Avtomatika protocol.
|
|
3
|
+
Use these constants instead of hardcoded strings to ensure consistency.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
# --- Auth Headers ---
|
|
7
|
+
AUTH_HEADER_CLIENT = "X-Avtomatika-Token"
|
|
8
|
+
AUTH_HEADER_WORKER = "X-Worker-Token"
|
|
9
|
+
|
|
10
|
+
# --- Error Codes ---
|
|
11
|
+
# Error codes returned by workers in the result payload
|
|
12
|
+
ERROR_CODE_TRANSIENT = "TRANSIENT_ERROR"
|
|
13
|
+
ERROR_CODE_PERMANENT = "PERMANENT_ERROR"
|
|
14
|
+
ERROR_CODE_INVALID_INPUT = "INVALID_INPUT_ERROR"
|
|
15
|
+
|
|
16
|
+
# --- Task Statuses ---
|
|
17
|
+
# Standard statuses for task results
|
|
18
|
+
TASK_STATUS_SUCCESS = "success"
|
|
19
|
+
TASK_STATUS_FAILURE = "failure"
|
|
20
|
+
TASK_STATUS_CANCELLED = "cancelled"
|
|
21
|
+
|
|
22
|
+
# --- Job Statuses ---
|
|
23
|
+
JOB_STATUS_PENDING = "pending"
|
|
24
|
+
JOB_STATUS_WAITING_FOR_WORKER = "waiting_for_worker"
|
|
25
|
+
JOB_STATUS_RUNNING = "running"
|
|
26
|
+
JOB_STATUS_FAILED = "failed"
|
|
27
|
+
JOB_STATUS_QUARANTINED = "quarantined"
|
|
28
|
+
JOB_STATUS_CANCELLED = "cancelled"
|
|
29
|
+
JOB_STATUS_WAITING_FOR_HUMAN = "waiting_for_human"
|
|
30
|
+
JOB_STATUS_WAITING_FOR_PARALLEL = "waiting_for_parallel_tasks"
|
|
@@ -14,6 +14,22 @@ from .blueprint import StateMachineBlueprint
|
|
|
14
14
|
from .client_config_loader import load_client_configs_to_redis
|
|
15
15
|
from .compression import compression_middleware
|
|
16
16
|
from .config import Config
|
|
17
|
+
from .constants import (
|
|
18
|
+
ERROR_CODE_INVALID_INPUT,
|
|
19
|
+
ERROR_CODE_PERMANENT,
|
|
20
|
+
ERROR_CODE_TRANSIENT,
|
|
21
|
+
JOB_STATUS_CANCELLED,
|
|
22
|
+
JOB_STATUS_FAILED,
|
|
23
|
+
JOB_STATUS_PENDING,
|
|
24
|
+
JOB_STATUS_QUARANTINED,
|
|
25
|
+
JOB_STATUS_RUNNING,
|
|
26
|
+
JOB_STATUS_WAITING_FOR_HUMAN,
|
|
27
|
+
JOB_STATUS_WAITING_FOR_PARALLEL,
|
|
28
|
+
JOB_STATUS_WAITING_FOR_WORKER,
|
|
29
|
+
TASK_STATUS_CANCELLED,
|
|
30
|
+
TASK_STATUS_FAILURE,
|
|
31
|
+
TASK_STATUS_SUCCESS,
|
|
32
|
+
)
|
|
17
33
|
from .dispatcher import Dispatcher
|
|
18
34
|
from .executor import JobExecutor
|
|
19
35
|
from .health_checker import HealthChecker
|
|
@@ -23,6 +39,7 @@ from .logging_config import setup_logging
|
|
|
23
39
|
from .quota import quota_middleware_factory
|
|
24
40
|
from .ratelimit import rate_limit_middleware_factory
|
|
25
41
|
from .reputation import ReputationCalculator
|
|
42
|
+
from .scheduler import Scheduler
|
|
26
43
|
from .security import client_auth_middleware_factory, worker_auth_middleware_factory
|
|
27
44
|
from .storage.base import StorageBackend
|
|
28
45
|
from .telemetry import setup_telemetry
|
|
@@ -38,10 +55,13 @@ EXECUTOR_KEY = AppKey("executor", JobExecutor)
|
|
|
38
55
|
WATCHER_KEY = AppKey("watcher", Watcher)
|
|
39
56
|
REPUTATION_CALCULATOR_KEY = AppKey("reputation_calculator", ReputationCalculator)
|
|
40
57
|
HEALTH_CHECKER_KEY = AppKey("health_checker", HealthChecker)
|
|
58
|
+
SCHEDULER_KEY = AppKey("scheduler", Scheduler)
|
|
59
|
+
|
|
41
60
|
EXECUTOR_TASK_KEY = AppKey("executor_task", Task)
|
|
42
61
|
WATCHER_TASK_KEY = AppKey("watcher_task", Task)
|
|
43
62
|
REPUTATION_CALCULATOR_TASK_KEY = AppKey("reputation_calculator_task", Task)
|
|
44
63
|
HEALTH_CHECKER_TASK_KEY = AppKey("health_checker_task", Task)
|
|
64
|
+
SCHEDULER_TASK_KEY = AppKey("scheduler_task", Task)
|
|
45
65
|
|
|
46
66
|
metrics.init_metrics()
|
|
47
67
|
|
|
@@ -66,7 +86,7 @@ async def metrics_handler(_request: web.Request) -> web.Response:
|
|
|
66
86
|
|
|
67
87
|
class OrchestratorEngine:
|
|
68
88
|
def __init__(self, storage: StorageBackend, config: Config):
|
|
69
|
-
setup_logging(config.LOG_LEVEL, config.LOG_FORMAT)
|
|
89
|
+
setup_logging(config.LOG_LEVEL, config.LOG_FORMAT, config.TZ)
|
|
70
90
|
setup_telemetry()
|
|
71
91
|
self.storage = storage
|
|
72
92
|
self.config = config
|
|
@@ -115,7 +135,7 @@ class OrchestratorEngine:
|
|
|
115
135
|
storage_class = module.SQLiteHistoryStorage
|
|
116
136
|
parsed_uri = urlparse(uri)
|
|
117
137
|
db_path = parsed_uri.path
|
|
118
|
-
storage_args = [db_path]
|
|
138
|
+
storage_args = [db_path, self.config.TZ]
|
|
119
139
|
except ImportError as e:
|
|
120
140
|
logger.error(f"Could not import SQLiteHistoryStorage, perhaps aiosqlite is not installed? Error: {e}")
|
|
121
141
|
self.history_storage = NoOpHistoryStorage()
|
|
@@ -125,7 +145,7 @@ class OrchestratorEngine:
|
|
|
125
145
|
try:
|
|
126
146
|
module = import_module(".history.postgres", package="avtomatika")
|
|
127
147
|
storage_class = module.PostgresHistoryStorage
|
|
128
|
-
storage_args = [uri]
|
|
148
|
+
storage_args = [uri, self.config.TZ]
|
|
129
149
|
except ImportError as e:
|
|
130
150
|
logger.error(f"Could not import PostgresHistoryStorage, perhaps asyncpg is not installed? Error: {e}")
|
|
131
151
|
self.history_storage = NoOpHistoryStorage()
|
|
@@ -199,11 +219,13 @@ class OrchestratorEngine:
|
|
|
199
219
|
app[WATCHER_KEY] = Watcher(self)
|
|
200
220
|
app[REPUTATION_CALCULATOR_KEY] = ReputationCalculator(self)
|
|
201
221
|
app[HEALTH_CHECKER_KEY] = HealthChecker(self)
|
|
222
|
+
app[SCHEDULER_KEY] = Scheduler(self)
|
|
202
223
|
|
|
203
224
|
app[EXECUTOR_TASK_KEY] = create_task(app[EXECUTOR_KEY].run())
|
|
204
225
|
app[WATCHER_TASK_KEY] = create_task(app[WATCHER_KEY].run())
|
|
205
226
|
app[REPUTATION_CALCULATOR_TASK_KEY] = create_task(app[REPUTATION_CALCULATOR_KEY].run())
|
|
206
227
|
app[HEALTH_CHECKER_TASK_KEY] = create_task(app[HEALTH_CHECKER_KEY].run())
|
|
228
|
+
app[SCHEDULER_TASK_KEY] = create_task(app[SCHEDULER_KEY].run())
|
|
207
229
|
|
|
208
230
|
async def on_shutdown(self, app: web.Application):
|
|
209
231
|
logger.info("Shutdown sequence started.")
|
|
@@ -211,6 +233,7 @@ class OrchestratorEngine:
|
|
|
211
233
|
app[WATCHER_KEY].stop()
|
|
212
234
|
app[REPUTATION_CALCULATOR_KEY].stop()
|
|
213
235
|
app[HEALTH_CHECKER_KEY].stop()
|
|
236
|
+
app[SCHEDULER_KEY].stop()
|
|
214
237
|
logger.info("Background task running flags set to False.")
|
|
215
238
|
|
|
216
239
|
if hasattr(self.history_storage, "close"):
|
|
@@ -226,6 +249,8 @@ class OrchestratorEngine:
|
|
|
226
249
|
app[WATCHER_TASK_KEY].cancel()
|
|
227
250
|
app[REPUTATION_CALCULATOR_TASK_KEY].cancel()
|
|
228
251
|
app[EXECUTOR_TASK_KEY].cancel()
|
|
252
|
+
# Scheduler task manages its own loop cancellation in stop(), but just in case:
|
|
253
|
+
app[SCHEDULER_TASK_KEY].cancel()
|
|
229
254
|
logger.info("Background tasks cancelled.")
|
|
230
255
|
|
|
231
256
|
logger.info("Gathering background tasks with a 10s timeout...")
|
|
@@ -236,6 +261,7 @@ class OrchestratorEngine:
|
|
|
236
261
|
app[WATCHER_TASK_KEY],
|
|
237
262
|
app[REPUTATION_CALCULATOR_TASK_KEY],
|
|
238
263
|
app[EXECUTOR_TASK_KEY],
|
|
264
|
+
app[SCHEDULER_TASK_KEY],
|
|
239
265
|
return_exceptions=True,
|
|
240
266
|
),
|
|
241
267
|
timeout=10.0,
|
|
@@ -249,6 +275,55 @@ class OrchestratorEngine:
|
|
|
249
275
|
logger.info("HTTP session closed.")
|
|
250
276
|
logger.info("Shutdown sequence finished.")
|
|
251
277
|
|
|
278
|
+
async def create_background_job(
|
|
279
|
+
self,
|
|
280
|
+
blueprint_name: str,
|
|
281
|
+
initial_data: dict[str, Any],
|
|
282
|
+
source: str = "internal",
|
|
283
|
+
) -> str:
|
|
284
|
+
"""Creates a job directly, bypassing the HTTP API layer.
|
|
285
|
+
Useful for internal schedulers and triggers.
|
|
286
|
+
"""
|
|
287
|
+
blueprint = self.blueprints.get(blueprint_name)
|
|
288
|
+
if not blueprint:
|
|
289
|
+
raise ValueError(f"Blueprint '{blueprint_name}' not found.")
|
|
290
|
+
|
|
291
|
+
job_id = str(uuid4())
|
|
292
|
+
# Use a special internal client config
|
|
293
|
+
client_config = {
|
|
294
|
+
"token": "internal-scheduler",
|
|
295
|
+
"plan": "system",
|
|
296
|
+
"params": {"source": source},
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
job_state = {
|
|
300
|
+
"id": job_id,
|
|
301
|
+
"blueprint_name": blueprint.name,
|
|
302
|
+
"current_state": blueprint.start_state,
|
|
303
|
+
"initial_data": initial_data,
|
|
304
|
+
"state_history": {},
|
|
305
|
+
"status": JOB_STATUS_PENDING,
|
|
306
|
+
"tracing_context": {},
|
|
307
|
+
"client_config": client_config,
|
|
308
|
+
}
|
|
309
|
+
await self.storage.save_job_state(job_id, job_state)
|
|
310
|
+
await self.storage.enqueue_job(job_id)
|
|
311
|
+
metrics.jobs_total.inc({metrics.LABEL_BLUEPRINT: blueprint.name})
|
|
312
|
+
|
|
313
|
+
# Log the creation in history as well (so we can track scheduled jobs)
|
|
314
|
+
await self.history_storage.log_job_event(
|
|
315
|
+
{
|
|
316
|
+
"job_id": job_id,
|
|
317
|
+
"state": "pending",
|
|
318
|
+
"event_type": "job_created",
|
|
319
|
+
"context_snapshot": job_state,
|
|
320
|
+
"metadata": {"source": source, "scheduled": True},
|
|
321
|
+
}
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
logger.info(f"Created background job {job_id} for blueprint '{blueprint_name}' (source: {source})")
|
|
325
|
+
return job_id
|
|
326
|
+
|
|
252
327
|
def _create_job_handler(self, blueprint: StateMachineBlueprint) -> Callable:
|
|
253
328
|
async def handler(request: web.Request) -> web.Response:
|
|
254
329
|
try:
|
|
@@ -266,7 +341,7 @@ class OrchestratorEngine:
|
|
|
266
341
|
"current_state": blueprint.start_state,
|
|
267
342
|
"initial_data": initial_data,
|
|
268
343
|
"state_history": {},
|
|
269
|
-
"status":
|
|
344
|
+
"status": JOB_STATUS_PENDING,
|
|
270
345
|
"tracing_context": carrier,
|
|
271
346
|
"client_config": client_config,
|
|
272
347
|
}
|
|
@@ -295,7 +370,7 @@ class OrchestratorEngine:
|
|
|
295
370
|
if not job_state:
|
|
296
371
|
return json_response({"error": "Job not found"}, status=404)
|
|
297
372
|
|
|
298
|
-
if job_state.get("status") !=
|
|
373
|
+
if job_state.get("status") != JOB_STATUS_WAITING_FOR_WORKER:
|
|
299
374
|
return json_response(
|
|
300
375
|
{"error": "Job is not in a state that can be cancelled (must be waiting for a worker)."},
|
|
301
376
|
status=409,
|
|
@@ -388,7 +463,7 @@ class OrchestratorEngine:
|
|
|
388
463
|
job_id = data.get("job_id")
|
|
389
464
|
task_id = data.get("task_id")
|
|
390
465
|
result = data.get("result", {})
|
|
391
|
-
result_status = result.get("status",
|
|
466
|
+
result_status = result.get("status", TASK_STATUS_SUCCESS)
|
|
392
467
|
error_message = result.get("error")
|
|
393
468
|
payload_worker_id = data.get("worker_id")
|
|
394
469
|
except Exception:
|
|
@@ -417,14 +492,14 @@ class OrchestratorEngine:
|
|
|
417
492
|
return json_response({"error": "Job not found"}, status=404)
|
|
418
493
|
|
|
419
494
|
# Handle parallel task completion
|
|
420
|
-
if job_state.get("status") ==
|
|
495
|
+
if job_state.get("status") == JOB_STATUS_WAITING_FOR_PARALLEL:
|
|
421
496
|
await self.storage.remove_job_from_watch(f"{job_id}:{task_id}")
|
|
422
497
|
job_state.setdefault("aggregation_results", {})[task_id] = result
|
|
423
498
|
job_state.setdefault("active_branches", []).remove(task_id)
|
|
424
499
|
|
|
425
500
|
if not job_state["active_branches"]:
|
|
426
501
|
logger.info(f"All parallel branches for job {job_id} have completed.")
|
|
427
|
-
job_state["status"] =
|
|
502
|
+
job_state["status"] = JOB_STATUS_RUNNING
|
|
428
503
|
job_state["current_state"] = job_state["aggregation_target"]
|
|
429
504
|
await self.storage.save_job_state(job_id, job_state)
|
|
430
505
|
await self.storage.enqueue_job(job_id)
|
|
@@ -458,13 +533,13 @@ class OrchestratorEngine:
|
|
|
458
533
|
|
|
459
534
|
job_state["tracing_context"] = {str(k): v for k, v in request.headers.items()}
|
|
460
535
|
|
|
461
|
-
if result_status ==
|
|
536
|
+
if result_status == TASK_STATUS_FAILURE:
|
|
462
537
|
error_details = result.get("error", {})
|
|
463
|
-
error_type =
|
|
538
|
+
error_type = ERROR_CODE_TRANSIENT
|
|
464
539
|
error_message = "No error details provided."
|
|
465
540
|
|
|
466
541
|
if isinstance(error_details, dict):
|
|
467
|
-
error_type = error_details.get("code",
|
|
542
|
+
error_type = error_details.get("code", ERROR_CODE_TRANSIENT)
|
|
468
543
|
error_message = error_details.get("message", "No error message provided.")
|
|
469
544
|
elif isinstance(error_details, str):
|
|
470
545
|
# Fallback for old format where `error` was just a string
|
|
@@ -472,13 +547,13 @@ class OrchestratorEngine:
|
|
|
472
547
|
|
|
473
548
|
logging.warning(f"Task {task_id} for job {job_id} failed with error type '{error_type}'.")
|
|
474
549
|
|
|
475
|
-
if error_type ==
|
|
476
|
-
job_state["status"] =
|
|
550
|
+
if error_type == ERROR_CODE_PERMANENT:
|
|
551
|
+
job_state["status"] = JOB_STATUS_QUARANTINED
|
|
477
552
|
job_state["error_message"] = f"Task failed with permanent error: {error_message}"
|
|
478
553
|
await self.storage.save_job_state(job_id, job_state)
|
|
479
554
|
await self.storage.quarantine_job(job_id)
|
|
480
|
-
elif error_type ==
|
|
481
|
-
job_state["status"] =
|
|
555
|
+
elif error_type == ERROR_CODE_INVALID_INPUT:
|
|
556
|
+
job_state["status"] = JOB_STATUS_FAILED
|
|
482
557
|
job_state["error_message"] = f"Task failed due to invalid input: {error_message}"
|
|
483
558
|
await self.storage.save_job_state(job_id, job_state)
|
|
484
559
|
else: # TRANSIENT_ERROR or any other/unspecified error
|
|
@@ -486,15 +561,15 @@ class OrchestratorEngine:
|
|
|
486
561
|
|
|
487
562
|
return json_response({"status": "result_accepted_failure"}, status=200)
|
|
488
563
|
|
|
489
|
-
if result_status ==
|
|
564
|
+
if result_status == TASK_STATUS_CANCELLED:
|
|
490
565
|
logging.info(f"Task {task_id} for job {job_id} was cancelled by worker.")
|
|
491
|
-
job_state["status"] =
|
|
566
|
+
job_state["status"] = JOB_STATUS_CANCELLED
|
|
492
567
|
await self.storage.save_job_state(job_id, job_state)
|
|
493
568
|
# Optionally, trigger a specific 'cancelled' transition if defined in the blueprint
|
|
494
569
|
transitions = job_state.get("current_task_transitions", {})
|
|
495
570
|
if next_state := transitions.get("cancelled"):
|
|
496
571
|
job_state["current_state"] = next_state
|
|
497
|
-
job_state["status"] =
|
|
572
|
+
job_state["status"] = JOB_STATUS_RUNNING # It's running the cancellation handler now
|
|
498
573
|
await self.storage.save_job_state(job_id, job_state)
|
|
499
574
|
await self.storage.enqueue_job(job_id)
|
|
500
575
|
return json_response({"status": "result_accepted_cancelled"}, status=200)
|
|
@@ -510,12 +585,12 @@ class OrchestratorEngine:
|
|
|
510
585
|
job_state["state_history"].update(worker_data)
|
|
511
586
|
|
|
512
587
|
job_state["current_state"] = next_state
|
|
513
|
-
job_state["status"] =
|
|
588
|
+
job_state["status"] = JOB_STATUS_RUNNING
|
|
514
589
|
await self.storage.save_job_state(job_id, job_state)
|
|
515
590
|
await self.storage.enqueue_job(job_id)
|
|
516
591
|
else:
|
|
517
592
|
logging.error(f"Job {job_id} failed. Worker returned unhandled status '{result_status}'.")
|
|
518
|
-
job_state["status"] =
|
|
593
|
+
job_state["status"] = JOB_STATUS_FAILED
|
|
519
594
|
job_state["error_message"] = f"Worker returned unhandled status: {result_status}"
|
|
520
595
|
await self.storage.save_job_state(job_id, job_state)
|
|
521
596
|
|
|
@@ -535,7 +610,7 @@ class OrchestratorEngine:
|
|
|
535
610
|
task_info = job_state.get("current_task_info")
|
|
536
611
|
if not task_info:
|
|
537
612
|
logging.error(f"Cannot retry job {job_id}: missing 'current_task_info' in job state.")
|
|
538
|
-
job_state["status"] =
|
|
613
|
+
job_state["status"] = JOB_STATUS_FAILED
|
|
539
614
|
job_state["error_message"] = "Cannot retry: original task info not found."
|
|
540
615
|
await self.storage.save_job_state(job_id, job_state)
|
|
541
616
|
return
|
|
@@ -544,7 +619,7 @@ class OrchestratorEngine:
|
|
|
544
619
|
timeout_seconds = task_info.get("timeout_seconds", self.config.WORKER_TIMEOUT_SECONDS)
|
|
545
620
|
timeout_at = now + timeout_seconds
|
|
546
621
|
|
|
547
|
-
job_state["status"] =
|
|
622
|
+
job_state["status"] = JOB_STATUS_WAITING_FOR_WORKER
|
|
548
623
|
job_state["task_dispatched_at"] = now
|
|
549
624
|
await self.storage.save_job_state(job_id, job_state)
|
|
550
625
|
await self.storage.add_job_to_watch(job_id, timeout_at)
|
|
@@ -552,7 +627,7 @@ class OrchestratorEngine:
|
|
|
552
627
|
await self.dispatcher.dispatch(job_state, task_info)
|
|
553
628
|
else:
|
|
554
629
|
logging.critical(f"Job {job_id} has failed {max_retries + 1} times. Moving to quarantine.")
|
|
555
|
-
job_state["status"] =
|
|
630
|
+
job_state["status"] = JOB_STATUS_QUARANTINED
|
|
556
631
|
job_state["error_message"] = f"Task failed after {max_retries + 1} attempts: {error_message}"
|
|
557
632
|
await self.storage.save_job_state(job_id, job_state)
|
|
558
633
|
await self.storage.quarantine_job(job_id)
|
|
@@ -571,14 +646,14 @@ class OrchestratorEngine:
|
|
|
571
646
|
job_state = await self.storage.get_job_state(job_id)
|
|
572
647
|
if not job_state:
|
|
573
648
|
return json_response({"error": "Job not found"}, status=404)
|
|
574
|
-
if job_state.get("status") not in [
|
|
649
|
+
if job_state.get("status") not in [JOB_STATUS_WAITING_FOR_WORKER, JOB_STATUS_WAITING_FOR_HUMAN]:
|
|
575
650
|
return json_response({"error": "Job is not in a state that can be approved"}, status=409)
|
|
576
651
|
transitions = job_state.get("current_task_transitions", {})
|
|
577
652
|
next_state = transitions.get(decision)
|
|
578
653
|
if not next_state:
|
|
579
654
|
return json_response({"error": f"Invalid decision '{decision}' for this job"}, status=400)
|
|
580
655
|
job_state["current_state"] = next_state
|
|
581
|
-
job_state["status"] =
|
|
656
|
+
job_state["status"] = JOB_STATUS_RUNNING
|
|
582
657
|
await self.storage.save_job_state(job_id, job_state)
|
|
583
658
|
await self.storage.enqueue_job(job_id)
|
|
584
659
|
return json_response({"status": "approval_received", "job_id": job_id})
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
from abc import ABC
|
|
2
|
+
from contextlib import suppress
|
|
3
|
+
from datetime import datetime
|
|
2
4
|
from logging import getLogger
|
|
3
5
|
from typing import Any
|
|
4
6
|
from uuid import uuid4
|
|
7
|
+
from zoneinfo import ZoneInfo
|
|
5
8
|
|
|
6
|
-
from asyncpg import Pool, PostgresError, create_pool # type: ignore[import-untyped]
|
|
9
|
+
from asyncpg import Connection, Pool, PostgresError, create_pool # type: ignore[import-untyped]
|
|
10
|
+
from orjson import dumps, loads
|
|
7
11
|
|
|
8
12
|
from .base import HistoryStorageBase
|
|
9
13
|
|
|
@@ -41,14 +45,24 @@ CREATE_JOB_ID_INDEX_PG = "CREATE INDEX IF NOT EXISTS idx_job_id ON job_history(j
|
|
|
41
45
|
class PostgresHistoryStorage(HistoryStorageBase, ABC):
|
|
42
46
|
"""Implementation of the history store based on asyncpg for PostgreSQL."""
|
|
43
47
|
|
|
44
|
-
def __init__(self, dsn: str):
|
|
48
|
+
def __init__(self, dsn: str, tz_name: str = "UTC"):
|
|
45
49
|
self._dsn = dsn
|
|
46
50
|
self._pool: Pool | None = None
|
|
51
|
+
self.tz_name = tz_name
|
|
52
|
+
self.tz = ZoneInfo(tz_name)
|
|
53
|
+
|
|
54
|
+
async def _setup_connection(self, conn: Connection):
|
|
55
|
+
"""Configures the connection session with the correct timezone."""
|
|
56
|
+
try:
|
|
57
|
+
await conn.execute(f"SET TIME ZONE '{self.tz_name}'")
|
|
58
|
+
except PostgresError as e:
|
|
59
|
+
logger.error(f"Failed to set timezone '{self.tz_name}' for PG connection: {e}")
|
|
47
60
|
|
|
48
61
|
async def initialize(self):
|
|
49
62
|
"""Initializes the connection pool to PostgreSQL and creates tables."""
|
|
50
63
|
try:
|
|
51
|
-
|
|
64
|
+
# We use init parameter to configure each new connection in the pool
|
|
65
|
+
self._pool = await create_pool(dsn=self._dsn, init=self._setup_connection)
|
|
52
66
|
if not self._pool:
|
|
53
67
|
raise RuntimeError("Failed to create a connection pool.")
|
|
54
68
|
|
|
@@ -56,7 +70,7 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
|
|
|
56
70
|
await conn.execute(CREATE_JOB_HISTORY_TABLE_PG)
|
|
57
71
|
await conn.execute(CREATE_WORKER_HISTORY_TABLE_PG)
|
|
58
72
|
await conn.execute(CREATE_JOB_ID_INDEX_PG)
|
|
59
|
-
logger.info("PostgreSQL history storage initialized.")
|
|
73
|
+
logger.info(f"PostgreSQL history storage initialized (TZ={self.tz_name}).")
|
|
60
74
|
except (PostgresError, OSError) as e:
|
|
61
75
|
logger.error(f"Failed to initialize PostgreSQL history storage: {e}")
|
|
62
76
|
raise
|
|
@@ -74,14 +88,20 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
|
|
|
74
88
|
|
|
75
89
|
query = """
|
|
76
90
|
INSERT INTO job_history (
|
|
77
|
-
event_id, job_id, state, event_type, duration_ms,
|
|
91
|
+
event_id, job_id, timestamp, state, event_type, duration_ms,
|
|
78
92
|
previous_state, next_state, worker_id, attempt_number,
|
|
79
93
|
context_snapshot
|
|
80
|
-
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
|
94
|
+
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
|
81
95
|
"""
|
|
96
|
+
now = datetime.now(self.tz)
|
|
97
|
+
|
|
98
|
+
context_snapshot = event_data.get("context_snapshot")
|
|
99
|
+
context_snapshot_json = dumps(context_snapshot).decode("utf-8") if context_snapshot else None
|
|
100
|
+
|
|
82
101
|
params = (
|
|
83
102
|
uuid4(),
|
|
84
103
|
event_data.get("job_id"),
|
|
104
|
+
now,
|
|
85
105
|
event_data.get("state"),
|
|
86
106
|
event_data.get("event_type"),
|
|
87
107
|
event_data.get("duration_ms"),
|
|
@@ -89,7 +109,7 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
|
|
|
89
109
|
event_data.get("next_state"),
|
|
90
110
|
event_data.get("worker_id"),
|
|
91
111
|
event_data.get("attempt_number"),
|
|
92
|
-
|
|
112
|
+
context_snapshot_json,
|
|
93
113
|
)
|
|
94
114
|
try:
|
|
95
115
|
async with self._pool.acquire() as conn:
|
|
@@ -104,14 +124,20 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
|
|
|
104
124
|
|
|
105
125
|
query = """
|
|
106
126
|
INSERT INTO worker_history (
|
|
107
|
-
event_id, worker_id, event_type, worker_info_snapshot
|
|
108
|
-
) VALUES ($1, $2, $3, $4)
|
|
127
|
+
event_id, worker_id, timestamp, event_type, worker_info_snapshot
|
|
128
|
+
) VALUES ($1, $2, $3, $4, $5)
|
|
109
129
|
"""
|
|
130
|
+
now = datetime.now(self.tz)
|
|
131
|
+
|
|
132
|
+
worker_info = event_data.get("worker_info_snapshot")
|
|
133
|
+
worker_info_json = dumps(worker_info).decode("utf-8") if worker_info else None
|
|
134
|
+
|
|
110
135
|
params = (
|
|
111
136
|
uuid4(),
|
|
112
137
|
event_data.get("worker_id"),
|
|
138
|
+
now,
|
|
113
139
|
event_data.get("event_type"),
|
|
114
|
-
|
|
140
|
+
worker_info_json,
|
|
115
141
|
)
|
|
116
142
|
try:
|
|
117
143
|
async with self._pool.acquire() as conn:
|
|
@@ -119,6 +145,23 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
|
|
|
119
145
|
except PostgresError as e:
|
|
120
146
|
logger.error(f"Failed to log worker event to PostgreSQL: {e}")
|
|
121
147
|
|
|
148
|
+
def _format_row(self, row: dict[str, Any]) -> dict[str, Any]:
|
|
149
|
+
"""Helper to format a row from DB: convert timestamp to local TZ and decode JSON."""
|
|
150
|
+
item = dict(row)
|
|
151
|
+
|
|
152
|
+
if isinstance(item.get("context_snapshot"), str):
|
|
153
|
+
with suppress(Exception):
|
|
154
|
+
item["context_snapshot"] = loads(item["context_snapshot"])
|
|
155
|
+
|
|
156
|
+
if isinstance(item.get("worker_info_snapshot"), str):
|
|
157
|
+
with suppress(Exception):
|
|
158
|
+
item["worker_info_snapshot"] = loads(item["worker_info_snapshot"])
|
|
159
|
+
|
|
160
|
+
if "timestamp" in item and isinstance(item["timestamp"], datetime):
|
|
161
|
+
item["timestamp"] = item["timestamp"].astimezone(self.tz)
|
|
162
|
+
|
|
163
|
+
return item
|
|
164
|
+
|
|
122
165
|
async def get_job_history(self, job_id: str) -> list[dict[str, Any]]:
|
|
123
166
|
"""Gets the full history for the specified job from PostgreSQL."""
|
|
124
167
|
if not self._pool:
|
|
@@ -128,7 +171,7 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
|
|
|
128
171
|
try:
|
|
129
172
|
async with self._pool.acquire() as conn:
|
|
130
173
|
rows = await conn.fetch(query, job_id)
|
|
131
|
-
return [
|
|
174
|
+
return [self._format_row(row) for row in rows]
|
|
132
175
|
except PostgresError as e:
|
|
133
176
|
logger.error(
|
|
134
177
|
f"Failed to get job history for job_id {job_id} from PostgreSQL: {e}",
|
|
@@ -154,7 +197,7 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
|
|
|
154
197
|
try:
|
|
155
198
|
async with self._pool.acquire() as conn:
|
|
156
199
|
rows = await conn.fetch(query, limit, offset)
|
|
157
|
-
return [
|
|
200
|
+
return [self._format_row(row) for row in rows]
|
|
158
201
|
except PostgresError as e:
|
|
159
202
|
logger.error(f"Failed to get jobs list from PostgreSQL: {e}")
|
|
160
203
|
return []
|
|
@@ -206,7 +249,7 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
|
|
|
206
249
|
try:
|
|
207
250
|
async with self._pool.acquire() as conn:
|
|
208
251
|
rows = await conn.fetch(query, worker_id, since_days)
|
|
209
|
-
return [
|
|
252
|
+
return [self._format_row(row) for row in rows]
|
|
210
253
|
except PostgresError as e:
|
|
211
254
|
logger.error(f"Failed to get worker history for worker_id {worker_id} from PostgreSQL: {e}")
|
|
212
255
|
return []
|