avtomatika 1.0b5__tar.gz → 1.0b6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {avtomatika-1.0b5/src/avtomatika.egg-info → avtomatika-1.0b6}/PKG-INFO +35 -2
  2. {avtomatika-1.0b5 → avtomatika-1.0b6}/README.md +34 -1
  3. {avtomatika-1.0b5 → avtomatika-1.0b6}/pyproject.toml +1 -1
  4. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/config.py +4 -0
  5. avtomatika-1.0b6/src/avtomatika/constants.py +30 -0
  6. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/engine.py +100 -25
  7. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/history/postgres.py +56 -13
  8. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/history/sqlite.py +54 -34
  9. avtomatika-1.0b6/src/avtomatika/logging_config.py +92 -0
  10. avtomatika-1.0b6/src/avtomatika/scheduler.py +119 -0
  11. avtomatika-1.0b6/src/avtomatika/scheduler_config_loader.py +41 -0
  12. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/security.py +3 -5
  13. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/storage/base.py +17 -3
  14. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/storage/memory.py +41 -4
  15. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/storage/redis.py +17 -0
  16. {avtomatika-1.0b5 → avtomatika-1.0b6/src/avtomatika.egg-info}/PKG-INFO +35 -2
  17. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika.egg-info/SOURCES.txt +4 -0
  18. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_engine.py +22 -16
  19. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_logging_config.py +16 -4
  20. avtomatika-1.0b6/tests/test_scheduler.py +200 -0
  21. avtomatika-1.0b5/src/avtomatika/logging_config.py +0 -41
  22. {avtomatika-1.0b5 → avtomatika-1.0b6}/LICENSE +0 -0
  23. {avtomatika-1.0b5 → avtomatika-1.0b6}/setup.cfg +0 -0
  24. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/__init__.py +0 -0
  25. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/api.html +0 -0
  26. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/blueprint.py +0 -0
  27. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/client_config_loader.py +0 -0
  28. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/compression.py +0 -0
  29. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/context.py +0 -0
  30. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/data_types.py +0 -0
  31. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/datastore.py +0 -0
  32. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/dispatcher.py +0 -0
  33. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/executor.py +0 -0
  34. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/health_checker.py +0 -0
  35. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/history/base.py +0 -0
  36. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/history/noop.py +0 -0
  37. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/metrics.py +0 -0
  38. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/py.typed +0 -0
  39. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/quota.py +0 -0
  40. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/ratelimit.py +0 -0
  41. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/reputation.py +0 -0
  42. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/storage/__init__.py +0 -0
  43. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/telemetry.py +0 -0
  44. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/watcher.py +0 -0
  45. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/worker_config_loader.py +0 -0
  46. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika/ws_manager.py +0 -0
  47. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika.egg-info/dependency_links.txt +0 -0
  48. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika.egg-info/requires.txt +0 -0
  49. {avtomatika-1.0b5 → avtomatika-1.0b6}/src/avtomatika.egg-info/top_level.txt +0 -0
  50. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_blueprint_conditions.py +0 -0
  51. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_blueprints.py +0 -0
  52. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_client_config_loader.py +0 -0
  53. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_compression.py +0 -0
  54. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_config_validation.py +0 -0
  55. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_context.py +0 -0
  56. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_dispatcher.py +0 -0
  57. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_dispatcher_extended.py +0 -0
  58. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_error_handling.py +0 -0
  59. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_executor.py +0 -0
  60. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_health_checker.py +0 -0
  61. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_history.py +0 -0
  62. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_integration.py +0 -0
  63. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_memory_locking.py +0 -0
  64. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_memory_storage.py +0 -0
  65. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_metrics.py +0 -0
  66. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_noop_history.py +0 -0
  67. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_postgres_history.py +0 -0
  68. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_ratelimit.py +0 -0
  69. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_redis_locking.py +0 -0
  70. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_redis_storage.py +0 -0
  71. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_reputation.py +0 -0
  72. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_telemetry.py +0 -0
  73. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_watcher.py +0 -0
  74. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_worker_config_loader.py +0 -0
  75. {avtomatika-1.0b5 → avtomatika-1.0b6}/tests/test_ws_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avtomatika
3
- Version: 1.0b5
3
+ Version: 1.0b6
4
4
  Summary: A state-machine based orchestrator for long-running AI and other jobs.
5
5
  Project-URL: Homepage, https://github.com/avtomatika-ai/avtomatika
6
6
  Project-URL: Bug Tracker, https://github.com/avtomatika-ai/avtomatika/issues
@@ -60,6 +60,7 @@ This document serves as a comprehensive guide for developers looking to build pi
60
60
  - [Delegating Tasks to Workers (dispatch_task)](#delegating-tasks-to-workers-dispatch_task)
61
61
  - [Parallel Execution and Aggregation (Fan-out/Fan-in)](#parallel-execution-and-aggregation-fan-outfan-in)
62
62
  - [Dependency Injection (DataStore)](#dependency-injection-datastore)
63
+ - [Native Scheduler](#native-scheduler)
63
64
  - [Production Configuration](#production-configuration)
64
65
  - [Fault Tolerance](#fault-tolerance)
65
66
  - [Storage Backend](#storage-backend)
@@ -74,7 +75,17 @@ The project is based on a simple yet powerful architectural pattern that separat
74
75
 
75
76
  * **Orchestrator (OrchestratorEngine)** — The Director. It manages the entire process from start to finish, tracks state, handles errors, and decides what should happen next. It does not perform business tasks itself.
76
77
  * **Blueprints (Blueprint)** — The Script. Each blueprint is a detailed plan (a state machine) for a specific business process. It describes the steps (states) and the rules for transitioning between them.
77
- * **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator.## Installation
78
+ * **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator.
79
+
80
+ ## Ecosystem
81
+
82
+ Avtomatika is part of a larger ecosystem:
83
+
84
+ * **[Avtomatika Worker SDK](https://github.com/avtomatika-ai/avtomatika-worker)**: The official Python SDK for building workers that connect to this engine.
85
+ * **[RCA Protocol](https://github.com/avtomatika-ai/rca)**: The architectural specification and manifesto behind the system.
86
+ * **[Full Example](https://github.com/avtomatika-ai/avtomatika-full-example)**: A complete reference project demonstrating the engine and workers in action.
87
+
88
+ ## Installation
78
89
 
79
90
  * **Install the core engine only:**
80
91
  ```bash
@@ -328,6 +339,22 @@ async def cache_handler(data_stores):
328
339
  user_data = await data_stores.cache.get("user:123")
329
340
  print(f"User from cache: {user_data}")
330
341
  ```
342
+
343
+ ### 5. Native Scheduler
344
+
345
+ Avtomatika includes a built-in distributed scheduler. It allows you to trigger blueprints periodically (interval, daily, weekly, monthly) without external tools like cron.
346
+
347
+ * **Configuration:** Defined in `schedules.toml`.
348
+ * **Timezone Aware:** Supports global timezone configuration (e.g., `TZ="Europe/Moscow"`).
349
+ * **Distributed Locking:** Safe to run with multiple orchestrator instances; jobs are guaranteed to run only once per interval using distributed locks (Redis/Memory).
350
+
351
+ ```toml
352
+ # schedules.toml example
353
+ [nightly_backup]
354
+ blueprint = "backup_flow"
355
+ daily_at = "02:00"
356
+ ```
357
+
331
358
  ## Production Configuration
332
359
 
333
360
  The orchestrator's behavior can be configured through environment variables. Additionally, any configuration parameter loaded from environment variables can be programmatically overridden in your application code after the `Config` object has been initialized. This provides flexibility for different deployment and testing scenarios.
@@ -349,6 +376,12 @@ To manage access and worker settings securely, Avtomatika uses TOML configuratio
349
376
  [gpu-worker-01]
350
377
  token = "worker-secret-456"
351
378
  ```
379
+ - **`schedules.toml`**: Defines periodic tasks (CRON-like) for the native scheduler.
380
+ ```toml
381
+ [nightly_backup]
382
+ blueprint = "backup_flow"
383
+ daily_at = "02:00"
384
+ ```
352
385
 
353
386
  For detailed specifications and examples, please refer to the [**Configuration Guide**](docs/configuration.md).
354
387
 
@@ -14,6 +14,7 @@ This document serves as a comprehensive guide for developers looking to build pi
14
14
  - [Delegating Tasks to Workers (dispatch_task)](#delegating-tasks-to-workers-dispatch_task)
15
15
  - [Parallel Execution and Aggregation (Fan-out/Fan-in)](#parallel-execution-and-aggregation-fan-outfan-in)
16
16
  - [Dependency Injection (DataStore)](#dependency-injection-datastore)
17
+ - [Native Scheduler](#native-scheduler)
17
18
  - [Production Configuration](#production-configuration)
18
19
  - [Fault Tolerance](#fault-tolerance)
19
20
  - [Storage Backend](#storage-backend)
@@ -28,7 +29,17 @@ The project is based on a simple yet powerful architectural pattern that separat
28
29
 
29
30
  * **Orchestrator (OrchestratorEngine)** — The Director. It manages the entire process from start to finish, tracks state, handles errors, and decides what should happen next. It does not perform business tasks itself.
30
31
  * **Blueprints (Blueprint)** — The Script. Each blueprint is a detailed plan (a state machine) for a specific business process. It describes the steps (states) and the rules for transitioning between them.
31
- * **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator.## Installation
32
+ * **Workers (Worker)** — The Team of Specialists. These are independent, specialized executors. Each worker knows how to perform a specific set of tasks (e.g., "process video," "send email") and reports back to the Orchestrator.
33
+
34
+ ## Ecosystem
35
+
36
+ Avtomatika is part of a larger ecosystem:
37
+
38
+ * **[Avtomatika Worker SDK](https://github.com/avtomatika-ai/avtomatika-worker)**: The official Python SDK for building workers that connect to this engine.
39
+ * **[RCA Protocol](https://github.com/avtomatika-ai/rca)**: The architectural specification and manifesto behind the system.
40
+ * **[Full Example](https://github.com/avtomatika-ai/avtomatika-full-example)**: A complete reference project demonstrating the engine and workers in action.
41
+
42
+ ## Installation
32
43
 
33
44
  * **Install the core engine only:**
34
45
  ```bash
@@ -282,6 +293,22 @@ async def cache_handler(data_stores):
282
293
  user_data = await data_stores.cache.get("user:123")
283
294
  print(f"User from cache: {user_data}")
284
295
  ```
296
+
297
+ ### 5. Native Scheduler
298
+
299
+ Avtomatika includes a built-in distributed scheduler. It allows you to trigger blueprints periodically (interval, daily, weekly, monthly) without external tools like cron.
300
+
301
+ * **Configuration:** Defined in `schedules.toml`.
302
+ * **Timezone Aware:** Supports global timezone configuration (e.g., `TZ="Europe/Moscow"`).
303
+ * **Distributed Locking:** Safe to run with multiple orchestrator instances; jobs are guaranteed to run only once per interval using distributed locks (Redis/Memory).
304
+
305
+ ```toml
306
+ # schedules.toml example
307
+ [nightly_backup]
308
+ blueprint = "backup_flow"
309
+ daily_at = "02:00"
310
+ ```
311
+
285
312
  ## Production Configuration
286
313
 
287
314
  The orchestrator's behavior can be configured through environment variables. Additionally, any configuration parameter loaded from environment variables can be programmatically overridden in your application code after the `Config` object has been initialized. This provides flexibility for different deployment and testing scenarios.
@@ -303,6 +330,12 @@ To manage access and worker settings securely, Avtomatika uses TOML configuratio
303
330
  [gpu-worker-01]
304
331
  token = "worker-secret-456"
305
332
  ```
333
+ - **`schedules.toml`**: Defines periodic tasks (CRON-like) for the native scheduler.
334
+ ```toml
335
+ [nightly_backup]
336
+ blueprint = "backup_flow"
337
+ daily_at = "02:00"
338
+ ```
306
339
 
307
340
  For detailed specifications and examples, please refer to the [**Configuration Guide**](docs/configuration.md).
308
341
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "avtomatika"
7
- version = "1.0b5"
7
+ version = "1.0b6"
8
8
  description = "A state-machine based orchestrator for long-running AI and other jobs."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -62,3 +62,7 @@ class Config:
62
62
  # External config files
63
63
  self.WORKERS_CONFIG_PATH: str = getenv("WORKERS_CONFIG_PATH", "")
64
64
  self.CLIENTS_CONFIG_PATH: str = getenv("CLIENTS_CONFIG_PATH", "")
65
+ self.SCHEDULES_CONFIG_PATH: str = getenv("SCHEDULES_CONFIG_PATH", "")
66
+
67
+ # Timezone settings
68
+ self.TZ: str = getenv("TZ", "UTC")
@@ -0,0 +1,30 @@
1
+ """
2
+ Centralized constants for the Avtomatika protocol.
3
+ Use these constants instead of hardcoded strings to ensure consistency.
4
+ """
5
+
6
+ # --- Auth Headers ---
7
+ AUTH_HEADER_CLIENT = "X-Avtomatika-Token"
8
+ AUTH_HEADER_WORKER = "X-Worker-Token"
9
+
10
+ # --- Error Codes ---
11
+ # Error codes returned by workers in the result payload
12
+ ERROR_CODE_TRANSIENT = "TRANSIENT_ERROR"
13
+ ERROR_CODE_PERMANENT = "PERMANENT_ERROR"
14
+ ERROR_CODE_INVALID_INPUT = "INVALID_INPUT_ERROR"
15
+
16
+ # --- Task Statuses ---
17
+ # Standard statuses for task results
18
+ TASK_STATUS_SUCCESS = "success"
19
+ TASK_STATUS_FAILURE = "failure"
20
+ TASK_STATUS_CANCELLED = "cancelled"
21
+
22
+ # --- Job Statuses ---
23
+ JOB_STATUS_PENDING = "pending"
24
+ JOB_STATUS_WAITING_FOR_WORKER = "waiting_for_worker"
25
+ JOB_STATUS_RUNNING = "running"
26
+ JOB_STATUS_FAILED = "failed"
27
+ JOB_STATUS_QUARANTINED = "quarantined"
28
+ JOB_STATUS_CANCELLED = "cancelled"
29
+ JOB_STATUS_WAITING_FOR_HUMAN = "waiting_for_human"
30
+ JOB_STATUS_WAITING_FOR_PARALLEL = "waiting_for_parallel_tasks"
@@ -14,6 +14,22 @@ from .blueprint import StateMachineBlueprint
14
14
  from .client_config_loader import load_client_configs_to_redis
15
15
  from .compression import compression_middleware
16
16
  from .config import Config
17
+ from .constants import (
18
+ ERROR_CODE_INVALID_INPUT,
19
+ ERROR_CODE_PERMANENT,
20
+ ERROR_CODE_TRANSIENT,
21
+ JOB_STATUS_CANCELLED,
22
+ JOB_STATUS_FAILED,
23
+ JOB_STATUS_PENDING,
24
+ JOB_STATUS_QUARANTINED,
25
+ JOB_STATUS_RUNNING,
26
+ JOB_STATUS_WAITING_FOR_HUMAN,
27
+ JOB_STATUS_WAITING_FOR_PARALLEL,
28
+ JOB_STATUS_WAITING_FOR_WORKER,
29
+ TASK_STATUS_CANCELLED,
30
+ TASK_STATUS_FAILURE,
31
+ TASK_STATUS_SUCCESS,
32
+ )
17
33
  from .dispatcher import Dispatcher
18
34
  from .executor import JobExecutor
19
35
  from .health_checker import HealthChecker
@@ -23,6 +39,7 @@ from .logging_config import setup_logging
23
39
  from .quota import quota_middleware_factory
24
40
  from .ratelimit import rate_limit_middleware_factory
25
41
  from .reputation import ReputationCalculator
42
+ from .scheduler import Scheduler
26
43
  from .security import client_auth_middleware_factory, worker_auth_middleware_factory
27
44
  from .storage.base import StorageBackend
28
45
  from .telemetry import setup_telemetry
@@ -38,10 +55,13 @@ EXECUTOR_KEY = AppKey("executor", JobExecutor)
38
55
  WATCHER_KEY = AppKey("watcher", Watcher)
39
56
  REPUTATION_CALCULATOR_KEY = AppKey("reputation_calculator", ReputationCalculator)
40
57
  HEALTH_CHECKER_KEY = AppKey("health_checker", HealthChecker)
58
+ SCHEDULER_KEY = AppKey("scheduler", Scheduler)
59
+
41
60
  EXECUTOR_TASK_KEY = AppKey("executor_task", Task)
42
61
  WATCHER_TASK_KEY = AppKey("watcher_task", Task)
43
62
  REPUTATION_CALCULATOR_TASK_KEY = AppKey("reputation_calculator_task", Task)
44
63
  HEALTH_CHECKER_TASK_KEY = AppKey("health_checker_task", Task)
64
+ SCHEDULER_TASK_KEY = AppKey("scheduler_task", Task)
45
65
 
46
66
  metrics.init_metrics()
47
67
 
@@ -66,7 +86,7 @@ async def metrics_handler(_request: web.Request) -> web.Response:
66
86
 
67
87
  class OrchestratorEngine:
68
88
  def __init__(self, storage: StorageBackend, config: Config):
69
- setup_logging(config.LOG_LEVEL, config.LOG_FORMAT)
89
+ setup_logging(config.LOG_LEVEL, config.LOG_FORMAT, config.TZ)
70
90
  setup_telemetry()
71
91
  self.storage = storage
72
92
  self.config = config
@@ -115,7 +135,7 @@ class OrchestratorEngine:
115
135
  storage_class = module.SQLiteHistoryStorage
116
136
  parsed_uri = urlparse(uri)
117
137
  db_path = parsed_uri.path
118
- storage_args = [db_path]
138
+ storage_args = [db_path, self.config.TZ]
119
139
  except ImportError as e:
120
140
  logger.error(f"Could not import SQLiteHistoryStorage, perhaps aiosqlite is not installed? Error: {e}")
121
141
  self.history_storage = NoOpHistoryStorage()
@@ -125,7 +145,7 @@ class OrchestratorEngine:
125
145
  try:
126
146
  module = import_module(".history.postgres", package="avtomatika")
127
147
  storage_class = module.PostgresHistoryStorage
128
- storage_args = [uri]
148
+ storage_args = [uri, self.config.TZ]
129
149
  except ImportError as e:
130
150
  logger.error(f"Could not import PostgresHistoryStorage, perhaps asyncpg is not installed? Error: {e}")
131
151
  self.history_storage = NoOpHistoryStorage()
@@ -199,11 +219,13 @@ class OrchestratorEngine:
199
219
  app[WATCHER_KEY] = Watcher(self)
200
220
  app[REPUTATION_CALCULATOR_KEY] = ReputationCalculator(self)
201
221
  app[HEALTH_CHECKER_KEY] = HealthChecker(self)
222
+ app[SCHEDULER_KEY] = Scheduler(self)
202
223
 
203
224
  app[EXECUTOR_TASK_KEY] = create_task(app[EXECUTOR_KEY].run())
204
225
  app[WATCHER_TASK_KEY] = create_task(app[WATCHER_KEY].run())
205
226
  app[REPUTATION_CALCULATOR_TASK_KEY] = create_task(app[REPUTATION_CALCULATOR_KEY].run())
206
227
  app[HEALTH_CHECKER_TASK_KEY] = create_task(app[HEALTH_CHECKER_KEY].run())
228
+ app[SCHEDULER_TASK_KEY] = create_task(app[SCHEDULER_KEY].run())
207
229
 
208
230
  async def on_shutdown(self, app: web.Application):
209
231
  logger.info("Shutdown sequence started.")
@@ -211,6 +233,7 @@ class OrchestratorEngine:
211
233
  app[WATCHER_KEY].stop()
212
234
  app[REPUTATION_CALCULATOR_KEY].stop()
213
235
  app[HEALTH_CHECKER_KEY].stop()
236
+ app[SCHEDULER_KEY].stop()
214
237
  logger.info("Background task running flags set to False.")
215
238
 
216
239
  if hasattr(self.history_storage, "close"):
@@ -226,6 +249,8 @@ class OrchestratorEngine:
226
249
  app[WATCHER_TASK_KEY].cancel()
227
250
  app[REPUTATION_CALCULATOR_TASK_KEY].cancel()
228
251
  app[EXECUTOR_TASK_KEY].cancel()
252
+ # Scheduler task manages its own loop cancellation in stop(), but just in case:
253
+ app[SCHEDULER_TASK_KEY].cancel()
229
254
  logger.info("Background tasks cancelled.")
230
255
 
231
256
  logger.info("Gathering background tasks with a 10s timeout...")
@@ -236,6 +261,7 @@ class OrchestratorEngine:
236
261
  app[WATCHER_TASK_KEY],
237
262
  app[REPUTATION_CALCULATOR_TASK_KEY],
238
263
  app[EXECUTOR_TASK_KEY],
264
+ app[SCHEDULER_TASK_KEY],
239
265
  return_exceptions=True,
240
266
  ),
241
267
  timeout=10.0,
@@ -249,6 +275,55 @@ class OrchestratorEngine:
249
275
  logger.info("HTTP session closed.")
250
276
  logger.info("Shutdown sequence finished.")
251
277
 
278
+ async def create_background_job(
279
+ self,
280
+ blueprint_name: str,
281
+ initial_data: dict[str, Any],
282
+ source: str = "internal",
283
+ ) -> str:
284
+ """Creates a job directly, bypassing the HTTP API layer.
285
+ Useful for internal schedulers and triggers.
286
+ """
287
+ blueprint = self.blueprints.get(blueprint_name)
288
+ if not blueprint:
289
+ raise ValueError(f"Blueprint '{blueprint_name}' not found.")
290
+
291
+ job_id = str(uuid4())
292
+ # Use a special internal client config
293
+ client_config = {
294
+ "token": "internal-scheduler",
295
+ "plan": "system",
296
+ "params": {"source": source},
297
+ }
298
+
299
+ job_state = {
300
+ "id": job_id,
301
+ "blueprint_name": blueprint.name,
302
+ "current_state": blueprint.start_state,
303
+ "initial_data": initial_data,
304
+ "state_history": {},
305
+ "status": JOB_STATUS_PENDING,
306
+ "tracing_context": {},
307
+ "client_config": client_config,
308
+ }
309
+ await self.storage.save_job_state(job_id, job_state)
310
+ await self.storage.enqueue_job(job_id)
311
+ metrics.jobs_total.inc({metrics.LABEL_BLUEPRINT: blueprint.name})
312
+
313
+ # Log the creation in history as well (so we can track scheduled jobs)
314
+ await self.history_storage.log_job_event(
315
+ {
316
+ "job_id": job_id,
317
+ "state": "pending",
318
+ "event_type": "job_created",
319
+ "context_snapshot": job_state,
320
+ "metadata": {"source": source, "scheduled": True},
321
+ }
322
+ )
323
+
324
+ logger.info(f"Created background job {job_id} for blueprint '{blueprint_name}' (source: {source})")
325
+ return job_id
326
+
252
327
  def _create_job_handler(self, blueprint: StateMachineBlueprint) -> Callable:
253
328
  async def handler(request: web.Request) -> web.Response:
254
329
  try:
@@ -266,7 +341,7 @@ class OrchestratorEngine:
266
341
  "current_state": blueprint.start_state,
267
342
  "initial_data": initial_data,
268
343
  "state_history": {},
269
- "status": "pending",
344
+ "status": JOB_STATUS_PENDING,
270
345
  "tracing_context": carrier,
271
346
  "client_config": client_config,
272
347
  }
@@ -295,7 +370,7 @@ class OrchestratorEngine:
295
370
  if not job_state:
296
371
  return json_response({"error": "Job not found"}, status=404)
297
372
 
298
- if job_state.get("status") != "waiting_for_worker":
373
+ if job_state.get("status") != JOB_STATUS_WAITING_FOR_WORKER:
299
374
  return json_response(
300
375
  {"error": "Job is not in a state that can be cancelled (must be waiting for a worker)."},
301
376
  status=409,
@@ -388,7 +463,7 @@ class OrchestratorEngine:
388
463
  job_id = data.get("job_id")
389
464
  task_id = data.get("task_id")
390
465
  result = data.get("result", {})
391
- result_status = result.get("status", "success")
466
+ result_status = result.get("status", TASK_STATUS_SUCCESS)
392
467
  error_message = result.get("error")
393
468
  payload_worker_id = data.get("worker_id")
394
469
  except Exception:
@@ -417,14 +492,14 @@ class OrchestratorEngine:
417
492
  return json_response({"error": "Job not found"}, status=404)
418
493
 
419
494
  # Handle parallel task completion
420
- if job_state.get("status") == "waiting_for_parallel_tasks":
495
+ if job_state.get("status") == JOB_STATUS_WAITING_FOR_PARALLEL:
421
496
  await self.storage.remove_job_from_watch(f"{job_id}:{task_id}")
422
497
  job_state.setdefault("aggregation_results", {})[task_id] = result
423
498
  job_state.setdefault("active_branches", []).remove(task_id)
424
499
 
425
500
  if not job_state["active_branches"]:
426
501
  logger.info(f"All parallel branches for job {job_id} have completed.")
427
- job_state["status"] = "running"
502
+ job_state["status"] = JOB_STATUS_RUNNING
428
503
  job_state["current_state"] = job_state["aggregation_target"]
429
504
  await self.storage.save_job_state(job_id, job_state)
430
505
  await self.storage.enqueue_job(job_id)
@@ -458,13 +533,13 @@ class OrchestratorEngine:
458
533
 
459
534
  job_state["tracing_context"] = {str(k): v for k, v in request.headers.items()}
460
535
 
461
- if result_status == "failure":
536
+ if result_status == TASK_STATUS_FAILURE:
462
537
  error_details = result.get("error", {})
463
- error_type = "TRANSIENT_ERROR"
538
+ error_type = ERROR_CODE_TRANSIENT
464
539
  error_message = "No error details provided."
465
540
 
466
541
  if isinstance(error_details, dict):
467
- error_type = error_details.get("code", "TRANSIENT_ERROR")
542
+ error_type = error_details.get("code", ERROR_CODE_TRANSIENT)
468
543
  error_message = error_details.get("message", "No error message provided.")
469
544
  elif isinstance(error_details, str):
470
545
  # Fallback for old format where `error` was just a string
@@ -472,13 +547,13 @@ class OrchestratorEngine:
472
547
 
473
548
  logging.warning(f"Task {task_id} for job {job_id} failed with error type '{error_type}'.")
474
549
 
475
- if error_type == "PERMANENT_ERROR":
476
- job_state["status"] = "quarantined"
550
+ if error_type == ERROR_CODE_PERMANENT:
551
+ job_state["status"] = JOB_STATUS_QUARANTINED
477
552
  job_state["error_message"] = f"Task failed with permanent error: {error_message}"
478
553
  await self.storage.save_job_state(job_id, job_state)
479
554
  await self.storage.quarantine_job(job_id)
480
- elif error_type == "INVALID_INPUT_ERROR":
481
- job_state["status"] = "failed"
555
+ elif error_type == ERROR_CODE_INVALID_INPUT:
556
+ job_state["status"] = JOB_STATUS_FAILED
482
557
  job_state["error_message"] = f"Task failed due to invalid input: {error_message}"
483
558
  await self.storage.save_job_state(job_id, job_state)
484
559
  else: # TRANSIENT_ERROR or any other/unspecified error
@@ -486,15 +561,15 @@ class OrchestratorEngine:
486
561
 
487
562
  return json_response({"status": "result_accepted_failure"}, status=200)
488
563
 
489
- if result_status == "cancelled":
564
+ if result_status == TASK_STATUS_CANCELLED:
490
565
  logging.info(f"Task {task_id} for job {job_id} was cancelled by worker.")
491
- job_state["status"] = "cancelled"
566
+ job_state["status"] = JOB_STATUS_CANCELLED
492
567
  await self.storage.save_job_state(job_id, job_state)
493
568
  # Optionally, trigger a specific 'cancelled' transition if defined in the blueprint
494
569
  transitions = job_state.get("current_task_transitions", {})
495
570
  if next_state := transitions.get("cancelled"):
496
571
  job_state["current_state"] = next_state
497
- job_state["status"] = "running" # It's running the cancellation handler now
572
+ job_state["status"] = JOB_STATUS_RUNNING # It's running the cancellation handler now
498
573
  await self.storage.save_job_state(job_id, job_state)
499
574
  await self.storage.enqueue_job(job_id)
500
575
  return json_response({"status": "result_accepted_cancelled"}, status=200)
@@ -510,12 +585,12 @@ class OrchestratorEngine:
510
585
  job_state["state_history"].update(worker_data)
511
586
 
512
587
  job_state["current_state"] = next_state
513
- job_state["status"] = "running"
588
+ job_state["status"] = JOB_STATUS_RUNNING
514
589
  await self.storage.save_job_state(job_id, job_state)
515
590
  await self.storage.enqueue_job(job_id)
516
591
  else:
517
592
  logging.error(f"Job {job_id} failed. Worker returned unhandled status '{result_status}'.")
518
- job_state["status"] = "failed"
593
+ job_state["status"] = JOB_STATUS_FAILED
519
594
  job_state["error_message"] = f"Worker returned unhandled status: {result_status}"
520
595
  await self.storage.save_job_state(job_id, job_state)
521
596
 
@@ -535,7 +610,7 @@ class OrchestratorEngine:
535
610
  task_info = job_state.get("current_task_info")
536
611
  if not task_info:
537
612
  logging.error(f"Cannot retry job {job_id}: missing 'current_task_info' in job state.")
538
- job_state["status"] = "failed"
613
+ job_state["status"] = JOB_STATUS_FAILED
539
614
  job_state["error_message"] = "Cannot retry: original task info not found."
540
615
  await self.storage.save_job_state(job_id, job_state)
541
616
  return
@@ -544,7 +619,7 @@ class OrchestratorEngine:
544
619
  timeout_seconds = task_info.get("timeout_seconds", self.config.WORKER_TIMEOUT_SECONDS)
545
620
  timeout_at = now + timeout_seconds
546
621
 
547
- job_state["status"] = "waiting_for_worker"
622
+ job_state["status"] = JOB_STATUS_WAITING_FOR_WORKER
548
623
  job_state["task_dispatched_at"] = now
549
624
  await self.storage.save_job_state(job_id, job_state)
550
625
  await self.storage.add_job_to_watch(job_id, timeout_at)
@@ -552,7 +627,7 @@ class OrchestratorEngine:
552
627
  await self.dispatcher.dispatch(job_state, task_info)
553
628
  else:
554
629
  logging.critical(f"Job {job_id} has failed {max_retries + 1} times. Moving to quarantine.")
555
- job_state["status"] = "quarantined"
630
+ job_state["status"] = JOB_STATUS_QUARANTINED
556
631
  job_state["error_message"] = f"Task failed after {max_retries + 1} attempts: {error_message}"
557
632
  await self.storage.save_job_state(job_id, job_state)
558
633
  await self.storage.quarantine_job(job_id)
@@ -571,14 +646,14 @@ class OrchestratorEngine:
571
646
  job_state = await self.storage.get_job_state(job_id)
572
647
  if not job_state:
573
648
  return json_response({"error": "Job not found"}, status=404)
574
- if job_state.get("status") not in ["waiting_for_worker", "waiting_for_human"]:
649
+ if job_state.get("status") not in [JOB_STATUS_WAITING_FOR_WORKER, JOB_STATUS_WAITING_FOR_HUMAN]:
575
650
  return json_response({"error": "Job is not in a state that can be approved"}, status=409)
576
651
  transitions = job_state.get("current_task_transitions", {})
577
652
  next_state = transitions.get(decision)
578
653
  if not next_state:
579
654
  return json_response({"error": f"Invalid decision '{decision}' for this job"}, status=400)
580
655
  job_state["current_state"] = next_state
581
- job_state["status"] = "running"
656
+ job_state["status"] = JOB_STATUS_RUNNING
582
657
  await self.storage.save_job_state(job_id, job_state)
583
658
  await self.storage.enqueue_job(job_id)
584
659
  return json_response({"status": "approval_received", "job_id": job_id})
@@ -1,9 +1,13 @@
1
1
  from abc import ABC
2
+ from contextlib import suppress
3
+ from datetime import datetime
2
4
  from logging import getLogger
3
5
  from typing import Any
4
6
  from uuid import uuid4
7
+ from zoneinfo import ZoneInfo
5
8
 
6
- from asyncpg import Pool, PostgresError, create_pool # type: ignore[import-untyped]
9
+ from asyncpg import Connection, Pool, PostgresError, create_pool # type: ignore[import-untyped]
10
+ from orjson import dumps, loads
7
11
 
8
12
  from .base import HistoryStorageBase
9
13
 
@@ -41,14 +45,24 @@ CREATE_JOB_ID_INDEX_PG = "CREATE INDEX IF NOT EXISTS idx_job_id ON job_history(j
41
45
  class PostgresHistoryStorage(HistoryStorageBase, ABC):
42
46
  """Implementation of the history store based on asyncpg for PostgreSQL."""
43
47
 
44
- def __init__(self, dsn: str):
48
+ def __init__(self, dsn: str, tz_name: str = "UTC"):
45
49
  self._dsn = dsn
46
50
  self._pool: Pool | None = None
51
+ self.tz_name = tz_name
52
+ self.tz = ZoneInfo(tz_name)
53
+
54
+ async def _setup_connection(self, conn: Connection):
55
+ """Configures the connection session with the correct timezone."""
56
+ try:
57
+ await conn.execute(f"SET TIME ZONE '{self.tz_name}'")
58
+ except PostgresError as e:
59
+ logger.error(f"Failed to set timezone '{self.tz_name}' for PG connection: {e}")
47
60
 
48
61
  async def initialize(self):
49
62
  """Initializes the connection pool to PostgreSQL and creates tables."""
50
63
  try:
51
- self._pool = await create_pool(dsn=self._dsn)
64
+ # We use init parameter to configure each new connection in the pool
65
+ self._pool = await create_pool(dsn=self._dsn, init=self._setup_connection)
52
66
  if not self._pool:
53
67
  raise RuntimeError("Failed to create a connection pool.")
54
68
 
@@ -56,7 +70,7 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
56
70
  await conn.execute(CREATE_JOB_HISTORY_TABLE_PG)
57
71
  await conn.execute(CREATE_WORKER_HISTORY_TABLE_PG)
58
72
  await conn.execute(CREATE_JOB_ID_INDEX_PG)
59
- logger.info("PostgreSQL history storage initialized.")
73
+ logger.info(f"PostgreSQL history storage initialized (TZ={self.tz_name}).")
60
74
  except (PostgresError, OSError) as e:
61
75
  logger.error(f"Failed to initialize PostgreSQL history storage: {e}")
62
76
  raise
@@ -74,14 +88,20 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
74
88
 
75
89
  query = """
76
90
  INSERT INTO job_history (
77
- event_id, job_id, state, event_type, duration_ms,
91
+ event_id, job_id, timestamp, state, event_type, duration_ms,
78
92
  previous_state, next_state, worker_id, attempt_number,
79
93
  context_snapshot
80
- ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
94
+ ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
81
95
  """
96
+ now = datetime.now(self.tz)
97
+
98
+ context_snapshot = event_data.get("context_snapshot")
99
+ context_snapshot_json = dumps(context_snapshot).decode("utf-8") if context_snapshot else None
100
+
82
101
  params = (
83
102
  uuid4(),
84
103
  event_data.get("job_id"),
104
+ now,
85
105
  event_data.get("state"),
86
106
  event_data.get("event_type"),
87
107
  event_data.get("duration_ms"),
@@ -89,7 +109,7 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
89
109
  event_data.get("next_state"),
90
110
  event_data.get("worker_id"),
91
111
  event_data.get("attempt_number"),
92
- event_data.get("context_snapshot"),
112
+ context_snapshot_json,
93
113
  )
94
114
  try:
95
115
  async with self._pool.acquire() as conn:
@@ -104,14 +124,20 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
104
124
 
105
125
  query = """
106
126
  INSERT INTO worker_history (
107
- event_id, worker_id, event_type, worker_info_snapshot
108
- ) VALUES ($1, $2, $3, $4)
127
+ event_id, worker_id, timestamp, event_type, worker_info_snapshot
128
+ ) VALUES ($1, $2, $3, $4, $5)
109
129
  """
130
+ now = datetime.now(self.tz)
131
+
132
+ worker_info = event_data.get("worker_info_snapshot")
133
+ worker_info_json = dumps(worker_info).decode("utf-8") if worker_info else None
134
+
110
135
  params = (
111
136
  uuid4(),
112
137
  event_data.get("worker_id"),
138
+ now,
113
139
  event_data.get("event_type"),
114
- event_data.get("worker_info_snapshot"),
140
+ worker_info_json,
115
141
  )
116
142
  try:
117
143
  async with self._pool.acquire() as conn:
@@ -119,6 +145,23 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
119
145
  except PostgresError as e:
120
146
  logger.error(f"Failed to log worker event to PostgreSQL: {e}")
121
147
 
148
+ def _format_row(self, row: dict[str, Any]) -> dict[str, Any]:
149
+ """Helper to format a row from DB: convert timestamp to local TZ and decode JSON."""
150
+ item = dict(row)
151
+
152
+ if isinstance(item.get("context_snapshot"), str):
153
+ with suppress(Exception):
154
+ item["context_snapshot"] = loads(item["context_snapshot"])
155
+
156
+ if isinstance(item.get("worker_info_snapshot"), str):
157
+ with suppress(Exception):
158
+ item["worker_info_snapshot"] = loads(item["worker_info_snapshot"])
159
+
160
+ if "timestamp" in item and isinstance(item["timestamp"], datetime):
161
+ item["timestamp"] = item["timestamp"].astimezone(self.tz)
162
+
163
+ return item
164
+
122
165
  async def get_job_history(self, job_id: str) -> list[dict[str, Any]]:
123
166
  """Gets the full history for the specified job from PostgreSQL."""
124
167
  if not self._pool:
@@ -128,7 +171,7 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
128
171
  try:
129
172
  async with self._pool.acquire() as conn:
130
173
  rows = await conn.fetch(query, job_id)
131
- return [dict(row) for row in rows]
174
+ return [self._format_row(row) for row in rows]
132
175
  except PostgresError as e:
133
176
  logger.error(
134
177
  f"Failed to get job history for job_id {job_id} from PostgreSQL: {e}",
@@ -154,7 +197,7 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
154
197
  try:
155
198
  async with self._pool.acquire() as conn:
156
199
  rows = await conn.fetch(query, limit, offset)
157
- return [dict(row) for row in rows]
200
+ return [self._format_row(row) for row in rows]
158
201
  except PostgresError as e:
159
202
  logger.error(f"Failed to get jobs list from PostgreSQL: {e}")
160
203
  return []
@@ -206,7 +249,7 @@ class PostgresHistoryStorage(HistoryStorageBase, ABC):
206
249
  try:
207
250
  async with self._pool.acquire() as conn:
208
251
  rows = await conn.fetch(query, worker_id, since_days)
209
- return [dict(row) for row in rows]
252
+ return [self._format_row(row) for row in rows]
210
253
  except PostgresError as e:
211
254
  logger.error(f"Failed to get worker history for worker_id {worker_id} from PostgreSQL: {e}")
212
255
  return []