pyworkflow-engine 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. dashboard/backend/app/__init__.py +1 -0
  2. dashboard/backend/app/config.py +32 -0
  3. dashboard/backend/app/controllers/__init__.py +6 -0
  4. dashboard/backend/app/controllers/run_controller.py +86 -0
  5. dashboard/backend/app/controllers/workflow_controller.py +33 -0
  6. dashboard/backend/app/dependencies/__init__.py +5 -0
  7. dashboard/backend/app/dependencies/storage.py +50 -0
  8. dashboard/backend/app/repositories/__init__.py +6 -0
  9. dashboard/backend/app/repositories/run_repository.py +80 -0
  10. dashboard/backend/app/repositories/workflow_repository.py +27 -0
  11. dashboard/backend/app/rest/__init__.py +8 -0
  12. dashboard/backend/app/rest/v1/__init__.py +12 -0
  13. dashboard/backend/app/rest/v1/health.py +33 -0
  14. dashboard/backend/app/rest/v1/runs.py +133 -0
  15. dashboard/backend/app/rest/v1/workflows.py +41 -0
  16. dashboard/backend/app/schemas/__init__.py +23 -0
  17. dashboard/backend/app/schemas/common.py +16 -0
  18. dashboard/backend/app/schemas/event.py +24 -0
  19. dashboard/backend/app/schemas/hook.py +25 -0
  20. dashboard/backend/app/schemas/run.py +54 -0
  21. dashboard/backend/app/schemas/step.py +28 -0
  22. dashboard/backend/app/schemas/workflow.py +31 -0
  23. dashboard/backend/app/server.py +87 -0
  24. dashboard/backend/app/services/__init__.py +6 -0
  25. dashboard/backend/app/services/run_service.py +240 -0
  26. dashboard/backend/app/services/workflow_service.py +155 -0
  27. dashboard/backend/main.py +18 -0
  28. docs/concepts/cancellation.mdx +362 -0
  29. docs/concepts/continue-as-new.mdx +434 -0
  30. docs/concepts/events.mdx +266 -0
  31. docs/concepts/fault-tolerance.mdx +370 -0
  32. docs/concepts/hooks.mdx +552 -0
  33. docs/concepts/limitations.mdx +167 -0
  34. docs/concepts/schedules.mdx +775 -0
  35. docs/concepts/sleep.mdx +312 -0
  36. docs/concepts/steps.mdx +301 -0
  37. docs/concepts/workflows.mdx +255 -0
  38. docs/guides/cli.mdx +942 -0
  39. docs/guides/configuration.mdx +560 -0
  40. docs/introduction.mdx +155 -0
  41. docs/quickstart.mdx +279 -0
  42. examples/__init__.py +1 -0
  43. examples/celery/__init__.py +1 -0
  44. examples/celery/durable/docker-compose.yml +55 -0
  45. examples/celery/durable/pyworkflow.config.yaml +12 -0
  46. examples/celery/durable/workflows/__init__.py +122 -0
  47. examples/celery/durable/workflows/basic.py +87 -0
  48. examples/celery/durable/workflows/batch_processing.py +102 -0
  49. examples/celery/durable/workflows/cancellation.py +273 -0
  50. examples/celery/durable/workflows/child_workflow_patterns.py +240 -0
  51. examples/celery/durable/workflows/child_workflows.py +202 -0
  52. examples/celery/durable/workflows/continue_as_new.py +260 -0
  53. examples/celery/durable/workflows/fault_tolerance.py +210 -0
  54. examples/celery/durable/workflows/hooks.py +211 -0
  55. examples/celery/durable/workflows/idempotency.py +112 -0
  56. examples/celery/durable/workflows/long_running.py +99 -0
  57. examples/celery/durable/workflows/retries.py +101 -0
  58. examples/celery/durable/workflows/schedules.py +209 -0
  59. examples/celery/transient/01_basic_workflow.py +91 -0
  60. examples/celery/transient/02_fault_tolerance.py +257 -0
  61. examples/celery/transient/__init__.py +20 -0
  62. examples/celery/transient/pyworkflow.config.yaml +25 -0
  63. examples/local/__init__.py +1 -0
  64. examples/local/durable/01_basic_workflow.py +94 -0
  65. examples/local/durable/02_file_storage.py +132 -0
  66. examples/local/durable/03_retries.py +169 -0
  67. examples/local/durable/04_long_running.py +119 -0
  68. examples/local/durable/05_event_log.py +145 -0
  69. examples/local/durable/06_idempotency.py +148 -0
  70. examples/local/durable/07_hooks.py +334 -0
  71. examples/local/durable/08_cancellation.py +233 -0
  72. examples/local/durable/09_child_workflows.py +198 -0
  73. examples/local/durable/10_child_workflow_patterns.py +265 -0
  74. examples/local/durable/11_continue_as_new.py +249 -0
  75. examples/local/durable/12_schedules.py +198 -0
  76. examples/local/durable/__init__.py +1 -0
  77. examples/local/transient/01_quick_tasks.py +87 -0
  78. examples/local/transient/02_retries.py +130 -0
  79. examples/local/transient/03_sleep.py +141 -0
  80. examples/local/transient/__init__.py +1 -0
  81. pyworkflow/__init__.py +256 -0
  82. pyworkflow/aws/__init__.py +68 -0
  83. pyworkflow/aws/context.py +234 -0
  84. pyworkflow/aws/handler.py +184 -0
  85. pyworkflow/aws/testing.py +310 -0
  86. pyworkflow/celery/__init__.py +41 -0
  87. pyworkflow/celery/app.py +198 -0
  88. pyworkflow/celery/scheduler.py +315 -0
  89. pyworkflow/celery/tasks.py +1746 -0
  90. pyworkflow/cli/__init__.py +132 -0
  91. pyworkflow/cli/__main__.py +6 -0
  92. pyworkflow/cli/commands/__init__.py +1 -0
  93. pyworkflow/cli/commands/hooks.py +640 -0
  94. pyworkflow/cli/commands/quickstart.py +495 -0
  95. pyworkflow/cli/commands/runs.py +773 -0
  96. pyworkflow/cli/commands/scheduler.py +130 -0
  97. pyworkflow/cli/commands/schedules.py +794 -0
  98. pyworkflow/cli/commands/setup.py +703 -0
  99. pyworkflow/cli/commands/worker.py +413 -0
  100. pyworkflow/cli/commands/workflows.py +1257 -0
  101. pyworkflow/cli/output/__init__.py +1 -0
  102. pyworkflow/cli/output/formatters.py +321 -0
  103. pyworkflow/cli/output/styles.py +121 -0
  104. pyworkflow/cli/utils/__init__.py +1 -0
  105. pyworkflow/cli/utils/async_helpers.py +30 -0
  106. pyworkflow/cli/utils/config.py +130 -0
  107. pyworkflow/cli/utils/config_generator.py +344 -0
  108. pyworkflow/cli/utils/discovery.py +53 -0
  109. pyworkflow/cli/utils/docker_manager.py +651 -0
  110. pyworkflow/cli/utils/interactive.py +364 -0
  111. pyworkflow/cli/utils/storage.py +115 -0
  112. pyworkflow/config.py +329 -0
  113. pyworkflow/context/__init__.py +63 -0
  114. pyworkflow/context/aws.py +230 -0
  115. pyworkflow/context/base.py +416 -0
  116. pyworkflow/context/local.py +930 -0
  117. pyworkflow/context/mock.py +381 -0
  118. pyworkflow/core/__init__.py +0 -0
  119. pyworkflow/core/exceptions.py +353 -0
  120. pyworkflow/core/registry.py +313 -0
  121. pyworkflow/core/scheduled.py +328 -0
  122. pyworkflow/core/step.py +494 -0
  123. pyworkflow/core/workflow.py +294 -0
  124. pyworkflow/discovery.py +248 -0
  125. pyworkflow/engine/__init__.py +0 -0
  126. pyworkflow/engine/events.py +879 -0
  127. pyworkflow/engine/executor.py +682 -0
  128. pyworkflow/engine/replay.py +273 -0
  129. pyworkflow/observability/__init__.py +19 -0
  130. pyworkflow/observability/logging.py +234 -0
  131. pyworkflow/primitives/__init__.py +33 -0
  132. pyworkflow/primitives/child_handle.py +174 -0
  133. pyworkflow/primitives/child_workflow.py +372 -0
  134. pyworkflow/primitives/continue_as_new.py +101 -0
  135. pyworkflow/primitives/define_hook.py +150 -0
  136. pyworkflow/primitives/hooks.py +97 -0
  137. pyworkflow/primitives/resume_hook.py +210 -0
  138. pyworkflow/primitives/schedule.py +545 -0
  139. pyworkflow/primitives/shield.py +96 -0
  140. pyworkflow/primitives/sleep.py +100 -0
  141. pyworkflow/runtime/__init__.py +21 -0
  142. pyworkflow/runtime/base.py +179 -0
  143. pyworkflow/runtime/celery.py +310 -0
  144. pyworkflow/runtime/factory.py +101 -0
  145. pyworkflow/runtime/local.py +706 -0
  146. pyworkflow/scheduler/__init__.py +9 -0
  147. pyworkflow/scheduler/local.py +248 -0
  148. pyworkflow/serialization/__init__.py +0 -0
  149. pyworkflow/serialization/decoder.py +146 -0
  150. pyworkflow/serialization/encoder.py +162 -0
  151. pyworkflow/storage/__init__.py +54 -0
  152. pyworkflow/storage/base.py +612 -0
  153. pyworkflow/storage/config.py +185 -0
  154. pyworkflow/storage/dynamodb.py +1315 -0
  155. pyworkflow/storage/file.py +827 -0
  156. pyworkflow/storage/memory.py +549 -0
  157. pyworkflow/storage/postgres.py +1161 -0
  158. pyworkflow/storage/schemas.py +486 -0
  159. pyworkflow/storage/sqlite.py +1136 -0
  160. pyworkflow/utils/__init__.py +0 -0
  161. pyworkflow/utils/duration.py +177 -0
  162. pyworkflow/utils/schedule.py +391 -0
  163. pyworkflow_engine-0.1.7.dist-info/METADATA +687 -0
  164. pyworkflow_engine-0.1.7.dist-info/RECORD +196 -0
  165. pyworkflow_engine-0.1.7.dist-info/WHEEL +5 -0
  166. pyworkflow_engine-0.1.7.dist-info/entry_points.txt +2 -0
  167. pyworkflow_engine-0.1.7.dist-info/licenses/LICENSE +21 -0
  168. pyworkflow_engine-0.1.7.dist-info/top_level.txt +5 -0
  169. tests/examples/__init__.py +0 -0
  170. tests/integration/__init__.py +0 -0
  171. tests/integration/test_cancellation.py +330 -0
  172. tests/integration/test_child_workflows.py +439 -0
  173. tests/integration/test_continue_as_new.py +428 -0
  174. tests/integration/test_dynamodb_storage.py +1146 -0
  175. tests/integration/test_fault_tolerance.py +369 -0
  176. tests/integration/test_schedule_storage.py +484 -0
  177. tests/unit/__init__.py +0 -0
  178. tests/unit/backends/__init__.py +1 -0
  179. tests/unit/backends/test_dynamodb_storage.py +1554 -0
  180. tests/unit/backends/test_postgres_storage.py +1281 -0
  181. tests/unit/backends/test_sqlite_storage.py +1460 -0
  182. tests/unit/conftest.py +41 -0
  183. tests/unit/test_cancellation.py +364 -0
  184. tests/unit/test_child_workflows.py +680 -0
  185. tests/unit/test_continue_as_new.py +441 -0
  186. tests/unit/test_event_limits.py +316 -0
  187. tests/unit/test_executor.py +320 -0
  188. tests/unit/test_fault_tolerance.py +334 -0
  189. tests/unit/test_hooks.py +495 -0
  190. tests/unit/test_registry.py +261 -0
  191. tests/unit/test_replay.py +420 -0
  192. tests/unit/test_schedule_schemas.py +285 -0
  193. tests/unit/test_schedule_utils.py +286 -0
  194. tests/unit/test_scheduled_workflow.py +274 -0
  195. tests/unit/test_step.py +353 -0
  196. tests/unit/test_workflow.py +243 -0
@@ -0,0 +1,198 @@
1
+ """
2
+ Celery application for distributed workflow execution.
3
+
4
+ This module configures Celery for:
5
+ - Distributed step execution across workers
6
+ - Automatic retry with exponential backoff
7
+ - Scheduled task execution (sleep resumption)
8
+ - Result persistence
9
+
10
+ Note: With Python 3.13, you may see "BufferError: Existing exports of data"
11
+ warnings in Celery logs. This is a known compatibility issue between Python 3.13's
12
+ garbage collector and Celery's saferepr module. It does not affect functionality.
13
+ """
14
+
15
+ import os
16
+
17
+ from celery import Celery
18
+ from kombu import Exchange, Queue
19
+
20
+ from pyworkflow.observability.logging import configure_logging
21
+
22
+
23
+ def discover_workflows(modules: list[str] | None = None) -> None:
24
+ """
25
+ Discover and import workflow modules to register workflows with Celery workers.
26
+
27
+ This function imports Python modules containing workflow definitions so that
28
+ Celery workers can find and execute them.
29
+
30
+ Args:
31
+ modules: List of module paths to import (e.g., ["myapp.workflows", "myapp.tasks"])
32
+ If None, reads from PYWORKFLOW_DISCOVER environment variable
33
+
34
+ Environment Variables:
35
+ PYWORKFLOW_DISCOVER: Comma-separated list of modules to import
36
+ Example: "myapp.workflows,myapp.tasks,examples.functional.basic_workflow"
37
+
38
+ Examples:
39
+ # Discover from environment variable
40
+ discover_workflows()
41
+
42
+ # Discover specific modules
43
+ discover_workflows(["myapp.workflows", "myapp.tasks"])
44
+ """
45
+ if modules is None:
46
+ # Read from environment variable
47
+ discover_env = os.getenv("PYWORKFLOW_DISCOVER", "")
48
+ if not discover_env:
49
+ return
50
+ modules = [m.strip() for m in discover_env.split(",") if m.strip()]
51
+
52
+ for module_path in modules:
53
+ try:
54
+ __import__(module_path)
55
+ print(f"✓ Discovered workflows from: {module_path}")
56
+ except ImportError as e:
57
+ print(f"✗ Failed to import {module_path}: {e}")
58
+
59
+
60
+ def create_celery_app(
61
+ broker_url: str | None = None,
62
+ result_backend: str | None = None,
63
+ app_name: str = "pyworkflow",
64
+ ) -> Celery:
65
+ """
66
+ Create and configure a Celery application for PyWorkflow.
67
+
68
+ Args:
69
+ broker_url: Celery broker URL. Priority: parameter > PYWORKFLOW_CELERY_BROKER env var > redis://localhost:6379/0
70
+ result_backend: Result backend URL. Priority: parameter > PYWORKFLOW_CELERY_RESULT_BACKEND env var > redis://localhost:6379/1
71
+ app_name: Application name
72
+
73
+ Returns:
74
+ Configured Celery application
75
+
76
+ Environment Variables:
77
+ PYWORKFLOW_CELERY_BROKER: Celery broker URL (used if broker_url param not provided)
78
+ PYWORKFLOW_CELERY_RESULT_BACKEND: Result backend URL (used if result_backend param not provided)
79
+
80
+ Examples:
81
+ # Default configuration (uses env vars if set, otherwise localhost Redis)
82
+ app = create_celery_app()
83
+
84
+ # Custom Redis
85
+ app = create_celery_app(
86
+ broker_url="redis://redis-host:6379/0",
87
+ result_backend="redis://redis-host:6379/1"
88
+ )
89
+
90
+ # RabbitMQ with Redis backend
91
+ app = create_celery_app(
92
+ broker_url="amqp://guest:guest@rabbitmq:5672//",
93
+ result_backend="redis://localhost:6379/1"
94
+ )
95
+ """
96
+ # Priority: parameter > environment variable > hardcoded default
97
+ broker_url = broker_url or os.getenv("PYWORKFLOW_CELERY_BROKER") or "redis://localhost:6379/0"
98
+ result_backend = (
99
+ result_backend
100
+ or os.getenv("PYWORKFLOW_CELERY_RESULT_BACKEND")
101
+ or "redis://localhost:6379/1"
102
+ )
103
+
104
+ app = Celery(
105
+ app_name,
106
+ broker=broker_url,
107
+ backend=result_backend,
108
+ include=[
109
+ "pyworkflow.celery.tasks",
110
+ ],
111
+ )
112
+
113
+ # Configure Celery
114
+ app.conf.update(
115
+ # Task execution settings
116
+ task_serializer="json",
117
+ result_serializer="json",
118
+ accept_content=["json"],
119
+ timezone="UTC",
120
+ enable_utc=True,
121
+ # Task routing
122
+ task_default_queue="pyworkflow.default",
123
+ task_default_exchange="pyworkflow",
124
+ task_default_exchange_type="topic",
125
+ task_default_routing_key="workflow.default",
126
+ # Task queues
127
+ task_queues=(
128
+ Queue(
129
+ "pyworkflow.default",
130
+ Exchange("pyworkflow", type="topic"),
131
+ routing_key="workflow.#",
132
+ ),
133
+ Queue(
134
+ "pyworkflow.steps",
135
+ Exchange("pyworkflow", type="topic"),
136
+ routing_key="workflow.step.#",
137
+ ),
138
+ Queue(
139
+ "pyworkflow.workflows",
140
+ Exchange("pyworkflow", type="topic"),
141
+ routing_key="workflow.workflow.#",
142
+ ),
143
+ Queue(
144
+ "pyworkflow.schedules",
145
+ Exchange("pyworkflow", type="topic"),
146
+ routing_key="workflow.schedule.#",
147
+ ),
148
+ ),
149
+ # Result backend settings
150
+ result_expires=3600, # 1 hour
151
+ result_persistent=True,
152
+ # Task execution
153
+ task_acks_late=True,
154
+ task_reject_on_worker_lost=True,
155
+ worker_prefetch_multiplier=1, # Fair task distribution
156
+ # Retry settings
157
+ task_autoretry_for=(Exception,),
158
+ task_retry_backoff=True,
159
+ task_retry_backoff_max=600, # 10 minutes max
160
+ task_retry_jitter=True,
161
+ # Monitoring
162
+ worker_send_task_events=True,
163
+ task_send_sent_event=True,
164
+ # Beat scheduler (for sleep resumption)
165
+ beat_schedule={},
166
+ # Logging
167
+ worker_log_format="[%(asctime)s: %(levelname)s/%(processName)s] %(message)s",
168
+ worker_task_log_format="[%(asctime)s: %(levelname)s/%(processName)s] [%(task_name)s(%(task_id)s)] %(message)s",
169
+ )
170
+
171
+ # Configure logging
172
+ configure_logging(level="INFO")
173
+
174
+ # Auto-discover workflows from environment variable or configured modules
175
+ discover_workflows()
176
+
177
+ return app
178
+
179
+
180
+ # Global Celery app instance
181
+ # Can be customized by calling create_celery_app() with custom config
182
+ celery_app = create_celery_app()
183
+
184
+
185
+ def get_celery_app() -> Celery:
186
+ """
187
+ Get the global Celery application instance.
188
+
189
+ Returns:
190
+ Celery application
191
+
192
+ Example:
193
+ from pyworkflow.celery.app import get_celery_app
194
+
195
+ app = get_celery_app()
196
+ app.conf.update(broker_url="redis://custom:6379/0")
197
+ """
198
+ return celery_app
@@ -0,0 +1,315 @@
1
+ """
2
+ Custom Celery Beat scheduler for PyWorkflow schedules.
3
+
4
+ This scheduler integrates with PyWorkflow's storage backend to dynamically
5
+ load and execute scheduled workflows without requiring Beat to be restarted
6
+ when schedules change.
7
+
8
+ Usage:
9
+ celery -A pyworkflow.celery.app beat \\
10
+ --scheduler pyworkflow.celery.scheduler:PyWorkflowScheduler \\
11
+ --loglevel INFO
12
+
13
+ The scheduler:
14
+ 1. Polls storage for due schedules every sync_interval seconds
15
+ 2. Creates Celery tasks for each due schedule
16
+ 3. Handles overlap policies
17
+ 4. Updates schedule metadata after runs
18
+ """
19
+
20
+ import asyncio
21
+ from datetime import UTC, datetime
22
+ from typing import Any
23
+
24
+ from celery.beat import Scheduler
25
+ from loguru import logger
26
+
27
+ from pyworkflow.storage.base import StorageBackend
28
+ from pyworkflow.storage.config import config_to_storage
29
+ from pyworkflow.storage.schemas import OverlapPolicy, Schedule
30
+
31
+
32
+ class PyWorkflowScheduler(Scheduler):
33
+ """
34
+ Custom Celery Beat scheduler that reads schedules from PyWorkflow storage.
35
+
36
+ This scheduler:
37
+ 1. Polls storage for due schedules every sync_interval
38
+ 2. Creates Celery tasks for each due schedule
39
+ 3. Handles overlap policies (skip, buffer, cancel, allow)
40
+ 4. Updates schedule metadata after runs
41
+
42
+ Configuration:
43
+ The scheduler reads configuration from environment variables:
44
+ - PYWORKFLOW_STORAGE_BACKEND: Storage backend type (file, memory)
45
+ - PYWORKFLOW_STORAGE_PATH: Path for file storage backend
46
+
47
+ Example:
48
+ celery -A pyworkflow.celery.app beat \\
49
+ --scheduler pyworkflow.celery.scheduler:PyWorkflowScheduler
50
+ """
51
+
52
+ #: How often to check for due schedules (seconds)
53
+ sync_interval = 5.0
54
+
55
+ def __init__(self, *args: Any, storage_config: dict[str, Any] | None = None, **kwargs: Any):
56
+ """
57
+ Initialize the scheduler.
58
+
59
+ Args:
60
+ storage_config: Storage backend configuration dict
61
+ """
62
+ super().__init__(*args, **kwargs)
63
+ self._storage_config = storage_config
64
+ self._storage: StorageBackend | None = None
65
+ self._last_schedule_check: datetime | None = None
66
+ self._initialized = False
67
+
68
+ def setup_schedule(self) -> None:
69
+ """Initialize the scheduler."""
70
+ super().setup_schedule()
71
+
72
+ # Activate any @scheduled_workflow decorated functions
73
+ self._activate_decorated_schedules()
74
+
75
+ logger.info("PyWorkflow scheduler initialized")
76
+ self._initialized = True
77
+
78
+ def _activate_decorated_schedules(self) -> None:
79
+ """Activate all @scheduled_workflow decorated functions."""
80
+ try:
81
+ loop = asyncio.new_event_loop()
82
+ asyncio.set_event_loop(loop)
83
+ try:
84
+ loop.run_until_complete(self._do_activate_schedules())
85
+ finally:
86
+ loop.close()
87
+ except Exception as e:
88
+ logger.error(f"Error activating decorated schedules: {e}")
89
+
90
+ async def _do_activate_schedules(self) -> None:
91
+ """Actually activate the decorated schedules."""
92
+ from pyworkflow.core.scheduled import activate_scheduled_workflows
93
+
94
+ storage = self._get_storage()
95
+ if storage is None:
96
+ logger.warning("Storage not configured, cannot activate decorated schedules")
97
+ return
98
+
99
+ try:
100
+ schedule_ids = await activate_scheduled_workflows(storage=storage)
101
+ if schedule_ids:
102
+ logger.info(f"Activated {len(schedule_ids)} decorated schedule(s): {schedule_ids}")
103
+ else:
104
+ logger.debug("No decorated schedules to activate")
105
+ except Exception as e:
106
+ logger.error(f"Failed to activate decorated schedules: {e}")
107
+
108
+ def tick(self) -> float:
109
+ """
110
+ Called by Celery Beat on each tick.
111
+
112
+ Checks for due schedules and triggers them.
113
+
114
+ Returns:
115
+ Seconds until next tick
116
+ """
117
+ # Call parent tick to handle existing celery beat entries
118
+ remaining = super().tick()
119
+
120
+ # Check for due schedules
121
+ now = datetime.now(UTC)
122
+ if (
123
+ self._last_schedule_check is None
124
+ or (now - self._last_schedule_check).total_seconds() >= self.sync_interval
125
+ ):
126
+ self._sync_schedules()
127
+ self._last_schedule_check = now
128
+
129
+ # Return the smaller of the two intervals
130
+ return min(remaining, self.sync_interval)
131
+
132
+ def _sync_schedules(self) -> None:
133
+ """Sync schedules from storage and trigger due ones."""
134
+ try:
135
+ # Run async code in sync context
136
+ loop = asyncio.new_event_loop()
137
+ asyncio.set_event_loop(loop)
138
+ try:
139
+ loop.run_until_complete(self._process_due_schedules())
140
+ finally:
141
+ loop.close()
142
+ except Exception as e:
143
+ logger.error(f"Error syncing schedules: {e}")
144
+
145
+ async def _process_due_schedules(self) -> None:
146
+ """Process all schedules that are due to run."""
147
+ from pyworkflow.celery.tasks import execute_scheduled_workflow_task
148
+ from pyworkflow.utils.schedule import calculate_next_run_time
149
+
150
+ storage = self._get_storage()
151
+ if storage is None:
152
+ logger.warning("Storage not configured, skipping schedule sync")
153
+ return
154
+
155
+ now = datetime.now(UTC)
156
+
157
+ try:
158
+ due_schedules = await storage.get_due_schedules(now)
159
+ except Exception as e:
160
+ logger.error(f"Failed to get due schedules: {e}")
161
+ return
162
+
163
+ for schedule in due_schedules:
164
+ try:
165
+ should_run, reason = await self._check_overlap_policy(schedule, storage)
166
+
167
+ if should_run:
168
+ # Calculate next run time before triggering
169
+ next_run_time = calculate_next_run_time(
170
+ schedule.spec,
171
+ last_run=schedule.next_run_time,
172
+ now=now,
173
+ )
174
+
175
+ # Update next_run_time immediately to prevent duplicate triggers
176
+ schedule.next_run_time = next_run_time
177
+ schedule.updated_at = datetime.now(UTC)
178
+ await storage.update_schedule(schedule)
179
+
180
+ # Trigger the scheduled workflow task
181
+ execute_scheduled_workflow_task.apply_async(
182
+ kwargs={
183
+ "schedule_id": schedule.schedule_id,
184
+ "scheduled_time": schedule.next_run_time.isoformat()
185
+ if schedule.next_run_time
186
+ else now.isoformat(),
187
+ "storage_config": self._storage_config,
188
+ },
189
+ queue="pyworkflow.schedules",
190
+ )
191
+
192
+ logger.info(
193
+ f"Triggered scheduled workflow: {schedule.workflow_name}",
194
+ schedule_id=schedule.schedule_id,
195
+ next_run=next_run_time.isoformat() if next_run_time else None,
196
+ )
197
+ else:
198
+ # Record skip and update next run time
199
+ schedule.skipped_runs += 1
200
+ next_run_time = calculate_next_run_time(
201
+ schedule.spec,
202
+ last_run=schedule.next_run_time,
203
+ now=now,
204
+ )
205
+ schedule.next_run_time = next_run_time
206
+ schedule.updated_at = datetime.now(UTC)
207
+ await storage.update_schedule(schedule)
208
+
209
+ logger.info(
210
+ f"Skipped scheduled workflow: {schedule.workflow_name} ({reason})",
211
+ schedule_id=schedule.schedule_id,
212
+ )
213
+
214
+ except Exception as e:
215
+ logger.error(
216
+ f"Error processing schedule {schedule.schedule_id}: {e}",
217
+ )
218
+
219
+ async def _check_overlap_policy(
220
+ self,
221
+ schedule: Schedule,
222
+ storage: StorageBackend,
223
+ ) -> tuple[bool, str | None]:
224
+ """
225
+ Check if schedule should run based on overlap policy.
226
+
227
+ Args:
228
+ schedule: The schedule to check
229
+ storage: Storage backend
230
+
231
+ Returns:
232
+ Tuple of (should_run, reason_if_not)
233
+ """
234
+ # No running runs means we can always run
235
+ if not schedule.running_run_ids:
236
+ return True, None
237
+
238
+ policy = schedule.overlap_policy
239
+
240
+ if policy == OverlapPolicy.ALLOW_ALL:
241
+ return True, None
242
+
243
+ elif policy == OverlapPolicy.SKIP:
244
+ return False, "Previous run still active (SKIP policy)"
245
+
246
+ elif policy == OverlapPolicy.BUFFER_ONE:
247
+ if schedule.buffered_count >= 1:
248
+ return False, "Buffer full (BUFFER_ONE policy)"
249
+ # Increment buffer count
250
+ schedule.buffered_count += 1
251
+ await storage.update_schedule(schedule)
252
+ return True, None
253
+
254
+ elif policy == OverlapPolicy.BUFFER_ALL:
255
+ # Always allow, buffered_count tracks pending runs
256
+ schedule.buffered_count += 1
257
+ await storage.update_schedule(schedule)
258
+ return True, None
259
+
260
+ elif policy == OverlapPolicy.CANCEL_OTHER:
261
+ # Cancel running runs
262
+ from pyworkflow.primitives.cancel import cancel_workflow
263
+
264
+ for run_id in schedule.running_run_ids:
265
+ try:
266
+ await cancel_workflow(
267
+ run_id,
268
+ reason="Cancelled by CANCEL_OTHER overlap policy",
269
+ storage=storage,
270
+ )
271
+ except Exception as e:
272
+ logger.warning(f"Failed to cancel run {run_id}: {e}")
273
+
274
+ # Clear the running runs list
275
+ schedule.running_run_ids = []
276
+ await storage.update_schedule(schedule)
277
+ return True, None
278
+
279
+ # Default: allow
280
+ return True, None
281
+
282
+ def _get_storage(self) -> StorageBackend | None:
283
+ """Get or create the storage backend."""
284
+ if self._storage is not None:
285
+ return self._storage
286
+
287
+ # Try to get storage config from environment or defaults
288
+ if self._storage_config:
289
+ self._storage = config_to_storage(self._storage_config)
290
+ return self._storage
291
+
292
+ # Try default file storage
293
+ import os
294
+
295
+ storage_type = os.getenv("PYWORKFLOW_STORAGE_BACKEND", "file")
296
+ storage_path = os.getenv("PYWORKFLOW_STORAGE_PATH", "./pyworkflow_data")
297
+
298
+ if storage_type == "file":
299
+ from pyworkflow.storage.file import FileStorageBackend
300
+
301
+ self._storage = FileStorageBackend(storage_path)
302
+ elif storage_type == "memory":
303
+ from pyworkflow.storage.memory import InMemoryStorageBackend
304
+
305
+ self._storage = InMemoryStorageBackend()
306
+ else:
307
+ logger.warning(f"Unknown storage type: {storage_type}")
308
+ return None
309
+
310
+ return self._storage
311
+
312
+ @property
313
+ def info(self) -> str:
314
+ """Return scheduler info string."""
315
+ return f"PyWorkflowScheduler (sync_interval={self.sync_interval}s)"