pyworkflow-engine 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dashboard/backend/app/__init__.py +1 -0
- dashboard/backend/app/config.py +32 -0
- dashboard/backend/app/controllers/__init__.py +6 -0
- dashboard/backend/app/controllers/run_controller.py +86 -0
- dashboard/backend/app/controllers/workflow_controller.py +33 -0
- dashboard/backend/app/dependencies/__init__.py +5 -0
- dashboard/backend/app/dependencies/storage.py +50 -0
- dashboard/backend/app/repositories/__init__.py +6 -0
- dashboard/backend/app/repositories/run_repository.py +80 -0
- dashboard/backend/app/repositories/workflow_repository.py +27 -0
- dashboard/backend/app/rest/__init__.py +8 -0
- dashboard/backend/app/rest/v1/__init__.py +12 -0
- dashboard/backend/app/rest/v1/health.py +33 -0
- dashboard/backend/app/rest/v1/runs.py +133 -0
- dashboard/backend/app/rest/v1/workflows.py +41 -0
- dashboard/backend/app/schemas/__init__.py +23 -0
- dashboard/backend/app/schemas/common.py +16 -0
- dashboard/backend/app/schemas/event.py +24 -0
- dashboard/backend/app/schemas/hook.py +25 -0
- dashboard/backend/app/schemas/run.py +54 -0
- dashboard/backend/app/schemas/step.py +28 -0
- dashboard/backend/app/schemas/workflow.py +31 -0
- dashboard/backend/app/server.py +87 -0
- dashboard/backend/app/services/__init__.py +6 -0
- dashboard/backend/app/services/run_service.py +240 -0
- dashboard/backend/app/services/workflow_service.py +155 -0
- dashboard/backend/main.py +18 -0
- docs/concepts/cancellation.mdx +362 -0
- docs/concepts/continue-as-new.mdx +434 -0
- docs/concepts/events.mdx +266 -0
- docs/concepts/fault-tolerance.mdx +370 -0
- docs/concepts/hooks.mdx +552 -0
- docs/concepts/limitations.mdx +167 -0
- docs/concepts/schedules.mdx +775 -0
- docs/concepts/sleep.mdx +312 -0
- docs/concepts/steps.mdx +301 -0
- docs/concepts/workflows.mdx +255 -0
- docs/guides/cli.mdx +942 -0
- docs/guides/configuration.mdx +560 -0
- docs/introduction.mdx +155 -0
- docs/quickstart.mdx +279 -0
- examples/__init__.py +1 -0
- examples/celery/__init__.py +1 -0
- examples/celery/durable/docker-compose.yml +55 -0
- examples/celery/durable/pyworkflow.config.yaml +12 -0
- examples/celery/durable/workflows/__init__.py +122 -0
- examples/celery/durable/workflows/basic.py +87 -0
- examples/celery/durable/workflows/batch_processing.py +102 -0
- examples/celery/durable/workflows/cancellation.py +273 -0
- examples/celery/durable/workflows/child_workflow_patterns.py +240 -0
- examples/celery/durable/workflows/child_workflows.py +202 -0
- examples/celery/durable/workflows/continue_as_new.py +260 -0
- examples/celery/durable/workflows/fault_tolerance.py +210 -0
- examples/celery/durable/workflows/hooks.py +211 -0
- examples/celery/durable/workflows/idempotency.py +112 -0
- examples/celery/durable/workflows/long_running.py +99 -0
- examples/celery/durable/workflows/retries.py +101 -0
- examples/celery/durable/workflows/schedules.py +209 -0
- examples/celery/transient/01_basic_workflow.py +91 -0
- examples/celery/transient/02_fault_tolerance.py +257 -0
- examples/celery/transient/__init__.py +20 -0
- examples/celery/transient/pyworkflow.config.yaml +25 -0
- examples/local/__init__.py +1 -0
- examples/local/durable/01_basic_workflow.py +94 -0
- examples/local/durable/02_file_storage.py +132 -0
- examples/local/durable/03_retries.py +169 -0
- examples/local/durable/04_long_running.py +119 -0
- examples/local/durable/05_event_log.py +145 -0
- examples/local/durable/06_idempotency.py +148 -0
- examples/local/durable/07_hooks.py +334 -0
- examples/local/durable/08_cancellation.py +233 -0
- examples/local/durable/09_child_workflows.py +198 -0
- examples/local/durable/10_child_workflow_patterns.py +265 -0
- examples/local/durable/11_continue_as_new.py +249 -0
- examples/local/durable/12_schedules.py +198 -0
- examples/local/durable/__init__.py +1 -0
- examples/local/transient/01_quick_tasks.py +87 -0
- examples/local/transient/02_retries.py +130 -0
- examples/local/transient/03_sleep.py +141 -0
- examples/local/transient/__init__.py +1 -0
- pyworkflow/__init__.py +256 -0
- pyworkflow/aws/__init__.py +68 -0
- pyworkflow/aws/context.py +234 -0
- pyworkflow/aws/handler.py +184 -0
- pyworkflow/aws/testing.py +310 -0
- pyworkflow/celery/__init__.py +41 -0
- pyworkflow/celery/app.py +198 -0
- pyworkflow/celery/scheduler.py +315 -0
- pyworkflow/celery/tasks.py +1746 -0
- pyworkflow/cli/__init__.py +132 -0
- pyworkflow/cli/__main__.py +6 -0
- pyworkflow/cli/commands/__init__.py +1 -0
- pyworkflow/cli/commands/hooks.py +640 -0
- pyworkflow/cli/commands/quickstart.py +495 -0
- pyworkflow/cli/commands/runs.py +773 -0
- pyworkflow/cli/commands/scheduler.py +130 -0
- pyworkflow/cli/commands/schedules.py +794 -0
- pyworkflow/cli/commands/setup.py +703 -0
- pyworkflow/cli/commands/worker.py +413 -0
- pyworkflow/cli/commands/workflows.py +1257 -0
- pyworkflow/cli/output/__init__.py +1 -0
- pyworkflow/cli/output/formatters.py +321 -0
- pyworkflow/cli/output/styles.py +121 -0
- pyworkflow/cli/utils/__init__.py +1 -0
- pyworkflow/cli/utils/async_helpers.py +30 -0
- pyworkflow/cli/utils/config.py +130 -0
- pyworkflow/cli/utils/config_generator.py +344 -0
- pyworkflow/cli/utils/discovery.py +53 -0
- pyworkflow/cli/utils/docker_manager.py +651 -0
- pyworkflow/cli/utils/interactive.py +364 -0
- pyworkflow/cli/utils/storage.py +115 -0
- pyworkflow/config.py +329 -0
- pyworkflow/context/__init__.py +63 -0
- pyworkflow/context/aws.py +230 -0
- pyworkflow/context/base.py +416 -0
- pyworkflow/context/local.py +930 -0
- pyworkflow/context/mock.py +381 -0
- pyworkflow/core/__init__.py +0 -0
- pyworkflow/core/exceptions.py +353 -0
- pyworkflow/core/registry.py +313 -0
- pyworkflow/core/scheduled.py +328 -0
- pyworkflow/core/step.py +494 -0
- pyworkflow/core/workflow.py +294 -0
- pyworkflow/discovery.py +248 -0
- pyworkflow/engine/__init__.py +0 -0
- pyworkflow/engine/events.py +879 -0
- pyworkflow/engine/executor.py +682 -0
- pyworkflow/engine/replay.py +273 -0
- pyworkflow/observability/__init__.py +19 -0
- pyworkflow/observability/logging.py +234 -0
- pyworkflow/primitives/__init__.py +33 -0
- pyworkflow/primitives/child_handle.py +174 -0
- pyworkflow/primitives/child_workflow.py +372 -0
- pyworkflow/primitives/continue_as_new.py +101 -0
- pyworkflow/primitives/define_hook.py +150 -0
- pyworkflow/primitives/hooks.py +97 -0
- pyworkflow/primitives/resume_hook.py +210 -0
- pyworkflow/primitives/schedule.py +545 -0
- pyworkflow/primitives/shield.py +96 -0
- pyworkflow/primitives/sleep.py +100 -0
- pyworkflow/runtime/__init__.py +21 -0
- pyworkflow/runtime/base.py +179 -0
- pyworkflow/runtime/celery.py +310 -0
- pyworkflow/runtime/factory.py +101 -0
- pyworkflow/runtime/local.py +706 -0
- pyworkflow/scheduler/__init__.py +9 -0
- pyworkflow/scheduler/local.py +248 -0
- pyworkflow/serialization/__init__.py +0 -0
- pyworkflow/serialization/decoder.py +146 -0
- pyworkflow/serialization/encoder.py +162 -0
- pyworkflow/storage/__init__.py +54 -0
- pyworkflow/storage/base.py +612 -0
- pyworkflow/storage/config.py +185 -0
- pyworkflow/storage/dynamodb.py +1315 -0
- pyworkflow/storage/file.py +827 -0
- pyworkflow/storage/memory.py +549 -0
- pyworkflow/storage/postgres.py +1161 -0
- pyworkflow/storage/schemas.py +486 -0
- pyworkflow/storage/sqlite.py +1136 -0
- pyworkflow/utils/__init__.py +0 -0
- pyworkflow/utils/duration.py +177 -0
- pyworkflow/utils/schedule.py +391 -0
- pyworkflow_engine-0.1.7.dist-info/METADATA +687 -0
- pyworkflow_engine-0.1.7.dist-info/RECORD +196 -0
- pyworkflow_engine-0.1.7.dist-info/WHEEL +5 -0
- pyworkflow_engine-0.1.7.dist-info/entry_points.txt +2 -0
- pyworkflow_engine-0.1.7.dist-info/licenses/LICENSE +21 -0
- pyworkflow_engine-0.1.7.dist-info/top_level.txt +5 -0
- tests/examples/__init__.py +0 -0
- tests/integration/__init__.py +0 -0
- tests/integration/test_cancellation.py +330 -0
- tests/integration/test_child_workflows.py +439 -0
- tests/integration/test_continue_as_new.py +428 -0
- tests/integration/test_dynamodb_storage.py +1146 -0
- tests/integration/test_fault_tolerance.py +369 -0
- tests/integration/test_schedule_storage.py +484 -0
- tests/unit/__init__.py +0 -0
- tests/unit/backends/__init__.py +1 -0
- tests/unit/backends/test_dynamodb_storage.py +1554 -0
- tests/unit/backends/test_postgres_storage.py +1281 -0
- tests/unit/backends/test_sqlite_storage.py +1460 -0
- tests/unit/conftest.py +41 -0
- tests/unit/test_cancellation.py +364 -0
- tests/unit/test_child_workflows.py +680 -0
- tests/unit/test_continue_as_new.py +441 -0
- tests/unit/test_event_limits.py +316 -0
- tests/unit/test_executor.py +320 -0
- tests/unit/test_fault_tolerance.py +334 -0
- tests/unit/test_hooks.py +495 -0
- tests/unit/test_registry.py +261 -0
- tests/unit/test_replay.py +420 -0
- tests/unit/test_schedule_schemas.py +285 -0
- tests/unit/test_schedule_utils.py +286 -0
- tests/unit/test_scheduled_workflow.py +274 -0
- tests/unit/test_step.py +353 -0
- tests/unit/test_workflow.py +243 -0
pyworkflow/celery/app.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Celery application for distributed workflow execution.
|
|
3
|
+
|
|
4
|
+
This module configures Celery for:
|
|
5
|
+
- Distributed step execution across workers
|
|
6
|
+
- Automatic retry with exponential backoff
|
|
7
|
+
- Scheduled task execution (sleep resumption)
|
|
8
|
+
- Result persistence
|
|
9
|
+
|
|
10
|
+
Note: With Python 3.13, you may see "BufferError: Existing exports of data"
|
|
11
|
+
warnings in Celery logs. This is a known compatibility issue between Python 3.13's
|
|
12
|
+
garbage collector and Celery's saferepr module. It does not affect functionality.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
from celery import Celery
|
|
18
|
+
from kombu import Exchange, Queue
|
|
19
|
+
|
|
20
|
+
from pyworkflow.observability.logging import configure_logging
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def discover_workflows(modules: list[str] | None = None) -> None:
|
|
24
|
+
"""
|
|
25
|
+
Discover and import workflow modules to register workflows with Celery workers.
|
|
26
|
+
|
|
27
|
+
This function imports Python modules containing workflow definitions so that
|
|
28
|
+
Celery workers can find and execute them.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
modules: List of module paths to import (e.g., ["myapp.workflows", "myapp.tasks"])
|
|
32
|
+
If None, reads from PYWORKFLOW_DISCOVER environment variable
|
|
33
|
+
|
|
34
|
+
Environment Variables:
|
|
35
|
+
PYWORKFLOW_DISCOVER: Comma-separated list of modules to import
|
|
36
|
+
Example: "myapp.workflows,myapp.tasks,examples.functional.basic_workflow"
|
|
37
|
+
|
|
38
|
+
Examples:
|
|
39
|
+
# Discover from environment variable
|
|
40
|
+
discover_workflows()
|
|
41
|
+
|
|
42
|
+
# Discover specific modules
|
|
43
|
+
discover_workflows(["myapp.workflows", "myapp.tasks"])
|
|
44
|
+
"""
|
|
45
|
+
if modules is None:
|
|
46
|
+
# Read from environment variable
|
|
47
|
+
discover_env = os.getenv("PYWORKFLOW_DISCOVER", "")
|
|
48
|
+
if not discover_env:
|
|
49
|
+
return
|
|
50
|
+
modules = [m.strip() for m in discover_env.split(",") if m.strip()]
|
|
51
|
+
|
|
52
|
+
for module_path in modules:
|
|
53
|
+
try:
|
|
54
|
+
__import__(module_path)
|
|
55
|
+
print(f"✓ Discovered workflows from: {module_path}")
|
|
56
|
+
except ImportError as e:
|
|
57
|
+
print(f"✗ Failed to import {module_path}: {e}")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def create_celery_app(
|
|
61
|
+
broker_url: str | None = None,
|
|
62
|
+
result_backend: str | None = None,
|
|
63
|
+
app_name: str = "pyworkflow",
|
|
64
|
+
) -> Celery:
|
|
65
|
+
"""
|
|
66
|
+
Create and configure a Celery application for PyWorkflow.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
broker_url: Celery broker URL. Priority: parameter > PYWORKFLOW_CELERY_BROKER env var > redis://localhost:6379/0
|
|
70
|
+
result_backend: Result backend URL. Priority: parameter > PYWORKFLOW_CELERY_RESULT_BACKEND env var > redis://localhost:6379/1
|
|
71
|
+
app_name: Application name
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Configured Celery application
|
|
75
|
+
|
|
76
|
+
Environment Variables:
|
|
77
|
+
PYWORKFLOW_CELERY_BROKER: Celery broker URL (used if broker_url param not provided)
|
|
78
|
+
PYWORKFLOW_CELERY_RESULT_BACKEND: Result backend URL (used if result_backend param not provided)
|
|
79
|
+
|
|
80
|
+
Examples:
|
|
81
|
+
# Default configuration (uses env vars if set, otherwise localhost Redis)
|
|
82
|
+
app = create_celery_app()
|
|
83
|
+
|
|
84
|
+
# Custom Redis
|
|
85
|
+
app = create_celery_app(
|
|
86
|
+
broker_url="redis://redis-host:6379/0",
|
|
87
|
+
result_backend="redis://redis-host:6379/1"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# RabbitMQ with Redis backend
|
|
91
|
+
app = create_celery_app(
|
|
92
|
+
broker_url="amqp://guest:guest@rabbitmq:5672//",
|
|
93
|
+
result_backend="redis://localhost:6379/1"
|
|
94
|
+
)
|
|
95
|
+
"""
|
|
96
|
+
# Priority: parameter > environment variable > hardcoded default
|
|
97
|
+
broker_url = broker_url or os.getenv("PYWORKFLOW_CELERY_BROKER") or "redis://localhost:6379/0"
|
|
98
|
+
result_backend = (
|
|
99
|
+
result_backend
|
|
100
|
+
or os.getenv("PYWORKFLOW_CELERY_RESULT_BACKEND")
|
|
101
|
+
or "redis://localhost:6379/1"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
app = Celery(
|
|
105
|
+
app_name,
|
|
106
|
+
broker=broker_url,
|
|
107
|
+
backend=result_backend,
|
|
108
|
+
include=[
|
|
109
|
+
"pyworkflow.celery.tasks",
|
|
110
|
+
],
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Configure Celery
|
|
114
|
+
app.conf.update(
|
|
115
|
+
# Task execution settings
|
|
116
|
+
task_serializer="json",
|
|
117
|
+
result_serializer="json",
|
|
118
|
+
accept_content=["json"],
|
|
119
|
+
timezone="UTC",
|
|
120
|
+
enable_utc=True,
|
|
121
|
+
# Task routing
|
|
122
|
+
task_default_queue="pyworkflow.default",
|
|
123
|
+
task_default_exchange="pyworkflow",
|
|
124
|
+
task_default_exchange_type="topic",
|
|
125
|
+
task_default_routing_key="workflow.default",
|
|
126
|
+
# Task queues
|
|
127
|
+
task_queues=(
|
|
128
|
+
Queue(
|
|
129
|
+
"pyworkflow.default",
|
|
130
|
+
Exchange("pyworkflow", type="topic"),
|
|
131
|
+
routing_key="workflow.#",
|
|
132
|
+
),
|
|
133
|
+
Queue(
|
|
134
|
+
"pyworkflow.steps",
|
|
135
|
+
Exchange("pyworkflow", type="topic"),
|
|
136
|
+
routing_key="workflow.step.#",
|
|
137
|
+
),
|
|
138
|
+
Queue(
|
|
139
|
+
"pyworkflow.workflows",
|
|
140
|
+
Exchange("pyworkflow", type="topic"),
|
|
141
|
+
routing_key="workflow.workflow.#",
|
|
142
|
+
),
|
|
143
|
+
Queue(
|
|
144
|
+
"pyworkflow.schedules",
|
|
145
|
+
Exchange("pyworkflow", type="topic"),
|
|
146
|
+
routing_key="workflow.schedule.#",
|
|
147
|
+
),
|
|
148
|
+
),
|
|
149
|
+
# Result backend settings
|
|
150
|
+
result_expires=3600, # 1 hour
|
|
151
|
+
result_persistent=True,
|
|
152
|
+
# Task execution
|
|
153
|
+
task_acks_late=True,
|
|
154
|
+
task_reject_on_worker_lost=True,
|
|
155
|
+
worker_prefetch_multiplier=1, # Fair task distribution
|
|
156
|
+
# Retry settings
|
|
157
|
+
task_autoretry_for=(Exception,),
|
|
158
|
+
task_retry_backoff=True,
|
|
159
|
+
task_retry_backoff_max=600, # 10 minutes max
|
|
160
|
+
task_retry_jitter=True,
|
|
161
|
+
# Monitoring
|
|
162
|
+
worker_send_task_events=True,
|
|
163
|
+
task_send_sent_event=True,
|
|
164
|
+
# Beat scheduler (for sleep resumption)
|
|
165
|
+
beat_schedule={},
|
|
166
|
+
# Logging
|
|
167
|
+
worker_log_format="[%(asctime)s: %(levelname)s/%(processName)s] %(message)s",
|
|
168
|
+
worker_task_log_format="[%(asctime)s: %(levelname)s/%(processName)s] [%(task_name)s(%(task_id)s)] %(message)s",
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Configure logging
|
|
172
|
+
configure_logging(level="INFO")
|
|
173
|
+
|
|
174
|
+
# Auto-discover workflows from environment variable or configured modules
|
|
175
|
+
discover_workflows()
|
|
176
|
+
|
|
177
|
+
return app
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# Global Celery app instance
|
|
181
|
+
# Can be customized by calling create_celery_app() with custom config
|
|
182
|
+
celery_app = create_celery_app()
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def get_celery_app() -> Celery:
|
|
186
|
+
"""
|
|
187
|
+
Get the global Celery application instance.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Celery application
|
|
191
|
+
|
|
192
|
+
Example:
|
|
193
|
+
from pyworkflow.celery.app import get_celery_app
|
|
194
|
+
|
|
195
|
+
app = get_celery_app()
|
|
196
|
+
app.conf.update(broker_url="redis://custom:6379/0")
|
|
197
|
+
"""
|
|
198
|
+
return celery_app
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom Celery Beat scheduler for PyWorkflow schedules.
|
|
3
|
+
|
|
4
|
+
This scheduler integrates with PyWorkflow's storage backend to dynamically
|
|
5
|
+
load and execute scheduled workflows without requiring Beat to be restarted
|
|
6
|
+
when schedules change.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
celery -A pyworkflow.celery.app beat \\
|
|
10
|
+
--scheduler pyworkflow.celery.scheduler:PyWorkflowScheduler \\
|
|
11
|
+
--loglevel INFO
|
|
12
|
+
|
|
13
|
+
The scheduler:
|
|
14
|
+
1. Polls storage for due schedules every sync_interval seconds
|
|
15
|
+
2. Creates Celery tasks for each due schedule
|
|
16
|
+
3. Handles overlap policies
|
|
17
|
+
4. Updates schedule metadata after runs
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import asyncio
|
|
21
|
+
from datetime import UTC, datetime
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from celery.beat import Scheduler
|
|
25
|
+
from loguru import logger
|
|
26
|
+
|
|
27
|
+
from pyworkflow.storage.base import StorageBackend
|
|
28
|
+
from pyworkflow.storage.config import config_to_storage
|
|
29
|
+
from pyworkflow.storage.schemas import OverlapPolicy, Schedule
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PyWorkflowScheduler(Scheduler):
|
|
33
|
+
"""
|
|
34
|
+
Custom Celery Beat scheduler that reads schedules from PyWorkflow storage.
|
|
35
|
+
|
|
36
|
+
This scheduler:
|
|
37
|
+
1. Polls storage for due schedules every sync_interval
|
|
38
|
+
2. Creates Celery tasks for each due schedule
|
|
39
|
+
3. Handles overlap policies (skip, buffer, cancel, allow)
|
|
40
|
+
4. Updates schedule metadata after runs
|
|
41
|
+
|
|
42
|
+
Configuration:
|
|
43
|
+
The scheduler reads configuration from environment variables:
|
|
44
|
+
- PYWORKFLOW_STORAGE_BACKEND: Storage backend type (file, memory)
|
|
45
|
+
- PYWORKFLOW_STORAGE_PATH: Path for file storage backend
|
|
46
|
+
|
|
47
|
+
Example:
|
|
48
|
+
celery -A pyworkflow.celery.app beat \\
|
|
49
|
+
--scheduler pyworkflow.celery.scheduler:PyWorkflowScheduler
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
#: How often to check for due schedules (seconds)
|
|
53
|
+
sync_interval = 5.0
|
|
54
|
+
|
|
55
|
+
def __init__(self, *args: Any, storage_config: dict[str, Any] | None = None, **kwargs: Any):
|
|
56
|
+
"""
|
|
57
|
+
Initialize the scheduler.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
storage_config: Storage backend configuration dict
|
|
61
|
+
"""
|
|
62
|
+
super().__init__(*args, **kwargs)
|
|
63
|
+
self._storage_config = storage_config
|
|
64
|
+
self._storage: StorageBackend | None = None
|
|
65
|
+
self._last_schedule_check: datetime | None = None
|
|
66
|
+
self._initialized = False
|
|
67
|
+
|
|
68
|
+
def setup_schedule(self) -> None:
|
|
69
|
+
"""Initialize the scheduler."""
|
|
70
|
+
super().setup_schedule()
|
|
71
|
+
|
|
72
|
+
# Activate any @scheduled_workflow decorated functions
|
|
73
|
+
self._activate_decorated_schedules()
|
|
74
|
+
|
|
75
|
+
logger.info("PyWorkflow scheduler initialized")
|
|
76
|
+
self._initialized = True
|
|
77
|
+
|
|
78
|
+
def _activate_decorated_schedules(self) -> None:
|
|
79
|
+
"""Activate all @scheduled_workflow decorated functions."""
|
|
80
|
+
try:
|
|
81
|
+
loop = asyncio.new_event_loop()
|
|
82
|
+
asyncio.set_event_loop(loop)
|
|
83
|
+
try:
|
|
84
|
+
loop.run_until_complete(self._do_activate_schedules())
|
|
85
|
+
finally:
|
|
86
|
+
loop.close()
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.error(f"Error activating decorated schedules: {e}")
|
|
89
|
+
|
|
90
|
+
async def _do_activate_schedules(self) -> None:
|
|
91
|
+
"""Actually activate the decorated schedules."""
|
|
92
|
+
from pyworkflow.core.scheduled import activate_scheduled_workflows
|
|
93
|
+
|
|
94
|
+
storage = self._get_storage()
|
|
95
|
+
if storage is None:
|
|
96
|
+
logger.warning("Storage not configured, cannot activate decorated schedules")
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
schedule_ids = await activate_scheduled_workflows(storage=storage)
|
|
101
|
+
if schedule_ids:
|
|
102
|
+
logger.info(f"Activated {len(schedule_ids)} decorated schedule(s): {schedule_ids}")
|
|
103
|
+
else:
|
|
104
|
+
logger.debug("No decorated schedules to activate")
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.error(f"Failed to activate decorated schedules: {e}")
|
|
107
|
+
|
|
108
|
+
def tick(self) -> float:
|
|
109
|
+
"""
|
|
110
|
+
Called by Celery Beat on each tick.
|
|
111
|
+
|
|
112
|
+
Checks for due schedules and triggers them.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Seconds until next tick
|
|
116
|
+
"""
|
|
117
|
+
# Call parent tick to handle existing celery beat entries
|
|
118
|
+
remaining = super().tick()
|
|
119
|
+
|
|
120
|
+
# Check for due schedules
|
|
121
|
+
now = datetime.now(UTC)
|
|
122
|
+
if (
|
|
123
|
+
self._last_schedule_check is None
|
|
124
|
+
or (now - self._last_schedule_check).total_seconds() >= self.sync_interval
|
|
125
|
+
):
|
|
126
|
+
self._sync_schedules()
|
|
127
|
+
self._last_schedule_check = now
|
|
128
|
+
|
|
129
|
+
# Return the smaller of the two intervals
|
|
130
|
+
return min(remaining, self.sync_interval)
|
|
131
|
+
|
|
132
|
+
def _sync_schedules(self) -> None:
|
|
133
|
+
"""Sync schedules from storage and trigger due ones."""
|
|
134
|
+
try:
|
|
135
|
+
# Run async code in sync context
|
|
136
|
+
loop = asyncio.new_event_loop()
|
|
137
|
+
asyncio.set_event_loop(loop)
|
|
138
|
+
try:
|
|
139
|
+
loop.run_until_complete(self._process_due_schedules())
|
|
140
|
+
finally:
|
|
141
|
+
loop.close()
|
|
142
|
+
except Exception as e:
|
|
143
|
+
logger.error(f"Error syncing schedules: {e}")
|
|
144
|
+
|
|
145
|
+
async def _process_due_schedules(self) -> None:
|
|
146
|
+
"""Process all schedules that are due to run."""
|
|
147
|
+
from pyworkflow.celery.tasks import execute_scheduled_workflow_task
|
|
148
|
+
from pyworkflow.utils.schedule import calculate_next_run_time
|
|
149
|
+
|
|
150
|
+
storage = self._get_storage()
|
|
151
|
+
if storage is None:
|
|
152
|
+
logger.warning("Storage not configured, skipping schedule sync")
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
now = datetime.now(UTC)
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
due_schedules = await storage.get_due_schedules(now)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.error(f"Failed to get due schedules: {e}")
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
for schedule in due_schedules:
|
|
164
|
+
try:
|
|
165
|
+
should_run, reason = await self._check_overlap_policy(schedule, storage)
|
|
166
|
+
|
|
167
|
+
if should_run:
|
|
168
|
+
# Calculate next run time before triggering
|
|
169
|
+
next_run_time = calculate_next_run_time(
|
|
170
|
+
schedule.spec,
|
|
171
|
+
last_run=schedule.next_run_time,
|
|
172
|
+
now=now,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Update next_run_time immediately to prevent duplicate triggers
|
|
176
|
+
schedule.next_run_time = next_run_time
|
|
177
|
+
schedule.updated_at = datetime.now(UTC)
|
|
178
|
+
await storage.update_schedule(schedule)
|
|
179
|
+
|
|
180
|
+
# Trigger the scheduled workflow task
|
|
181
|
+
execute_scheduled_workflow_task.apply_async(
|
|
182
|
+
kwargs={
|
|
183
|
+
"schedule_id": schedule.schedule_id,
|
|
184
|
+
"scheduled_time": schedule.next_run_time.isoformat()
|
|
185
|
+
if schedule.next_run_time
|
|
186
|
+
else now.isoformat(),
|
|
187
|
+
"storage_config": self._storage_config,
|
|
188
|
+
},
|
|
189
|
+
queue="pyworkflow.schedules",
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
logger.info(
|
|
193
|
+
f"Triggered scheduled workflow: {schedule.workflow_name}",
|
|
194
|
+
schedule_id=schedule.schedule_id,
|
|
195
|
+
next_run=next_run_time.isoformat() if next_run_time else None,
|
|
196
|
+
)
|
|
197
|
+
else:
|
|
198
|
+
# Record skip and update next run time
|
|
199
|
+
schedule.skipped_runs += 1
|
|
200
|
+
next_run_time = calculate_next_run_time(
|
|
201
|
+
schedule.spec,
|
|
202
|
+
last_run=schedule.next_run_time,
|
|
203
|
+
now=now,
|
|
204
|
+
)
|
|
205
|
+
schedule.next_run_time = next_run_time
|
|
206
|
+
schedule.updated_at = datetime.now(UTC)
|
|
207
|
+
await storage.update_schedule(schedule)
|
|
208
|
+
|
|
209
|
+
logger.info(
|
|
210
|
+
f"Skipped scheduled workflow: {schedule.workflow_name} ({reason})",
|
|
211
|
+
schedule_id=schedule.schedule_id,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
except Exception as e:
|
|
215
|
+
logger.error(
|
|
216
|
+
f"Error processing schedule {schedule.schedule_id}: {e}",
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
async def _check_overlap_policy(
|
|
220
|
+
self,
|
|
221
|
+
schedule: Schedule,
|
|
222
|
+
storage: StorageBackend,
|
|
223
|
+
) -> tuple[bool, str | None]:
|
|
224
|
+
"""
|
|
225
|
+
Check if schedule should run based on overlap policy.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
schedule: The schedule to check
|
|
229
|
+
storage: Storage backend
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
Tuple of (should_run, reason_if_not)
|
|
233
|
+
"""
|
|
234
|
+
# No running runs means we can always run
|
|
235
|
+
if not schedule.running_run_ids:
|
|
236
|
+
return True, None
|
|
237
|
+
|
|
238
|
+
policy = schedule.overlap_policy
|
|
239
|
+
|
|
240
|
+
if policy == OverlapPolicy.ALLOW_ALL:
|
|
241
|
+
return True, None
|
|
242
|
+
|
|
243
|
+
elif policy == OverlapPolicy.SKIP:
|
|
244
|
+
return False, "Previous run still active (SKIP policy)"
|
|
245
|
+
|
|
246
|
+
elif policy == OverlapPolicy.BUFFER_ONE:
|
|
247
|
+
if schedule.buffered_count >= 1:
|
|
248
|
+
return False, "Buffer full (BUFFER_ONE policy)"
|
|
249
|
+
# Increment buffer count
|
|
250
|
+
schedule.buffered_count += 1
|
|
251
|
+
await storage.update_schedule(schedule)
|
|
252
|
+
return True, None
|
|
253
|
+
|
|
254
|
+
elif policy == OverlapPolicy.BUFFER_ALL:
|
|
255
|
+
# Always allow, buffered_count tracks pending runs
|
|
256
|
+
schedule.buffered_count += 1
|
|
257
|
+
await storage.update_schedule(schedule)
|
|
258
|
+
return True, None
|
|
259
|
+
|
|
260
|
+
elif policy == OverlapPolicy.CANCEL_OTHER:
|
|
261
|
+
# Cancel running runs
|
|
262
|
+
from pyworkflow.primitives.cancel import cancel_workflow
|
|
263
|
+
|
|
264
|
+
for run_id in schedule.running_run_ids:
|
|
265
|
+
try:
|
|
266
|
+
await cancel_workflow(
|
|
267
|
+
run_id,
|
|
268
|
+
reason="Cancelled by CANCEL_OTHER overlap policy",
|
|
269
|
+
storage=storage,
|
|
270
|
+
)
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.warning(f"Failed to cancel run {run_id}: {e}")
|
|
273
|
+
|
|
274
|
+
# Clear the running runs list
|
|
275
|
+
schedule.running_run_ids = []
|
|
276
|
+
await storage.update_schedule(schedule)
|
|
277
|
+
return True, None
|
|
278
|
+
|
|
279
|
+
# Default: allow
|
|
280
|
+
return True, None
|
|
281
|
+
|
|
282
|
+
def _get_storage(self) -> StorageBackend | None:
|
|
283
|
+
"""Get or create the storage backend."""
|
|
284
|
+
if self._storage is not None:
|
|
285
|
+
return self._storage
|
|
286
|
+
|
|
287
|
+
# Try to get storage config from environment or defaults
|
|
288
|
+
if self._storage_config:
|
|
289
|
+
self._storage = config_to_storage(self._storage_config)
|
|
290
|
+
return self._storage
|
|
291
|
+
|
|
292
|
+
# Try default file storage
|
|
293
|
+
import os
|
|
294
|
+
|
|
295
|
+
storage_type = os.getenv("PYWORKFLOW_STORAGE_BACKEND", "file")
|
|
296
|
+
storage_path = os.getenv("PYWORKFLOW_STORAGE_PATH", "./pyworkflow_data")
|
|
297
|
+
|
|
298
|
+
if storage_type == "file":
|
|
299
|
+
from pyworkflow.storage.file import FileStorageBackend
|
|
300
|
+
|
|
301
|
+
self._storage = FileStorageBackend(storage_path)
|
|
302
|
+
elif storage_type == "memory":
|
|
303
|
+
from pyworkflow.storage.memory import InMemoryStorageBackend
|
|
304
|
+
|
|
305
|
+
self._storage = InMemoryStorageBackend()
|
|
306
|
+
else:
|
|
307
|
+
logger.warning(f"Unknown storage type: {storage_type}")
|
|
308
|
+
return None
|
|
309
|
+
|
|
310
|
+
return self._storage
|
|
311
|
+
|
|
312
|
+
@property
|
|
313
|
+
def info(self) -> str:
|
|
314
|
+
"""Return scheduler info string."""
|
|
315
|
+
return f"PyWorkflowScheduler (sync_interval={self.sync_interval}s)"
|