pyworkflow-engine 0.1.7__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +10 -1
- pyworkflow/celery/tasks.py +272 -24
- pyworkflow/cli/__init__.py +4 -1
- pyworkflow/cli/commands/runs.py +4 -4
- pyworkflow/cli/commands/setup.py +203 -4
- pyworkflow/cli/utils/config_generator.py +76 -3
- pyworkflow/cli/utils/docker_manager.py +232 -0
- pyworkflow/config.py +94 -17
- pyworkflow/context/__init__.py +13 -0
- pyworkflow/context/base.py +26 -0
- pyworkflow/context/local.py +80 -0
- pyworkflow/context/step_context.py +295 -0
- pyworkflow/core/registry.py +6 -1
- pyworkflow/core/step.py +141 -0
- pyworkflow/core/workflow.py +56 -0
- pyworkflow/engine/events.py +30 -0
- pyworkflow/engine/replay.py +39 -0
- pyworkflow/primitives/child_workflow.py +1 -1
- pyworkflow/runtime/local.py +1 -1
- pyworkflow/storage/__init__.py +14 -0
- pyworkflow/storage/base.py +35 -0
- pyworkflow/storage/cassandra.py +1747 -0
- pyworkflow/storage/config.py +69 -0
- pyworkflow/storage/dynamodb.py +31 -2
- pyworkflow/storage/file.py +28 -0
- pyworkflow/storage/memory.py +18 -0
- pyworkflow/storage/mysql.py +1159 -0
- pyworkflow/storage/postgres.py +27 -2
- pyworkflow/storage/schemas.py +4 -3
- pyworkflow/storage/sqlite.py +25 -2
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/METADATA +7 -4
- pyworkflow_engine-0.1.10.dist-info/RECORD +91 -0
- pyworkflow_engine-0.1.10.dist-info/top_level.txt +1 -0
- dashboard/backend/app/__init__.py +0 -1
- dashboard/backend/app/config.py +0 -32
- dashboard/backend/app/controllers/__init__.py +0 -6
- dashboard/backend/app/controllers/run_controller.py +0 -86
- dashboard/backend/app/controllers/workflow_controller.py +0 -33
- dashboard/backend/app/dependencies/__init__.py +0 -5
- dashboard/backend/app/dependencies/storage.py +0 -50
- dashboard/backend/app/repositories/__init__.py +0 -6
- dashboard/backend/app/repositories/run_repository.py +0 -80
- dashboard/backend/app/repositories/workflow_repository.py +0 -27
- dashboard/backend/app/rest/__init__.py +0 -8
- dashboard/backend/app/rest/v1/__init__.py +0 -12
- dashboard/backend/app/rest/v1/health.py +0 -33
- dashboard/backend/app/rest/v1/runs.py +0 -133
- dashboard/backend/app/rest/v1/workflows.py +0 -41
- dashboard/backend/app/schemas/__init__.py +0 -23
- dashboard/backend/app/schemas/common.py +0 -16
- dashboard/backend/app/schemas/event.py +0 -24
- dashboard/backend/app/schemas/hook.py +0 -25
- dashboard/backend/app/schemas/run.py +0 -54
- dashboard/backend/app/schemas/step.py +0 -28
- dashboard/backend/app/schemas/workflow.py +0 -31
- dashboard/backend/app/server.py +0 -87
- dashboard/backend/app/services/__init__.py +0 -6
- dashboard/backend/app/services/run_service.py +0 -240
- dashboard/backend/app/services/workflow_service.py +0 -155
- dashboard/backend/main.py +0 -18
- docs/concepts/cancellation.mdx +0 -362
- docs/concepts/continue-as-new.mdx +0 -434
- docs/concepts/events.mdx +0 -266
- docs/concepts/fault-tolerance.mdx +0 -370
- docs/concepts/hooks.mdx +0 -552
- docs/concepts/limitations.mdx +0 -167
- docs/concepts/schedules.mdx +0 -775
- docs/concepts/sleep.mdx +0 -312
- docs/concepts/steps.mdx +0 -301
- docs/concepts/workflows.mdx +0 -255
- docs/guides/cli.mdx +0 -942
- docs/guides/configuration.mdx +0 -560
- docs/introduction.mdx +0 -155
- docs/quickstart.mdx +0 -279
- examples/__init__.py +0 -1
- examples/celery/__init__.py +0 -1
- examples/celery/durable/docker-compose.yml +0 -55
- examples/celery/durable/pyworkflow.config.yaml +0 -12
- examples/celery/durable/workflows/__init__.py +0 -122
- examples/celery/durable/workflows/basic.py +0 -87
- examples/celery/durable/workflows/batch_processing.py +0 -102
- examples/celery/durable/workflows/cancellation.py +0 -273
- examples/celery/durable/workflows/child_workflow_patterns.py +0 -240
- examples/celery/durable/workflows/child_workflows.py +0 -202
- examples/celery/durable/workflows/continue_as_new.py +0 -260
- examples/celery/durable/workflows/fault_tolerance.py +0 -210
- examples/celery/durable/workflows/hooks.py +0 -211
- examples/celery/durable/workflows/idempotency.py +0 -112
- examples/celery/durable/workflows/long_running.py +0 -99
- examples/celery/durable/workflows/retries.py +0 -101
- examples/celery/durable/workflows/schedules.py +0 -209
- examples/celery/transient/01_basic_workflow.py +0 -91
- examples/celery/transient/02_fault_tolerance.py +0 -257
- examples/celery/transient/__init__.py +0 -20
- examples/celery/transient/pyworkflow.config.yaml +0 -25
- examples/local/__init__.py +0 -1
- examples/local/durable/01_basic_workflow.py +0 -94
- examples/local/durable/02_file_storage.py +0 -132
- examples/local/durable/03_retries.py +0 -169
- examples/local/durable/04_long_running.py +0 -119
- examples/local/durable/05_event_log.py +0 -145
- examples/local/durable/06_idempotency.py +0 -148
- examples/local/durable/07_hooks.py +0 -334
- examples/local/durable/08_cancellation.py +0 -233
- examples/local/durable/09_child_workflows.py +0 -198
- examples/local/durable/10_child_workflow_patterns.py +0 -265
- examples/local/durable/11_continue_as_new.py +0 -249
- examples/local/durable/12_schedules.py +0 -198
- examples/local/durable/__init__.py +0 -1
- examples/local/transient/01_quick_tasks.py +0 -87
- examples/local/transient/02_retries.py +0 -130
- examples/local/transient/03_sleep.py +0 -141
- examples/local/transient/__init__.py +0 -1
- pyworkflow_engine-0.1.7.dist-info/RECORD +0 -196
- pyworkflow_engine-0.1.7.dist-info/top_level.txt +0 -5
- tests/examples/__init__.py +0 -0
- tests/integration/__init__.py +0 -0
- tests/integration/test_cancellation.py +0 -330
- tests/integration/test_child_workflows.py +0 -439
- tests/integration/test_continue_as_new.py +0 -428
- tests/integration/test_dynamodb_storage.py +0 -1146
- tests/integration/test_fault_tolerance.py +0 -369
- tests/integration/test_schedule_storage.py +0 -484
- tests/unit/__init__.py +0 -0
- tests/unit/backends/__init__.py +0 -1
- tests/unit/backends/test_dynamodb_storage.py +0 -1554
- tests/unit/backends/test_postgres_storage.py +0 -1281
- tests/unit/backends/test_sqlite_storage.py +0 -1460
- tests/unit/conftest.py +0 -41
- tests/unit/test_cancellation.py +0 -364
- tests/unit/test_child_workflows.py +0 -680
- tests/unit/test_continue_as_new.py +0 -441
- tests/unit/test_event_limits.py +0 -316
- tests/unit/test_executor.py +0 -320
- tests/unit/test_fault_tolerance.py +0 -334
- tests/unit/test_hooks.py +0 -495
- tests/unit/test_registry.py +0 -261
- tests/unit/test_replay.py +0 -420
- tests/unit/test_schedule_schemas.py +0 -285
- tests/unit/test_schedule_utils.py +0 -286
- tests/unit/test_scheduled_workflow.py +0 -274
- tests/unit/test_step.py +0 -353
- tests/unit/test_workflow.py +0 -243
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.7.dist-info → pyworkflow_engine-0.1.10.dist-info}/licenses/LICENSE +0 -0
docs/concepts/schedules.mdx
DELETED
|
@@ -1,775 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
title: 'Schedules'
|
|
3
|
-
description: 'Automatically execute workflows on a recurring basis using cron, intervals, or calendar-based schedules'
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
## What are Schedules?
|
|
7
|
-
|
|
8
|
-
Schedules allow you to automatically trigger workflow executions at specified times or intervals. Instead of manually starting workflows, you can configure them to run:
|
|
9
|
-
|
|
10
|
-
- **On a cron schedule**: Run at specific times (e.g., every day at 9 AM)
|
|
11
|
-
- **At regular intervals**: Run repeatedly with a fixed delay (e.g., every 5 minutes)
|
|
12
|
-
- **On calendar dates**: Run on specific days of the month or week
|
|
13
|
-
|
|
14
|
-
```python
|
|
15
|
-
from pyworkflow import scheduled_workflow, OverlapPolicy
|
|
16
|
-
|
|
17
|
-
@scheduled_workflow(cron="0 9 * * *")
|
|
18
|
-
async def daily_report():
|
|
19
|
-
"""Runs every day at 9 AM"""
|
|
20
|
-
data = await gather_metrics()
|
|
21
|
-
await generate_report(data)
|
|
22
|
-
return {"status": "report_generated"}
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
## Key Features
|
|
26
|
-
|
|
27
|
-
<CardGroup cols={2}>
|
|
28
|
-
<Card title="Multiple Schedule Types" icon="calendar">
|
|
29
|
-
Choose from cron expressions, intervals, or calendar-based scheduling.
|
|
30
|
-
</Card>
|
|
31
|
-
<Card title="Overlap Policies" icon="layer-group">
|
|
32
|
-
Control what happens when a new run is triggered while a previous run is still executing.
|
|
33
|
-
</Card>
|
|
34
|
-
<Card title="Dynamic Management" icon="sliders">
|
|
35
|
-
Create, pause, resume, and delete schedules at runtime without redeploying.
|
|
36
|
-
</Card>
|
|
37
|
-
<Card title="Backfill Support" icon="clock-rotate-left">
|
|
38
|
-
Catch up on missed runs after downtime with backfill capabilities.
|
|
39
|
-
</Card>
|
|
40
|
-
</CardGroup>
|
|
41
|
-
|
|
42
|
-
## Creating Schedules
|
|
43
|
-
|
|
44
|
-
There are two ways to create schedules:
|
|
45
|
-
|
|
46
|
-
### Using the Decorator
|
|
47
|
-
|
|
48
|
-
The `@scheduled_workflow` decorator combines workflow definition with schedule configuration:
|
|
49
|
-
|
|
50
|
-
<Tabs>
|
|
51
|
-
<Tab title="Cron Schedule">
|
|
52
|
-
```python
|
|
53
|
-
from pyworkflow import scheduled_workflow
|
|
54
|
-
|
|
55
|
-
@scheduled_workflow(cron="0 9 * * *") # Every day at 9 AM
|
|
56
|
-
async def daily_cleanup():
|
|
57
|
-
await cleanup_old_records()
|
|
58
|
-
return {"cleaned": True}
|
|
59
|
-
```
|
|
60
|
-
</Tab>
|
|
61
|
-
<Tab title="Interval Schedule">
|
|
62
|
-
```python
|
|
63
|
-
from pyworkflow import scheduled_workflow
|
|
64
|
-
|
|
65
|
-
@scheduled_workflow(interval="5m") # Every 5 minutes
|
|
66
|
-
async def health_check():
|
|
67
|
-
status = await check_system_health()
|
|
68
|
-
return {"healthy": status}
|
|
69
|
-
```
|
|
70
|
-
</Tab>
|
|
71
|
-
<Tab title="With Options">
|
|
72
|
-
```python
|
|
73
|
-
from pyworkflow import scheduled_workflow, OverlapPolicy
|
|
74
|
-
|
|
75
|
-
@scheduled_workflow(
|
|
76
|
-
cron="0 */4 * * *", # Every 4 hours
|
|
77
|
-
timezone="America/New_York",
|
|
78
|
-
overlap_policy=OverlapPolicy.SKIP,
|
|
79
|
-
recover_on_worker_loss=True,
|
|
80
|
-
)
|
|
81
|
-
async def sync_external_data():
|
|
82
|
-
await sync_data()
|
|
83
|
-
return {"synced": True}
|
|
84
|
-
```
|
|
85
|
-
</Tab>
|
|
86
|
-
</Tabs>
|
|
87
|
-
|
|
88
|
-
### Using the API
|
|
89
|
-
|
|
90
|
-
For dynamic schedule creation, use the `create_schedule` function:
|
|
91
|
-
|
|
92
|
-
```python
|
|
93
|
-
from pyworkflow import create_schedule, ScheduleSpec, OverlapPolicy
|
|
94
|
-
|
|
95
|
-
# Create a schedule for an existing workflow
|
|
96
|
-
schedule = await create_schedule(
|
|
97
|
-
workflow_name="daily_report",
|
|
98
|
-
spec=ScheduleSpec(cron="0 9 * * *"),
|
|
99
|
-
overlap_policy=OverlapPolicy.SKIP,
|
|
100
|
-
schedule_id="daily_report_schedule", # Optional custom ID
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
print(f"Created schedule: {schedule.schedule_id}")
|
|
104
|
-
print(f"Next run: {schedule.next_run_time}")
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
## Schedule Types
|
|
108
|
-
|
|
109
|
-
### Cron Expressions
|
|
110
|
-
|
|
111
|
-
Cron expressions provide precise control over when workflows run. The format is:
|
|
112
|
-
|
|
113
|
-
```
|
|
114
|
-
┌───────────── minute (0-59)
|
|
115
|
-
│ ┌───────────── hour (0-23)
|
|
116
|
-
│ │ ┌───────────── day of month (1-31)
|
|
117
|
-
│ │ │ ┌───────────── month (1-12)
|
|
118
|
-
│ │ │ │ ┌───────────── day of week (0-6, Sunday=0)
|
|
119
|
-
│ │ │ │ │
|
|
120
|
-
* * * * *
|
|
121
|
-
```
|
|
122
|
-
|
|
123
|
-
**Common patterns:**
|
|
124
|
-
|
|
125
|
-
| Expression | Description |
|
|
126
|
-
|------------|-------------|
|
|
127
|
-
| `* * * * *` | Every minute |
|
|
128
|
-
| `0 * * * *` | Every hour |
|
|
129
|
-
| `0 9 * * *` | Every day at 9 AM |
|
|
130
|
-
| `0 9 * * 1` | Every Monday at 9 AM |
|
|
131
|
-
| `0 0 1 * *` | First day of every month at midnight |
|
|
132
|
-
| `*/5 * * * *` | Every 5 minutes |
|
|
133
|
-
| `0 */4 * * *` | Every 4 hours |
|
|
134
|
-
| `0 9-17 * * 1-5` | Hourly from 9 AM to 5 PM, Monday to Friday |
|
|
135
|
-
|
|
136
|
-
### Intervals
|
|
137
|
-
|
|
138
|
-
Intervals specify a fixed duration between runs:
|
|
139
|
-
|
|
140
|
-
```python
|
|
141
|
-
@scheduled_workflow(interval="5m") # Every 5 minutes
|
|
142
|
-
@scheduled_workflow(interval="1h") # Every hour
|
|
143
|
-
@scheduled_workflow(interval="30s") # Every 30 seconds
|
|
144
|
-
@scheduled_workflow(interval="1d") # Every day
|
|
145
|
-
```
|
|
146
|
-
|
|
147
|
-
**Supported units:**
|
|
148
|
-
- `s` - seconds
|
|
149
|
-
- `m` - minutes
|
|
150
|
-
- `h` - hours
|
|
151
|
-
- `d` - days
|
|
152
|
-
|
|
153
|
-
<Note>
|
|
154
|
-
When using intervals, the first run happens immediately when the schedule is created. Subsequent runs occur at the specified interval after each run completes.
|
|
155
|
-
</Note>
|
|
156
|
-
|
|
157
|
-
### Calendar-Based Schedules
|
|
158
|
-
|
|
159
|
-
For more complex scheduling needs, use calendar specifications:
|
|
160
|
-
|
|
161
|
-
```python
|
|
162
|
-
from pyworkflow import ScheduleSpec, CalendarSpec
|
|
163
|
-
|
|
164
|
-
# Run on the 1st and 15th of every month at midnight
|
|
165
|
-
spec = ScheduleSpec(
|
|
166
|
-
calendar=[
|
|
167
|
-
CalendarSpec(day_of_month=1, hour=0, minute=0),
|
|
168
|
-
CalendarSpec(day_of_month=15, hour=0, minute=0),
|
|
169
|
-
]
|
|
170
|
-
)
|
|
171
|
-
```
|
|
172
|
-
|
|
173
|
-
**CalendarSpec fields:**
|
|
174
|
-
|
|
175
|
-
| Field | Type | Description |
|
|
176
|
-
|-------|------|-------------|
|
|
177
|
-
| `second` | `int` | Second (0-59), default: 0 |
|
|
178
|
-
| `minute` | `int` | Minute (0-59), default: 0 |
|
|
179
|
-
| `hour` | `int` | Hour (0-23), default: 0 |
|
|
180
|
-
| `day_of_month` | `int \| None` | Day of month (1-31) |
|
|
181
|
-
| `month` | `int \| None` | Month (1-12) |
|
|
182
|
-
| `day_of_week` | `int \| None` | Day of week (0-6, Sunday=0) |
|
|
183
|
-
|
|
184
|
-
## Overlap Policies
|
|
185
|
-
|
|
186
|
-
When a schedule triggers while a previous run is still executing, the overlap policy determines what happens:
|
|
187
|
-
|
|
188
|
-
```python
|
|
189
|
-
from pyworkflow import OverlapPolicy
|
|
190
|
-
|
|
191
|
-
@scheduled_workflow(
|
|
192
|
-
interval="5m",
|
|
193
|
-
overlap_policy=OverlapPolicy.SKIP
|
|
194
|
-
)
|
|
195
|
-
async def my_workflow():
|
|
196
|
-
pass
|
|
197
|
-
```
|
|
198
|
-
|
|
199
|
-
| Policy | Description |
|
|
200
|
-
|--------|-------------|
|
|
201
|
-
| `SKIP` | **Default.** Skip the new run if a previous run is still active. |
|
|
202
|
-
| `BUFFER_ONE` | Queue one run to execute after the current run completes. |
|
|
203
|
-
| `BUFFER_ALL` | Queue all triggered runs (use with caution). |
|
|
204
|
-
| `CANCEL_OTHER` | Cancel the running execution and start a new one. |
|
|
205
|
-
| `ALLOW_ALL` | Allow multiple concurrent executions. |
|
|
206
|
-
|
|
207
|
-
<Warning>
|
|
208
|
-
`BUFFER_ALL` can lead to unbounded queue growth if runs take longer than the schedule interval. Use `SKIP` or `BUFFER_ONE` for most use cases.
|
|
209
|
-
</Warning>
|
|
210
|
-
|
|
211
|
-
### Choosing an Overlap Policy
|
|
212
|
-
|
|
213
|
-
<AccordionGroup>
|
|
214
|
-
<Accordion title="SKIP - Best for idempotent operations">
|
|
215
|
-
Use when it's safe to miss a run. The next scheduled run will catch up.
|
|
216
|
-
|
|
217
|
-
**Example:** Metrics collection, status checks
|
|
218
|
-
|
|
219
|
-
```python
|
|
220
|
-
@scheduled_workflow(interval="1m", overlap_policy=OverlapPolicy.SKIP)
|
|
221
|
-
async def collect_metrics():
|
|
222
|
-
# Missing one collection is fine
|
|
223
|
-
await record_current_metrics()
|
|
224
|
-
```
|
|
225
|
-
</Accordion>
|
|
226
|
-
|
|
227
|
-
<Accordion title="BUFFER_ONE - Ensure at least one catchup run">
|
|
228
|
-
Use when you need to guarantee at least one run happens after a long-running execution.
|
|
229
|
-
|
|
230
|
-
**Example:** Data synchronization
|
|
231
|
-
|
|
232
|
-
```python
|
|
233
|
-
@scheduled_workflow(interval="5m", overlap_policy=OverlapPolicy.BUFFER_ONE)
|
|
234
|
-
async def sync_data():
|
|
235
|
-
# If sync takes > 5 min, one more will run after
|
|
236
|
-
await sync_all_records()
|
|
237
|
-
```
|
|
238
|
-
</Accordion>
|
|
239
|
-
|
|
240
|
-
<Accordion title="CANCEL_OTHER - Latest data wins">
|
|
241
|
-
Use when only the most recent run matters and older runs should be cancelled.
|
|
242
|
-
|
|
243
|
-
**Example:** Cache refresh
|
|
244
|
-
|
|
245
|
-
```python
|
|
246
|
-
@scheduled_workflow(interval="10m", overlap_policy=OverlapPolicy.CANCEL_OTHER)
|
|
247
|
-
async def refresh_cache():
|
|
248
|
-
# Cancel old refresh, use latest data
|
|
249
|
-
await rebuild_cache()
|
|
250
|
-
```
|
|
251
|
-
</Accordion>
|
|
252
|
-
|
|
253
|
-
<Accordion title="ALLOW_ALL - Parallel processing">
|
|
254
|
-
Use when runs are independent and can execute concurrently.
|
|
255
|
-
|
|
256
|
-
**Example:** Processing independent queues
|
|
257
|
-
|
|
258
|
-
```python
|
|
259
|
-
@scheduled_workflow(interval="1m", overlap_policy=OverlapPolicy.ALLOW_ALL)
|
|
260
|
-
async def process_queue():
|
|
261
|
-
# Multiple workers can process simultaneously
|
|
262
|
-
await process_next_batch()
|
|
263
|
-
```
|
|
264
|
-
</Accordion>
|
|
265
|
-
</AccordionGroup>
|
|
266
|
-
|
|
267
|
-
## Managing Schedules
|
|
268
|
-
|
|
269
|
-
### Pause and Resume
|
|
270
|
-
|
|
271
|
-
Temporarily stop a schedule without deleting it:
|
|
272
|
-
|
|
273
|
-
```python
|
|
274
|
-
from pyworkflow import pause_schedule, resume_schedule
|
|
275
|
-
|
|
276
|
-
# Pause the schedule
|
|
277
|
-
await pause_schedule("daily_report_schedule")
|
|
278
|
-
|
|
279
|
-
# ... later, resume it
|
|
280
|
-
schedule = await resume_schedule("daily_report_schedule")
|
|
281
|
-
print(f"Resumed. Next run: {schedule.next_run_time}")
|
|
282
|
-
```
|
|
283
|
-
|
|
284
|
-
### Update Schedule
|
|
285
|
-
|
|
286
|
-
Modify an existing schedule's configuration:
|
|
287
|
-
|
|
288
|
-
```python
|
|
289
|
-
from pyworkflow import update_schedule, ScheduleSpec, OverlapPolicy
|
|
290
|
-
|
|
291
|
-
# Change the schedule timing
|
|
292
|
-
await update_schedule(
|
|
293
|
-
schedule_id="daily_report_schedule",
|
|
294
|
-
spec=ScheduleSpec(cron="0 10 * * *"), # Change to 10 AM
|
|
295
|
-
)
|
|
296
|
-
|
|
297
|
-
# Change the overlap policy
|
|
298
|
-
await update_schedule(
|
|
299
|
-
schedule_id="daily_report_schedule",
|
|
300
|
-
overlap_policy=OverlapPolicy.BUFFER_ONE,
|
|
301
|
-
)
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
### Delete Schedule
|
|
305
|
-
|
|
306
|
-
Remove a schedule (soft delete - record is preserved for audit):
|
|
307
|
-
|
|
308
|
-
```python
|
|
309
|
-
from pyworkflow import delete_schedule
|
|
310
|
-
|
|
311
|
-
await delete_schedule("daily_report_schedule")
|
|
312
|
-
```
|
|
313
|
-
|
|
314
|
-
### List Schedules
|
|
315
|
-
|
|
316
|
-
Query existing schedules:
|
|
317
|
-
|
|
318
|
-
```python
|
|
319
|
-
from pyworkflow import list_schedules, ScheduleStatus
|
|
320
|
-
|
|
321
|
-
# List all active schedules
|
|
322
|
-
schedules = await list_schedules(status=ScheduleStatus.ACTIVE)
|
|
323
|
-
|
|
324
|
-
for s in schedules:
|
|
325
|
-
print(f"{s.schedule_id}: {s.workflow_name}")
|
|
326
|
-
print(f" Next run: {s.next_run_time}")
|
|
327
|
-
print(f" Runs: {s.successful_runs}/{s.total_runs}")
|
|
328
|
-
|
|
329
|
-
# List schedules for a specific workflow
|
|
330
|
-
schedules = await list_schedules(workflow_name="daily_report")
|
|
331
|
-
```
|
|
332
|
-
|
|
333
|
-
## Manual Trigger and Backfill
|
|
334
|
-
|
|
335
|
-
### Manual Trigger
|
|
336
|
-
|
|
337
|
-
Execute a scheduled workflow immediately, outside of its normal schedule:
|
|
338
|
-
|
|
339
|
-
```python
|
|
340
|
-
from pyworkflow import trigger_schedule
|
|
341
|
-
|
|
342
|
-
# Trigger the schedule now (doesn't affect regular schedule)
|
|
343
|
-
await trigger_schedule("daily_report_schedule")
|
|
344
|
-
```
|
|
345
|
-
|
|
346
|
-
This is useful for:
|
|
347
|
-
- Testing the workflow
|
|
348
|
-
- Running on-demand when needed
|
|
349
|
-
- Recovering from issues
|
|
350
|
-
|
|
351
|
-
### Backfill Missed Runs
|
|
352
|
-
|
|
353
|
-
If the scheduler was down and missed some runs, you can backfill them:
|
|
354
|
-
|
|
355
|
-
```python
|
|
356
|
-
from pyworkflow import backfill_schedule
|
|
357
|
-
from datetime import datetime, UTC
|
|
358
|
-
|
|
359
|
-
# Backfill runs that should have occurred during downtime
|
|
360
|
-
run_ids = await backfill_schedule(
|
|
361
|
-
schedule_id="hourly_sync_schedule",
|
|
362
|
-
start_time=datetime(2024, 1, 15, 0, 0, 0, tzinfo=UTC),
|
|
363
|
-
end_time=datetime(2024, 1, 15, 12, 0, 0, tzinfo=UTC),
|
|
364
|
-
)
|
|
365
|
-
|
|
366
|
-
print(f"Created {len(run_ids)} backfill runs")
|
|
367
|
-
```
|
|
368
|
-
|
|
369
|
-
<Warning>
|
|
370
|
-
Backfill creates runs for all scheduled times in the range. For high-frequency schedules, this could create many runs. Consider the `overlap_policy` when backfilling.
|
|
371
|
-
</Warning>
|
|
372
|
-
|
|
373
|
-
## Timezone Support
|
|
374
|
-
|
|
375
|
-
Schedules support timezone-aware execution:
|
|
376
|
-
|
|
377
|
-
```python
|
|
378
|
-
@scheduled_workflow(
|
|
379
|
-
cron="0 9 * * *",
|
|
380
|
-
timezone="America/New_York" # 9 AM Eastern Time
|
|
381
|
-
)
|
|
382
|
-
async def east_coast_report():
|
|
383
|
-
pass
|
|
384
|
-
|
|
385
|
-
@scheduled_workflow(
|
|
386
|
-
cron="0 9 * * *",
|
|
387
|
-
timezone="Europe/London" # 9 AM British Time
|
|
388
|
-
)
|
|
389
|
-
async def uk_report():
|
|
390
|
-
pass
|
|
391
|
-
```
|
|
392
|
-
|
|
393
|
-
<Note>
|
|
394
|
-
All schedule times are stored internally as UTC. The timezone is used to calculate the correct UTC time for each run.
|
|
395
|
-
</Note>
|
|
396
|
-
|
|
397
|
-
## Time Bounds
|
|
398
|
-
|
|
399
|
-
Limit when a schedule is active:
|
|
400
|
-
|
|
401
|
-
```python
|
|
402
|
-
from datetime import datetime, UTC
|
|
403
|
-
|
|
404
|
-
@scheduled_workflow(
|
|
405
|
-
cron="0 9 * * *",
|
|
406
|
-
start_at=datetime(2024, 1, 1, tzinfo=UTC), # Start on Jan 1
|
|
407
|
-
end_at=datetime(2024, 12, 31, tzinfo=UTC), # End on Dec 31
|
|
408
|
-
)
|
|
409
|
-
async def annual_workflow():
|
|
410
|
-
pass
|
|
411
|
-
```
|
|
412
|
-
|
|
413
|
-
## Running the Scheduler
|
|
414
|
-
|
|
415
|
-
PyWorkflow supports two runtimes for schedule execution:
|
|
416
|
-
|
|
417
|
-
<Tabs>
|
|
418
|
-
<Tab title="Local Runtime">
|
|
419
|
-
For development, testing, or single-process deployments, use the local scheduler:
|
|
420
|
-
|
|
421
|
-
```bash
|
|
422
|
-
# Start the local scheduler
|
|
423
|
-
pyworkflow scheduler run
|
|
424
|
-
|
|
425
|
-
# With custom poll interval
|
|
426
|
-
pyworkflow scheduler run --poll-interval 10
|
|
427
|
-
|
|
428
|
-
# With a specific module
|
|
429
|
-
pyworkflow --module myapp.workflows scheduler run
|
|
430
|
-
|
|
431
|
-
# Run for a specific duration (useful for testing)
|
|
432
|
-
pyworkflow scheduler run --duration 60
|
|
433
|
-
```
|
|
434
|
-
|
|
435
|
-
The local scheduler polls storage for due schedules and triggers workflows in-process.
|
|
436
|
-
</Tab>
|
|
437
|
-
<Tab title="Celery Runtime (Distributed)">
|
|
438
|
-
For production and distributed execution, use Celery Beat:
|
|
439
|
-
|
|
440
|
-
```bash
|
|
441
|
-
# Start Celery Beat with PyWorkflow scheduler
|
|
442
|
-
celery -A pyworkflow.celery.app beat \
|
|
443
|
-
--scheduler pyworkflow.celery.scheduler:PyWorkflowScheduler \
|
|
444
|
-
--loglevel INFO
|
|
445
|
-
```
|
|
446
|
-
|
|
447
|
-
Or use the CLI:
|
|
448
|
-
|
|
449
|
-
```bash
|
|
450
|
-
# Start a worker with beat scheduler
|
|
451
|
-
pyworkflow worker run --beat
|
|
452
|
-
```
|
|
453
|
-
|
|
454
|
-
Celery Beat dispatches workflows to distributed workers for parallel execution.
|
|
455
|
-
</Tab>
|
|
456
|
-
</Tabs>
|
|
457
|
-
|
|
458
|
-
Both schedulers:
|
|
459
|
-
1. Poll storage for due schedules (every 5 seconds by default)
|
|
460
|
-
2. Trigger workflow execution for due schedules
|
|
461
|
-
3. Update `next_run_time` after each run
|
|
462
|
-
4. Handle overlap policies automatically
|
|
463
|
-
|
|
464
|
-
<Note>
|
|
465
|
-
The schedule primitives (`trigger_schedule`, `backfill_schedule`, etc.) are runtime-agnostic
|
|
466
|
-
and will use whichever runtime is configured. You can switch between local and Celery
|
|
467
|
-
without changing your schedule management code.
|
|
468
|
-
</Note>
|
|
469
|
-
|
|
470
|
-
## Activating Decorator-Based Schedules
|
|
471
|
-
|
|
472
|
-
When using `@scheduled_workflow`, the schedules need to be activated to create records in storage:
|
|
473
|
-
|
|
474
|
-
```python
|
|
475
|
-
from pyworkflow import activate_scheduled_workflows
|
|
476
|
-
|
|
477
|
-
# Activate all @scheduled_workflow decorated functions
|
|
478
|
-
schedule_ids = await activate_scheduled_workflows()
|
|
479
|
-
print(f"Activated {len(schedule_ids)} schedules")
|
|
480
|
-
```
|
|
481
|
-
|
|
482
|
-
Call this during application startup to ensure all decorated workflows have corresponding schedule records.
|
|
483
|
-
|
|
484
|
-
## Complete Examples
|
|
485
|
-
|
|
486
|
-
### Every-Minute Schedule with Celery (Distributed)
|
|
487
|
-
|
|
488
|
-
This example shows a workflow that runs every minute using Celery workers for distributed execution.
|
|
489
|
-
|
|
490
|
-
**1. Define the scheduled workflow (`myapp/workflows.py`):**
|
|
491
|
-
|
|
492
|
-
```python
|
|
493
|
-
from pyworkflow import scheduled_workflow, step, OverlapPolicy
|
|
494
|
-
from datetime import datetime, UTC
|
|
495
|
-
|
|
496
|
-
@scheduled_workflow(
|
|
497
|
-
cron="* * * * *", # Every minute
|
|
498
|
-
overlap_policy=OverlapPolicy.SKIP, # Skip if previous run still active
|
|
499
|
-
)
|
|
500
|
-
async def minute_health_check():
|
|
501
|
-
"""Runs every minute to check system health."""
|
|
502
|
-
result = await check_services()
|
|
503
|
-
await record_metrics(result)
|
|
504
|
-
return {"timestamp": datetime.now(UTC).isoformat(), "status": result}
|
|
505
|
-
|
|
506
|
-
@step()
|
|
507
|
-
async def check_services():
|
|
508
|
-
# Check various services
|
|
509
|
-
return {"api": "healthy", "db": "healthy", "cache": "healthy"}
|
|
510
|
-
|
|
511
|
-
@step()
|
|
512
|
-
async def record_metrics(health_status: dict):
|
|
513
|
-
# Record metrics to monitoring system
|
|
514
|
-
print(f"Health check: {health_status}")
|
|
515
|
-
```
|
|
516
|
-
|
|
517
|
-
**2. Create a startup script (`myapp/main.py`):**
|
|
518
|
-
|
|
519
|
-
```python
|
|
520
|
-
import asyncio
|
|
521
|
-
from pyworkflow import configure, activate_scheduled_workflows
|
|
522
|
-
from pyworkflow.storage.file import FileStorageBackend
|
|
523
|
-
|
|
524
|
-
async def setup_schedules():
|
|
525
|
-
# Configure PyWorkflow with file storage
|
|
526
|
-
storage = FileStorageBackend(base_path="./workflow_data")
|
|
527
|
-
configure(
|
|
528
|
-
storage=storage,
|
|
529
|
-
default_durable=True,
|
|
530
|
-
)
|
|
531
|
-
|
|
532
|
-
# Activate all @scheduled_workflow decorators
|
|
533
|
-
# This creates schedule records in storage
|
|
534
|
-
schedule_ids = await activate_scheduled_workflows(storage=storage)
|
|
535
|
-
print(f"Activated {len(schedule_ids)} schedule(s)")
|
|
536
|
-
|
|
537
|
-
if __name__ == "__main__":
|
|
538
|
-
asyncio.run(setup_schedules())
|
|
539
|
-
```
|
|
540
|
-
|
|
541
|
-
**3. Start the services:**
|
|
542
|
-
|
|
543
|
-
```bash
|
|
544
|
-
# Terminal 1: Run the setup script to activate schedules
|
|
545
|
-
python myapp/main.py
|
|
546
|
-
|
|
547
|
-
# Terminal 2: Start Redis (required for Celery)
|
|
548
|
-
docker run -d -p 6379:6379 redis:7-alpine
|
|
549
|
-
|
|
550
|
-
# Terminal 3: Start Celery worker for workflow execution
|
|
551
|
-
pyworkflow --module myapp.workflows worker run
|
|
552
|
-
|
|
553
|
-
# Terminal 4: Start Celery Beat scheduler
|
|
554
|
-
celery -A pyworkflow.celery.app beat \
|
|
555
|
-
--scheduler pyworkflow.celery.scheduler:PyWorkflowScheduler \
|
|
556
|
-
--loglevel INFO
|
|
557
|
-
```
|
|
558
|
-
|
|
559
|
-
The scheduler will now trigger `minute_health_check` every minute, and the Celery worker will execute it.
|
|
560
|
-
|
|
561
|
-
---
|
|
562
|
-
|
|
563
|
-
### Every-Minute Schedule with Local Runtime
|
|
564
|
-
|
|
565
|
-
For testing or simple use cases, you can run schedules locally without Celery.
|
|
566
|
-
|
|
567
|
-
**1. Define the workflow (`local_schedule.py`):**
|
|
568
|
-
|
|
569
|
-
```python
|
|
570
|
-
import asyncio
|
|
571
|
-
from datetime import datetime, UTC
|
|
572
|
-
|
|
573
|
-
from pyworkflow import (
|
|
574
|
-
configure,
|
|
575
|
-
workflow,
|
|
576
|
-
step,
|
|
577
|
-
create_schedule,
|
|
578
|
-
ScheduleSpec,
|
|
579
|
-
OverlapPolicy,
|
|
580
|
-
start,
|
|
581
|
-
)
|
|
582
|
-
from pyworkflow.storage.file import FileStorageBackend
|
|
583
|
-
from pyworkflow.utils.schedule import calculate_next_run_time
|
|
584
|
-
|
|
585
|
-
# Define a simple workflow
|
|
586
|
-
@workflow()
|
|
587
|
-
async def minute_task():
|
|
588
|
-
"""A task that runs every minute."""
|
|
589
|
-
result = await do_work()
|
|
590
|
-
print(f"[{datetime.now().strftime('%H:%M:%S')}] Task completed: {result}")
|
|
591
|
-
return result
|
|
592
|
-
|
|
593
|
-
@step()
|
|
594
|
-
async def do_work():
|
|
595
|
-
return {"processed_at": datetime.now(UTC).isoformat()}
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
async def run_local_scheduler():
|
|
599
|
-
"""Simple local scheduler loop."""
|
|
600
|
-
# Configure with file storage
|
|
601
|
-
storage = FileStorageBackend(base_path="./workflow_data")
|
|
602
|
-
configure(
|
|
603
|
-
storage=storage,
|
|
604
|
-
default_durable=True,
|
|
605
|
-
default_runtime="local", # Run in-process, no Celery
|
|
606
|
-
)
|
|
607
|
-
|
|
608
|
-
# Create a schedule for our workflow
|
|
609
|
-
spec = ScheduleSpec(cron="* * * * *") # Every minute
|
|
610
|
-
|
|
611
|
-
schedule = await create_schedule(
|
|
612
|
-
workflow_name="minute_task",
|
|
613
|
-
spec=spec,
|
|
614
|
-
overlap_policy=OverlapPolicy.SKIP,
|
|
615
|
-
schedule_id="local_minute_schedule",
|
|
616
|
-
storage=storage,
|
|
617
|
-
)
|
|
618
|
-
print(f"Created schedule: {schedule.schedule_id}")
|
|
619
|
-
print(f"First run at: {schedule.next_run_time}")
|
|
620
|
-
|
|
621
|
-
# Simple scheduler loop
|
|
622
|
-
while True:
|
|
623
|
-
now = datetime.now(UTC)
|
|
624
|
-
|
|
625
|
-
# Check if schedule is due
|
|
626
|
-
due_schedules = await storage.get_due_schedules(now)
|
|
627
|
-
|
|
628
|
-
for sched in due_schedules:
|
|
629
|
-
print(f"\n[{now.strftime('%H:%M:%S')}] Triggering: {sched.workflow_name}")
|
|
630
|
-
|
|
631
|
-
# Start the workflow locally
|
|
632
|
-
run_id = await start(minute_task)
|
|
633
|
-
print(f"Started run: {run_id}")
|
|
634
|
-
|
|
635
|
-
# Update next run time
|
|
636
|
-
sched.next_run_time = calculate_next_run_time(sched.spec, now=now)
|
|
637
|
-
sched.total_runs += 1
|
|
638
|
-
await storage.update_schedule(sched)
|
|
639
|
-
|
|
640
|
-
# Check every 5 seconds
|
|
641
|
-
await asyncio.sleep(5)
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
if __name__ == "__main__":
|
|
645
|
-
print("Starting local scheduler (Ctrl+C to stop)...")
|
|
646
|
-
asyncio.run(run_local_scheduler())
|
|
647
|
-
```
|
|
648
|
-
|
|
649
|
-
**2. Run it:**
|
|
650
|
-
|
|
651
|
-
```bash
|
|
652
|
-
python local_schedule.py
|
|
653
|
-
```
|
|
654
|
-
|
|
655
|
-
**Output:**
|
|
656
|
-
|
|
657
|
-
```
|
|
658
|
-
Created schedule: local_minute_schedule
|
|
659
|
-
First run at: 2024-01-15 10:01:00+00:00
|
|
660
|
-
Starting local scheduler (Ctrl+C to stop)...
|
|
661
|
-
|
|
662
|
-
[10:01:00] Triggering: minute_task
|
|
663
|
-
Started run: run_abc123...
|
|
664
|
-
[10:01:00] Task completed: {'processed_at': '2024-01-15T10:01:00.123456+00:00'}
|
|
665
|
-
|
|
666
|
-
[10:02:00] Triggering: minute_task
|
|
667
|
-
Started run: run_def456...
|
|
668
|
-
[10:02:00] Task completed: {'processed_at': '2024-01-15T10:02:00.234567+00:00'}
|
|
669
|
-
```
|
|
670
|
-
|
|
671
|
-
<Tip>
|
|
672
|
-
The local runtime is great for development and testing. For production, use the Celery-based approach with `pyworkflow worker run --beat` for robust, distributed schedule execution.
|
|
673
|
-
</Tip>
|
|
674
|
-
|
|
675
|
-
## CLI Commands
|
|
676
|
-
|
|
677
|
-
Manage schedules from the command line:
|
|
678
|
-
|
|
679
|
-
```bash
|
|
680
|
-
# List schedules
|
|
681
|
-
pyworkflow schedules list
|
|
682
|
-
pyworkflow schedules list --workflow daily_report --status active
|
|
683
|
-
|
|
684
|
-
# Show schedule details
|
|
685
|
-
pyworkflow schedules show sched_abc123
|
|
686
|
-
|
|
687
|
-
# Create a schedule
|
|
688
|
-
pyworkflow schedules create my_workflow --cron "0 9 * * *"
|
|
689
|
-
pyworkflow schedules create my_workflow --interval 5m --overlap skip
|
|
690
|
-
|
|
691
|
-
# Pause and resume
|
|
692
|
-
pyworkflow schedules pause sched_abc123
|
|
693
|
-
pyworkflow schedules resume sched_abc123
|
|
694
|
-
|
|
695
|
-
# Manual trigger
|
|
696
|
-
pyworkflow schedules trigger sched_abc123
|
|
697
|
-
|
|
698
|
-
# Backfill
|
|
699
|
-
pyworkflow schedules backfill sched_abc123 \
|
|
700
|
-
--start 2024-01-01T00:00:00 \
|
|
701
|
-
--end 2024-01-31T23:59:59
|
|
702
|
-
|
|
703
|
-
# Delete
|
|
704
|
-
pyworkflow schedules delete sched_abc123
|
|
705
|
-
```
|
|
706
|
-
|
|
707
|
-
## Best Practices
|
|
708
|
-
|
|
709
|
-
<AccordionGroup>
|
|
710
|
-
<Accordion title="Use SKIP for most schedules">
|
|
711
|
-
The `SKIP` overlap policy is the safest default. It prevents resource exhaustion and ensures predictable behavior.
|
|
712
|
-
</Accordion>
|
|
713
|
-
|
|
714
|
-
<Accordion title="Set appropriate intervals">
|
|
715
|
-
Don't schedule too frequently. Consider the typical workflow duration when setting intervals to avoid constant overlaps.
|
|
716
|
-
</Accordion>
|
|
717
|
-
|
|
718
|
-
<Accordion title="Handle failures gracefully">
|
|
719
|
-
Scheduled workflows should handle transient failures with retries. Use step-level retries for resilience.
|
|
720
|
-
</Accordion>
|
|
721
|
-
|
|
722
|
-
<Accordion title="Monitor schedule health">
|
|
723
|
-
Track `successful_runs`, `failed_runs`, and `skipped_runs` to identify issues with your schedules.
|
|
724
|
-
</Accordion>
|
|
725
|
-
|
|
726
|
-
<Accordion title="Use idempotent operations">
|
|
727
|
-
Design scheduled workflows to be idempotent. They may run multiple times due to retries or backfills.
|
|
728
|
-
</Accordion>
|
|
729
|
-
|
|
730
|
-
<Accordion title="Consider timezones carefully">
|
|
731
|
-
Be explicit about timezones, especially for schedules that should run at specific local times.
|
|
732
|
-
</Accordion>
|
|
733
|
-
</AccordionGroup>
|
|
734
|
-
|
|
735
|
-
## Schedule Status
|
|
736
|
-
|
|
737
|
-
Schedules can be in one of three states:
|
|
738
|
-
|
|
739
|
-
| Status | Description |
|
|
740
|
-
|--------|-------------|
|
|
741
|
-
| `ACTIVE` | Schedule is running and will trigger at `next_run_time` |
|
|
742
|
-
| `PAUSED` | Schedule is temporarily stopped, no runs will trigger |
|
|
743
|
-
| `DELETED` | Schedule has been soft-deleted |
|
|
744
|
-
|
|
745
|
-
## Monitoring
|
|
746
|
-
|
|
747
|
-
Track schedule execution with built-in statistics:
|
|
748
|
-
|
|
749
|
-
```python
|
|
750
|
-
schedule = await get_schedule("my_schedule")
|
|
751
|
-
|
|
752
|
-
print(f"Total runs: {schedule.total_runs}")
|
|
753
|
-
print(f"Successful: {schedule.successful_runs}")
|
|
754
|
-
print(f"Failed: {schedule.failed_runs}")
|
|
755
|
-
print(f"Skipped: {schedule.skipped_runs}")
|
|
756
|
-
print(f"Last run: {schedule.last_run_at}")
|
|
757
|
-
print(f"Next run: {schedule.next_run_time}")
|
|
758
|
-
```
|
|
759
|
-
|
|
760
|
-
## Next Steps
|
|
761
|
-
|
|
762
|
-
<CardGroup cols={2}>
|
|
763
|
-
<Card title="CLI Reference" icon="terminal" href="/guides/cli">
|
|
764
|
-
Learn the full CLI commands for schedule management.
|
|
765
|
-
</Card>
|
|
766
|
-
<Card title="Workflows" icon="diagram-project" href="/concepts/workflows">
|
|
767
|
-
Understand workflow concepts and patterns.
|
|
768
|
-
</Card>
|
|
769
|
-
<Card title="Fault Tolerance" icon="shield-check" href="/concepts/fault-tolerance">
|
|
770
|
-
Configure auto recovery for scheduled workflows.
|
|
771
|
-
</Card>
|
|
772
|
-
<Card title="Events" icon="timeline" href="/concepts/events">
|
|
773
|
-
Track schedule events in the event log.
|
|
774
|
-
</Card>
|
|
775
|
-
</CardGroup>
|