pyworkflow-engine 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +1 -1
- pyworkflow/celery/app.py +10 -0
- pyworkflow/celery/singleton.py +368 -0
- pyworkflow/celery/tasks.py +32 -52
- pyworkflow/core/step.py +8 -0
- pyworkflow/core/validation.py +112 -0
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.13.dist-info}/METADATA +1 -1
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.13.dist-info}/RECORD +12 -10
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.13.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.13.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.13.dist-info}/licenses/LICENSE +0 -0
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.13.dist-info}/top_level.txt +0 -0
pyworkflow/__init__.py
CHANGED
pyworkflow/celery/app.py
CHANGED
|
@@ -194,6 +194,16 @@ def create_celery_app(
|
|
|
194
194
|
worker_task_log_format="[%(asctime)s: %(levelname)s/%(processName)s] [%(task_name)s(%(task_id)s)] %(message)s",
|
|
195
195
|
)
|
|
196
196
|
|
|
197
|
+
# Configure singleton locking for Redis brokers
|
|
198
|
+
# This enables distributed locking to prevent duplicate task execution
|
|
199
|
+
is_redis_broker = broker_url.startswith("redis://") or broker_url.startswith("rediss://")
|
|
200
|
+
if is_redis_broker:
|
|
201
|
+
app.conf.update(
|
|
202
|
+
singleton_backend_url=broker_url,
|
|
203
|
+
singleton_key_prefix="pyworkflow:lock:",
|
|
204
|
+
singleton_lock_expiry=3600, # 1 hour TTL (safety net)
|
|
205
|
+
)
|
|
206
|
+
|
|
197
207
|
# Note: Logging is configured via Celery signals (worker_init, worker_process_init)
|
|
198
208
|
# to ensure proper initialization AFTER process forking.
|
|
199
209
|
# See on_worker_init() and on_worker_process_init() below.
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Singleton task implementation for PyWorkflow.
|
|
3
|
+
|
|
4
|
+
Provides Redis-based distributed locking to prevent duplicate task execution.
|
|
5
|
+
Self-contained implementation (no external dependencies beyond redis).
|
|
6
|
+
|
|
7
|
+
Based on:
|
|
8
|
+
- steinitzu/celery-singleton library concepts
|
|
9
|
+
- FlowHunt's battle-tested refinements for retry-safe lock management
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import inspect
|
|
13
|
+
import json
|
|
14
|
+
from hashlib import md5
|
|
15
|
+
from typing import Any
|
|
16
|
+
from uuid import uuid4
|
|
17
|
+
|
|
18
|
+
from celery import Task
|
|
19
|
+
from celery.exceptions import WorkerLostError
|
|
20
|
+
from loguru import logger
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def generate_lock_key(
|
|
24
|
+
task_name: str,
|
|
25
|
+
task_args: list[Any] | tuple[Any, ...] | None = None,
|
|
26
|
+
task_kwargs: dict[str, Any] | None = None,
|
|
27
|
+
key_prefix: str = "pyworkflow:lock:",
|
|
28
|
+
) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Generate a unique lock key for a task based on its name and arguments.
|
|
31
|
+
|
|
32
|
+
Uses MD5 hash to keep key length reasonable while ensuring uniqueness.
|
|
33
|
+
"""
|
|
34
|
+
str_args = json.dumps(task_args or [], sort_keys=True, default=str)
|
|
35
|
+
str_kwargs = json.dumps(task_kwargs or {}, sort_keys=True, default=str)
|
|
36
|
+
task_hash = md5((task_name + str_args + str_kwargs).encode()).hexdigest()
|
|
37
|
+
return key_prefix + task_hash
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class SingletonConfig:
|
|
41
|
+
"""Configuration for singleton task behavior."""
|
|
42
|
+
|
|
43
|
+
def __init__(self, app: Any):
|
|
44
|
+
self.app = app
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def backend_url(self) -> str | None:
|
|
48
|
+
return self.app.conf.get("singleton_backend_url")
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def key_prefix(self) -> str:
|
|
52
|
+
return self.app.conf.get("singleton_key_prefix", "pyworkflow:lock:")
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def lock_expiry(self) -> int:
|
|
56
|
+
return self.app.conf.get("singleton_lock_expiry", 3600)
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def raise_on_duplicate(self) -> bool:
|
|
60
|
+
return self.app.conf.get("singleton_raise_on_duplicate", False)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class RedisLockBackend:
|
|
64
|
+
"""Redis backend for distributed locking."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, url: str):
|
|
67
|
+
import redis
|
|
68
|
+
|
|
69
|
+
self.redis = redis.from_url(url, decode_responses=True)
|
|
70
|
+
|
|
71
|
+
def lock(self, lock_key: str, task_id: str, expiry: int | None = None) -> bool:
|
|
72
|
+
"""Acquire lock atomically. Returns True if acquired."""
|
|
73
|
+
return bool(self.redis.set(lock_key, task_id, nx=True, ex=expiry))
|
|
74
|
+
|
|
75
|
+
def unlock(self, lock_key: str) -> None:
|
|
76
|
+
"""Release the lock."""
|
|
77
|
+
self.redis.delete(lock_key)
|
|
78
|
+
|
|
79
|
+
def get(self, lock_key: str) -> str | None:
|
|
80
|
+
"""Get the task ID holding the lock."""
|
|
81
|
+
return self.redis.get(lock_key)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class DuplicateTaskError(Exception):
|
|
85
|
+
"""Raised when attempting to queue a duplicate singleton task."""
|
|
86
|
+
|
|
87
|
+
def __init__(self, message: str, task_id: str):
|
|
88
|
+
self.task_id = task_id
|
|
89
|
+
super().__init__(message)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class SingletonWorkflowTask(Task):
|
|
93
|
+
"""
|
|
94
|
+
Base class for singleton workflow tasks with distributed locking.
|
|
95
|
+
|
|
96
|
+
Features:
|
|
97
|
+
- Redis-based lock prevents duplicate execution
|
|
98
|
+
- Support for unique_on with nested dict/list access (e.g., "data.run_id")
|
|
99
|
+
- Retry-safe: locks NOT released on failure (prevents duplicate during retries)
|
|
100
|
+
- Lock released on success or when max retries exceeded
|
|
101
|
+
- Time-based lock expiry as safety net
|
|
102
|
+
|
|
103
|
+
Configuration:
|
|
104
|
+
unique_on: List of argument names to use for uniqueness (e.g., ["run_id", "step_id"])
|
|
105
|
+
Supports nested access with dot notation (e.g., ["data.run_id"])
|
|
106
|
+
raise_on_duplicate: If True, raise DuplicateTaskError instead of returning existing result
|
|
107
|
+
lock_expiry: Lock TTL in seconds (default: 3600 = 1 hour)
|
|
108
|
+
|
|
109
|
+
Example:
|
|
110
|
+
@celery_app.task(
|
|
111
|
+
base=SingletonWorkflowTask,
|
|
112
|
+
unique_on=["run_id", "step_id"],
|
|
113
|
+
)
|
|
114
|
+
def my_task(run_id: str, step_id: str, data: dict):
|
|
115
|
+
...
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
abstract = True
|
|
119
|
+
|
|
120
|
+
# Singleton configuration (can be overridden per-task)
|
|
121
|
+
unique_on: list[str] | str | None = None
|
|
122
|
+
raise_on_duplicate: bool | None = None
|
|
123
|
+
lock_expiry: int | None = None
|
|
124
|
+
|
|
125
|
+
# Lock behavior
|
|
126
|
+
release_lock_on_success: bool = True
|
|
127
|
+
release_lock_on_failure: bool = False # Keep lock during retries
|
|
128
|
+
|
|
129
|
+
# Celery task settings
|
|
130
|
+
max_retries: int | None = None
|
|
131
|
+
acks_on_failure_or_timeout: bool = True
|
|
132
|
+
|
|
133
|
+
# Cached instances (class-level, shared across task instances)
|
|
134
|
+
_singleton_backend: RedisLockBackend | None = None
|
|
135
|
+
_singleton_config: SingletonConfig | None = None
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def singleton_config(self) -> SingletonConfig:
|
|
139
|
+
if self._singleton_config is None:
|
|
140
|
+
self._singleton_config = SingletonConfig(self.app)
|
|
141
|
+
return self._singleton_config
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def singleton_backend(self) -> RedisLockBackend | None:
|
|
145
|
+
if self._singleton_backend is None:
|
|
146
|
+
url = self.singleton_config.backend_url
|
|
147
|
+
if not url:
|
|
148
|
+
# Try broker URL if it's Redis
|
|
149
|
+
broker = self.app.conf.broker_url or ""
|
|
150
|
+
if broker.startswith("redis://") or broker.startswith("rediss://"):
|
|
151
|
+
url = broker
|
|
152
|
+
if url:
|
|
153
|
+
self._singleton_backend = RedisLockBackend(url)
|
|
154
|
+
return self._singleton_backend
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def _lock_expiry(self) -> int:
|
|
158
|
+
if self.lock_expiry is not None:
|
|
159
|
+
return self.lock_expiry
|
|
160
|
+
return self.singleton_config.lock_expiry
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def _raise_on_duplicate(self) -> bool:
|
|
164
|
+
if self.raise_on_duplicate is not None:
|
|
165
|
+
return self.raise_on_duplicate
|
|
166
|
+
return self.singleton_config.raise_on_duplicate
|
|
167
|
+
|
|
168
|
+
def generate_lock(
|
|
169
|
+
self,
|
|
170
|
+
task_name: str,
|
|
171
|
+
task_args: list[Any] | tuple[Any, ...] | None = None,
|
|
172
|
+
task_kwargs: dict[str, Any] | None = None,
|
|
173
|
+
) -> str:
|
|
174
|
+
"""Generate lock key, supporting nested attribute access via unique_on."""
|
|
175
|
+
unique_on = self.unique_on
|
|
176
|
+
task_args = task_args or []
|
|
177
|
+
task_kwargs = task_kwargs or {}
|
|
178
|
+
|
|
179
|
+
if unique_on:
|
|
180
|
+
if isinstance(unique_on, str):
|
|
181
|
+
unique_on = [unique_on]
|
|
182
|
+
|
|
183
|
+
# Bind arguments to function signature
|
|
184
|
+
sig = inspect.signature(self.run)
|
|
185
|
+
bound = sig.bind(*task_args, **task_kwargs).arguments
|
|
186
|
+
|
|
187
|
+
unique_args: list[Any] = []
|
|
188
|
+
for key in unique_on:
|
|
189
|
+
keys = key.split(".")
|
|
190
|
+
if keys[0] not in bound:
|
|
191
|
+
raise ValueError(f"Key '{keys[0]}' not found in task arguments")
|
|
192
|
+
|
|
193
|
+
value = bound[keys[0]]
|
|
194
|
+
# Navigate nested structure (supports one level of nesting)
|
|
195
|
+
if len(keys) == 2:
|
|
196
|
+
nested_key = keys[1]
|
|
197
|
+
if isinstance(value, dict):
|
|
198
|
+
if nested_key not in value:
|
|
199
|
+
raise ValueError(f"Key '{nested_key}' not found in dict")
|
|
200
|
+
unique_args.append(value[nested_key])
|
|
201
|
+
elif isinstance(value, (list, tuple)):
|
|
202
|
+
unique_args.append(value[int(nested_key)])
|
|
203
|
+
elif hasattr(value, nested_key):
|
|
204
|
+
unique_args.append(getattr(value, nested_key))
|
|
205
|
+
else:
|
|
206
|
+
raise ValueError(f"Key '{key}' has unsupported type")
|
|
207
|
+
elif len(keys) == 1:
|
|
208
|
+
unique_args.append(value)
|
|
209
|
+
else:
|
|
210
|
+
raise ValueError(f"Key '{key}' has too many levels (max 2)")
|
|
211
|
+
|
|
212
|
+
return generate_lock_key(
|
|
213
|
+
task_name,
|
|
214
|
+
unique_args,
|
|
215
|
+
{},
|
|
216
|
+
key_prefix=self.singleton_config.key_prefix,
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
return generate_lock_key(
|
|
220
|
+
task_name,
|
|
221
|
+
list(task_args),
|
|
222
|
+
task_kwargs,
|
|
223
|
+
key_prefix=self.singleton_config.key_prefix,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
def acquire_lock(self, lock_key: str, task_id: str) -> bool:
|
|
227
|
+
"""Attempt to acquire lock. Returns True if successful."""
|
|
228
|
+
backend = self.singleton_backend
|
|
229
|
+
if backend is None:
|
|
230
|
+
return True # No Redis = no locking
|
|
231
|
+
return backend.lock(lock_key, task_id, expiry=self._lock_expiry)
|
|
232
|
+
|
|
233
|
+
def release_lock(
|
|
234
|
+
self,
|
|
235
|
+
task_args: list[Any] | tuple[Any, ...] | None = None,
|
|
236
|
+
task_kwargs: dict[str, Any] | None = None,
|
|
237
|
+
) -> None:
|
|
238
|
+
"""Release the lock for this task."""
|
|
239
|
+
backend = self.singleton_backend
|
|
240
|
+
if backend is None:
|
|
241
|
+
return
|
|
242
|
+
lock_key = self.generate_lock(self.name, task_args, task_kwargs)
|
|
243
|
+
backend.unlock(lock_key)
|
|
244
|
+
|
|
245
|
+
def get_existing_task_id(self, lock_key: str) -> str | None:
|
|
246
|
+
"""Get task ID holding the lock, if any."""
|
|
247
|
+
backend = self.singleton_backend
|
|
248
|
+
if backend is None:
|
|
249
|
+
return None
|
|
250
|
+
return backend.get(lock_key)
|
|
251
|
+
|
|
252
|
+
def apply_async(
|
|
253
|
+
self,
|
|
254
|
+
args: list[Any] | tuple[Any, ...] | None = None,
|
|
255
|
+
kwargs: dict[str, Any] | None = None,
|
|
256
|
+
task_id: str | None = None,
|
|
257
|
+
**options: Any,
|
|
258
|
+
) -> Any:
|
|
259
|
+
"""Override apply_async to implement singleton behavior."""
|
|
260
|
+
args = args or []
|
|
261
|
+
kwargs = kwargs or {}
|
|
262
|
+
task_id = task_id or str(uuid4())
|
|
263
|
+
|
|
264
|
+
backend = self.singleton_backend
|
|
265
|
+
if backend is None:
|
|
266
|
+
# No Redis = normal behavior
|
|
267
|
+
return super().apply_async(args, kwargs, task_id=task_id, **options)
|
|
268
|
+
|
|
269
|
+
lock_key = self.generate_lock(self.name, args, kwargs)
|
|
270
|
+
|
|
271
|
+
# Try to acquire lock and run
|
|
272
|
+
if self.acquire_lock(lock_key, task_id):
|
|
273
|
+
try:
|
|
274
|
+
return super().apply_async(args, kwargs, task_id=task_id, **options)
|
|
275
|
+
except Exception:
|
|
276
|
+
# Release lock if apply_async fails
|
|
277
|
+
backend.unlock(lock_key)
|
|
278
|
+
raise
|
|
279
|
+
|
|
280
|
+
# Lock not acquired - check for existing task
|
|
281
|
+
existing_task_id = self.get_existing_task_id(lock_key)
|
|
282
|
+
if existing_task_id:
|
|
283
|
+
logger.debug(
|
|
284
|
+
"Singleton: duplicate task blocked",
|
|
285
|
+
task=self.name,
|
|
286
|
+
existing_task_id=existing_task_id,
|
|
287
|
+
)
|
|
288
|
+
if self._raise_on_duplicate:
|
|
289
|
+
raise DuplicateTaskError(f"Duplicate of task {existing_task_id}", existing_task_id)
|
|
290
|
+
return self.AsyncResult(existing_task_id)
|
|
291
|
+
|
|
292
|
+
# Race condition: lock disappeared, retry
|
|
293
|
+
if self.acquire_lock(lock_key, task_id):
|
|
294
|
+
try:
|
|
295
|
+
return super().apply_async(args, kwargs, task_id=task_id, **options)
|
|
296
|
+
except Exception:
|
|
297
|
+
backend.unlock(lock_key)
|
|
298
|
+
raise
|
|
299
|
+
|
|
300
|
+
# Still can't acquire - return existing or submit anyway
|
|
301
|
+
existing_task_id = self.get_existing_task_id(lock_key)
|
|
302
|
+
if existing_task_id:
|
|
303
|
+
return self.AsyncResult(existing_task_id)
|
|
304
|
+
|
|
305
|
+
# Fallback: submit anyway (rare edge case)
|
|
306
|
+
logger.warning(f"Singleton lock unstable, submitting anyway: {self.name}")
|
|
307
|
+
return super().apply_async(args, kwargs, task_id=task_id, **options)
|
|
308
|
+
|
|
309
|
+
def on_success(
|
|
310
|
+
self, retval: Any, task_id: str, args: tuple[Any, ...], kwargs: dict[str, Any]
|
|
311
|
+
) -> None:
|
|
312
|
+
"""Release lock on successful task completion."""
|
|
313
|
+
if self.release_lock_on_success:
|
|
314
|
+
self.release_lock(task_args=args, task_kwargs=kwargs)
|
|
315
|
+
|
|
316
|
+
def on_failure(
|
|
317
|
+
self,
|
|
318
|
+
exc: Exception,
|
|
319
|
+
task_id: str,
|
|
320
|
+
args: tuple[Any, ...],
|
|
321
|
+
kwargs: dict[str, Any],
|
|
322
|
+
einfo: Any,
|
|
323
|
+
) -> None:
|
|
324
|
+
"""
|
|
325
|
+
Retry-aware lock management on failure.
|
|
326
|
+
|
|
327
|
+
- If task will retry: Keep lock
|
|
328
|
+
- If max retries exceeded: Release lock
|
|
329
|
+
"""
|
|
330
|
+
max_retries_exceeded = False
|
|
331
|
+
if hasattr(self, "request") and self.request:
|
|
332
|
+
current_retries = getattr(self.request, "retries", 0)
|
|
333
|
+
max_retries = self.max_retries if self.max_retries is not None else 3
|
|
334
|
+
max_retries_exceeded = current_retries >= max_retries
|
|
335
|
+
|
|
336
|
+
if self.release_lock_on_failure or max_retries_exceeded:
|
|
337
|
+
self.release_lock(task_args=args, task_kwargs=kwargs)
|
|
338
|
+
if max_retries_exceeded:
|
|
339
|
+
logger.warning(
|
|
340
|
+
f"Task {self.name} failed after {current_retries} retries. Lock released.",
|
|
341
|
+
task_id=task_id,
|
|
342
|
+
error=str(exc),
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
# Log appropriately
|
|
346
|
+
if isinstance(exc, WorkerLostError):
|
|
347
|
+
logger.warning("Task interrupted due to worker loss", task_id=task_id)
|
|
348
|
+
else:
|
|
349
|
+
logger.error(
|
|
350
|
+
f"Task {self.name} failed: {exc}",
|
|
351
|
+
task_id=task_id,
|
|
352
|
+
traceback=einfo.traceback if einfo else None,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
def on_retry(
|
|
356
|
+
self,
|
|
357
|
+
exc: Exception,
|
|
358
|
+
task_id: str,
|
|
359
|
+
args: tuple[Any, ...],
|
|
360
|
+
kwargs: dict[str, Any],
|
|
361
|
+
einfo: Any,
|
|
362
|
+
) -> None:
|
|
363
|
+
"""Lock is retained during retry."""
|
|
364
|
+
logger.warning(
|
|
365
|
+
f"Task {self.name} retrying (lock retained)",
|
|
366
|
+
task_id=task_id,
|
|
367
|
+
retry_count=self.request.retries,
|
|
368
|
+
)
|
pyworkflow/celery/tasks.py
CHANGED
|
@@ -19,12 +19,12 @@ from typing import TYPE_CHECKING, Any
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
20
|
from pyworkflow.context.step_context import StepContext
|
|
21
21
|
|
|
22
|
-
from celery import
|
|
23
|
-
from celery.exceptions import MaxRetriesExceededError, Retry, WorkerLostError
|
|
22
|
+
from celery.exceptions import MaxRetriesExceededError, Retry
|
|
24
23
|
from loguru import logger
|
|
25
24
|
|
|
26
25
|
from pyworkflow.celery.app import celery_app
|
|
27
26
|
from pyworkflow.celery.loop import run_async
|
|
27
|
+
from pyworkflow.celery.singleton import SingletonWorkflowTask
|
|
28
28
|
from pyworkflow.core.exceptions import (
|
|
29
29
|
CancellationError,
|
|
30
30
|
ContinueAsNewSignal,
|
|
@@ -33,6 +33,7 @@ from pyworkflow.core.exceptions import (
|
|
|
33
33
|
SuspensionSignal,
|
|
34
34
|
)
|
|
35
35
|
from pyworkflow.core.registry import WorkflowMetadata, get_workflow
|
|
36
|
+
from pyworkflow.core.validation import validate_step_parameters
|
|
36
37
|
from pyworkflow.core.workflow import execute_workflow_with_context
|
|
37
38
|
from pyworkflow.engine.events import (
|
|
38
39
|
EventType,
|
|
@@ -73,58 +74,15 @@ def _calculate_exponential_backoff(
|
|
|
73
74
|
return delay * jitter
|
|
74
75
|
|
|
75
76
|
|
|
76
|
-
class WorkflowTask(Task):
|
|
77
|
-
"""Base task class for workflow execution with custom error handling."""
|
|
78
|
-
|
|
79
|
-
# Allow unlimited Celery-level retries - our code controls the actual limit
|
|
80
|
-
# via the max_retries parameter passed to execute_step_task
|
|
81
|
-
max_retries = None
|
|
82
|
-
# Prevent message requeue loops when task fails
|
|
83
|
-
acks_on_failure_or_timeout = True
|
|
84
|
-
|
|
85
|
-
def on_failure(self, exc, task_id, args, kwargs, einfo):
|
|
86
|
-
"""
|
|
87
|
-
Handle task failure.
|
|
88
|
-
|
|
89
|
-
Detects worker loss and handles recovery appropriately:
|
|
90
|
-
- WorkerLostError: Infrastructure failure, may trigger recovery
|
|
91
|
-
- Other exceptions: Application failure
|
|
92
|
-
"""
|
|
93
|
-
is_worker_loss = isinstance(exc, WorkerLostError)
|
|
94
|
-
if is_worker_loss:
|
|
95
|
-
logger.warning(
|
|
96
|
-
f"Task {self.name} interrupted due to worker loss",
|
|
97
|
-
task_id=task_id,
|
|
98
|
-
error=str(exc),
|
|
99
|
-
)
|
|
100
|
-
# Note: Recovery is handled when the task is requeued and picked up
|
|
101
|
-
# by another worker. See _handle_workflow_recovery() for logic.
|
|
102
|
-
else:
|
|
103
|
-
logger.error(
|
|
104
|
-
f"Task {self.name} failed: {str(exc)}",
|
|
105
|
-
task_id=task_id,
|
|
106
|
-
error=str(exc),
|
|
107
|
-
traceback=einfo.traceback if einfo else None,
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
def on_retry(self, exc, task_id, args, kwargs, einfo):
|
|
111
|
-
"""Handle task retry."""
|
|
112
|
-
logger.warning(
|
|
113
|
-
f"Task {self.name} retrying",
|
|
114
|
-
task_id=task_id,
|
|
115
|
-
error=str(exc),
|
|
116
|
-
retry_count=self.request.retries,
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
|
|
120
77
|
@celery_app.task(
|
|
121
78
|
name="pyworkflow.execute_step",
|
|
122
|
-
base=
|
|
79
|
+
base=SingletonWorkflowTask,
|
|
123
80
|
bind=True,
|
|
124
81
|
queue="pyworkflow.steps",
|
|
82
|
+
unique_on=["run_id", "step_id"],
|
|
125
83
|
)
|
|
126
84
|
def execute_step_task(
|
|
127
|
-
self:
|
|
85
|
+
self: SingletonWorkflowTask,
|
|
128
86
|
step_name: str,
|
|
129
87
|
args_json: str,
|
|
130
88
|
kwargs_json: str,
|
|
@@ -212,10 +170,28 @@ def execute_step_task(
|
|
|
212
170
|
)
|
|
213
171
|
raise FatalError(f"Step '{step_name}' not found in registry")
|
|
214
172
|
|
|
173
|
+
# Ignore processing step if already completed (idempotency)
|
|
174
|
+
events = run_async(storage.get_events(run_id))
|
|
175
|
+
already_completed = any(
|
|
176
|
+
evt.type == EventType.STEP_COMPLETED and evt.data.get("step_id") == step_id
|
|
177
|
+
for evt in events
|
|
178
|
+
)
|
|
179
|
+
if already_completed:
|
|
180
|
+
logger.warning(
|
|
181
|
+
"Step already completed by another task, skipping execution",
|
|
182
|
+
run_id=run_id,
|
|
183
|
+
step_id=step_id,
|
|
184
|
+
step_name=step_name,
|
|
185
|
+
)
|
|
186
|
+
return None
|
|
187
|
+
|
|
215
188
|
# Deserialize arguments
|
|
216
189
|
args = deserialize_args(args_json)
|
|
217
190
|
kwargs = deserialize_kwargs(kwargs_json)
|
|
218
191
|
|
|
192
|
+
# Validate parameters before execution on worker (defense in depth)
|
|
193
|
+
validate_step_parameters(step_meta.original_func, args, kwargs, step_name)
|
|
194
|
+
|
|
219
195
|
# Set up step context if provided (read-only mode)
|
|
220
196
|
step_context_token = None
|
|
221
197
|
readonly_token = None
|
|
@@ -608,8 +584,9 @@ def _resolve_context_class(class_name: str) -> type["StepContext"] | None:
|
|
|
608
584
|
|
|
609
585
|
@celery_app.task(
|
|
610
586
|
name="pyworkflow.start_workflow",
|
|
611
|
-
base=
|
|
587
|
+
base=SingletonWorkflowTask,
|
|
612
588
|
queue="pyworkflow.workflows",
|
|
589
|
+
unique_on=["run_id"],
|
|
613
590
|
)
|
|
614
591
|
def start_workflow_task(
|
|
615
592
|
workflow_name: str,
|
|
@@ -678,8 +655,9 @@ def start_workflow_task(
|
|
|
678
655
|
|
|
679
656
|
@celery_app.task(
|
|
680
657
|
name="pyworkflow.start_child_workflow",
|
|
681
|
-
base=
|
|
658
|
+
base=SingletonWorkflowTask,
|
|
682
659
|
queue="pyworkflow.workflows",
|
|
660
|
+
unique_on=["child_run_id"],
|
|
683
661
|
)
|
|
684
662
|
def start_child_workflow_task(
|
|
685
663
|
workflow_name: str,
|
|
@@ -1719,8 +1697,9 @@ async def _start_workflow_on_worker(
|
|
|
1719
1697
|
|
|
1720
1698
|
@celery_app.task(
|
|
1721
1699
|
name="pyworkflow.resume_workflow",
|
|
1722
|
-
base=
|
|
1700
|
+
base=SingletonWorkflowTask,
|
|
1723
1701
|
queue="pyworkflow.schedules",
|
|
1702
|
+
unique_on=["run_id"],
|
|
1724
1703
|
)
|
|
1725
1704
|
def resume_workflow_task(
|
|
1726
1705
|
run_id: str,
|
|
@@ -1766,8 +1745,9 @@ def resume_workflow_task(
|
|
|
1766
1745
|
|
|
1767
1746
|
@celery_app.task(
|
|
1768
1747
|
name="pyworkflow.execute_scheduled_workflow",
|
|
1769
|
-
base=
|
|
1748
|
+
base=SingletonWorkflowTask,
|
|
1770
1749
|
queue="pyworkflow.schedules",
|
|
1750
|
+
# No unique_on - scheduled workflows create new runs each time, no deduplication needed
|
|
1771
1751
|
)
|
|
1772
1752
|
def execute_scheduled_workflow_task(
|
|
1773
1753
|
schedule_id: str,
|
pyworkflow/core/step.py
CHANGED
|
@@ -23,6 +23,7 @@ from loguru import logger
|
|
|
23
23
|
from pyworkflow.context import get_context, has_context
|
|
24
24
|
from pyworkflow.core.exceptions import FatalError, RetryableError
|
|
25
25
|
from pyworkflow.core.registry import register_step
|
|
26
|
+
from pyworkflow.core.validation import validate_step_parameters
|
|
26
27
|
from pyworkflow.engine.events import (
|
|
27
28
|
create_step_completed_event,
|
|
28
29
|
create_step_failed_event,
|
|
@@ -118,6 +119,8 @@ def step(
|
|
|
118
119
|
f"Step {step_name} in transient mode, executing directly",
|
|
119
120
|
run_id=ctx.run_id,
|
|
120
121
|
)
|
|
122
|
+
# Validate parameters before execution
|
|
123
|
+
validate_step_parameters(func, args, kwargs, step_name)
|
|
121
124
|
return await _execute_with_retries(
|
|
122
125
|
func, args, kwargs, step_name, max_retries, retry_delay
|
|
123
126
|
)
|
|
@@ -172,6 +175,8 @@ def step(
|
|
|
172
175
|
# When running in a distributed runtime (e.g., Celery), dispatch steps
|
|
173
176
|
# to step workers instead of executing inline.
|
|
174
177
|
if ctx.runtime == "celery":
|
|
178
|
+
# Validate parameters before dispatching to Celery
|
|
179
|
+
validate_step_parameters(func, args, kwargs, step_name)
|
|
175
180
|
return await _dispatch_step_to_celery(
|
|
176
181
|
ctx=ctx,
|
|
177
182
|
func=func,
|
|
@@ -240,6 +245,9 @@ def step(
|
|
|
240
245
|
# Check for cancellation before executing step
|
|
241
246
|
ctx.check_cancellation()
|
|
242
247
|
|
|
248
|
+
# Validate parameters before execution
|
|
249
|
+
validate_step_parameters(func, args, kwargs, step_name)
|
|
250
|
+
|
|
243
251
|
try:
|
|
244
252
|
# Execute step function
|
|
245
253
|
result = await func(*args, **kwargs)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic validation for step parameters.
|
|
3
|
+
|
|
4
|
+
Validates step function arguments against their type hints using Pydantic's
|
|
5
|
+
TypeAdapter for runtime type checking.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import inspect
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from typing import Any, get_type_hints
|
|
11
|
+
|
|
12
|
+
from pydantic import TypeAdapter, ValidationError
|
|
13
|
+
|
|
14
|
+
from pyworkflow.core.exceptions import FatalError
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class StepValidationError(FatalError):
|
|
18
|
+
"""
|
|
19
|
+
Raised when step parameter validation fails.
|
|
20
|
+
|
|
21
|
+
This is a FatalError subclass to ensure validation failures
|
|
22
|
+
immediately fail the workflow without retries.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
step_name: str,
|
|
28
|
+
param_name: str,
|
|
29
|
+
expected_type: type,
|
|
30
|
+
received_value: Any,
|
|
31
|
+
validation_error: ValidationError,
|
|
32
|
+
) -> None:
|
|
33
|
+
self.step_name = step_name
|
|
34
|
+
self.param_name = param_name
|
|
35
|
+
self.expected_type = expected_type
|
|
36
|
+
self.received_value = received_value
|
|
37
|
+
self.validation_error = validation_error
|
|
38
|
+
|
|
39
|
+
# Build clear error message
|
|
40
|
+
error_details = str(validation_error)
|
|
41
|
+
message = (
|
|
42
|
+
f"Step '{step_name}' parameter validation failed for '{param_name}': "
|
|
43
|
+
f"expected {expected_type}, got {type(received_value).__name__} "
|
|
44
|
+
f"with value {received_value!r}. Details: {error_details}"
|
|
45
|
+
)
|
|
46
|
+
super().__init__(message)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def validate_step_parameters(
|
|
50
|
+
func: Callable,
|
|
51
|
+
args: tuple,
|
|
52
|
+
kwargs: dict,
|
|
53
|
+
step_name: str,
|
|
54
|
+
) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Validate step parameters against their type hints using Pydantic.
|
|
57
|
+
|
|
58
|
+
Only parameters with type annotations are validated. Parameters without
|
|
59
|
+
type hints are skipped.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
func: The step function (original, unwrapped)
|
|
63
|
+
args: Positional arguments passed to the step
|
|
64
|
+
kwargs: Keyword arguments passed to the step
|
|
65
|
+
step_name: Name of the step for error messages
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
StepValidationError: If any typed parameter fails validation
|
|
69
|
+
"""
|
|
70
|
+
# Get function signature and type hints
|
|
71
|
+
sig = inspect.signature(func)
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
# Try to get type hints, may fail for some edge cases
|
|
75
|
+
type_hints = get_type_hints(func)
|
|
76
|
+
except Exception:
|
|
77
|
+
# If we can't get type hints, skip validation
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
if not type_hints:
|
|
81
|
+
# No type hints at all, skip validation
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
# Bind arguments to parameters
|
|
85
|
+
try:
|
|
86
|
+
bound = sig.bind(*args, **kwargs)
|
|
87
|
+
bound.apply_defaults()
|
|
88
|
+
except TypeError:
|
|
89
|
+
# If binding fails, the function call itself will fail
|
|
90
|
+
# Let the normal execution handle this
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
# Validate each parameter that has a type hint
|
|
94
|
+
for param_name, param_value in bound.arguments.items():
|
|
95
|
+
if param_name not in type_hints:
|
|
96
|
+
# No type hint for this parameter, skip validation
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
expected_type = type_hints[param_name]
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
# Use Pydantic TypeAdapter for validation
|
|
103
|
+
adapter = TypeAdapter(expected_type)
|
|
104
|
+
adapter.validate_python(param_value)
|
|
105
|
+
except ValidationError as e:
|
|
106
|
+
raise StepValidationError(
|
|
107
|
+
step_name=step_name,
|
|
108
|
+
param_name=param_name,
|
|
109
|
+
expected_type=expected_type,
|
|
110
|
+
received_value=param_value,
|
|
111
|
+
validation_error=e,
|
|
112
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
pyworkflow/__init__.py,sha256=
|
|
1
|
+
pyworkflow/__init__.py,sha256=C7t5zLS32QNzI2BXNdGpWXwzgNyUVchs6L7dcx0p84c,6281
|
|
2
2
|
pyworkflow/config.py,sha256=pKwPrpCwBJiDpB-MIjM0U7GW1TFmQFO341pihL5-vTM,14455
|
|
3
3
|
pyworkflow/discovery.py,sha256=snW3l4nvY3Nc067TGlwtn_qdzTU9ybN7YPr8FbvY8iM,8066
|
|
4
4
|
pyworkflow/aws/__init__.py,sha256=Ak_xHcR9LTRX-CwcS0XecYmzrXZw4EM3V9aKBBDEmIk,1741
|
|
@@ -6,10 +6,11 @@ pyworkflow/aws/context.py,sha256=Vjyjip6U1Emg-WA5TlBaxFhcg15rf9mVJiPfT4VywHc,821
|
|
|
6
6
|
pyworkflow/aws/handler.py,sha256=0SnQuIfQVD99QKMCRFPtrsrV_l1LYKFkzPIRx_2UkSI,5849
|
|
7
7
|
pyworkflow/aws/testing.py,sha256=WrRk9wjbycM-UyHFQWNnA83UE9IrYnhfT38WrbxQT2U,8844
|
|
8
8
|
pyworkflow/celery/__init__.py,sha256=FywVyqnT8AYz9cXkr-wel7_-N7dHFsPNASEPMFESf4Q,1179
|
|
9
|
-
pyworkflow/celery/app.py,sha256=
|
|
9
|
+
pyworkflow/celery/app.py,sha256=UwZauZjVzOxMPX3WmPilRi8Emg5_VbMjHjNn7uz7R14,9670
|
|
10
10
|
pyworkflow/celery/loop.py,sha256=mu8cIfMJYgHAoGCN_DdDoNoXK3QHzHpLmrPCyFDQYIY,3016
|
|
11
11
|
pyworkflow/celery/scheduler.py,sha256=Ms4rqRpdpMiLM8l4y3DK-Divunj9afYuUaGGoNQe7P4,11288
|
|
12
|
-
pyworkflow/celery/
|
|
12
|
+
pyworkflow/celery/singleton.py,sha256=BykAovBVP0XUo2ZbTQzD4fANa6C_lHSqqaWnhfNfQGw,12978
|
|
13
|
+
pyworkflow/celery/tasks.py,sha256=uHpOoHvZd72CYxCG4yhjgyT7j12fOoyf2380pJgMACs,82083
|
|
13
14
|
pyworkflow/cli/__init__.py,sha256=tcbe-fcZmyeEKUy_aEo8bsEF40HsNKOwvyMBZIJZPwc,3844
|
|
14
15
|
pyworkflow/cli/__main__.py,sha256=LxLLS4FEEPXa5rWpLTtKuivn6Xp9pGia-QKGoxt9SS0,148
|
|
15
16
|
pyworkflow/cli/commands/__init__.py,sha256=IXvnTgukALckkO8fTlZhVRq80ojSqpnIIgboAg_-yZU,39
|
|
@@ -42,7 +43,8 @@ pyworkflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
42
43
|
pyworkflow/core/exceptions.py,sha256=F2nbXyoed7wlIJMeGfpgsIC8ZyWcYN0iKtOnBA7-xnQ,10719
|
|
43
44
|
pyworkflow/core/registry.py,sha256=ZUf2YTpBvWpC9EehRbMF8soXOk9VsjNruoi6lR4O33M,9361
|
|
44
45
|
pyworkflow/core/scheduled.py,sha256=479A7IvjHiMob7ZrZtfE6VqtypG6DLIGMGhh16jLIWM,10522
|
|
45
|
-
pyworkflow/core/step.py,sha256=
|
|
46
|
+
pyworkflow/core/step.py,sha256=9JG9udEDph_6ecfruVdY0qC3ruoC6bjd0F91chg8QZM,23913
|
|
47
|
+
pyworkflow/core/validation.py,sha256=0VaZyQ9YGK8WFy4ZG4Bjt9MYAp0vz6xEOe80kcgaP5g,3362
|
|
46
48
|
pyworkflow/core/workflow.py,sha256=dlcICq1B69-nxUJth_n-H8U9TjP3QZyjvquQXxWHcxs,12076
|
|
47
49
|
pyworkflow/engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
50
|
pyworkflow/engine/events.py,sha256=KFtyIqQjr1B9Frtd5V1Zq0ph1iwg_Ky3uPzmTYZ1Tnk,25827
|
|
@@ -84,9 +86,9 @@ pyworkflow/storage/sqlite.py,sha256=oBzJnnOp2uk0-U7hMTQk9QgJq3RBwXPQfrmYpivjdgE,
|
|
|
84
86
|
pyworkflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
85
87
|
pyworkflow/utils/duration.py,sha256=C-itmiSQQlplw7j6XB679hLF9xYGnyCwm7twO88OF8U,3978
|
|
86
88
|
pyworkflow/utils/schedule.py,sha256=dO_MkGFyfwZpb0LDlW6BGyZzlPuQIA6dc6j9nk9lc4Y,10691
|
|
87
|
-
pyworkflow_engine-0.1.
|
|
88
|
-
pyworkflow_engine-0.1.
|
|
89
|
-
pyworkflow_engine-0.1.
|
|
90
|
-
pyworkflow_engine-0.1.
|
|
91
|
-
pyworkflow_engine-0.1.
|
|
92
|
-
pyworkflow_engine-0.1.
|
|
89
|
+
pyworkflow_engine-0.1.13.dist-info/licenses/LICENSE,sha256=Y49RCTZ5ayn_yzBcRxnyIFdcMCyuYm150aty_FIznfY,1080
|
|
90
|
+
pyworkflow_engine-0.1.13.dist-info/METADATA,sha256=zqczKyklwOmbUKz9hfYrmhxP_ZteNp49g7RoB8zPaSM,19628
|
|
91
|
+
pyworkflow_engine-0.1.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
92
|
+
pyworkflow_engine-0.1.13.dist-info/entry_points.txt,sha256=3IGAfuylnS39U0YX0pxnjrj54kB4iT_bNYrmsiDB-dE,51
|
|
93
|
+
pyworkflow_engine-0.1.13.dist-info/top_level.txt,sha256=FLTv9pQmLDBXrQdLOhTMIS3njFibliMsQEfumqmdzBE,11
|
|
94
|
+
pyworkflow_engine-0.1.13.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|