pyworkflow-engine 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +1 -1
- pyworkflow/celery/app.py +97 -3
- pyworkflow/celery/loop.py +108 -0
- pyworkflow/celery/singleton.py +368 -0
- pyworkflow/celery/tasks.py +553 -111
- pyworkflow/cli/commands/worker.py +13 -16
- pyworkflow/config.py +5 -0
- pyworkflow/context/base.py +4 -0
- pyworkflow/context/local.py +27 -1
- pyworkflow/context/step_context.py +1 -11
- pyworkflow/core/step.py +43 -15
- pyworkflow/core/validation.py +112 -0
- pyworkflow/engine/events.py +44 -30
- pyworkflow/engine/executor.py +21 -1
- pyworkflow/engine/replay.py +0 -39
- pyworkflow/observability/logging.py +43 -1
- pyworkflow/runtime/celery.py +1 -1
- pyworkflow/runtime/local.py +41 -1
- pyworkflow/storage/config.py +81 -2
- pyworkflow/storage/postgres.py +103 -34
- {pyworkflow_engine-0.1.11.dist-info → pyworkflow_engine-0.1.13.dist-info}/METADATA +1 -1
- {pyworkflow_engine-0.1.11.dist-info → pyworkflow_engine-0.1.13.dist-info}/RECORD +26 -23
- {pyworkflow_engine-0.1.11.dist-info → pyworkflow_engine-0.1.13.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.11.dist-info → pyworkflow_engine-0.1.13.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.11.dist-info → pyworkflow_engine-0.1.13.dist-info}/licenses/LICENSE +0 -0
- {pyworkflow_engine-0.1.11.dist-info → pyworkflow_engine-0.1.13.dist-info}/top_level.txt +0 -0
|
@@ -43,8 +43,8 @@ def worker() -> None:
|
|
|
43
43
|
"--concurrency",
|
|
44
44
|
"-c",
|
|
45
45
|
type=int,
|
|
46
|
-
default=
|
|
47
|
-
help="Number of worker processes (default:
|
|
46
|
+
default=1,
|
|
47
|
+
help="Number of worker processes (default: 1)",
|
|
48
48
|
)
|
|
49
49
|
@click.option(
|
|
50
50
|
"--loglevel",
|
|
@@ -67,8 +67,8 @@ def worker() -> None:
|
|
|
67
67
|
@click.option(
|
|
68
68
|
"--pool",
|
|
69
69
|
type=click.Choice(["prefork", "solo", "eventlet", "gevent"], case_sensitive=False),
|
|
70
|
-
default=
|
|
71
|
-
help="Worker pool type. Use 'solo' for debugging with breakpoints",
|
|
70
|
+
default="prefork",
|
|
71
|
+
help="Worker pool type (default: prefork). Use 'solo' for debugging with breakpoints",
|
|
72
72
|
)
|
|
73
73
|
@click.pass_context
|
|
74
74
|
def run_worker(
|
|
@@ -153,15 +153,16 @@ def run_worker(
|
|
|
153
153
|
os.getenv("PYWORKFLOW_CELERY_RESULT_BACKEND", "redis://localhost:6379/1"),
|
|
154
154
|
)
|
|
155
155
|
|
|
156
|
+
# Worker processes always need logging enabled
|
|
157
|
+
from loguru import logger as loguru_logger
|
|
158
|
+
|
|
159
|
+
loguru_logger.enable("pyworkflow")
|
|
160
|
+
|
|
156
161
|
print_info("Starting Celery worker...")
|
|
157
162
|
print_info(f"Broker: {broker_url}")
|
|
158
163
|
print_info(f"Queues: {', '.join(queues)}")
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
print_info(f"Concurrency: {concurrency}")
|
|
162
|
-
|
|
163
|
-
if pool:
|
|
164
|
-
print_info(f"Pool: {pool}")
|
|
164
|
+
print_info(f"Concurrency: {concurrency}")
|
|
165
|
+
print_info(f"Pool: {pool}")
|
|
165
166
|
|
|
166
167
|
try:
|
|
167
168
|
# Discover workflows using CLI discovery (reads from --module, env var, or YAML config)
|
|
@@ -212,11 +213,10 @@ def run_worker(
|
|
|
212
213
|
"worker",
|
|
213
214
|
f"--loglevel={loglevel.upper()}",
|
|
214
215
|
f"--queues={','.join(queues)}",
|
|
216
|
+
f"--concurrency={concurrency}", # Always set (default: 1)
|
|
217
|
+
f"--pool={pool}", # Always set (default: prefork)
|
|
215
218
|
]
|
|
216
219
|
|
|
217
|
-
if concurrency:
|
|
218
|
-
worker_args.append(f"--concurrency={concurrency}")
|
|
219
|
-
|
|
220
220
|
if hostname:
|
|
221
221
|
worker_args.append(f"--hostname={hostname}")
|
|
222
222
|
|
|
@@ -224,9 +224,6 @@ def run_worker(
|
|
|
224
224
|
worker_args.append("--beat")
|
|
225
225
|
worker_args.append("--scheduler=pyworkflow.celery.scheduler:PyWorkflowScheduler")
|
|
226
226
|
|
|
227
|
-
if pool:
|
|
228
|
-
worker_args.append(f"--pool={pool}")
|
|
229
|
-
|
|
230
227
|
print_success("Worker starting...")
|
|
231
228
|
print_info("Press Ctrl+C to stop")
|
|
232
229
|
print_info("")
|
pyworkflow/config.py
CHANGED
|
@@ -391,6 +391,11 @@ def reset_config() -> None:
|
|
|
391
391
|
_config = None
|
|
392
392
|
_config_loaded_from_yaml = False
|
|
393
393
|
|
|
394
|
+
# Also clear the storage cache to ensure test isolation
|
|
395
|
+
from pyworkflow.storage.config import clear_storage_cache
|
|
396
|
+
|
|
397
|
+
clear_storage_cache()
|
|
398
|
+
|
|
394
399
|
|
|
395
400
|
def get_storage() -> Optional["StorageBackend"]:
|
|
396
401
|
"""
|
pyworkflow/context/base.py
CHANGED
|
@@ -307,6 +307,10 @@ class WorkflowContext(ABC):
|
|
|
307
307
|
"""Get failure info for a step."""
|
|
308
308
|
return None # Default: no failure info
|
|
309
309
|
|
|
310
|
+
def is_step_in_progress(self, step_id: str) -> bool:
|
|
311
|
+
"""Check if a step is currently in progress (dispatched but not completed)."""
|
|
312
|
+
return False # Default: no in-progress tracking
|
|
313
|
+
|
|
310
314
|
def should_execute_step(self, step_id: str) -> bool:
|
|
311
315
|
"""Check if step should be executed (not already completed)."""
|
|
312
316
|
return True # Default: always execute
|
pyworkflow/context/local.py
CHANGED
|
@@ -104,6 +104,10 @@ class LocalContext(WorkflowContext):
|
|
|
104
104
|
# Step failure tracking (for handling failures during replay)
|
|
105
105
|
self._step_failures: dict[str, dict[str, Any]] = {}
|
|
106
106
|
|
|
107
|
+
# Steps in progress (dispatched to Celery but not yet completed)
|
|
108
|
+
# Used to prevent re-dispatch during resume
|
|
109
|
+
self._steps_in_progress: set[str] = set()
|
|
110
|
+
|
|
107
111
|
# Replay state if resuming
|
|
108
112
|
if event_log:
|
|
109
113
|
self._is_replaying = True
|
|
@@ -116,10 +120,18 @@ class LocalContext(WorkflowContext):
|
|
|
116
120
|
from pyworkflow.serialization.decoder import deserialize
|
|
117
121
|
|
|
118
122
|
for event in events:
|
|
119
|
-
if event.type == EventType.
|
|
123
|
+
if event.type == EventType.STEP_STARTED:
|
|
124
|
+
# Track step as in-progress (dispatched but not completed)
|
|
125
|
+
step_id = event.data.get("step_id")
|
|
126
|
+
if step_id:
|
|
127
|
+
self._steps_in_progress.add(step_id)
|
|
128
|
+
|
|
129
|
+
elif event.type == EventType.STEP_COMPLETED:
|
|
120
130
|
step_id = event.data.get("step_id")
|
|
121
131
|
result = deserialize(event.data.get("result"))
|
|
122
132
|
self._step_results[step_id] = result
|
|
133
|
+
# Step completed - no longer in progress
|
|
134
|
+
self._steps_in_progress.discard(step_id)
|
|
123
135
|
|
|
124
136
|
elif event.type == EventType.SLEEP_COMPLETED:
|
|
125
137
|
sleep_id = event.data.get("sleep_id")
|
|
@@ -156,6 +168,8 @@ class LocalContext(WorkflowContext):
|
|
|
156
168
|
"error_type": event.data.get("error_type", "Exception"),
|
|
157
169
|
"is_retryable": is_retryable,
|
|
158
170
|
}
|
|
171
|
+
# Terminal failure - no longer in progress
|
|
172
|
+
self._steps_in_progress.discard(step_id)
|
|
159
173
|
|
|
160
174
|
elif event.type == EventType.CANCELLATION_REQUESTED:
|
|
161
175
|
self._cancellation_requested = True
|
|
@@ -335,6 +349,18 @@ class LocalContext(WorkflowContext):
|
|
|
335
349
|
"is_retryable": is_retryable,
|
|
336
350
|
}
|
|
337
351
|
|
|
352
|
+
def is_step_in_progress(self, step_id: str) -> bool:
|
|
353
|
+
"""
|
|
354
|
+
Check if a step is currently in progress (dispatched but not completed).
|
|
355
|
+
|
|
356
|
+
This is used to prevent re-dispatching a step that's already running
|
|
357
|
+
on a Celery worker during workflow resume.
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
True if step has STEP_STARTED but no STEP_COMPLETED/terminal STEP_FAILED
|
|
361
|
+
"""
|
|
362
|
+
return step_id in self._steps_in_progress
|
|
363
|
+
|
|
338
364
|
# =========================================================================
|
|
339
365
|
# Sleep state management (for @step decorator and EventReplayer compatibility)
|
|
340
366
|
# =========================================================================
|
|
@@ -176,8 +176,7 @@ async def set_step_context(ctx: StepContext) -> None:
|
|
|
176
176
|
Set the current step context and persist to storage.
|
|
177
177
|
|
|
178
178
|
This function can only be called from workflow code, not from within steps.
|
|
179
|
-
When called, the context is persisted to storage
|
|
180
|
-
is recorded for deterministic replay.
|
|
179
|
+
When called, the context is persisted to storage for resumption.
|
|
181
180
|
|
|
182
181
|
Args:
|
|
183
182
|
ctx: The StepContext instance to set
|
|
@@ -213,15 +212,6 @@ async def set_step_context(ctx: StepContext) -> None:
|
|
|
213
212
|
if has_context():
|
|
214
213
|
workflow_ctx = get_context()
|
|
215
214
|
if workflow_ctx.is_durable and workflow_ctx.storage is not None:
|
|
216
|
-
from pyworkflow.engine.events import create_context_updated_event
|
|
217
|
-
|
|
218
|
-
# Record CONTEXT_UPDATED event for replay
|
|
219
|
-
event = create_context_updated_event(
|
|
220
|
-
run_id=workflow_ctx.run_id,
|
|
221
|
-
context_data=ctx.to_dict(),
|
|
222
|
-
)
|
|
223
|
-
await workflow_ctx.storage.record_event(event)
|
|
224
|
-
|
|
225
215
|
# Update the WorkflowRun.context field
|
|
226
216
|
await workflow_ctx.storage.update_run_context(workflow_ctx.run_id, ctx.to_dict())
|
|
227
217
|
|
pyworkflow/core/step.py
CHANGED
|
@@ -23,6 +23,7 @@ from loguru import logger
|
|
|
23
23
|
from pyworkflow.context import get_context, has_context
|
|
24
24
|
from pyworkflow.core.exceptions import FatalError, RetryableError
|
|
25
25
|
from pyworkflow.core.registry import register_step
|
|
26
|
+
from pyworkflow.core.validation import validate_step_parameters
|
|
26
27
|
from pyworkflow.engine.events import (
|
|
27
28
|
create_step_completed_event,
|
|
28
29
|
create_step_failed_event,
|
|
@@ -118,6 +119,8 @@ def step(
|
|
|
118
119
|
f"Step {step_name} in transient mode, executing directly",
|
|
119
120
|
run_id=ctx.run_id,
|
|
120
121
|
)
|
|
122
|
+
# Validate parameters before execution
|
|
123
|
+
validate_step_parameters(func, args, kwargs, step_name)
|
|
121
124
|
return await _execute_with_retries(
|
|
122
125
|
func, args, kwargs, step_name, max_retries, retry_delay
|
|
123
126
|
)
|
|
@@ -126,22 +129,24 @@ def step(
|
|
|
126
129
|
# Generate step ID (deterministic based on name + args)
|
|
127
130
|
step_id = _generate_step_id(step_name, args, kwargs)
|
|
128
131
|
|
|
132
|
+
# Check if step has already failed (must check BEFORE cached result check)
|
|
133
|
+
# A failed step has no cached result, so should_execute_step would return True
|
|
134
|
+
# and skip this check if it were inside the should_execute_step block
|
|
135
|
+
if ctx.has_step_failed(step_id):
|
|
136
|
+
error_info = ctx.get_step_failure(step_id)
|
|
137
|
+
logger.error(
|
|
138
|
+
f"Step {step_name} failed on remote worker",
|
|
139
|
+
run_id=ctx.run_id,
|
|
140
|
+
step_id=step_id,
|
|
141
|
+
error=error_info.get("error") if error_info else "Unknown error",
|
|
142
|
+
)
|
|
143
|
+
raise FatalError(
|
|
144
|
+
f"Step {step_name} failed: "
|
|
145
|
+
f"{error_info.get('error') if error_info else 'Unknown error'}"
|
|
146
|
+
)
|
|
147
|
+
|
|
129
148
|
# Check if step has already completed (replay)
|
|
130
149
|
if not ctx.should_execute_step(step_id):
|
|
131
|
-
# Check if step failed (for distributed step dispatch)
|
|
132
|
-
if ctx.has_step_failed(step_id):
|
|
133
|
-
error_info = ctx.get_step_failure(step_id)
|
|
134
|
-
logger.error(
|
|
135
|
-
f"Step {step_name} failed on remote worker",
|
|
136
|
-
run_id=ctx.run_id,
|
|
137
|
-
step_id=step_id,
|
|
138
|
-
error=error_info.get("error") if error_info else "Unknown error",
|
|
139
|
-
)
|
|
140
|
-
raise FatalError(
|
|
141
|
-
f"Step {step_name} failed: "
|
|
142
|
-
f"{error_info.get('error') if error_info else 'Unknown error'}"
|
|
143
|
-
)
|
|
144
|
-
|
|
145
150
|
logger.debug(
|
|
146
151
|
f"Step {step_name} already completed, using cached result",
|
|
147
152
|
run_id=ctx.run_id,
|
|
@@ -149,10 +154,29 @@ def step(
|
|
|
149
154
|
)
|
|
150
155
|
return ctx.get_step_result(step_id)
|
|
151
156
|
|
|
157
|
+
# Check if step is already in progress (dispatched to Celery but not completed)
|
|
158
|
+
# This prevents re-dispatch during resume when step is still running/retrying
|
|
159
|
+
if ctx.is_step_in_progress(step_id):
|
|
160
|
+
logger.debug(
|
|
161
|
+
f"Step {step_name} already in progress, waiting for completion",
|
|
162
|
+
run_id=ctx.run_id,
|
|
163
|
+
step_id=step_id,
|
|
164
|
+
)
|
|
165
|
+
# Re-suspend and wait for existing task to complete
|
|
166
|
+
from pyworkflow.core.exceptions import SuspensionSignal
|
|
167
|
+
|
|
168
|
+
raise SuspensionSignal(
|
|
169
|
+
reason=f"step_dispatch:{step_id}",
|
|
170
|
+
step_id=step_id,
|
|
171
|
+
step_name=step_name,
|
|
172
|
+
)
|
|
173
|
+
|
|
152
174
|
# ========== Distributed Step Dispatch ==========
|
|
153
175
|
# When running in a distributed runtime (e.g., Celery), dispatch steps
|
|
154
176
|
# to step workers instead of executing inline.
|
|
155
177
|
if ctx.runtime == "celery":
|
|
178
|
+
# Validate parameters before dispatching to Celery
|
|
179
|
+
validate_step_parameters(func, args, kwargs, step_name)
|
|
156
180
|
return await _dispatch_step_to_celery(
|
|
157
181
|
ctx=ctx,
|
|
158
182
|
func=func,
|
|
@@ -221,6 +245,9 @@ def step(
|
|
|
221
245
|
# Check for cancellation before executing step
|
|
222
246
|
ctx.check_cancellation()
|
|
223
247
|
|
|
248
|
+
# Validate parameters before execution
|
|
249
|
+
validate_step_parameters(func, args, kwargs, step_name)
|
|
250
|
+
|
|
224
251
|
try:
|
|
225
252
|
# Execute step function
|
|
226
253
|
result = await func(*args, **kwargs)
|
|
@@ -366,12 +393,13 @@ def step(
|
|
|
366
393
|
)
|
|
367
394
|
|
|
368
395
|
# Record final STEP_FAILED event
|
|
396
|
+
# is_retryable=False since we've exhausted all retries
|
|
369
397
|
failure_event = create_step_failed_event(
|
|
370
398
|
run_id=ctx.run_id,
|
|
371
399
|
step_id=step_id,
|
|
372
400
|
error=str(e),
|
|
373
401
|
error_type=type(e).__name__,
|
|
374
|
-
is_retryable=
|
|
402
|
+
is_retryable=False,
|
|
375
403
|
attempt=current_attempt,
|
|
376
404
|
)
|
|
377
405
|
await ctx.storage.record_event(failure_event) # type: ignore[union-attr]
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic validation for step parameters.
|
|
3
|
+
|
|
4
|
+
Validates step function arguments against their type hints using Pydantic's
|
|
5
|
+
TypeAdapter for runtime type checking.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import inspect
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from typing import Any, get_type_hints
|
|
11
|
+
|
|
12
|
+
from pydantic import TypeAdapter, ValidationError
|
|
13
|
+
|
|
14
|
+
from pyworkflow.core.exceptions import FatalError
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class StepValidationError(FatalError):
|
|
18
|
+
"""
|
|
19
|
+
Raised when step parameter validation fails.
|
|
20
|
+
|
|
21
|
+
This is a FatalError subclass to ensure validation failures
|
|
22
|
+
immediately fail the workflow without retries.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
step_name: str,
|
|
28
|
+
param_name: str,
|
|
29
|
+
expected_type: type,
|
|
30
|
+
received_value: Any,
|
|
31
|
+
validation_error: ValidationError,
|
|
32
|
+
) -> None:
|
|
33
|
+
self.step_name = step_name
|
|
34
|
+
self.param_name = param_name
|
|
35
|
+
self.expected_type = expected_type
|
|
36
|
+
self.received_value = received_value
|
|
37
|
+
self.validation_error = validation_error
|
|
38
|
+
|
|
39
|
+
# Build clear error message
|
|
40
|
+
error_details = str(validation_error)
|
|
41
|
+
message = (
|
|
42
|
+
f"Step '{step_name}' parameter validation failed for '{param_name}': "
|
|
43
|
+
f"expected {expected_type}, got {type(received_value).__name__} "
|
|
44
|
+
f"with value {received_value!r}. Details: {error_details}"
|
|
45
|
+
)
|
|
46
|
+
super().__init__(message)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def validate_step_parameters(
|
|
50
|
+
func: Callable,
|
|
51
|
+
args: tuple,
|
|
52
|
+
kwargs: dict,
|
|
53
|
+
step_name: str,
|
|
54
|
+
) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Validate step parameters against their type hints using Pydantic.
|
|
57
|
+
|
|
58
|
+
Only parameters with type annotations are validated. Parameters without
|
|
59
|
+
type hints are skipped.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
func: The step function (original, unwrapped)
|
|
63
|
+
args: Positional arguments passed to the step
|
|
64
|
+
kwargs: Keyword arguments passed to the step
|
|
65
|
+
step_name: Name of the step for error messages
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
StepValidationError: If any typed parameter fails validation
|
|
69
|
+
"""
|
|
70
|
+
# Get function signature and type hints
|
|
71
|
+
sig = inspect.signature(func)
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
# Try to get type hints, may fail for some edge cases
|
|
75
|
+
type_hints = get_type_hints(func)
|
|
76
|
+
except Exception:
|
|
77
|
+
# If we can't get type hints, skip validation
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
if not type_hints:
|
|
81
|
+
# No type hints at all, skip validation
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
# Bind arguments to parameters
|
|
85
|
+
try:
|
|
86
|
+
bound = sig.bind(*args, **kwargs)
|
|
87
|
+
bound.apply_defaults()
|
|
88
|
+
except TypeError:
|
|
89
|
+
# If binding fails, the function call itself will fail
|
|
90
|
+
# Let the normal execution handle this
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
# Validate each parameter that has a type hint
|
|
94
|
+
for param_name, param_value in bound.arguments.items():
|
|
95
|
+
if param_name not in type_hints:
|
|
96
|
+
# No type hint for this parameter, skip validation
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
expected_type = type_hints[param_name]
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
# Use Pydantic TypeAdapter for validation
|
|
103
|
+
adapter = TypeAdapter(expected_type)
|
|
104
|
+
adapter.validate_python(param_value)
|
|
105
|
+
except ValidationError as e:
|
|
106
|
+
raise StepValidationError(
|
|
107
|
+
step_name=step_name,
|
|
108
|
+
param_name=param_name,
|
|
109
|
+
expected_type=expected_type,
|
|
110
|
+
received_value=param_value,
|
|
111
|
+
validation_error=e,
|
|
112
|
+
)
|
pyworkflow/engine/events.py
CHANGED
|
@@ -24,6 +24,7 @@ class EventType(Enum):
|
|
|
24
24
|
WORKFLOW_PAUSED = "workflow.paused"
|
|
25
25
|
WORKFLOW_RESUMED = "workflow.resumed"
|
|
26
26
|
WORKFLOW_CONTINUED_AS_NEW = "workflow.continued_as_new" # Workflow continued with fresh history
|
|
27
|
+
WORKFLOW_SUSPENDED = "workflow.suspended" # Workflow suspended (waiting for step/sleep/hook)
|
|
27
28
|
|
|
28
29
|
# Step lifecycle events
|
|
29
30
|
STEP_STARTED = "step.started"
|
|
@@ -45,9 +46,6 @@ class EventType(Enum):
|
|
|
45
46
|
# Cancellation events
|
|
46
47
|
CANCELLATION_REQUESTED = "cancellation.requested"
|
|
47
48
|
|
|
48
|
-
# Context events
|
|
49
|
-
CONTEXT_UPDATED = "context.updated"
|
|
50
|
-
|
|
51
49
|
# Child workflow events
|
|
52
50
|
CHILD_WORKFLOW_STARTED = "child_workflow.started"
|
|
53
51
|
CHILD_WORKFLOW_COMPLETED = "child_workflow.completed"
|
|
@@ -174,6 +172,49 @@ def create_workflow_continued_as_new_event(
|
|
|
174
172
|
)
|
|
175
173
|
|
|
176
174
|
|
|
175
|
+
def create_workflow_suspended_event(
|
|
176
|
+
run_id: str,
|
|
177
|
+
reason: str,
|
|
178
|
+
step_id: str | None = None,
|
|
179
|
+
step_name: str | None = None,
|
|
180
|
+
sleep_id: str | None = None,
|
|
181
|
+
hook_id: str | None = None,
|
|
182
|
+
child_id: str | None = None,
|
|
183
|
+
) -> Event:
|
|
184
|
+
"""
|
|
185
|
+
Create a workflow suspended event.
|
|
186
|
+
|
|
187
|
+
This event is recorded when a workflow suspends execution, typically
|
|
188
|
+
waiting for a step to complete on a worker, a sleep to elapse, a hook
|
|
189
|
+
to be received, or a child workflow to complete.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
run_id: The workflow run ID
|
|
193
|
+
reason: Suspension reason (e.g., "step_dispatch:step_id", "sleep", "hook", "child_workflow")
|
|
194
|
+
step_id: Step ID if suspended for step execution
|
|
195
|
+
step_name: Step name if suspended for step execution
|
|
196
|
+
sleep_id: Sleep ID if suspended for sleep
|
|
197
|
+
hook_id: Hook ID if suspended for webhook
|
|
198
|
+
child_id: Child workflow ID if suspended for child
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Event: The workflow suspended event
|
|
202
|
+
"""
|
|
203
|
+
return Event(
|
|
204
|
+
run_id=run_id,
|
|
205
|
+
type=EventType.WORKFLOW_SUSPENDED,
|
|
206
|
+
data={
|
|
207
|
+
"reason": reason,
|
|
208
|
+
"step_id": step_id,
|
|
209
|
+
"step_name": step_name,
|
|
210
|
+
"sleep_id": sleep_id,
|
|
211
|
+
"hook_id": hook_id,
|
|
212
|
+
"child_id": child_id,
|
|
213
|
+
"suspended_at": datetime.now(UTC).isoformat(),
|
|
214
|
+
},
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
177
218
|
def create_workflow_interrupted_event(
|
|
178
219
|
run_id: str,
|
|
179
220
|
reason: str,
|
|
@@ -483,33 +524,6 @@ def create_step_cancelled_event(
|
|
|
483
524
|
)
|
|
484
525
|
|
|
485
526
|
|
|
486
|
-
def create_context_updated_event(
|
|
487
|
-
run_id: str,
|
|
488
|
-
context_data: dict[str, Any],
|
|
489
|
-
) -> Event:
|
|
490
|
-
"""
|
|
491
|
-
Create a context updated event.
|
|
492
|
-
|
|
493
|
-
This event is recorded when set_step_context() is called in workflow code.
|
|
494
|
-
It captures the full context state for deterministic replay.
|
|
495
|
-
|
|
496
|
-
Args:
|
|
497
|
-
run_id: The workflow run ID
|
|
498
|
-
context_data: The serialized context data (from StepContext.to_dict())
|
|
499
|
-
|
|
500
|
-
Returns:
|
|
501
|
-
Event: The context updated event
|
|
502
|
-
"""
|
|
503
|
-
return Event(
|
|
504
|
-
run_id=run_id,
|
|
505
|
-
type=EventType.CONTEXT_UPDATED,
|
|
506
|
-
data={
|
|
507
|
-
"context": context_data,
|
|
508
|
-
"updated_at": datetime.now(UTC).isoformat(),
|
|
509
|
-
},
|
|
510
|
-
)
|
|
511
|
-
|
|
512
|
-
|
|
513
527
|
# Child workflow event creation helpers
|
|
514
528
|
|
|
515
529
|
|
pyworkflow/engine/executor.py
CHANGED
|
@@ -283,9 +283,29 @@ async def _execute_workflow_local(
|
|
|
283
283
|
return result
|
|
284
284
|
|
|
285
285
|
except SuspensionSignal as e:
|
|
286
|
-
# Workflow suspended (sleep or
|
|
286
|
+
# Workflow suspended (sleep, hook, or step dispatch)
|
|
287
287
|
await storage.update_run_status(run_id=run_id, status=RunStatus.SUSPENDED)
|
|
288
288
|
|
|
289
|
+
# Record WORKFLOW_SUSPENDED event
|
|
290
|
+
from pyworkflow.engine.events import create_workflow_suspended_event
|
|
291
|
+
|
|
292
|
+
step_id = e.data.get("step_id") if e.data else None
|
|
293
|
+
step_name = e.data.get("step_name") if e.data else None
|
|
294
|
+
sleep_id = e.data.get("sleep_id") if e.data else None
|
|
295
|
+
hook_id = e.data.get("hook_id") if e.data else None
|
|
296
|
+
child_id = e.data.get("child_id") if e.data else None
|
|
297
|
+
|
|
298
|
+
suspended_event = create_workflow_suspended_event(
|
|
299
|
+
run_id=run_id,
|
|
300
|
+
reason=e.reason,
|
|
301
|
+
step_id=step_id,
|
|
302
|
+
step_name=step_name,
|
|
303
|
+
sleep_id=sleep_id,
|
|
304
|
+
hook_id=hook_id,
|
|
305
|
+
child_id=child_id,
|
|
306
|
+
)
|
|
307
|
+
await storage.record_event(suspended_event)
|
|
308
|
+
|
|
289
309
|
logger.info(
|
|
290
310
|
f"Workflow suspended: {e.reason}",
|
|
291
311
|
run_id=run_id,
|
pyworkflow/engine/replay.py
CHANGED
|
@@ -93,9 +93,6 @@ class EventReplayer:
|
|
|
93
93
|
elif event.type == EventType.CANCELLATION_REQUESTED:
|
|
94
94
|
await self._apply_cancellation_requested(ctx, event)
|
|
95
95
|
|
|
96
|
-
elif event.type == EventType.CONTEXT_UPDATED:
|
|
97
|
-
await self._apply_context_updated(ctx, event)
|
|
98
|
-
|
|
99
96
|
# Other event types don't affect replay state
|
|
100
97
|
# (workflow_started, step_started, step_failed, etc. are informational)
|
|
101
98
|
|
|
@@ -258,42 +255,6 @@ class EventReplayer:
|
|
|
258
255
|
requested_by=requested_by,
|
|
259
256
|
)
|
|
260
257
|
|
|
261
|
-
async def _apply_context_updated(self, ctx: LocalContext, event: Event) -> None:
|
|
262
|
-
"""
|
|
263
|
-
Apply context_updated event - restore step context.
|
|
264
|
-
|
|
265
|
-
During replay, this restores the step context to its state at the time
|
|
266
|
-
the event was recorded. This ensures deterministic replay.
|
|
267
|
-
"""
|
|
268
|
-
from pyworkflow.context.step_context import (
|
|
269
|
-
_set_step_context_internal,
|
|
270
|
-
get_step_context_class,
|
|
271
|
-
)
|
|
272
|
-
|
|
273
|
-
context_data = event.data.get("context", {})
|
|
274
|
-
|
|
275
|
-
if context_data:
|
|
276
|
-
# Get the registered context class
|
|
277
|
-
context_class = get_step_context_class()
|
|
278
|
-
if context_class is not None:
|
|
279
|
-
try:
|
|
280
|
-
step_ctx = context_class.from_dict(context_data)
|
|
281
|
-
_set_step_context_internal(step_ctx)
|
|
282
|
-
logger.debug(
|
|
283
|
-
"Restored step context from replay",
|
|
284
|
-
run_id=ctx.run_id,
|
|
285
|
-
)
|
|
286
|
-
except Exception as e:
|
|
287
|
-
logger.warning(
|
|
288
|
-
f"Failed to restore step context: {e}",
|
|
289
|
-
run_id=ctx.run_id,
|
|
290
|
-
)
|
|
291
|
-
else:
|
|
292
|
-
logger.debug(
|
|
293
|
-
"No context class registered, skipping context restoration",
|
|
294
|
-
run_id=ctx.run_id,
|
|
295
|
-
)
|
|
296
|
-
|
|
297
258
|
|
|
298
259
|
# Singleton instance
|
|
299
260
|
_replayer = EventReplayer()
|
|
@@ -5,13 +5,40 @@ Provides structured logging with context-aware formatting for workflows, steps,
|
|
|
5
5
|
and events. Integrates with loguru for powerful logging capabilities.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import logging
|
|
8
9
|
import sys
|
|
9
10
|
from pathlib import Path
|
|
11
|
+
from types import FrameType
|
|
10
12
|
from typing import Any
|
|
11
13
|
|
|
12
14
|
from loguru import logger
|
|
13
15
|
|
|
14
16
|
|
|
17
|
+
class InterceptHandler(logging.Handler):
|
|
18
|
+
"""
|
|
19
|
+
Intercept standard logging calls and redirect them to loguru.
|
|
20
|
+
|
|
21
|
+
This ensures all logs (including from third-party libraries like Celery)
|
|
22
|
+
go through loguru's unified formatting.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
26
|
+
# Get corresponding Loguru level if it exists
|
|
27
|
+
try:
|
|
28
|
+
level: str | int = logger.level(record.levelname).name
|
|
29
|
+
except ValueError:
|
|
30
|
+
level = record.levelno
|
|
31
|
+
|
|
32
|
+
# Find caller from where originated the logged message
|
|
33
|
+
frame_or_none: FrameType | None = logging.currentframe()
|
|
34
|
+
depth = 2
|
|
35
|
+
while frame_or_none is not None and frame_or_none.f_code.co_filename == logging.__file__:
|
|
36
|
+
frame_or_none = frame_or_none.f_back
|
|
37
|
+
depth += 1
|
|
38
|
+
|
|
39
|
+
logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
|
|
40
|
+
|
|
41
|
+
|
|
15
42
|
def configure_logging(
|
|
16
43
|
level: str = "INFO",
|
|
17
44
|
log_file: str | None = None,
|
|
@@ -50,6 +77,18 @@ def configure_logging(
|
|
|
50
77
|
# Remove default logger
|
|
51
78
|
logger.remove()
|
|
52
79
|
|
|
80
|
+
# Intercept standard library logging and redirect to loguru
|
|
81
|
+
# This ensures Celery and other libraries' logs go through loguru
|
|
82
|
+
logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True)
|
|
83
|
+
|
|
84
|
+
# Suppress verbose Celery logs - only show warnings and above
|
|
85
|
+
# PyWorkflow provides its own task execution logs
|
|
86
|
+
for logger_name in ("celery", "celery.task", "celery.worker", "kombu", "amqp"):
|
|
87
|
+
celery_logger = logging.getLogger(logger_name)
|
|
88
|
+
celery_logger.handlers = [InterceptHandler()]
|
|
89
|
+
celery_logger.propagate = False
|
|
90
|
+
celery_logger.setLevel(logging.WARNING)
|
|
91
|
+
|
|
53
92
|
# Console format
|
|
54
93
|
if json_logs:
|
|
55
94
|
# JSON format for structured logging
|
|
@@ -90,13 +129,16 @@ def configure_logging(
|
|
|
90
129
|
record["extra"]["_context"] = extra_str
|
|
91
130
|
return True
|
|
92
131
|
|
|
132
|
+
# Add console handler - use stdout for better Celery worker compatibility
|
|
133
|
+
# enqueue=True makes it process-safe for multiprocessing (Celery workers)
|
|
93
134
|
logger.add(
|
|
94
|
-
sys.
|
|
135
|
+
sys.stdout,
|
|
95
136
|
format=console_format + "{extra[_context]}",
|
|
96
137
|
level=level,
|
|
97
138
|
colorize=not json_logs,
|
|
98
139
|
serialize=json_logs,
|
|
99
140
|
filter=format_with_context, # type: ignore[arg-type]
|
|
141
|
+
enqueue=True, # Process-safe logging for Celery workers
|
|
100
142
|
)
|
|
101
143
|
|
|
102
144
|
# Add file handler if requested
|
pyworkflow/runtime/celery.py
CHANGED
|
@@ -248,7 +248,7 @@ class CeleryRuntime(Runtime):
|
|
|
248
248
|
)
|
|
249
249
|
|
|
250
250
|
# Use the existing schedule function which handles the delay calculation
|
|
251
|
-
schedule_workflow_resumption(run_id, wake_time)
|
|
251
|
+
schedule_workflow_resumption(run_id, wake_time, triggered_by="celery_runtime_schedule_wake")
|
|
252
252
|
|
|
253
253
|
logger.info(
|
|
254
254
|
f"Workflow wake scheduled: {run_id}",
|