pyworkflow-engine 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +1 -1
- pyworkflow/celery/app.py +87 -3
- pyworkflow/celery/loop.py +108 -0
- pyworkflow/celery/tasks.py +537 -75
- pyworkflow/cli/commands/worker.py +13 -16
- pyworkflow/config.py +9 -2
- pyworkflow/context/base.py +4 -0
- pyworkflow/context/local.py +27 -1
- pyworkflow/context/step_context.py +1 -11
- pyworkflow/core/step.py +35 -15
- pyworkflow/engine/events.py +44 -30
- pyworkflow/engine/executor.py +21 -1
- pyworkflow/engine/replay.py +0 -39
- pyworkflow/observability/logging.py +43 -1
- pyworkflow/runtime/celery.py +1 -1
- pyworkflow/runtime/local.py +41 -1
- pyworkflow/storage/config.py +81 -2
- pyworkflow/storage/postgres.py +103 -34
- {pyworkflow_engine-0.1.10.dist-info → pyworkflow_engine-0.1.12.dist-info}/METADATA +1 -1
- {pyworkflow_engine-0.1.10.dist-info → pyworkflow_engine-0.1.12.dist-info}/RECORD +24 -23
- {pyworkflow_engine-0.1.10.dist-info → pyworkflow_engine-0.1.12.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.10.dist-info → pyworkflow_engine-0.1.12.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.10.dist-info → pyworkflow_engine-0.1.12.dist-info}/licenses/LICENSE +0 -0
- {pyworkflow_engine-0.1.10.dist-info → pyworkflow_engine-0.1.12.dist-info}/top_level.txt +0 -0
pyworkflow/__init__.py
CHANGED
pyworkflow/celery/app.py
CHANGED
|
@@ -15,10 +15,28 @@ garbage collector and Celery's saferepr module. It does not affect functionality
|
|
|
15
15
|
import os
|
|
16
16
|
|
|
17
17
|
from celery import Celery
|
|
18
|
+
from celery.signals import worker_init, worker_process_init, worker_shutdown
|
|
18
19
|
from kombu import Exchange, Queue
|
|
19
20
|
|
|
20
21
|
from pyworkflow.observability.logging import configure_logging
|
|
21
22
|
|
|
23
|
+
# Track if logging has been configured in this process
|
|
24
|
+
_logging_configured = False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _configure_worker_logging() -> None:
|
|
28
|
+
"""Configure logging for the current worker process."""
|
|
29
|
+
global _logging_configured
|
|
30
|
+
if not _logging_configured:
|
|
31
|
+
from loguru import logger as loguru_logger
|
|
32
|
+
|
|
33
|
+
# Enable pyworkflow logging (may have been disabled by CLI)
|
|
34
|
+
loguru_logger.enable("pyworkflow")
|
|
35
|
+
|
|
36
|
+
log_level = os.getenv("PYWORKFLOW_LOG_LEVEL", "INFO").upper()
|
|
37
|
+
configure_logging(level=log_level)
|
|
38
|
+
_logging_configured = True
|
|
39
|
+
|
|
22
40
|
|
|
23
41
|
def discover_workflows(modules: list[str] | None = None) -> None:
|
|
24
42
|
"""
|
|
@@ -118,6 +136,14 @@ def create_celery_app(
|
|
|
118
136
|
accept_content=["json"],
|
|
119
137
|
timezone="UTC",
|
|
120
138
|
enable_utc=True,
|
|
139
|
+
# Broker transport options - prevent task redelivery
|
|
140
|
+
# See: https://github.com/celery/celery/issues/5935
|
|
141
|
+
broker_transport_options={
|
|
142
|
+
"visibility_timeout": 3600, # 12 hours - prevent Redis from re-queueing tasks
|
|
143
|
+
},
|
|
144
|
+
result_backend_transport_options={
|
|
145
|
+
"visibility_timeout": 3600,
|
|
146
|
+
},
|
|
121
147
|
# Task routing
|
|
122
148
|
task_default_queue="pyworkflow.default",
|
|
123
149
|
task_default_exchange="pyworkflow",
|
|
@@ -154,7 +180,7 @@ def create_celery_app(
|
|
|
154
180
|
task_reject_on_worker_lost=True,
|
|
155
181
|
worker_prefetch_multiplier=1, # Fair task distribution
|
|
156
182
|
# Retry settings
|
|
157
|
-
task_autoretry_for=(
|
|
183
|
+
task_autoretry_for=(),
|
|
158
184
|
task_retry_backoff=True,
|
|
159
185
|
task_retry_backoff_max=600, # 10 minutes max
|
|
160
186
|
task_retry_jitter=True,
|
|
@@ -168,8 +194,9 @@ def create_celery_app(
|
|
|
168
194
|
worker_task_log_format="[%(asctime)s: %(levelname)s/%(processName)s] [%(task_name)s(%(task_id)s)] %(message)s",
|
|
169
195
|
)
|
|
170
196
|
|
|
171
|
-
#
|
|
172
|
-
|
|
197
|
+
# Note: Logging is configured via Celery signals (worker_init, worker_process_init)
|
|
198
|
+
# to ensure proper initialization AFTER process forking.
|
|
199
|
+
# See on_worker_init() and on_worker_process_init() below.
|
|
173
200
|
|
|
174
201
|
# Auto-discover workflows from environment variable or configured modules
|
|
175
202
|
discover_workflows()
|
|
@@ -182,6 +209,63 @@ def create_celery_app(
|
|
|
182
209
|
celery_app = create_celery_app()
|
|
183
210
|
|
|
184
211
|
|
|
212
|
+
# ========== Celery Worker Signals ==========
|
|
213
|
+
# These signals ensure proper initialization in forked worker processes
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
@worker_init.connect
|
|
217
|
+
def on_worker_init(**kwargs):
|
|
218
|
+
"""
|
|
219
|
+
Called when the main worker process starts (before forking).
|
|
220
|
+
|
|
221
|
+
For prefork pool, this runs in the parent process.
|
|
222
|
+
For solo/threads pool, this is the main initialization point.
|
|
223
|
+
"""
|
|
224
|
+
_configure_worker_logging()
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@worker_process_init.connect
|
|
228
|
+
def on_worker_process_init(**kwargs):
|
|
229
|
+
"""
|
|
230
|
+
Called when a worker child process is initialized (after forking).
|
|
231
|
+
|
|
232
|
+
This is critical for prefork pool:
|
|
233
|
+
- loguru's background thread doesn't survive fork()
|
|
234
|
+
- We need a persistent event loop for connection pool reuse
|
|
235
|
+
"""
|
|
236
|
+
_configure_worker_logging()
|
|
237
|
+
|
|
238
|
+
# Initialize persistent event loop for this worker
|
|
239
|
+
from pyworkflow.celery.loop import init_worker_loop
|
|
240
|
+
|
|
241
|
+
init_worker_loop()
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@worker_shutdown.connect
|
|
245
|
+
def on_worker_shutdown(**kwargs):
|
|
246
|
+
"""
|
|
247
|
+
Called when the worker is shutting down.
|
|
248
|
+
|
|
249
|
+
Cleans up:
|
|
250
|
+
- Storage backend connections (PostgreSQL connection pools, etc.)
|
|
251
|
+
- The persistent event loop
|
|
252
|
+
"""
|
|
253
|
+
from loguru import logger
|
|
254
|
+
|
|
255
|
+
from pyworkflow.celery.loop import close_worker_loop, run_async
|
|
256
|
+
from pyworkflow.storage.config import disconnect_all_cached
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
# Clean up storage connections using the persistent loop
|
|
260
|
+
run_async(disconnect_all_cached())
|
|
261
|
+
except Exception as e:
|
|
262
|
+
# Log but don't fail shutdown
|
|
263
|
+
logger.warning(f"Error during storage cleanup on shutdown: {e}")
|
|
264
|
+
finally:
|
|
265
|
+
# Close the persistent event loop
|
|
266
|
+
close_worker_loop()
|
|
267
|
+
|
|
268
|
+
|
|
185
269
|
def get_celery_app() -> Celery:
|
|
186
270
|
"""
|
|
187
271
|
Get the global Celery application instance.
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Persistent event loop management for Celery workers.
|
|
3
|
+
|
|
4
|
+
This module provides a single, persistent event loop per worker process.
|
|
5
|
+
Using a persistent loop allows asyncpg connection pools to be reused across
|
|
6
|
+
tasks, avoiding the overhead of creating/destroying pools for each task.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
from pyworkflow.celery.loop import run_async
|
|
10
|
+
|
|
11
|
+
# Instead of: result = asyncio.run(some_coroutine())
|
|
12
|
+
# Use: result = run_async(some_coroutine())
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import threading
|
|
17
|
+
from collections.abc import Coroutine
|
|
18
|
+
from typing import Any, TypeVar
|
|
19
|
+
|
|
20
|
+
T = TypeVar("T")
|
|
21
|
+
|
|
22
|
+
# Per-worker persistent event loop
|
|
23
|
+
# Created in worker_process_init, closed in worker_shutdown
|
|
24
|
+
_worker_loop: asyncio.AbstractEventLoop | None = None
|
|
25
|
+
_loop_lock = threading.Lock()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def init_worker_loop() -> None:
|
|
29
|
+
"""
|
|
30
|
+
Initialize the persistent event loop for this worker process.
|
|
31
|
+
|
|
32
|
+
Called from worker_process_init signal handler.
|
|
33
|
+
"""
|
|
34
|
+
global _worker_loop
|
|
35
|
+
|
|
36
|
+
with _loop_lock:
|
|
37
|
+
if _worker_loop is None or _worker_loop.is_closed():
|
|
38
|
+
_worker_loop = asyncio.new_event_loop()
|
|
39
|
+
asyncio.set_event_loop(_worker_loop)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def close_worker_loop() -> None:
|
|
43
|
+
"""
|
|
44
|
+
Close the persistent event loop for this worker process.
|
|
45
|
+
|
|
46
|
+
Called from worker_shutdown signal handler.
|
|
47
|
+
"""
|
|
48
|
+
global _worker_loop
|
|
49
|
+
|
|
50
|
+
with _loop_lock:
|
|
51
|
+
if _worker_loop is not None and not _worker_loop.is_closed():
|
|
52
|
+
try:
|
|
53
|
+
# Run any pending cleanup
|
|
54
|
+
_worker_loop.run_until_complete(_worker_loop.shutdown_asyncgens())
|
|
55
|
+
except Exception:
|
|
56
|
+
pass
|
|
57
|
+
finally:
|
|
58
|
+
_worker_loop.close()
|
|
59
|
+
_worker_loop = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_worker_loop() -> asyncio.AbstractEventLoop:
|
|
63
|
+
"""
|
|
64
|
+
Get the persistent event loop for this worker process.
|
|
65
|
+
|
|
66
|
+
If no loop exists (e.g., running outside Celery worker), creates one.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
The worker's event loop
|
|
70
|
+
"""
|
|
71
|
+
global _worker_loop
|
|
72
|
+
|
|
73
|
+
with _loop_lock:
|
|
74
|
+
if _worker_loop is None or _worker_loop.is_closed():
|
|
75
|
+
# Not in a Celery worker or loop was closed - create a new one
|
|
76
|
+
_worker_loop = asyncio.new_event_loop()
|
|
77
|
+
asyncio.set_event_loop(_worker_loop)
|
|
78
|
+
return _worker_loop
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def run_async(coro: Coroutine[Any, Any, T]) -> T:
|
|
82
|
+
"""
|
|
83
|
+
Run a coroutine on the persistent worker event loop.
|
|
84
|
+
|
|
85
|
+
This is a drop-in replacement for asyncio.run() that reuses
|
|
86
|
+
the same event loop across tasks, allowing connection pools
|
|
87
|
+
to be shared.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
coro: The coroutine to run
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The result of the coroutine
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
# Instead of:
|
|
97
|
+
result = asyncio.run(storage.get_run(run_id))
|
|
98
|
+
|
|
99
|
+
# Use:
|
|
100
|
+
result = run_async(storage.get_run(run_id))
|
|
101
|
+
"""
|
|
102
|
+
loop = get_worker_loop()
|
|
103
|
+
return loop.run_until_complete(coro)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def is_loop_running() -> bool:
|
|
107
|
+
"""Check if the worker loop exists and is not closed."""
|
|
108
|
+
return _worker_loop is not None and not _worker_loop.is_closed()
|