pyworkflow-engine 0.1.22__py3-none-any.whl → 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +1 -1
- pyworkflow/celery/app.py +18 -0
- pyworkflow/celery/tasks.py +61 -71
- pyworkflow/storage/base.py +36 -0
- pyworkflow/storage/cassandra.py +34 -0
- pyworkflow/storage/dynamodb.py +34 -0
- pyworkflow/storage/file.py +52 -0
- pyworkflow/storage/memory.py +37 -0
- pyworkflow/storage/migrations/__init__.py +15 -0
- pyworkflow/storage/migrations/base.py +299 -0
- pyworkflow/storage/mysql.py +186 -5
- pyworkflow/storage/postgres.py +197 -6
- pyworkflow/storage/sqlite.py +171 -5
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/METADATA +1 -1
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/RECORD +19 -17
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/licenses/LICENSE +0 -0
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/top_level.txt +0 -0
pyworkflow/__init__.py
CHANGED
pyworkflow/celery/app.py
CHANGED
|
@@ -151,6 +151,8 @@ def create_celery_app(
|
|
|
151
151
|
sentinel_master_name: str | None = None,
|
|
152
152
|
broker_transport_options: dict[str, Any] | None = None,
|
|
153
153
|
result_backend_transport_options: dict[str, Any] | None = None,
|
|
154
|
+
worker_max_memory_per_child: int | None = None,
|
|
155
|
+
worker_max_tasks_per_child: int | None = None,
|
|
154
156
|
) -> Celery:
|
|
155
157
|
"""
|
|
156
158
|
Create and configure a Celery application for PyWorkflow.
|
|
@@ -162,6 +164,8 @@ def create_celery_app(
|
|
|
162
164
|
sentinel_master_name: Redis Sentinel master name. Priority: parameter > PYWORKFLOW_CELERY_SENTINEL_MASTER env var > "mymaster"
|
|
163
165
|
broker_transport_options: Additional transport options for the broker (merged with defaults)
|
|
164
166
|
result_backend_transport_options: Additional transport options for the result backend (merged with defaults)
|
|
167
|
+
worker_max_memory_per_child: Max memory per worker child process (KB). Priority: parameter > PYWORKFLOW_WORKER_MAX_MEMORY env var > None (no limit)
|
|
168
|
+
worker_max_tasks_per_child: Max tasks per worker child before recycling. Priority: parameter > PYWORKFLOW_WORKER_MAX_TASKS env var > None (no limit)
|
|
165
169
|
|
|
166
170
|
Returns:
|
|
167
171
|
Configured Celery application
|
|
@@ -170,6 +174,8 @@ def create_celery_app(
|
|
|
170
174
|
PYWORKFLOW_CELERY_BROKER: Celery broker URL (used if broker_url param not provided)
|
|
171
175
|
PYWORKFLOW_CELERY_RESULT_BACKEND: Result backend URL (used if result_backend param not provided)
|
|
172
176
|
PYWORKFLOW_CELERY_SENTINEL_MASTER: Sentinel master name (used if sentinel_master_name param not provided)
|
|
177
|
+
PYWORKFLOW_WORKER_MAX_MEMORY: Max memory per worker child (KB) (used if worker_max_memory_per_child param not provided)
|
|
178
|
+
PYWORKFLOW_WORKER_MAX_TASKS: Max tasks per worker child (used if worker_max_tasks_per_child param not provided)
|
|
173
179
|
|
|
174
180
|
Examples:
|
|
175
181
|
# Default configuration (uses env vars if set, otherwise localhost Redis)
|
|
@@ -202,6 +208,14 @@ def create_celery_app(
|
|
|
202
208
|
or "redis://localhost:6379/1"
|
|
203
209
|
)
|
|
204
210
|
|
|
211
|
+
# Worker memory limits (KB) - prevents memory leaks from accumulating
|
|
212
|
+
# Priority: parameter > env var > None (no limit by default)
|
|
213
|
+
max_memory_env = os.getenv("PYWORKFLOW_WORKER_MAX_MEMORY")
|
|
214
|
+
max_memory = worker_max_memory_per_child or (int(max_memory_env) if max_memory_env else None)
|
|
215
|
+
|
|
216
|
+
max_tasks_env = os.getenv("PYWORKFLOW_WORKER_MAX_TASKS")
|
|
217
|
+
max_tasks = worker_max_tasks_per_child or (int(max_tasks_env) if max_tasks_env else None)
|
|
218
|
+
|
|
205
219
|
# Detect broker and backend types
|
|
206
220
|
is_sentinel_broker = is_sentinel_url(broker_url)
|
|
207
221
|
is_sentinel_backend = is_sentinel_url(result_backend)
|
|
@@ -310,6 +324,10 @@ def create_celery_app(
|
|
|
310
324
|
# Logging
|
|
311
325
|
worker_log_format="[%(asctime)s: %(levelname)s/%(processName)s] %(message)s",
|
|
312
326
|
worker_task_log_format="[%(asctime)s: %(levelname)s/%(processName)s] [%(task_name)s(%(task_id)s)] %(message)s",
|
|
327
|
+
# Worker memory management - prevents memory leaks from accumulating
|
|
328
|
+
# When set, workers are recycled after exceeding these limits
|
|
329
|
+
worker_max_memory_per_child=max_memory, # KB, None = no limit
|
|
330
|
+
worker_max_tasks_per_child=max_tasks, # None = no limit
|
|
313
331
|
)
|
|
314
332
|
|
|
315
333
|
# Configure singleton locking for Redis or Sentinel brokers
|
pyworkflow/celery/tasks.py
CHANGED
|
@@ -11,7 +11,6 @@ These tasks enable:
|
|
|
11
11
|
|
|
12
12
|
import asyncio
|
|
13
13
|
import random
|
|
14
|
-
import traceback
|
|
15
14
|
import uuid
|
|
16
15
|
from collections.abc import Callable
|
|
17
16
|
from datetime import UTC, datetime
|
|
@@ -172,10 +171,9 @@ def execute_step_task(
|
|
|
172
171
|
raise FatalError(f"Step '{step_name}' not found in registry")
|
|
173
172
|
|
|
174
173
|
# Ignore processing step if already completed (idempotency)
|
|
175
|
-
|
|
176
|
-
already_completed =
|
|
177
|
-
|
|
178
|
-
for evt in events
|
|
174
|
+
# Use has_event() for efficient EXISTS check instead of loading all events
|
|
175
|
+
already_completed = run_async(
|
|
176
|
+
storage.has_event(run_id, EventType.STEP_COMPLETED.value, step_id=step_id)
|
|
179
177
|
)
|
|
180
178
|
if already_completed:
|
|
181
179
|
logger.warning(
|
|
@@ -398,10 +396,9 @@ async def _record_step_completion_and_resume(
|
|
|
398
396
|
await storage.connect()
|
|
399
397
|
|
|
400
398
|
# Idempotency check: skip if step already completed
|
|
401
|
-
|
|
402
|
-
already_completed =
|
|
403
|
-
|
|
404
|
-
for evt in events
|
|
399
|
+
# Use has_event() for efficient EXISTS check instead of loading all events
|
|
400
|
+
already_completed = await storage.has_event(
|
|
401
|
+
run_id, EventType.STEP_COMPLETED.value, step_id=step_id
|
|
405
402
|
)
|
|
406
403
|
if already_completed:
|
|
407
404
|
logger.info(
|
|
@@ -414,26 +411,23 @@ async def _record_step_completion_and_resume(
|
|
|
414
411
|
|
|
415
412
|
# Wait for WORKFLOW_SUSPENDED event before recording STEP_COMPLETED
|
|
416
413
|
# This prevents race conditions where both events get the same sequence number
|
|
414
|
+
# Use has_event() for memory-efficient polling instead of loading all events
|
|
417
415
|
max_wait_attempts = 50 # 50 * 10ms = 500ms max wait
|
|
418
416
|
wait_interval = 0.01 # 10ms between checks
|
|
419
417
|
|
|
420
|
-
for
|
|
421
|
-
has_suspended =
|
|
422
|
-
|
|
423
|
-
and evt.data.get("step_id") == step_id
|
|
424
|
-
for evt in events
|
|
418
|
+
for _attempt in range(max_wait_attempts):
|
|
419
|
+
has_suspended = await storage.has_event(
|
|
420
|
+
run_id, EventType.WORKFLOW_SUSPENDED.value, step_id=step_id
|
|
425
421
|
)
|
|
426
422
|
if has_suspended:
|
|
427
423
|
break
|
|
428
424
|
|
|
429
|
-
# Wait and
|
|
425
|
+
# Wait and check again
|
|
430
426
|
await asyncio.sleep(wait_interval)
|
|
431
|
-
events = await storage.get_events(run_id)
|
|
432
427
|
|
|
433
428
|
# Also check if step was already completed by another task during wait
|
|
434
|
-
already_completed =
|
|
435
|
-
|
|
436
|
-
for evt in events
|
|
429
|
+
already_completed = await storage.has_event(
|
|
430
|
+
run_id, EventType.STEP_COMPLETED.value, step_id=step_id
|
|
437
431
|
)
|
|
438
432
|
if already_completed:
|
|
439
433
|
logger.info(
|
|
@@ -506,17 +500,18 @@ async def _record_step_failure_and_resume(
|
|
|
506
500
|
await storage.connect()
|
|
507
501
|
|
|
508
502
|
# Idempotency check: skip if step already completed or terminally failed
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
503
|
+
# Use has_event() for efficient EXISTS check instead of loading all events
|
|
504
|
+
# Note: For STEP_FAILED with is_retryable check, we use has_event for STEP_COMPLETED
|
|
505
|
+
# and separately check STEP_FAILED (non-retryable failures are rare, so this is still efficient)
|
|
506
|
+
already_completed = await storage.has_event(
|
|
507
|
+
run_id, EventType.STEP_COMPLETED.value, step_id=step_id
|
|
508
|
+
)
|
|
509
|
+
# For terminal failures, we check separately (is_retryable=false in data)
|
|
510
|
+
# This is less common, so checking completion first is the fast path
|
|
511
|
+
already_failed_terminal = await storage.has_event(
|
|
512
|
+
run_id, EventType.STEP_FAILED.value, step_id=step_id, is_retryable="False"
|
|
518
513
|
)
|
|
519
|
-
if
|
|
514
|
+
if already_completed or already_failed_terminal:
|
|
520
515
|
logger.info(
|
|
521
516
|
"Step already completed/failed by another task, skipping",
|
|
522
517
|
run_id=run_id,
|
|
@@ -527,33 +522,28 @@ async def _record_step_failure_and_resume(
|
|
|
527
522
|
|
|
528
523
|
# Wait for WORKFLOW_SUSPENDED event before recording STEP_FAILED
|
|
529
524
|
# This prevents race conditions where both events get the same sequence number
|
|
525
|
+
# Use has_event() for memory-efficient polling instead of loading all events
|
|
530
526
|
max_wait_attempts = 50 # 50 * 10ms = 500ms max wait
|
|
531
527
|
wait_interval = 0.01 # 10ms between checks
|
|
532
528
|
|
|
533
|
-
for
|
|
534
|
-
has_suspended =
|
|
535
|
-
|
|
536
|
-
and evt.data.get("step_id") == step_id
|
|
537
|
-
for evt in events
|
|
529
|
+
for _attempt in range(max_wait_attempts):
|
|
530
|
+
has_suspended = await storage.has_event(
|
|
531
|
+
run_id, EventType.WORKFLOW_SUSPENDED.value, step_id=step_id
|
|
538
532
|
)
|
|
539
533
|
if has_suspended:
|
|
540
534
|
break
|
|
541
535
|
|
|
542
|
-
# Wait and
|
|
536
|
+
# Wait and check again
|
|
543
537
|
await asyncio.sleep(wait_interval)
|
|
544
|
-
events = await storage.get_events(run_id)
|
|
545
538
|
|
|
546
539
|
# Also check if step was already handled by another task during wait
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
and not evt.data.get("is_retryable", True)
|
|
553
|
-
)
|
|
554
|
-
for evt in events
|
|
540
|
+
already_completed = await storage.has_event(
|
|
541
|
+
run_id, EventType.STEP_COMPLETED.value, step_id=step_id
|
|
542
|
+
)
|
|
543
|
+
already_failed_terminal = await storage.has_event(
|
|
544
|
+
run_id, EventType.STEP_FAILED.value, step_id=step_id, is_retryable="False"
|
|
555
545
|
)
|
|
556
|
-
if
|
|
546
|
+
if already_completed or already_failed_terminal:
|
|
557
547
|
logger.info(
|
|
558
548
|
"Step already completed/failed by another task during wait, skipping",
|
|
559
549
|
run_id=run_id,
|
|
@@ -891,13 +881,13 @@ async def _execute_child_workflow_on_worker(
|
|
|
891
881
|
|
|
892
882
|
# For step dispatch suspensions, check if step already completed/failed
|
|
893
883
|
if step_id and e.reason.startswith("step_dispatch:"):
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
evt.type in (EventType.STEP_COMPLETED, EventType.STEP_FAILED)
|
|
897
|
-
and evt.data.get("step_id") == step_id
|
|
898
|
-
for evt in events
|
|
884
|
+
step_completed = await storage.has_event(
|
|
885
|
+
child_run_id, EventType.STEP_COMPLETED.value, step_id=step_id
|
|
899
886
|
)
|
|
900
|
-
|
|
887
|
+
step_failed = await storage.has_event(
|
|
888
|
+
child_run_id, EventType.STEP_FAILED.value, step_id=step_id
|
|
889
|
+
)
|
|
890
|
+
if step_completed or step_failed:
|
|
901
891
|
logger.info(
|
|
902
892
|
"Child step finished before suspension completed, scheduling resume",
|
|
903
893
|
child_run_id=child_run_id,
|
|
@@ -1144,8 +1134,8 @@ async def _handle_workflow_recovery(
|
|
|
1144
1134
|
return False
|
|
1145
1135
|
|
|
1146
1136
|
# Get last event sequence
|
|
1147
|
-
|
|
1148
|
-
last_event_sequence =
|
|
1137
|
+
latest_event = await storage.get_latest_event(run.run_id)
|
|
1138
|
+
last_event_sequence = latest_event.sequence if latest_event else None
|
|
1149
1139
|
|
|
1150
1140
|
# Record interruption event
|
|
1151
1141
|
interrupted_event = create_workflow_interrupted_event(
|
|
@@ -1287,13 +1277,13 @@ async def _recover_workflow_on_worker(
|
|
|
1287
1277
|
|
|
1288
1278
|
# For step dispatch suspensions, check if step already completed/failed
|
|
1289
1279
|
if step_id and e.reason.startswith("step_dispatch:"):
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
evt.type in (EventType.STEP_COMPLETED, EventType.STEP_FAILED)
|
|
1293
|
-
and evt.data.get("step_id") == step_id
|
|
1294
|
-
for evt in events
|
|
1280
|
+
step_completed = await storage.has_event(
|
|
1281
|
+
run_id, EventType.STEP_COMPLETED.value, step_id=step_id
|
|
1295
1282
|
)
|
|
1296
|
-
|
|
1283
|
+
step_failed = await storage.has_event(
|
|
1284
|
+
run_id, EventType.STEP_FAILED.value, step_id=step_id
|
|
1285
|
+
)
|
|
1286
|
+
if step_completed or step_failed:
|
|
1297
1287
|
logger.info(
|
|
1298
1288
|
"Step finished before recovery suspension completed, scheduling resume",
|
|
1299
1289
|
run_id=run_id,
|
|
@@ -1679,13 +1669,13 @@ async def _start_workflow_on_worker(
|
|
|
1679
1669
|
# For step dispatch suspensions, check if step already completed/failed (race condition)
|
|
1680
1670
|
# If so, schedule resume immediately
|
|
1681
1671
|
if step_id and e.reason.startswith("step_dispatch:"):
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
evt.type in (EventType.STEP_COMPLETED, EventType.STEP_FAILED)
|
|
1685
|
-
and evt.data.get("step_id") == step_id
|
|
1686
|
-
for evt in events
|
|
1672
|
+
step_completed = await storage.has_event(
|
|
1673
|
+
run_id, EventType.STEP_COMPLETED.value, step_id=step_id
|
|
1687
1674
|
)
|
|
1688
|
-
|
|
1675
|
+
step_failed = await storage.has_event(
|
|
1676
|
+
run_id, EventType.STEP_FAILED.value, step_id=step_id
|
|
1677
|
+
)
|
|
1678
|
+
if step_completed or step_failed:
|
|
1689
1679
|
logger.info(
|
|
1690
1680
|
"Step finished before suspension completed, scheduling resume",
|
|
1691
1681
|
run_id=run_id,
|
|
@@ -2269,13 +2259,13 @@ async def _resume_workflow_on_worker(
|
|
|
2269
2259
|
|
|
2270
2260
|
# For step dispatch suspensions, check if step already completed/failed
|
|
2271
2261
|
if step_id and e.reason.startswith("step_dispatch:"):
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2262
|
+
step_completed = await storage.has_event(
|
|
2263
|
+
run_id, EventType.STEP_COMPLETED.value, step_id=step_id
|
|
2264
|
+
)
|
|
2265
|
+
step_failed = await storage.has_event(
|
|
2266
|
+
run_id, EventType.STEP_FAILED.value, step_id=step_id
|
|
2277
2267
|
)
|
|
2278
|
-
if
|
|
2268
|
+
if step_completed or step_failed:
|
|
2279
2269
|
logger.info(
|
|
2280
2270
|
"Step finished before resume suspension completed, scheduling resume",
|
|
2281
2271
|
run_id=run_id,
|
pyworkflow/storage/base.py
CHANGED
|
@@ -203,6 +203,42 @@ class StorageBackend(ABC):
|
|
|
203
203
|
"""
|
|
204
204
|
pass
|
|
205
205
|
|
|
206
|
+
@abstractmethod
|
|
207
|
+
async def has_event(
|
|
208
|
+
self,
|
|
209
|
+
run_id: str,
|
|
210
|
+
event_type: str,
|
|
211
|
+
**filters: str,
|
|
212
|
+
) -> bool:
|
|
213
|
+
"""
|
|
214
|
+
Check if an event exists matching the criteria.
|
|
215
|
+
|
|
216
|
+
This is a memory-efficient alternative to get_events() when you only
|
|
217
|
+
need to check for existence. Uses SQL EXISTS queries in SQL backends
|
|
218
|
+
for O(1) memory usage instead of loading all events.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
run_id: Workflow run identifier
|
|
222
|
+
event_type: Event type to check for (e.g., "step_completed")
|
|
223
|
+
**filters: Additional filters to match against event data fields.
|
|
224
|
+
For example, step_id="abc" will check data->>'step_id' = 'abc'
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
True if a matching event exists, False otherwise
|
|
228
|
+
|
|
229
|
+
Example:
|
|
230
|
+
# Check if step completed
|
|
231
|
+
exists = await storage.has_event(
|
|
232
|
+
run_id, "step_completed", step_id="step_123"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Check if workflow suspended for a specific step
|
|
236
|
+
exists = await storage.has_event(
|
|
237
|
+
run_id, "workflow_suspended", step_id="step_123"
|
|
238
|
+
)
|
|
239
|
+
"""
|
|
240
|
+
pass
|
|
241
|
+
|
|
206
242
|
@abstractmethod
|
|
207
243
|
async def get_latest_event(
|
|
208
244
|
self,
|
pyworkflow/storage/cassandra.py
CHANGED
|
@@ -896,6 +896,40 @@ class CassandraStorageBackend(StorageBackend):
|
|
|
896
896
|
|
|
897
897
|
return None
|
|
898
898
|
|
|
899
|
+
async def has_event(
|
|
900
|
+
self,
|
|
901
|
+
run_id: str,
|
|
902
|
+
event_type: str,
|
|
903
|
+
**filters: str,
|
|
904
|
+
) -> bool:
|
|
905
|
+
"""
|
|
906
|
+
Check if an event exists matching the criteria.
|
|
907
|
+
|
|
908
|
+
Loads events of the specified type and filters in Python for efficiency.
|
|
909
|
+
|
|
910
|
+
Args:
|
|
911
|
+
run_id: Workflow run identifier
|
|
912
|
+
event_type: Event type to check for
|
|
913
|
+
**filters: Additional filters for event data fields
|
|
914
|
+
|
|
915
|
+
Returns:
|
|
916
|
+
True if a matching event exists, False otherwise
|
|
917
|
+
"""
|
|
918
|
+
# Load only events of the specific type
|
|
919
|
+
events = await self.get_events(run_id, event_types=[event_type])
|
|
920
|
+
|
|
921
|
+
# Filter in Python
|
|
922
|
+
for event in events:
|
|
923
|
+
match = True
|
|
924
|
+
for key, value in filters.items():
|
|
925
|
+
if str(event.data.get(key)) != str(value):
|
|
926
|
+
match = False
|
|
927
|
+
break
|
|
928
|
+
if match:
|
|
929
|
+
return True
|
|
930
|
+
|
|
931
|
+
return False
|
|
932
|
+
|
|
899
933
|
# Step Operations
|
|
900
934
|
|
|
901
935
|
async def create_step(self, step: StepExecution) -> None:
|
pyworkflow/storage/dynamodb.py
CHANGED
|
@@ -588,6 +588,40 @@ class DynamoDBStorageBackend(StorageBackend):
|
|
|
588
588
|
|
|
589
589
|
return self._item_to_event(self._item_to_dict(items[0]))
|
|
590
590
|
|
|
591
|
+
async def has_event(
|
|
592
|
+
self,
|
|
593
|
+
run_id: str,
|
|
594
|
+
event_type: str,
|
|
595
|
+
**filters: str,
|
|
596
|
+
) -> bool:
|
|
597
|
+
"""
|
|
598
|
+
Check if an event exists matching the criteria.
|
|
599
|
+
|
|
600
|
+
Loads events of the specified type and filters in Python for efficiency.
|
|
601
|
+
|
|
602
|
+
Args:
|
|
603
|
+
run_id: Workflow run identifier
|
|
604
|
+
event_type: Event type to check for
|
|
605
|
+
**filters: Additional filters for event data fields
|
|
606
|
+
|
|
607
|
+
Returns:
|
|
608
|
+
True if a matching event exists, False otherwise
|
|
609
|
+
"""
|
|
610
|
+
# Load only events of the specific type
|
|
611
|
+
events = await self.get_events(run_id, event_types=[event_type])
|
|
612
|
+
|
|
613
|
+
# Filter in Python
|
|
614
|
+
for event in events:
|
|
615
|
+
match = True
|
|
616
|
+
for key, value in filters.items():
|
|
617
|
+
if str(event.data.get(key)) != str(value):
|
|
618
|
+
match = False
|
|
619
|
+
break
|
|
620
|
+
if match:
|
|
621
|
+
return True
|
|
622
|
+
|
|
623
|
+
return False
|
|
624
|
+
|
|
591
625
|
# Step Operations
|
|
592
626
|
|
|
593
627
|
async def create_step(self, step: StepExecution) -> None:
|
pyworkflow/storage/file.py
CHANGED
|
@@ -373,6 +373,58 @@ class FileStorageBackend(StorageBackend):
|
|
|
373
373
|
events = await self.get_events(run_id, event_types=[event_type] if event_type else None)
|
|
374
374
|
return events[-1] if events else None
|
|
375
375
|
|
|
376
|
+
async def has_event(
|
|
377
|
+
self,
|
|
378
|
+
run_id: str,
|
|
379
|
+
event_type: str,
|
|
380
|
+
**filters: str,
|
|
381
|
+
) -> bool:
|
|
382
|
+
"""
|
|
383
|
+
Check if an event exists using file-based iteration with early termination.
|
|
384
|
+
|
|
385
|
+
Reads the events file line by line and returns as soon as a match is found,
|
|
386
|
+
avoiding loading the entire event log into memory.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
run_id: Workflow run identifier
|
|
390
|
+
event_type: Event type to check for
|
|
391
|
+
**filters: Additional filters for event data fields
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
True if a matching event exists, False otherwise
|
|
395
|
+
"""
|
|
396
|
+
events_file = self.events_dir / f"{run_id}.jsonl"
|
|
397
|
+
|
|
398
|
+
if not events_file.exists():
|
|
399
|
+
return False
|
|
400
|
+
|
|
401
|
+
def _check() -> bool:
|
|
402
|
+
with events_file.open("r") as f:
|
|
403
|
+
for line in f:
|
|
404
|
+
if not line.strip():
|
|
405
|
+
continue
|
|
406
|
+
|
|
407
|
+
data = json.loads(line)
|
|
408
|
+
|
|
409
|
+
# Check event type
|
|
410
|
+
if data["type"] != event_type:
|
|
411
|
+
continue
|
|
412
|
+
|
|
413
|
+
# Check all data filters
|
|
414
|
+
match = True
|
|
415
|
+
event_data = data.get("data", {})
|
|
416
|
+
for key, value in filters.items():
|
|
417
|
+
if str(event_data.get(key)) != str(value):
|
|
418
|
+
match = False
|
|
419
|
+
break
|
|
420
|
+
|
|
421
|
+
if match:
|
|
422
|
+
return True
|
|
423
|
+
|
|
424
|
+
return False
|
|
425
|
+
|
|
426
|
+
return await asyncio.to_thread(_check)
|
|
427
|
+
|
|
376
428
|
# Step Operations
|
|
377
429
|
|
|
378
430
|
async def create_step(self, step: StepExecution) -> None:
|
pyworkflow/storage/memory.py
CHANGED
|
@@ -250,6 +250,43 @@ class InMemoryStorageBackend(StorageBackend):
|
|
|
250
250
|
# Return event with highest sequence
|
|
251
251
|
return max(events, key=lambda e: e.sequence or 0)
|
|
252
252
|
|
|
253
|
+
async def has_event(
|
|
254
|
+
self,
|
|
255
|
+
run_id: str,
|
|
256
|
+
event_type: str,
|
|
257
|
+
**filters: str,
|
|
258
|
+
) -> bool:
|
|
259
|
+
"""
|
|
260
|
+
Check if an event exists by loading events of the specific type and filtering.
|
|
261
|
+
|
|
262
|
+
This approach:
|
|
263
|
+
1. Uses the event_types filter to load only events of the target type
|
|
264
|
+
2. Filters in Python on the loaded data
|
|
265
|
+
3. Significantly reduces memory vs loading ALL events
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
run_id: Workflow run identifier
|
|
269
|
+
event_type: Event type to check for
|
|
270
|
+
**filters: Additional filters for event data fields
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
True if a matching event exists, False otherwise
|
|
274
|
+
"""
|
|
275
|
+
# Load only events of the specific type
|
|
276
|
+
events = await self.get_events(run_id, event_types=[event_type])
|
|
277
|
+
|
|
278
|
+
# Filter in Python
|
|
279
|
+
for event in events:
|
|
280
|
+
match = True
|
|
281
|
+
for key, value in filters.items():
|
|
282
|
+
if str(event.data.get(key)) != str(value):
|
|
283
|
+
match = False
|
|
284
|
+
break
|
|
285
|
+
if match:
|
|
286
|
+
return True
|
|
287
|
+
|
|
288
|
+
return False
|
|
289
|
+
|
|
253
290
|
# Step Operations
|
|
254
291
|
|
|
255
292
|
async def create_step(self, step: StepExecution) -> None:
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database schema migration framework for PyWorkflow storage backends.
|
|
3
|
+
|
|
4
|
+
This module provides a migration framework that allows storage backends to
|
|
5
|
+
evolve their schema over time while maintaining backward compatibility with
|
|
6
|
+
existing databases.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from pyworkflow.storage.migrations.base import (
|
|
10
|
+
Migration,
|
|
11
|
+
MigrationRegistry,
|
|
12
|
+
MigrationRunner,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = ["Migration", "MigrationRegistry", "MigrationRunner"]
|