pyworkflow-engine 0.1.20__py3-none-any.whl → 0.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +1 -1
- pyworkflow/celery/tasks.py +131 -74
- pyworkflow/core/step.py +20 -0
- pyworkflow/storage/base.py +29 -0
- pyworkflow/storage/cassandra.py +25 -0
- pyworkflow/storage/dynamodb.py +31 -0
- pyworkflow/storage/file.py +28 -0
- pyworkflow/storage/memory.py +14 -0
- pyworkflow/storage/mysql.py +20 -0
- pyworkflow/storage/postgres.py +24 -0
- pyworkflow/storage/sqlite.py +20 -0
- {pyworkflow_engine-0.1.20.dist-info → pyworkflow_engine-0.1.22.dist-info}/METADATA +1 -1
- {pyworkflow_engine-0.1.20.dist-info → pyworkflow_engine-0.1.22.dist-info}/RECORD +17 -17
- {pyworkflow_engine-0.1.20.dist-info → pyworkflow_engine-0.1.22.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.20.dist-info → pyworkflow_engine-0.1.22.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.20.dist-info → pyworkflow_engine-0.1.22.dist-info}/licenses/LICENSE +0 -0
- {pyworkflow_engine-0.1.20.dist-info → pyworkflow_engine-0.1.22.dist-info}/top_level.txt +0 -0
pyworkflow/__init__.py
CHANGED
pyworkflow/celery/tasks.py
CHANGED
|
@@ -11,6 +11,7 @@ These tasks enable:
|
|
|
11
11
|
|
|
12
12
|
import asyncio
|
|
13
13
|
import random
|
|
14
|
+
import traceback
|
|
14
15
|
import uuid
|
|
15
16
|
from collections.abc import Callable
|
|
16
17
|
from datetime import UTC, datetime
|
|
@@ -379,9 +380,9 @@ async def _record_step_completion_and_resume(
|
|
|
379
380
|
|
|
380
381
|
Called by execute_step_task after successful step execution.
|
|
381
382
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
383
|
+
IMPORTANT: This function waits for WORKFLOW_SUSPENDED event before recording
|
|
384
|
+
STEP_COMPLETED to prevent race conditions where both events get the same
|
|
385
|
+
sequence number. The workflow must fully suspend before we record completion.
|
|
385
386
|
|
|
386
387
|
Idempotency: If STEP_COMPLETED already exists for this step_id, skip
|
|
387
388
|
recording and resume scheduling (another task already handled it).
|
|
@@ -411,43 +412,67 @@ async def _record_step_completion_and_resume(
|
|
|
411
412
|
)
|
|
412
413
|
return
|
|
413
414
|
|
|
414
|
-
#
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
result=serialize(result),
|
|
419
|
-
step_name=step_name,
|
|
420
|
-
)
|
|
421
|
-
await storage.record_event(completion_event)
|
|
415
|
+
# Wait for WORKFLOW_SUSPENDED event before recording STEP_COMPLETED
|
|
416
|
+
# This prevents race conditions where both events get the same sequence number
|
|
417
|
+
max_wait_attempts = 50 # 50 * 10ms = 500ms max wait
|
|
418
|
+
wait_interval = 0.01 # 10ms between checks
|
|
422
419
|
|
|
423
|
-
|
|
424
|
-
|
|
420
|
+
for attempt in range(max_wait_attempts):
|
|
421
|
+
has_suspended = any(
|
|
422
|
+
evt.type == EventType.WORKFLOW_SUSPENDED
|
|
423
|
+
and evt.data.get("step_id") == step_id
|
|
424
|
+
for evt in events
|
|
425
|
+
)
|
|
426
|
+
if has_suspended:
|
|
427
|
+
break
|
|
425
428
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
+
# Wait and refresh events
|
|
430
|
+
await asyncio.sleep(wait_interval)
|
|
431
|
+
events = await storage.get_events(run_id)
|
|
429
432
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
)
|
|
435
|
-
logger.info(
|
|
436
|
-
"Step completed and workflow resumption scheduled",
|
|
437
|
-
run_id=run_id,
|
|
438
|
-
step_id=step_id,
|
|
439
|
-
step_name=step_name,
|
|
433
|
+
# Also check if step was already completed by another task during wait
|
|
434
|
+
already_completed = any(
|
|
435
|
+
evt.type == EventType.STEP_COMPLETED and evt.data.get("step_id") == step_id
|
|
436
|
+
for evt in events
|
|
440
437
|
)
|
|
438
|
+
if already_completed:
|
|
439
|
+
logger.info(
|
|
440
|
+
"Step already completed by another task during wait, skipping",
|
|
441
|
+
run_id=run_id,
|
|
442
|
+
step_id=step_id,
|
|
443
|
+
step_name=step_name,
|
|
444
|
+
)
|
|
445
|
+
return
|
|
441
446
|
else:
|
|
442
|
-
#
|
|
443
|
-
#
|
|
444
|
-
logger.
|
|
445
|
-
"
|
|
447
|
+
# Timeout waiting for suspension - log warning but proceed anyway
|
|
448
|
+
# This handles edge cases where the workflow completes without suspending
|
|
449
|
+
logger.warning(
|
|
450
|
+
"Timeout waiting for WORKFLOW_SUSPENDED event, proceeding with completion",
|
|
446
451
|
run_id=run_id,
|
|
447
452
|
step_id=step_id,
|
|
448
453
|
step_name=step_name,
|
|
449
454
|
)
|
|
450
455
|
|
|
456
|
+
# Record STEP_COMPLETED event
|
|
457
|
+
completion_event = create_step_completed_event(
|
|
458
|
+
run_id=run_id,
|
|
459
|
+
step_id=step_id,
|
|
460
|
+
result=serialize(result),
|
|
461
|
+
step_name=step_name,
|
|
462
|
+
)
|
|
463
|
+
await storage.record_event(completion_event)
|
|
464
|
+
|
|
465
|
+
# Schedule workflow resumption
|
|
466
|
+
schedule_workflow_resumption(
|
|
467
|
+
run_id, datetime.now(UTC), storage_config, triggered_by="step_completed"
|
|
468
|
+
)
|
|
469
|
+
logger.info(
|
|
470
|
+
"Step completed and workflow resumption scheduled",
|
|
471
|
+
run_id=run_id,
|
|
472
|
+
step_id=step_id,
|
|
473
|
+
step_name=step_name,
|
|
474
|
+
)
|
|
475
|
+
|
|
451
476
|
|
|
452
477
|
async def _record_step_failure_and_resume(
|
|
453
478
|
storage_config: dict[str, Any] | None,
|
|
@@ -464,9 +489,9 @@ async def _record_step_failure_and_resume(
|
|
|
464
489
|
Called by execute_step_task after step failure (when retries are exhausted).
|
|
465
490
|
The workflow will fail when it replays and sees the failure event.
|
|
466
491
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
492
|
+
IMPORTANT: This function waits for WORKFLOW_SUSPENDED event before recording
|
|
493
|
+
STEP_FAILED to prevent race conditions where both events get the same
|
|
494
|
+
sequence number. The workflow must fully suspend before we record failure.
|
|
470
495
|
|
|
471
496
|
Idempotency: If STEP_COMPLETED or terminal STEP_FAILED already exists
|
|
472
497
|
for this step_id, skip recording and resume scheduling.
|
|
@@ -500,6 +525,51 @@ async def _record_step_failure_and_resume(
|
|
|
500
525
|
)
|
|
501
526
|
return
|
|
502
527
|
|
|
528
|
+
# Wait for WORKFLOW_SUSPENDED event before recording STEP_FAILED
|
|
529
|
+
# This prevents race conditions where both events get the same sequence number
|
|
530
|
+
max_wait_attempts = 50 # 50 * 10ms = 500ms max wait
|
|
531
|
+
wait_interval = 0.01 # 10ms between checks
|
|
532
|
+
|
|
533
|
+
for attempt in range(max_wait_attempts):
|
|
534
|
+
has_suspended = any(
|
|
535
|
+
evt.type == EventType.WORKFLOW_SUSPENDED
|
|
536
|
+
and evt.data.get("step_id") == step_id
|
|
537
|
+
for evt in events
|
|
538
|
+
)
|
|
539
|
+
if has_suspended:
|
|
540
|
+
break
|
|
541
|
+
|
|
542
|
+
# Wait and refresh events
|
|
543
|
+
await asyncio.sleep(wait_interval)
|
|
544
|
+
events = await storage.get_events(run_id)
|
|
545
|
+
|
|
546
|
+
# Also check if step was already handled by another task during wait
|
|
547
|
+
already_handled = any(
|
|
548
|
+
(evt.type == EventType.STEP_COMPLETED and evt.data.get("step_id") == step_id)
|
|
549
|
+
or (
|
|
550
|
+
evt.type == EventType.STEP_FAILED
|
|
551
|
+
and evt.data.get("step_id") == step_id
|
|
552
|
+
and not evt.data.get("is_retryable", True)
|
|
553
|
+
)
|
|
554
|
+
for evt in events
|
|
555
|
+
)
|
|
556
|
+
if already_handled:
|
|
557
|
+
logger.info(
|
|
558
|
+
"Step already completed/failed by another task during wait, skipping",
|
|
559
|
+
run_id=run_id,
|
|
560
|
+
step_id=step_id,
|
|
561
|
+
step_name=step_name,
|
|
562
|
+
)
|
|
563
|
+
return
|
|
564
|
+
else:
|
|
565
|
+
# Timeout waiting for suspension - log warning but proceed anyway
|
|
566
|
+
logger.warning(
|
|
567
|
+
"Timeout waiting for WORKFLOW_SUSPENDED event, proceeding with failure",
|
|
568
|
+
run_id=run_id,
|
|
569
|
+
step_id=step_id,
|
|
570
|
+
step_name=step_name,
|
|
571
|
+
)
|
|
572
|
+
|
|
503
573
|
# Record STEP_FAILED event
|
|
504
574
|
failure_event = create_step_failed_event(
|
|
505
575
|
run_id=run_id,
|
|
@@ -511,35 +581,17 @@ async def _record_step_failure_and_resume(
|
|
|
511
581
|
)
|
|
512
582
|
await storage.record_event(failure_event)
|
|
513
583
|
|
|
514
|
-
#
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
)
|
|
526
|
-
logger.info(
|
|
527
|
-
"Step failed and workflow resumption scheduled",
|
|
528
|
-
run_id=run_id,
|
|
529
|
-
step_id=step_id,
|
|
530
|
-
step_name=step_name,
|
|
531
|
-
error=error,
|
|
532
|
-
)
|
|
533
|
-
else:
|
|
534
|
-
# Workflow hasn't suspended yet - don't schedule resume
|
|
535
|
-
# The suspension handler will check for step failure and schedule resume
|
|
536
|
-
logger.info(
|
|
537
|
-
"Step failed but workflow not yet suspended, skipping resume scheduling",
|
|
538
|
-
run_id=run_id,
|
|
539
|
-
step_id=step_id,
|
|
540
|
-
step_name=step_name,
|
|
541
|
-
error=error,
|
|
542
|
-
)
|
|
584
|
+
# Schedule workflow resumption
|
|
585
|
+
schedule_workflow_resumption(
|
|
586
|
+
run_id, datetime.now(UTC), storage_config, triggered_by="step_failed"
|
|
587
|
+
)
|
|
588
|
+
logger.info(
|
|
589
|
+
"Step failed and workflow resumption scheduled",
|
|
590
|
+
run_id=run_id,
|
|
591
|
+
step_id=step_id,
|
|
592
|
+
step_name=step_name,
|
|
593
|
+
error=error,
|
|
594
|
+
)
|
|
543
595
|
|
|
544
596
|
|
|
545
597
|
async def _get_workflow_run_safe(
|
|
@@ -1154,8 +1206,14 @@ async def _recover_workflow_on_worker(
|
|
|
1154
1206
|
recovery_attempt=run.recovery_attempts,
|
|
1155
1207
|
)
|
|
1156
1208
|
|
|
1157
|
-
#
|
|
1158
|
-
|
|
1209
|
+
# Atomically claim the run for recovery.
|
|
1210
|
+
# The run may be in INTERRUPTED or RUNNING state after a worker crash.
|
|
1211
|
+
# Try INTERRUPTED -> RUNNING first (most common recovery path).
|
|
1212
|
+
# If the run is already RUNNING, just set it to RUNNING (idempotent).
|
|
1213
|
+
claimed = await storage.try_claim_run(run_id, RunStatus.INTERRUPTED, RunStatus.RUNNING)
|
|
1214
|
+
if not claimed:
|
|
1215
|
+
# May already be RUNNING from a previous partial recovery - update status directly
|
|
1216
|
+
await storage.update_run_status(run_id=run_id, status=RunStatus.RUNNING)
|
|
1159
1217
|
|
|
1160
1218
|
# Load event log for replay
|
|
1161
1219
|
events = await storage.get_events(run_id)
|
|
@@ -2048,14 +2106,14 @@ async def _resume_workflow_on_worker(
|
|
|
2048
2106
|
)
|
|
2049
2107
|
return None
|
|
2050
2108
|
|
|
2051
|
-
#
|
|
2109
|
+
# Atomically claim the run: SUSPENDED -> RUNNING
|
|
2052
2110
|
# Multiple resume tasks can be scheduled for the same workflow (e.g., race
|
|
2053
|
-
# condition between step completion and suspension handler). Only
|
|
2054
|
-
#
|
|
2055
|
-
|
|
2056
|
-
if
|
|
2111
|
+
# condition between step completion and suspension handler). Only one
|
|
2112
|
+
# succeeds; duplicates see the claim fail and return.
|
|
2113
|
+
claimed = await storage.try_claim_run(run_id, RunStatus.SUSPENDED, RunStatus.RUNNING)
|
|
2114
|
+
if not claimed:
|
|
2057
2115
|
logger.info(
|
|
2058
|
-
|
|
2116
|
+
"Workflow status is not SUSPENDED (already claimed) - skipping duplicate resume",
|
|
2059
2117
|
run_id=run_id,
|
|
2060
2118
|
workflow_name=run.workflow_name,
|
|
2061
2119
|
)
|
|
@@ -2075,6 +2133,8 @@ async def _resume_workflow_on_worker(
|
|
|
2075
2133
|
workflow_name=run.workflow_name,
|
|
2076
2134
|
triggered_by_hook_id=triggered_by_hook_id,
|
|
2077
2135
|
)
|
|
2136
|
+
# Revert status back to SUSPENDED since we won't actually resume
|
|
2137
|
+
await storage.update_run_status(run_id=run_id, status=RunStatus.SUSPENDED)
|
|
2078
2138
|
return None
|
|
2079
2139
|
|
|
2080
2140
|
# Check for cancellation flag
|
|
@@ -2084,7 +2144,7 @@ async def _resume_workflow_on_worker(
|
|
|
2084
2144
|
f"Resuming workflow execution on worker: {run.workflow_name}",
|
|
2085
2145
|
run_id=run_id,
|
|
2086
2146
|
workflow_name=run.workflow_name,
|
|
2087
|
-
current_status=
|
|
2147
|
+
current_status="running",
|
|
2088
2148
|
cancellation_requested=cancellation_requested,
|
|
2089
2149
|
)
|
|
2090
2150
|
|
|
@@ -2103,9 +2163,6 @@ async def _resume_workflow_on_worker(
|
|
|
2103
2163
|
args = deserialize_args(run.input_args)
|
|
2104
2164
|
kwargs = deserialize_kwargs(run.input_kwargs)
|
|
2105
2165
|
|
|
2106
|
-
# Update status to running
|
|
2107
|
-
await storage.update_run_status(run_id=run_id, status=RunStatus.RUNNING)
|
|
2108
|
-
|
|
2109
2166
|
# Execute workflow with event replay
|
|
2110
2167
|
try:
|
|
2111
2168
|
result = await execute_workflow_with_context(
|
pyworkflow/core/step.py
CHANGED
|
@@ -595,6 +595,7 @@ async def _dispatch_step_to_celery(
|
|
|
595
595
|
"""
|
|
596
596
|
from pyworkflow.celery.tasks import execute_step_task
|
|
597
597
|
from pyworkflow.core.exceptions import SuspensionSignal
|
|
598
|
+
from pyworkflow.engine.events import EventType
|
|
598
599
|
|
|
599
600
|
logger.info(
|
|
600
601
|
f"Dispatching step to Celery worker: {step_name}",
|
|
@@ -602,6 +603,25 @@ async def _dispatch_step_to_celery(
|
|
|
602
603
|
step_id=step_id,
|
|
603
604
|
)
|
|
604
605
|
|
|
606
|
+
# Defense-in-depth: check if STEP_STARTED was already recorded for this step.
|
|
607
|
+
# This guards against duplicate dispatch when two resume tasks race and both
|
|
608
|
+
# replay past the same step. If already started, re-suspend to wait.
|
|
609
|
+
events = await ctx.storage.get_events(ctx.run_id)
|
|
610
|
+
already_started = any(
|
|
611
|
+
evt.type == EventType.STEP_STARTED and evt.data.get("step_id") == step_id for evt in events
|
|
612
|
+
)
|
|
613
|
+
if already_started:
|
|
614
|
+
logger.info(
|
|
615
|
+
f"Step {step_name} already has STEP_STARTED event, re-suspending",
|
|
616
|
+
run_id=ctx.run_id,
|
|
617
|
+
step_id=step_id,
|
|
618
|
+
)
|
|
619
|
+
raise SuspensionSignal(
|
|
620
|
+
reason=f"step_dispatch:{step_id}",
|
|
621
|
+
step_id=step_id,
|
|
622
|
+
step_name=step_name,
|
|
623
|
+
)
|
|
624
|
+
|
|
605
625
|
# Validate event limits before recording step event
|
|
606
626
|
await ctx.validate_event_limits()
|
|
607
627
|
|
pyworkflow/storage/base.py
CHANGED
|
@@ -358,6 +358,35 @@ class StorageBackend(ABC):
|
|
|
358
358
|
"""
|
|
359
359
|
pass
|
|
360
360
|
|
|
361
|
+
# Atomic Status Transition
|
|
362
|
+
|
|
363
|
+
async def try_claim_run(
|
|
364
|
+
self, run_id: str, from_status: RunStatus, to_status: RunStatus
|
|
365
|
+
) -> bool:
|
|
366
|
+
"""
|
|
367
|
+
Atomically transition run status if the current status matches.
|
|
368
|
+
|
|
369
|
+
This is a compare-and-swap operation: the status is only updated
|
|
370
|
+
if the current status equals `from_status`. Returns True if the
|
|
371
|
+
transition was applied, False if the current status did not match
|
|
372
|
+
(meaning another task already claimed this run).
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
run_id: Workflow run identifier
|
|
376
|
+
from_status: Expected current status
|
|
377
|
+
to_status: New status to set
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
True if the transition succeeded, False otherwise
|
|
381
|
+
"""
|
|
382
|
+
# Default implementation using get_run + update_run_status.
|
|
383
|
+
# Backends should override with truly atomic implementations.
|
|
384
|
+
run = await self.get_run(run_id)
|
|
385
|
+
if not run or run.status != from_status:
|
|
386
|
+
return False
|
|
387
|
+
await self.update_run_status(run_id, to_status)
|
|
388
|
+
return True
|
|
389
|
+
|
|
361
390
|
# Cancellation Flag Operations
|
|
362
391
|
|
|
363
392
|
@abstractmethod
|
pyworkflow/storage/cassandra.py
CHANGED
|
@@ -1209,6 +1209,31 @@ class CassandraStorageBackend(StorageBackend):
|
|
|
1209
1209
|
# Apply offset and limit
|
|
1210
1210
|
return hooks[offset : offset + limit]
|
|
1211
1211
|
|
|
1212
|
+
# Atomic Status Transition
|
|
1213
|
+
|
|
1214
|
+
async def try_claim_run(
|
|
1215
|
+
self, run_id: str, from_status: RunStatus, to_status: RunStatus
|
|
1216
|
+
) -> bool:
|
|
1217
|
+
"""Atomically transition run status using lightweight transaction (IF)."""
|
|
1218
|
+
session = self._ensure_connected()
|
|
1219
|
+
|
|
1220
|
+
result = session.execute(
|
|
1221
|
+
SimpleStatement(
|
|
1222
|
+
"""
|
|
1223
|
+
UPDATE workflow_runs
|
|
1224
|
+
SET status = %s, updated_at = %s
|
|
1225
|
+
WHERE run_id = %s
|
|
1226
|
+
IF status = %s
|
|
1227
|
+
""",
|
|
1228
|
+
consistency_level=ConsistencyLevel.SERIAL,
|
|
1229
|
+
),
|
|
1230
|
+
(to_status.value, datetime.now(UTC), run_id, from_status.value),
|
|
1231
|
+
)
|
|
1232
|
+
|
|
1233
|
+
# Cassandra LWT returns [applied] column
|
|
1234
|
+
row = result.one()
|
|
1235
|
+
return row is not None and row[0] is True
|
|
1236
|
+
|
|
1212
1237
|
# Cancellation Flag Operations
|
|
1213
1238
|
|
|
1214
1239
|
async def set_cancellation_flag(self, run_id: str) -> None:
|
pyworkflow/storage/dynamodb.py
CHANGED
|
@@ -889,6 +889,37 @@ class DynamoDBStorageBackend(StorageBackend):
|
|
|
889
889
|
|
|
890
890
|
return [self._item_to_hook(self._item_to_dict(item)) for item in items]
|
|
891
891
|
|
|
892
|
+
# Atomic Status Transition
|
|
893
|
+
|
|
894
|
+
async def try_claim_run(
|
|
895
|
+
self, run_id: str, from_status: RunStatus, to_status: RunStatus
|
|
896
|
+
) -> bool:
|
|
897
|
+
"""Atomically transition run status using conditional update."""
|
|
898
|
+
async with self._get_client() as client:
|
|
899
|
+
try:
|
|
900
|
+
now = datetime.now(UTC).isoformat()
|
|
901
|
+
await client.update_item(
|
|
902
|
+
TableName=self.table_name,
|
|
903
|
+
Key={
|
|
904
|
+
"PK": {"S": f"RUN#{run_id}"},
|
|
905
|
+
"SK": {"S": "#METADATA"},
|
|
906
|
+
},
|
|
907
|
+
UpdateExpression="SET #status = :new_status, updated_at = :now, GSI1SK = :gsi1sk",
|
|
908
|
+
ConditionExpression="#status = :expected_status",
|
|
909
|
+
ExpressionAttributeNames={"#status": "status"},
|
|
910
|
+
ExpressionAttributeValues={
|
|
911
|
+
":new_status": {"S": to_status.value},
|
|
912
|
+
":expected_status": {"S": from_status.value},
|
|
913
|
+
":now": {"S": now},
|
|
914
|
+
":gsi1sk": {"S": f"{to_status.value}#{now}"},
|
|
915
|
+
},
|
|
916
|
+
)
|
|
917
|
+
return True
|
|
918
|
+
except ClientError as e:
|
|
919
|
+
if e.response["Error"]["Code"] == "ConditionalCheckFailedException":
|
|
920
|
+
return False
|
|
921
|
+
raise
|
|
922
|
+
|
|
892
923
|
# Cancellation Flag Operations
|
|
893
924
|
|
|
894
925
|
async def set_cancellation_flag(self, run_id: str) -> None:
|
pyworkflow/storage/file.py
CHANGED
|
@@ -592,6 +592,34 @@ class FileStorageBackend(StorageBackend):
|
|
|
592
592
|
hook_data_list = await asyncio.to_thread(_list)
|
|
593
593
|
return [Hook.from_dict(data) for data in hook_data_list]
|
|
594
594
|
|
|
595
|
+
# Atomic Status Transition
|
|
596
|
+
|
|
597
|
+
async def try_claim_run(
|
|
598
|
+
self, run_id: str, from_status: RunStatus, to_status: RunStatus
|
|
599
|
+
) -> bool:
|
|
600
|
+
"""Atomically transition run status using file lock."""
|
|
601
|
+
run_file = self.runs_dir / f"{run_id}.json"
|
|
602
|
+
|
|
603
|
+
if not run_file.exists():
|
|
604
|
+
return False
|
|
605
|
+
|
|
606
|
+
lock_file = self.locks_dir / f"{run_id}.lock"
|
|
607
|
+
lock = FileLock(str(lock_file))
|
|
608
|
+
|
|
609
|
+
def _try_claim() -> bool:
|
|
610
|
+
with lock:
|
|
611
|
+
if not run_file.exists():
|
|
612
|
+
return False
|
|
613
|
+
data = json.loads(run_file.read_text())
|
|
614
|
+
if data.get("status") != from_status.value:
|
|
615
|
+
return False
|
|
616
|
+
data["status"] = to_status.value
|
|
617
|
+
data["updated_at"] = datetime.now(UTC).isoformat()
|
|
618
|
+
run_file.write_text(json.dumps(data, indent=2))
|
|
619
|
+
return True
|
|
620
|
+
|
|
621
|
+
return await asyncio.to_thread(_try_claim)
|
|
622
|
+
|
|
595
623
|
# Cancellation Flag Operations
|
|
596
624
|
|
|
597
625
|
async def set_cancellation_flag(self, run_id: str) -> None:
|
pyworkflow/storage/memory.py
CHANGED
|
@@ -366,6 +366,20 @@ class InMemoryStorageBackend(StorageBackend):
|
|
|
366
366
|
# Apply pagination
|
|
367
367
|
return hooks[offset : offset + limit]
|
|
368
368
|
|
|
369
|
+
# Atomic Status Transition
|
|
370
|
+
|
|
371
|
+
async def try_claim_run(
|
|
372
|
+
self, run_id: str, from_status: RunStatus, to_status: RunStatus
|
|
373
|
+
) -> bool:
|
|
374
|
+
"""Atomically transition run status using lock-protected check-and-set."""
|
|
375
|
+
with self._lock:
|
|
376
|
+
run = self._runs.get(run_id)
|
|
377
|
+
if not run or run.status != from_status:
|
|
378
|
+
return False
|
|
379
|
+
run.status = to_status
|
|
380
|
+
run.updated_at = datetime.now(UTC)
|
|
381
|
+
return True
|
|
382
|
+
|
|
369
383
|
# Cancellation Flag Operations
|
|
370
384
|
|
|
371
385
|
async def set_cancellation_flag(self, run_id: str) -> None:
|
pyworkflow/storage/mysql.py
CHANGED
|
@@ -767,6 +767,26 @@ class MySQLStorageBackend(StorageBackend):
|
|
|
767
767
|
|
|
768
768
|
return [self._row_to_hook(row) for row in rows]
|
|
769
769
|
|
|
770
|
+
# Atomic Status Transition
|
|
771
|
+
|
|
772
|
+
async def try_claim_run(
|
|
773
|
+
self, run_id: str, from_status: RunStatus, to_status: RunStatus
|
|
774
|
+
) -> bool:
|
|
775
|
+
"""Atomically transition run status using conditional UPDATE."""
|
|
776
|
+
pool = self._ensure_connected()
|
|
777
|
+
|
|
778
|
+
async with pool.acquire() as conn, conn.cursor() as cur:
|
|
779
|
+
await cur.execute(
|
|
780
|
+
"""
|
|
781
|
+
UPDATE workflow_runs
|
|
782
|
+
SET status = %s, updated_at = %s
|
|
783
|
+
WHERE run_id = %s AND status = %s
|
|
784
|
+
""",
|
|
785
|
+
(to_status.value, datetime.now(UTC), run_id, from_status.value),
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
return cur.rowcount > 0
|
|
789
|
+
|
|
770
790
|
# Cancellation Flag Operations
|
|
771
791
|
|
|
772
792
|
async def set_cancellation_flag(self, run_id: str) -> None:
|
pyworkflow/storage/postgres.py
CHANGED
|
@@ -862,6 +862,30 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
862
862
|
|
|
863
863
|
return [self._row_to_hook(row) for row in rows]
|
|
864
864
|
|
|
865
|
+
# Atomic Status Transition
|
|
866
|
+
|
|
867
|
+
async def try_claim_run(
|
|
868
|
+
self, run_id: str, from_status: RunStatus, to_status: RunStatus
|
|
869
|
+
) -> bool:
|
|
870
|
+
"""Atomically transition run status using conditional UPDATE."""
|
|
871
|
+
pool = await self._get_pool()
|
|
872
|
+
|
|
873
|
+
async with pool.acquire() as conn:
|
|
874
|
+
result = await conn.execute(
|
|
875
|
+
"""
|
|
876
|
+
UPDATE workflow_runs
|
|
877
|
+
SET status = $1, updated_at = $2
|
|
878
|
+
WHERE run_id = $3 AND status = $4
|
|
879
|
+
""",
|
|
880
|
+
to_status.value,
|
|
881
|
+
datetime.now(UTC),
|
|
882
|
+
run_id,
|
|
883
|
+
from_status.value,
|
|
884
|
+
)
|
|
885
|
+
|
|
886
|
+
# asyncpg returns 'UPDATE N' where N is rows affected
|
|
887
|
+
return result == "UPDATE 1"
|
|
888
|
+
|
|
865
889
|
# Cancellation Flag Operations
|
|
866
890
|
|
|
867
891
|
async def set_cancellation_flag(self, run_id: str) -> None:
|
pyworkflow/storage/sqlite.py
CHANGED
|
@@ -750,6 +750,26 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
750
750
|
|
|
751
751
|
return [self._row_to_hook(row) for row in rows]
|
|
752
752
|
|
|
753
|
+
# Atomic Status Transition
|
|
754
|
+
|
|
755
|
+
async def try_claim_run(
|
|
756
|
+
self, run_id: str, from_status: RunStatus, to_status: RunStatus
|
|
757
|
+
) -> bool:
|
|
758
|
+
"""Atomically transition run status using conditional UPDATE."""
|
|
759
|
+
db = self._ensure_connected()
|
|
760
|
+
|
|
761
|
+
cursor = await db.execute(
|
|
762
|
+
"""
|
|
763
|
+
UPDATE workflow_runs
|
|
764
|
+
SET status = ?, updated_at = ?
|
|
765
|
+
WHERE run_id = ? AND status = ?
|
|
766
|
+
""",
|
|
767
|
+
(to_status.value, datetime.now(UTC).isoformat(), run_id, from_status.value),
|
|
768
|
+
)
|
|
769
|
+
await db.commit()
|
|
770
|
+
|
|
771
|
+
return cursor.rowcount > 0
|
|
772
|
+
|
|
753
773
|
# Cancellation Flag Operations
|
|
754
774
|
|
|
755
775
|
async def set_cancellation_flag(self, run_id: str) -> None:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
pyworkflow/__init__.py,sha256=
|
|
1
|
+
pyworkflow/__init__.py,sha256=HigyYErtUxyfrsBOPEUvegnsLnqnwfrBm87EEzfYtOI,6281
|
|
2
2
|
pyworkflow/config.py,sha256=pKwPrpCwBJiDpB-MIjM0U7GW1TFmQFO341pihL5-vTM,14455
|
|
3
3
|
pyworkflow/discovery.py,sha256=snW3l4nvY3Nc067TGlwtn_qdzTU9ybN7YPr8FbvY8iM,8066
|
|
4
4
|
pyworkflow/aws/__init__.py,sha256=Ak_xHcR9LTRX-CwcS0XecYmzrXZw4EM3V9aKBBDEmIk,1741
|
|
@@ -10,7 +10,7 @@ pyworkflow/celery/app.py,sha256=QXpPXVVuwJv3ToylT0pyz9SgmwjC9hW-9WaIO4wH5OQ,1434
|
|
|
10
10
|
pyworkflow/celery/loop.py,sha256=mu8cIfMJYgHAoGCN_DdDoNoXK3QHzHpLmrPCyFDQYIY,3016
|
|
11
11
|
pyworkflow/celery/scheduler.py,sha256=Ms4rqRpdpMiLM8l4y3DK-Divunj9afYuUaGGoNQe7P4,11288
|
|
12
12
|
pyworkflow/celery/singleton.py,sha256=9gdVHzqFjShZ9OJOJlJNABUg9oqnl6ITGROtomcOtsg,16070
|
|
13
|
-
pyworkflow/celery/tasks.py,sha256=
|
|
13
|
+
pyworkflow/celery/tasks.py,sha256=0doKC7JxGlU4QFShohQn9NRgiOybfLrBm6j81C8m3dc,88482
|
|
14
14
|
pyworkflow/cli/__init__.py,sha256=tcbe-fcZmyeEKUy_aEo8bsEF40HsNKOwvyMBZIJZPwc,3844
|
|
15
15
|
pyworkflow/cli/__main__.py,sha256=LxLLS4FEEPXa5rWpLTtKuivn6Xp9pGia-QKGoxt9SS0,148
|
|
16
16
|
pyworkflow/cli/commands/__init__.py,sha256=IXvnTgukALckkO8fTlZhVRq80ojSqpnIIgboAg_-yZU,39
|
|
@@ -43,7 +43,7 @@ pyworkflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
43
43
|
pyworkflow/core/exceptions.py,sha256=rL0SlRWymoLItOdbcbfMnAysq5F0IjmAls2CzzI6AWE,10725
|
|
44
44
|
pyworkflow/core/registry.py,sha256=ZUf2YTpBvWpC9EehRbMF8soXOk9VsjNruoi6lR4O33M,9361
|
|
45
45
|
pyworkflow/core/scheduled.py,sha256=479A7IvjHiMob7ZrZtfE6VqtypG6DLIGMGhh16jLIWM,10522
|
|
46
|
-
pyworkflow/core/step.py,sha256=
|
|
46
|
+
pyworkflow/core/step.py,sha256=tq2dc_gflAy4FVhanfPT9oHP8Mzxx1szaNc6m4pmAj0,24745
|
|
47
47
|
pyworkflow/core/validation.py,sha256=0VaZyQ9YGK8WFy4ZG4Bjt9MYAp0vz6xEOe80kcgaP5g,3362
|
|
48
48
|
pyworkflow/core/workflow.py,sha256=tBRMRYBmQZoCWjVEXDT4W-4VZLG6q-iRFqC-_qZW-Uc,12309
|
|
49
49
|
pyworkflow/engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -73,22 +73,22 @@ pyworkflow/serialization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
|
|
|
73
73
|
pyworkflow/serialization/decoder.py,sha256=F7Ofuw1Yzo82iSFFXiK2yoW_v2YRbLMpX3CQbKjm0Ls,3860
|
|
74
74
|
pyworkflow/serialization/encoder.py,sha256=ZBwAxe5Bb4MCfFJePHw7ArJlIbBieSwUgsysGCI2iPU,4108
|
|
75
75
|
pyworkflow/storage/__init__.py,sha256=LhVjLNZdo4Mi5dEC75hjSPnbQr9jBoIsTOrC8vzTGOM,1924
|
|
76
|
-
pyworkflow/storage/base.py,sha256=
|
|
77
|
-
pyworkflow/storage/cassandra.py,sha256=
|
|
76
|
+
pyworkflow/storage/base.py,sha256=4rPBrmRpxkZFNjmxOl7e5B8hVJ6kRM3XPCH_qgA1hFI,17404
|
|
77
|
+
pyworkflow/storage/cassandra.py,sha256=wevLXyx8WHya1QafnY5YUurvveyM2F2EEilD_G6RM_U,62560
|
|
78
78
|
pyworkflow/storage/config.py,sha256=45UMPxRoqgK4ZwE7HIK9ctxE_eoK3eAE_1tRhn3Psd4,12410
|
|
79
|
-
pyworkflow/storage/dynamodb.py,sha256=
|
|
80
|
-
pyworkflow/storage/file.py,sha256=
|
|
81
|
-
pyworkflow/storage/memory.py,sha256=
|
|
82
|
-
pyworkflow/storage/mysql.py,sha256=
|
|
83
|
-
pyworkflow/storage/postgres.py,sha256=
|
|
79
|
+
pyworkflow/storage/dynamodb.py,sha256=_X2Ijj3lEaOYfuYSdf6y4uhZBXHMovZOUS9XFuCz3Uk,55523
|
|
80
|
+
pyworkflow/storage/file.py,sha256=7CiEQ6lhRXAyUr_tEG_G4OxqllcHaGu0353v_9de-DU,31095
|
|
81
|
+
pyworkflow/storage/memory.py,sha256=YiA3n0BX7UOcgOkXxCtm7U3GTC5SlzUCrHIANSwoI7c,21040
|
|
82
|
+
pyworkflow/storage/mysql.py,sha256=bQyyu1-SqjTaGcV4woh3sr2bz_TrftCvoTXuVOQZq1A,44432
|
|
83
|
+
pyworkflow/storage/postgres.py,sha256=ZbPyT3-s7jCDLpTZe4CJYCtU1VQgldIcq4wl1hjFRAU,46113
|
|
84
84
|
pyworkflow/storage/schemas.py,sha256=o1ntTYNgQQ5YVuXtPCShtENEsndVjdrXclWrkCgkitg,18002
|
|
85
|
-
pyworkflow/storage/sqlite.py,sha256=
|
|
85
|
+
pyworkflow/storage/sqlite.py,sha256=h48FOGtsHHglIEe4hBkf_vR41pwumDDATGsz7-u3wQA,40987
|
|
86
86
|
pyworkflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
87
|
pyworkflow/utils/duration.py,sha256=C-itmiSQQlplw7j6XB679hLF9xYGnyCwm7twO88OF8U,3978
|
|
88
88
|
pyworkflow/utils/schedule.py,sha256=dO_MkGFyfwZpb0LDlW6BGyZzlPuQIA6dc6j9nk9lc4Y,10691
|
|
89
|
-
pyworkflow_engine-0.1.
|
|
90
|
-
pyworkflow_engine-0.1.
|
|
91
|
-
pyworkflow_engine-0.1.
|
|
92
|
-
pyworkflow_engine-0.1.
|
|
93
|
-
pyworkflow_engine-0.1.
|
|
94
|
-
pyworkflow_engine-0.1.
|
|
89
|
+
pyworkflow_engine-0.1.22.dist-info/licenses/LICENSE,sha256=Y49RCTZ5ayn_yzBcRxnyIFdcMCyuYm150aty_FIznfY,1080
|
|
90
|
+
pyworkflow_engine-0.1.22.dist-info/METADATA,sha256=vulOQGFZqTd-TJ3fjgTY1JQBB942bJr97aw7bHs3jTA,19628
|
|
91
|
+
pyworkflow_engine-0.1.22.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
92
|
+
pyworkflow_engine-0.1.22.dist-info/entry_points.txt,sha256=3IGAfuylnS39U0YX0pxnjrj54kB4iT_bNYrmsiDB-dE,51
|
|
93
|
+
pyworkflow_engine-0.1.22.dist-info/top_level.txt,sha256=FLTv9pQmLDBXrQdLOhTMIS3njFibliMsQEfumqmdzBE,11
|
|
94
|
+
pyworkflow_engine-0.1.22.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|