pyworkflow-engine 0.1.21__py3-none-any.whl → 0.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +1 -1
- pyworkflow/celery/tasks.py +114 -62
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.22.dist-info}/METADATA +1 -1
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.22.dist-info}/RECORD +8 -8
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.22.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.22.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.22.dist-info}/licenses/LICENSE +0 -0
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.22.dist-info}/top_level.txt +0 -0
pyworkflow/__init__.py
CHANGED
pyworkflow/celery/tasks.py
CHANGED
|
@@ -11,6 +11,7 @@ These tasks enable:
|
|
|
11
11
|
|
|
12
12
|
import asyncio
|
|
13
13
|
import random
|
|
14
|
+
import traceback
|
|
14
15
|
import uuid
|
|
15
16
|
from collections.abc import Callable
|
|
16
17
|
from datetime import UTC, datetime
|
|
@@ -379,9 +380,9 @@ async def _record_step_completion_and_resume(
|
|
|
379
380
|
|
|
380
381
|
Called by execute_step_task after successful step execution.
|
|
381
382
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
383
|
+
IMPORTANT: This function waits for WORKFLOW_SUSPENDED event before recording
|
|
384
|
+
STEP_COMPLETED to prevent race conditions where both events get the same
|
|
385
|
+
sequence number. The workflow must fully suspend before we record completion.
|
|
385
386
|
|
|
386
387
|
Idempotency: If STEP_COMPLETED already exists for this step_id, skip
|
|
387
388
|
recording and resume scheduling (another task already handled it).
|
|
@@ -411,43 +412,67 @@ async def _record_step_completion_and_resume(
|
|
|
411
412
|
)
|
|
412
413
|
return
|
|
413
414
|
|
|
414
|
-
#
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
result=serialize(result),
|
|
419
|
-
step_name=step_name,
|
|
420
|
-
)
|
|
421
|
-
await storage.record_event(completion_event)
|
|
415
|
+
# Wait for WORKFLOW_SUSPENDED event before recording STEP_COMPLETED
|
|
416
|
+
# This prevents race conditions where both events get the same sequence number
|
|
417
|
+
max_wait_attempts = 50 # 50 * 10ms = 500ms max wait
|
|
418
|
+
wait_interval = 0.01 # 10ms between checks
|
|
422
419
|
|
|
423
|
-
|
|
424
|
-
|
|
420
|
+
for attempt in range(max_wait_attempts):
|
|
421
|
+
has_suspended = any(
|
|
422
|
+
evt.type == EventType.WORKFLOW_SUSPENDED
|
|
423
|
+
and evt.data.get("step_id") == step_id
|
|
424
|
+
for evt in events
|
|
425
|
+
)
|
|
426
|
+
if has_suspended:
|
|
427
|
+
break
|
|
425
428
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
+
# Wait and refresh events
|
|
430
|
+
await asyncio.sleep(wait_interval)
|
|
431
|
+
events = await storage.get_events(run_id)
|
|
429
432
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
)
|
|
435
|
-
logger.info(
|
|
436
|
-
"Step completed and workflow resumption scheduled",
|
|
437
|
-
run_id=run_id,
|
|
438
|
-
step_id=step_id,
|
|
439
|
-
step_name=step_name,
|
|
433
|
+
# Also check if step was already completed by another task during wait
|
|
434
|
+
already_completed = any(
|
|
435
|
+
evt.type == EventType.STEP_COMPLETED and evt.data.get("step_id") == step_id
|
|
436
|
+
for evt in events
|
|
440
437
|
)
|
|
438
|
+
if already_completed:
|
|
439
|
+
logger.info(
|
|
440
|
+
"Step already completed by another task during wait, skipping",
|
|
441
|
+
run_id=run_id,
|
|
442
|
+
step_id=step_id,
|
|
443
|
+
step_name=step_name,
|
|
444
|
+
)
|
|
445
|
+
return
|
|
441
446
|
else:
|
|
442
|
-
#
|
|
443
|
-
#
|
|
444
|
-
logger.
|
|
445
|
-
"
|
|
447
|
+
# Timeout waiting for suspension - log warning but proceed anyway
|
|
448
|
+
# This handles edge cases where the workflow completes without suspending
|
|
449
|
+
logger.warning(
|
|
450
|
+
"Timeout waiting for WORKFLOW_SUSPENDED event, proceeding with completion",
|
|
446
451
|
run_id=run_id,
|
|
447
452
|
step_id=step_id,
|
|
448
453
|
step_name=step_name,
|
|
449
454
|
)
|
|
450
455
|
|
|
456
|
+
# Record STEP_COMPLETED event
|
|
457
|
+
completion_event = create_step_completed_event(
|
|
458
|
+
run_id=run_id,
|
|
459
|
+
step_id=step_id,
|
|
460
|
+
result=serialize(result),
|
|
461
|
+
step_name=step_name,
|
|
462
|
+
)
|
|
463
|
+
await storage.record_event(completion_event)
|
|
464
|
+
|
|
465
|
+
# Schedule workflow resumption
|
|
466
|
+
schedule_workflow_resumption(
|
|
467
|
+
run_id, datetime.now(UTC), storage_config, triggered_by="step_completed"
|
|
468
|
+
)
|
|
469
|
+
logger.info(
|
|
470
|
+
"Step completed and workflow resumption scheduled",
|
|
471
|
+
run_id=run_id,
|
|
472
|
+
step_id=step_id,
|
|
473
|
+
step_name=step_name,
|
|
474
|
+
)
|
|
475
|
+
|
|
451
476
|
|
|
452
477
|
async def _record_step_failure_and_resume(
|
|
453
478
|
storage_config: dict[str, Any] | None,
|
|
@@ -464,9 +489,9 @@ async def _record_step_failure_and_resume(
|
|
|
464
489
|
Called by execute_step_task after step failure (when retries are exhausted).
|
|
465
490
|
The workflow will fail when it replays and sees the failure event.
|
|
466
491
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
492
|
+
IMPORTANT: This function waits for WORKFLOW_SUSPENDED event before recording
|
|
493
|
+
STEP_FAILED to prevent race conditions where both events get the same
|
|
494
|
+
sequence number. The workflow must fully suspend before we record failure.
|
|
470
495
|
|
|
471
496
|
Idempotency: If STEP_COMPLETED or terminal STEP_FAILED already exists
|
|
472
497
|
for this step_id, skip recording and resume scheduling.
|
|
@@ -500,6 +525,51 @@ async def _record_step_failure_and_resume(
|
|
|
500
525
|
)
|
|
501
526
|
return
|
|
502
527
|
|
|
528
|
+
# Wait for WORKFLOW_SUSPENDED event before recording STEP_FAILED
|
|
529
|
+
# This prevents race conditions where both events get the same sequence number
|
|
530
|
+
max_wait_attempts = 50 # 50 * 10ms = 500ms max wait
|
|
531
|
+
wait_interval = 0.01 # 10ms between checks
|
|
532
|
+
|
|
533
|
+
for attempt in range(max_wait_attempts):
|
|
534
|
+
has_suspended = any(
|
|
535
|
+
evt.type == EventType.WORKFLOW_SUSPENDED
|
|
536
|
+
and evt.data.get("step_id") == step_id
|
|
537
|
+
for evt in events
|
|
538
|
+
)
|
|
539
|
+
if has_suspended:
|
|
540
|
+
break
|
|
541
|
+
|
|
542
|
+
# Wait and refresh events
|
|
543
|
+
await asyncio.sleep(wait_interval)
|
|
544
|
+
events = await storage.get_events(run_id)
|
|
545
|
+
|
|
546
|
+
# Also check if step was already handled by another task during wait
|
|
547
|
+
already_handled = any(
|
|
548
|
+
(evt.type == EventType.STEP_COMPLETED and evt.data.get("step_id") == step_id)
|
|
549
|
+
or (
|
|
550
|
+
evt.type == EventType.STEP_FAILED
|
|
551
|
+
and evt.data.get("step_id") == step_id
|
|
552
|
+
and not evt.data.get("is_retryable", True)
|
|
553
|
+
)
|
|
554
|
+
for evt in events
|
|
555
|
+
)
|
|
556
|
+
if already_handled:
|
|
557
|
+
logger.info(
|
|
558
|
+
"Step already completed/failed by another task during wait, skipping",
|
|
559
|
+
run_id=run_id,
|
|
560
|
+
step_id=step_id,
|
|
561
|
+
step_name=step_name,
|
|
562
|
+
)
|
|
563
|
+
return
|
|
564
|
+
else:
|
|
565
|
+
# Timeout waiting for suspension - log warning but proceed anyway
|
|
566
|
+
logger.warning(
|
|
567
|
+
"Timeout waiting for WORKFLOW_SUSPENDED event, proceeding with failure",
|
|
568
|
+
run_id=run_id,
|
|
569
|
+
step_id=step_id,
|
|
570
|
+
step_name=step_name,
|
|
571
|
+
)
|
|
572
|
+
|
|
503
573
|
# Record STEP_FAILED event
|
|
504
574
|
failure_event = create_step_failed_event(
|
|
505
575
|
run_id=run_id,
|
|
@@ -511,35 +581,17 @@ async def _record_step_failure_and_resume(
|
|
|
511
581
|
)
|
|
512
582
|
await storage.record_event(failure_event)
|
|
513
583
|
|
|
514
|
-
#
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
)
|
|
526
|
-
logger.info(
|
|
527
|
-
"Step failed and workflow resumption scheduled",
|
|
528
|
-
run_id=run_id,
|
|
529
|
-
step_id=step_id,
|
|
530
|
-
step_name=step_name,
|
|
531
|
-
error=error,
|
|
532
|
-
)
|
|
533
|
-
else:
|
|
534
|
-
# Workflow hasn't suspended yet - don't schedule resume
|
|
535
|
-
# The suspension handler will check for step failure and schedule resume
|
|
536
|
-
logger.info(
|
|
537
|
-
"Step failed but workflow not yet suspended, skipping resume scheduling",
|
|
538
|
-
run_id=run_id,
|
|
539
|
-
step_id=step_id,
|
|
540
|
-
step_name=step_name,
|
|
541
|
-
error=error,
|
|
542
|
-
)
|
|
584
|
+
# Schedule workflow resumption
|
|
585
|
+
schedule_workflow_resumption(
|
|
586
|
+
run_id, datetime.now(UTC), storage_config, triggered_by="step_failed"
|
|
587
|
+
)
|
|
588
|
+
logger.info(
|
|
589
|
+
"Step failed and workflow resumption scheduled",
|
|
590
|
+
run_id=run_id,
|
|
591
|
+
step_id=step_id,
|
|
592
|
+
step_name=step_name,
|
|
593
|
+
error=error,
|
|
594
|
+
)
|
|
543
595
|
|
|
544
596
|
|
|
545
597
|
async def _get_workflow_run_safe(
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
pyworkflow/__init__.py,sha256=
|
|
1
|
+
pyworkflow/__init__.py,sha256=HigyYErtUxyfrsBOPEUvegnsLnqnwfrBm87EEzfYtOI,6281
|
|
2
2
|
pyworkflow/config.py,sha256=pKwPrpCwBJiDpB-MIjM0U7GW1TFmQFO341pihL5-vTM,14455
|
|
3
3
|
pyworkflow/discovery.py,sha256=snW3l4nvY3Nc067TGlwtn_qdzTU9ybN7YPr8FbvY8iM,8066
|
|
4
4
|
pyworkflow/aws/__init__.py,sha256=Ak_xHcR9LTRX-CwcS0XecYmzrXZw4EM3V9aKBBDEmIk,1741
|
|
@@ -10,7 +10,7 @@ pyworkflow/celery/app.py,sha256=QXpPXVVuwJv3ToylT0pyz9SgmwjC9hW-9WaIO4wH5OQ,1434
|
|
|
10
10
|
pyworkflow/celery/loop.py,sha256=mu8cIfMJYgHAoGCN_DdDoNoXK3QHzHpLmrPCyFDQYIY,3016
|
|
11
11
|
pyworkflow/celery/scheduler.py,sha256=Ms4rqRpdpMiLM8l4y3DK-Divunj9afYuUaGGoNQe7P4,11288
|
|
12
12
|
pyworkflow/celery/singleton.py,sha256=9gdVHzqFjShZ9OJOJlJNABUg9oqnl6ITGROtomcOtsg,16070
|
|
13
|
-
pyworkflow/celery/tasks.py,sha256=
|
|
13
|
+
pyworkflow/celery/tasks.py,sha256=0doKC7JxGlU4QFShohQn9NRgiOybfLrBm6j81C8m3dc,88482
|
|
14
14
|
pyworkflow/cli/__init__.py,sha256=tcbe-fcZmyeEKUy_aEo8bsEF40HsNKOwvyMBZIJZPwc,3844
|
|
15
15
|
pyworkflow/cli/__main__.py,sha256=LxLLS4FEEPXa5rWpLTtKuivn6Xp9pGia-QKGoxt9SS0,148
|
|
16
16
|
pyworkflow/cli/commands/__init__.py,sha256=IXvnTgukALckkO8fTlZhVRq80ojSqpnIIgboAg_-yZU,39
|
|
@@ -86,9 +86,9 @@ pyworkflow/storage/sqlite.py,sha256=h48FOGtsHHglIEe4hBkf_vR41pwumDDATGsz7-u3wQA,
|
|
|
86
86
|
pyworkflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
87
|
pyworkflow/utils/duration.py,sha256=C-itmiSQQlplw7j6XB679hLF9xYGnyCwm7twO88OF8U,3978
|
|
88
88
|
pyworkflow/utils/schedule.py,sha256=dO_MkGFyfwZpb0LDlW6BGyZzlPuQIA6dc6j9nk9lc4Y,10691
|
|
89
|
-
pyworkflow_engine-0.1.
|
|
90
|
-
pyworkflow_engine-0.1.
|
|
91
|
-
pyworkflow_engine-0.1.
|
|
92
|
-
pyworkflow_engine-0.1.
|
|
93
|
-
pyworkflow_engine-0.1.
|
|
94
|
-
pyworkflow_engine-0.1.
|
|
89
|
+
pyworkflow_engine-0.1.22.dist-info/licenses/LICENSE,sha256=Y49RCTZ5ayn_yzBcRxnyIFdcMCyuYm150aty_FIznfY,1080
|
|
90
|
+
pyworkflow_engine-0.1.22.dist-info/METADATA,sha256=vulOQGFZqTd-TJ3fjgTY1JQBB942bJr97aw7bHs3jTA,19628
|
|
91
|
+
pyworkflow_engine-0.1.22.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
92
|
+
pyworkflow_engine-0.1.22.dist-info/entry_points.txt,sha256=3IGAfuylnS39U0YX0pxnjrj54kB4iT_bNYrmsiDB-dE,51
|
|
93
|
+
pyworkflow_engine-0.1.22.dist-info/top_level.txt,sha256=FLTv9pQmLDBXrQdLOhTMIS3njFibliMsQEfumqmdzBE,11
|
|
94
|
+
pyworkflow_engine-0.1.22.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|