edda-framework 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edda/__init__.py +39 -5
- edda/app.py +383 -223
- edda/channels.py +992 -0
- edda/compensation.py +22 -22
- edda/context.py +77 -51
- edda/integrations/opentelemetry/hooks.py +7 -2
- edda/locking.py +130 -67
- edda/replay.py +312 -82
- edda/storage/models.py +165 -24
- edda/storage/protocol.py +575 -122
- edda/storage/sqlalchemy_storage.py +2073 -319
- edda/viewer_ui/app.py +558 -127
- edda/viewer_ui/components.py +81 -68
- edda/viewer_ui/data_service.py +61 -25
- edda/viewer_ui/theme.py +200 -0
- edda/workflow.py +43 -0
- {edda_framework-0.6.0.dist-info → edda_framework-0.8.0.dist-info}/METADATA +167 -9
- {edda_framework-0.6.0.dist-info → edda_framework-0.8.0.dist-info}/RECORD +21 -20
- {edda_framework-0.6.0.dist-info → edda_framework-0.8.0.dist-info}/WHEEL +1 -1
- edda/events.py +0 -505
- {edda_framework-0.6.0.dist-info → edda_framework-0.8.0.dist-info}/entry_points.txt +0 -0
- {edda_framework-0.6.0.dist-info → edda_framework-0.8.0.dist-info}/licenses/LICENSE +0 -0
edda/storage/protocol.py
CHANGED
|
@@ -19,7 +19,7 @@ class StorageProtocol(Protocol):
|
|
|
19
19
|
|
|
20
20
|
This protocol defines all the methods that a storage backend must implement
|
|
21
21
|
to work with the Edda framework. It supports workflow instances, execution
|
|
22
|
-
history, compensations,
|
|
22
|
+
history, compensations, message subscriptions, outbox events, and distributed locking.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
async def initialize(self) -> None:
|
|
@@ -177,6 +177,7 @@ class StorageProtocol(Protocol):
|
|
|
177
177
|
owner_service: str,
|
|
178
178
|
input_data: dict[str, Any],
|
|
179
179
|
lock_timeout_seconds: int | None = None,
|
|
180
|
+
continued_from: str | None = None,
|
|
180
181
|
) -> None:
|
|
181
182
|
"""
|
|
182
183
|
Create a new workflow instance.
|
|
@@ -188,6 +189,7 @@ class StorageProtocol(Protocol):
|
|
|
188
189
|
owner_service: Service that owns this workflow (e.g., "order-service")
|
|
189
190
|
input_data: Input parameters for the workflow (serializable dict)
|
|
190
191
|
lock_timeout_seconds: Lock timeout for this workflow (None = use global default 300s)
|
|
192
|
+
continued_from: Optional instance ID this workflow continues from (for recur pattern)
|
|
191
193
|
"""
|
|
192
194
|
...
|
|
193
195
|
|
|
@@ -238,20 +240,34 @@ class StorageProtocol(Protocol):
|
|
|
238
240
|
async def list_instances(
|
|
239
241
|
self,
|
|
240
242
|
limit: int = 50,
|
|
243
|
+
page_token: str | None = None,
|
|
241
244
|
status_filter: str | None = None,
|
|
242
|
-
|
|
245
|
+
workflow_name_filter: str | None = None,
|
|
246
|
+
instance_id_filter: str | None = None,
|
|
247
|
+
started_after: datetime | None = None,
|
|
248
|
+
started_before: datetime | None = None,
|
|
249
|
+
) -> dict[str, Any]:
|
|
243
250
|
"""
|
|
244
|
-
List workflow instances with
|
|
251
|
+
List workflow instances with cursor-based pagination and filtering.
|
|
245
252
|
|
|
246
253
|
This method JOINs workflow_instances with workflow_definitions to
|
|
247
254
|
return instances along with their source code.
|
|
248
255
|
|
|
249
256
|
Args:
|
|
250
|
-
limit: Maximum number of instances to return
|
|
257
|
+
limit: Maximum number of instances to return per page
|
|
258
|
+
page_token: Cursor for pagination (format: "ISO_DATETIME||INSTANCE_ID")
|
|
251
259
|
status_filter: Optional status filter (e.g., "running", "completed", "failed")
|
|
260
|
+
workflow_name_filter: Optional workflow name filter (partial match, case-insensitive)
|
|
261
|
+
instance_id_filter: Optional instance ID filter (partial match, case-insensitive)
|
|
262
|
+
started_after: Filter instances started after this datetime (inclusive)
|
|
263
|
+
started_before: Filter instances started before this datetime (inclusive)
|
|
252
264
|
|
|
253
265
|
Returns:
|
|
254
|
-
|
|
266
|
+
Dictionary containing:
|
|
267
|
+
- instances: List of workflow instances, ordered by started_at DESC
|
|
268
|
+
- next_page_token: Cursor for the next page, or None if no more pages
|
|
269
|
+
- has_more: Boolean indicating if there are more pages
|
|
270
|
+
|
|
255
271
|
Each instance contains: instance_id, workflow_name, source_hash,
|
|
256
272
|
owner_service, status, current_activity_id, started_at, updated_at,
|
|
257
273
|
input_data, source_code, output_data, locked_by, locked_at
|
|
@@ -325,6 +341,47 @@ class StorageProtocol(Protocol):
|
|
|
325
341
|
"""
|
|
326
342
|
...
|
|
327
343
|
|
|
344
|
+
# -------------------------------------------------------------------------
|
|
345
|
+
# System-level Locking Methods (for background task coordination)
|
|
346
|
+
# -------------------------------------------------------------------------
|
|
347
|
+
|
|
348
|
+
async def try_acquire_system_lock(
|
|
349
|
+
self,
|
|
350
|
+
lock_name: str,
|
|
351
|
+
worker_id: str,
|
|
352
|
+
timeout_seconds: int = 60,
|
|
353
|
+
) -> bool:
|
|
354
|
+
"""
|
|
355
|
+
Try to acquire a system-level lock for coordinating background tasks.
|
|
356
|
+
|
|
357
|
+
System locks are used to coordinate operational tasks (cleanup, auto-resume)
|
|
358
|
+
across multiple pods, ensuring only one pod executes these tasks at a time.
|
|
359
|
+
|
|
360
|
+
Unlike workflow locks (which lock existing instances), system locks create
|
|
361
|
+
lock records on-demand.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
lock_name: Unique name for this lock (e.g., "cleanup_stale_locks")
|
|
365
|
+
worker_id: Unique identifier of the worker acquiring the lock
|
|
366
|
+
timeout_seconds: Lock timeout in seconds (default: 60)
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
True if lock was acquired, False if already locked by another worker
|
|
370
|
+
"""
|
|
371
|
+
...
|
|
372
|
+
|
|
373
|
+
async def release_system_lock(self, lock_name: str, worker_id: str) -> None:
|
|
374
|
+
"""
|
|
375
|
+
Release a system-level lock.
|
|
376
|
+
|
|
377
|
+
Only the worker that holds the lock can release it.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
lock_name: Name of the lock to release
|
|
381
|
+
worker_id: Unique identifier of the worker releasing the lock
|
|
382
|
+
"""
|
|
383
|
+
...
|
|
384
|
+
|
|
328
385
|
# -------------------------------------------------------------------------
|
|
329
386
|
# History Methods (for Deterministic Replay)
|
|
330
387
|
# -------------------------------------------------------------------------
|
|
@@ -363,6 +420,38 @@ class StorageProtocol(Protocol):
|
|
|
363
420
|
"""
|
|
364
421
|
...
|
|
365
422
|
|
|
423
|
+
async def archive_history(self, instance_id: str) -> int:
|
|
424
|
+
"""
|
|
425
|
+
Archive workflow history for the recur pattern.
|
|
426
|
+
|
|
427
|
+
Moves all history entries from workflow_history to workflow_history_archive.
|
|
428
|
+
This is called when a workflow uses recur() to restart with fresh history.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
instance_id: Workflow instance whose history should be archived
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
Number of history entries archived
|
|
435
|
+
"""
|
|
436
|
+
...
|
|
437
|
+
|
|
438
|
+
async def find_first_cancellation_event(self, instance_id: str) -> dict[str, Any] | None:
|
|
439
|
+
"""
|
|
440
|
+
Find the first cancellation event in workflow history.
|
|
441
|
+
|
|
442
|
+
This is an optimized query that uses LIMIT 1 to avoid loading
|
|
443
|
+
all history events when checking for cancellation status.
|
|
444
|
+
|
|
445
|
+
Args:
|
|
446
|
+
instance_id: Workflow instance ID
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
The first cancellation event if found, None otherwise.
|
|
450
|
+
A cancellation event is any event where event_type is
|
|
451
|
+
'WorkflowCancelled' or contains 'cancel' (case-insensitive).
|
|
452
|
+
"""
|
|
453
|
+
...
|
|
454
|
+
|
|
366
455
|
# -------------------------------------------------------------------------
|
|
367
456
|
# Compensation Methods (for Saga Pattern)
|
|
368
457
|
# -------------------------------------------------------------------------
|
|
@@ -412,125 +501,9 @@ class StorageProtocol(Protocol):
|
|
|
412
501
|
...
|
|
413
502
|
|
|
414
503
|
# -------------------------------------------------------------------------
|
|
415
|
-
#
|
|
504
|
+
# Timer Subscription Methods (for wait_timer)
|
|
416
505
|
# -------------------------------------------------------------------------
|
|
417
506
|
|
|
418
|
-
async def add_event_subscription(
|
|
419
|
-
self,
|
|
420
|
-
instance_id: str,
|
|
421
|
-
event_type: str,
|
|
422
|
-
timeout_at: datetime | None = None,
|
|
423
|
-
) -> None:
|
|
424
|
-
"""
|
|
425
|
-
Register an event wait subscription.
|
|
426
|
-
|
|
427
|
-
When a workflow calls wait_event(), a subscription is created
|
|
428
|
-
in the database so that incoming events can be routed to the
|
|
429
|
-
waiting workflow.
|
|
430
|
-
|
|
431
|
-
Note: filter_expr is not needed because subscriptions are uniquely
|
|
432
|
-
identified by instance_id. Events are delivered to specific workflow
|
|
433
|
-
instances, not filtered across multiple instances.
|
|
434
|
-
|
|
435
|
-
Args:
|
|
436
|
-
instance_id: Workflow instance
|
|
437
|
-
event_type: CloudEvent type to wait for (e.g., "payment.completed")
|
|
438
|
-
timeout_at: Optional timeout timestamp
|
|
439
|
-
"""
|
|
440
|
-
...
|
|
441
|
-
|
|
442
|
-
async def find_waiting_instances(self, event_type: str) -> list[dict[str, Any]]:
|
|
443
|
-
"""
|
|
444
|
-
Find workflow instances waiting for a specific event type.
|
|
445
|
-
|
|
446
|
-
Called when an event arrives to find which workflows are waiting for it.
|
|
447
|
-
|
|
448
|
-
Args:
|
|
449
|
-
event_type: CloudEvent type
|
|
450
|
-
|
|
451
|
-
Returns:
|
|
452
|
-
List of waiting instances with subscription info.
|
|
453
|
-
Each item contains: instance_id, event_type, timeout_at
|
|
454
|
-
"""
|
|
455
|
-
...
|
|
456
|
-
|
|
457
|
-
async def remove_event_subscription(
|
|
458
|
-
self,
|
|
459
|
-
instance_id: str,
|
|
460
|
-
event_type: str,
|
|
461
|
-
) -> None:
|
|
462
|
-
"""
|
|
463
|
-
Remove event subscription after the event is received.
|
|
464
|
-
|
|
465
|
-
Args:
|
|
466
|
-
instance_id: Workflow instance
|
|
467
|
-
event_type: CloudEvent type
|
|
468
|
-
"""
|
|
469
|
-
...
|
|
470
|
-
|
|
471
|
-
async def cleanup_expired_subscriptions(self) -> int:
|
|
472
|
-
"""
|
|
473
|
-
Clean up event subscriptions that have timed out.
|
|
474
|
-
|
|
475
|
-
Returns:
|
|
476
|
-
Number of subscriptions cleaned up
|
|
477
|
-
"""
|
|
478
|
-
...
|
|
479
|
-
|
|
480
|
-
async def find_expired_event_subscriptions(
|
|
481
|
-
self,
|
|
482
|
-
) -> list[dict[str, Any]]:
|
|
483
|
-
"""
|
|
484
|
-
Find event subscriptions that have timed out.
|
|
485
|
-
|
|
486
|
-
Returns:
|
|
487
|
-
List of dictionaries containing:
|
|
488
|
-
- instance_id: Workflow instance ID
|
|
489
|
-
- event_type: Event type that was being waited for
|
|
490
|
-
- timeout_at: Timeout timestamp (ISO 8601 string)
|
|
491
|
-
- created_at: Subscription creation timestamp (ISO 8601 string)
|
|
492
|
-
|
|
493
|
-
Note:
|
|
494
|
-
This method does NOT delete the subscriptions - it only finds them.
|
|
495
|
-
Use cleanup_expired_subscriptions() to delete them after processing.
|
|
496
|
-
"""
|
|
497
|
-
...
|
|
498
|
-
|
|
499
|
-
async def register_event_subscription_and_release_lock(
|
|
500
|
-
self,
|
|
501
|
-
instance_id: str,
|
|
502
|
-
worker_id: str,
|
|
503
|
-
event_type: str,
|
|
504
|
-
timeout_at: datetime | None = None,
|
|
505
|
-
activity_id: str | None = None,
|
|
506
|
-
) -> None:
|
|
507
|
-
"""
|
|
508
|
-
Atomically register event subscription and release workflow lock.
|
|
509
|
-
|
|
510
|
-
This method performs the following operations in a SINGLE database transaction:
|
|
511
|
-
1. Register event subscription (INSERT into workflow_event_subscriptions)
|
|
512
|
-
2. Update current activity (UPDATE workflow_instances.current_activity_id)
|
|
513
|
-
3. Release lock (UPDATE workflow_instances set locked_by=NULL)
|
|
514
|
-
|
|
515
|
-
This ensures that when a workflow calls wait_event(), the subscription is
|
|
516
|
-
registered and the lock is released atomically, preventing race conditions
|
|
517
|
-
in distributed environments (distributed coroutines pattern).
|
|
518
|
-
|
|
519
|
-
Note: filter_expr is not needed because subscriptions are uniquely identified
|
|
520
|
-
by instance_id. Events are delivered to specific workflow instances.
|
|
521
|
-
|
|
522
|
-
Args:
|
|
523
|
-
instance_id: Workflow instance ID
|
|
524
|
-
worker_id: Worker ID that currently holds the lock
|
|
525
|
-
event_type: CloudEvent type to wait for
|
|
526
|
-
timeout_at: Optional timeout timestamp
|
|
527
|
-
activity_id: Current activity ID to record
|
|
528
|
-
|
|
529
|
-
Raises:
|
|
530
|
-
RuntimeError: If the worker doesn't hold the lock (sanity check)
|
|
531
|
-
"""
|
|
532
|
-
...
|
|
533
|
-
|
|
534
507
|
async def register_timer_subscription_and_release_lock(
|
|
535
508
|
self,
|
|
536
509
|
instance_id: str,
|
|
@@ -735,3 +708,483 @@ class StorageProtocol(Protocol):
|
|
|
735
708
|
or if instance not found
|
|
736
709
|
"""
|
|
737
710
|
...
|
|
711
|
+
|
|
712
|
+
# -------------------------------------------------------------------------
|
|
713
|
+
# Message Subscription Methods (for wait_message)
|
|
714
|
+
# -------------------------------------------------------------------------
|
|
715
|
+
|
|
716
|
+
async def register_message_subscription_and_release_lock(
|
|
717
|
+
self,
|
|
718
|
+
instance_id: str,
|
|
719
|
+
worker_id: str,
|
|
720
|
+
channel: str,
|
|
721
|
+
timeout_at: datetime | None = None,
|
|
722
|
+
activity_id: str | None = None,
|
|
723
|
+
) -> None:
|
|
724
|
+
"""
|
|
725
|
+
Atomically register message subscription and release workflow lock.
|
|
726
|
+
|
|
727
|
+
This method performs the following operations in a SINGLE database transaction:
|
|
728
|
+
1. Register message subscription (INSERT into workflow_message_subscriptions)
|
|
729
|
+
2. Update current activity (UPDATE workflow_instances.current_activity_id)
|
|
730
|
+
3. Update status to 'waiting_for_event'
|
|
731
|
+
4. Release lock (UPDATE workflow_instances set locked_by=NULL)
|
|
732
|
+
|
|
733
|
+
This ensures that when a workflow calls wait_message(), the subscription is
|
|
734
|
+
registered and the lock is released atomically, preventing race conditions
|
|
735
|
+
in distributed environments (distributed coroutines pattern).
|
|
736
|
+
|
|
737
|
+
Args:
|
|
738
|
+
instance_id: Workflow instance ID
|
|
739
|
+
worker_id: Worker ID that currently holds the lock
|
|
740
|
+
channel: Channel name to wait on
|
|
741
|
+
timeout_at: Optional timeout timestamp
|
|
742
|
+
activity_id: Current activity ID to record
|
|
743
|
+
|
|
744
|
+
Raises:
|
|
745
|
+
RuntimeError: If the worker doesn't hold the lock (sanity check)
|
|
746
|
+
"""
|
|
747
|
+
...
|
|
748
|
+
|
|
749
|
+
async def find_waiting_instances_by_channel(
|
|
750
|
+
self,
|
|
751
|
+
channel: str,
|
|
752
|
+
) -> list[dict[str, Any]]:
|
|
753
|
+
"""
|
|
754
|
+
Find workflow instances waiting on a specific channel.
|
|
755
|
+
|
|
756
|
+
Called when a message arrives to find which workflows are waiting for it.
|
|
757
|
+
|
|
758
|
+
Args:
|
|
759
|
+
channel: Channel name
|
|
760
|
+
|
|
761
|
+
Returns:
|
|
762
|
+
List of waiting instances with subscription info.
|
|
763
|
+
Each item contains: instance_id, channel, activity_id, timeout_at
|
|
764
|
+
"""
|
|
765
|
+
...
|
|
766
|
+
|
|
767
|
+
async def remove_message_subscription(
|
|
768
|
+
self,
|
|
769
|
+
instance_id: str,
|
|
770
|
+
channel: str,
|
|
771
|
+
) -> None:
|
|
772
|
+
"""
|
|
773
|
+
Remove message subscription after the message is received.
|
|
774
|
+
|
|
775
|
+
Args:
|
|
776
|
+
instance_id: Workflow instance
|
|
777
|
+
channel: Channel name
|
|
778
|
+
"""
|
|
779
|
+
...
|
|
780
|
+
|
|
781
|
+
async def deliver_message(
|
|
782
|
+
self,
|
|
783
|
+
instance_id: str,
|
|
784
|
+
channel: str,
|
|
785
|
+
data: dict[str, Any] | bytes,
|
|
786
|
+
metadata: dict[str, Any],
|
|
787
|
+
worker_id: str | None = None,
|
|
788
|
+
) -> dict[str, Any] | None:
|
|
789
|
+
"""
|
|
790
|
+
Deliver a message to a workflow instance waiting on a channel.
|
|
791
|
+
|
|
792
|
+
Uses Lock-First pattern to prevent race conditions in distributed environments:
|
|
793
|
+
1. Checks if instance is waiting on the channel
|
|
794
|
+
2. Acquires lock (Lock-First pattern) - if worker_id provided
|
|
795
|
+
3. Records message to history
|
|
796
|
+
4. Removes subscription
|
|
797
|
+
5. Updates status to 'running'
|
|
798
|
+
6. Releases lock
|
|
799
|
+
|
|
800
|
+
The workflow will be resumed by the caller or background task.
|
|
801
|
+
|
|
802
|
+
Args:
|
|
803
|
+
instance_id: Target workflow instance ID
|
|
804
|
+
channel: Channel name
|
|
805
|
+
data: Message payload (dict or bytes)
|
|
806
|
+
metadata: Message metadata
|
|
807
|
+
worker_id: Worker ID for locking. If None, skip locking (unsafe for distributed).
|
|
808
|
+
|
|
809
|
+
Returns:
|
|
810
|
+
Dict with delivery info if successful:
|
|
811
|
+
{"instance_id": str, "workflow_name": str, "activity_id": str}
|
|
812
|
+
None if message was not delivered (no subscription or lock failed)
|
|
813
|
+
"""
|
|
814
|
+
...
|
|
815
|
+
|
|
816
|
+
async def find_expired_message_subscriptions(self) -> list[dict[str, Any]]:
|
|
817
|
+
"""
|
|
818
|
+
Find message subscriptions that have timed out.
|
|
819
|
+
|
|
820
|
+
Returns:
|
|
821
|
+
List of expired subscriptions with instance_id, channel, activity_id,
|
|
822
|
+
timeout_at, created_at
|
|
823
|
+
"""
|
|
824
|
+
...
|
|
825
|
+
|
|
826
|
+
# -------------------------------------------------------------------------
|
|
827
|
+
# Group Membership Methods (Erlang pg style)
|
|
828
|
+
# -------------------------------------------------------------------------
|
|
829
|
+
|
|
830
|
+
async def join_group(self, instance_id: str, group_name: str) -> None:
|
|
831
|
+
"""
|
|
832
|
+
Add a workflow instance to a group.
|
|
833
|
+
|
|
834
|
+
Groups provide loose coupling for message broadcasting.
|
|
835
|
+
Senders don't need to know receiver instance IDs.
|
|
836
|
+
|
|
837
|
+
Args:
|
|
838
|
+
instance_id: Workflow instance to add
|
|
839
|
+
group_name: Group name (e.g., "order_notifications")
|
|
840
|
+
"""
|
|
841
|
+
...
|
|
842
|
+
|
|
843
|
+
async def leave_group(self, instance_id: str, group_name: str) -> None:
|
|
844
|
+
"""
|
|
845
|
+
Remove a workflow instance from a group.
|
|
846
|
+
|
|
847
|
+
Args:
|
|
848
|
+
instance_id: Workflow instance to remove
|
|
849
|
+
group_name: Group name
|
|
850
|
+
"""
|
|
851
|
+
...
|
|
852
|
+
|
|
853
|
+
async def get_group_members(self, group_name: str) -> list[str]:
|
|
854
|
+
"""
|
|
855
|
+
Get all instance IDs in a group.
|
|
856
|
+
|
|
857
|
+
Args:
|
|
858
|
+
group_name: Group name
|
|
859
|
+
|
|
860
|
+
Returns:
|
|
861
|
+
List of instance IDs that are members of the group
|
|
862
|
+
"""
|
|
863
|
+
...
|
|
864
|
+
|
|
865
|
+
async def leave_all_groups(self, instance_id: str) -> None:
|
|
866
|
+
"""
|
|
867
|
+
Remove a workflow instance from all groups.
|
|
868
|
+
|
|
869
|
+
Called when a workflow completes or fails.
|
|
870
|
+
|
|
871
|
+
Args:
|
|
872
|
+
instance_id: Workflow instance to remove from all groups
|
|
873
|
+
"""
|
|
874
|
+
...
|
|
875
|
+
|
|
876
|
+
# -------------------------------------------------------------------------
|
|
877
|
+
# Workflow Resumption Methods
|
|
878
|
+
# -------------------------------------------------------------------------
|
|
879
|
+
|
|
880
|
+
async def find_resumable_workflows(self) -> list[dict[str, Any]]:
|
|
881
|
+
"""
|
|
882
|
+
Find workflows that are ready to be resumed.
|
|
883
|
+
|
|
884
|
+
Returns workflows with status='running' that don't have an active lock.
|
|
885
|
+
These are typically workflows that:
|
|
886
|
+
- Had a message delivered (deliver_message sets status='running')
|
|
887
|
+
- Had their lock released after message delivery
|
|
888
|
+
- Haven't been picked up by auto_resume yet
|
|
889
|
+
|
|
890
|
+
This allows immediate resumption after message delivery rather than
|
|
891
|
+
waiting for the stale lock cleanup cycle (60+ seconds).
|
|
892
|
+
|
|
893
|
+
Returns:
|
|
894
|
+
List of resumable workflows.
|
|
895
|
+
Each item contains: instance_id, workflow_name
|
|
896
|
+
"""
|
|
897
|
+
...
|
|
898
|
+
|
|
899
|
+
# -------------------------------------------------------------------------
|
|
900
|
+
# Subscription Cleanup Methods (for recur())
|
|
901
|
+
# -------------------------------------------------------------------------
|
|
902
|
+
|
|
903
|
+
async def cleanup_instance_subscriptions(self, instance_id: str) -> None:
|
|
904
|
+
"""
|
|
905
|
+
Remove all subscriptions for a workflow instance.
|
|
906
|
+
|
|
907
|
+
Called during recur() to clean up timer/message subscriptions
|
|
908
|
+
before archiving the history. This prevents:
|
|
909
|
+
- Message delivery to archived instances
|
|
910
|
+
- Timer expiration for non-existent workflows
|
|
911
|
+
|
|
912
|
+
Removes entries from:
|
|
913
|
+
- workflow_timer_subscriptions
|
|
914
|
+
- workflow_message_subscriptions
|
|
915
|
+
- channel_subscriptions (new)
|
|
916
|
+
- channel_message_claims (new)
|
|
917
|
+
|
|
918
|
+
Args:
|
|
919
|
+
instance_id: Workflow instance ID to clean up
|
|
920
|
+
"""
|
|
921
|
+
...
|
|
922
|
+
|
|
923
|
+
# -------------------------------------------------------------------------
|
|
924
|
+
# Channel-based Message Queue Methods
|
|
925
|
+
# -------------------------------------------------------------------------
|
|
926
|
+
|
|
927
|
+
async def publish_to_channel(
|
|
928
|
+
self,
|
|
929
|
+
channel: str,
|
|
930
|
+
data: dict[str, Any] | bytes,
|
|
931
|
+
metadata: dict[str, Any] | None = None,
|
|
932
|
+
) -> str:
|
|
933
|
+
"""
|
|
934
|
+
Publish a message to a channel.
|
|
935
|
+
|
|
936
|
+
Messages are persisted to the channel_messages table and will be
|
|
937
|
+
available for subscribers to receive. This implements the "mailbox"
|
|
938
|
+
pattern where messages are queued even before receive() is called.
|
|
939
|
+
|
|
940
|
+
Args:
|
|
941
|
+
channel: Channel name (e.g., "orders", "payment.completed")
|
|
942
|
+
data: Message payload (dict or bytes)
|
|
943
|
+
metadata: Optional message metadata
|
|
944
|
+
|
|
945
|
+
Returns:
|
|
946
|
+
Generated message_id (UUID)
|
|
947
|
+
"""
|
|
948
|
+
...
|
|
949
|
+
|
|
950
|
+
async def subscribe_to_channel(
|
|
951
|
+
self,
|
|
952
|
+
instance_id: str,
|
|
953
|
+
channel: str,
|
|
954
|
+
mode: str,
|
|
955
|
+
) -> None:
|
|
956
|
+
"""
|
|
957
|
+
Subscribe a workflow instance to a channel.
|
|
958
|
+
|
|
959
|
+
Args:
|
|
960
|
+
instance_id: Workflow instance ID
|
|
961
|
+
channel: Channel name
|
|
962
|
+
mode: Subscription mode ('broadcast' or 'competing')
|
|
963
|
+
- broadcast: All subscribers receive all messages
|
|
964
|
+
- competing: Each message is received by only one subscriber
|
|
965
|
+
|
|
966
|
+
Raises:
|
|
967
|
+
ValueError: If mode is not 'broadcast' or 'competing'
|
|
968
|
+
"""
|
|
969
|
+
...
|
|
970
|
+
|
|
971
|
+
async def unsubscribe_from_channel(
|
|
972
|
+
self,
|
|
973
|
+
instance_id: str,
|
|
974
|
+
channel: str,
|
|
975
|
+
) -> None:
|
|
976
|
+
"""
|
|
977
|
+
Unsubscribe a workflow instance from a channel.
|
|
978
|
+
|
|
979
|
+
Args:
|
|
980
|
+
instance_id: Workflow instance ID
|
|
981
|
+
channel: Channel name
|
|
982
|
+
"""
|
|
983
|
+
...
|
|
984
|
+
|
|
985
|
+
async def get_channel_subscription(
|
|
986
|
+
self,
|
|
987
|
+
instance_id: str,
|
|
988
|
+
channel: str,
|
|
989
|
+
) -> dict[str, Any] | None:
|
|
990
|
+
"""
|
|
991
|
+
Get the subscription info for a workflow instance on a channel.
|
|
992
|
+
|
|
993
|
+
Args:
|
|
994
|
+
instance_id: Workflow instance ID
|
|
995
|
+
channel: Channel name
|
|
996
|
+
|
|
997
|
+
Returns:
|
|
998
|
+
Subscription info dict with: mode, activity_id, cursor_message_id
|
|
999
|
+
or None if not subscribed
|
|
1000
|
+
"""
|
|
1001
|
+
...
|
|
1002
|
+
|
|
1003
|
+
async def register_channel_receive_and_release_lock(
|
|
1004
|
+
self,
|
|
1005
|
+
instance_id: str,
|
|
1006
|
+
worker_id: str,
|
|
1007
|
+
channel: str,
|
|
1008
|
+
activity_id: str | None = None,
|
|
1009
|
+
timeout_seconds: int | None = None,
|
|
1010
|
+
) -> None:
|
|
1011
|
+
"""
|
|
1012
|
+
Atomically register that workflow is waiting for channel message and release lock.
|
|
1013
|
+
|
|
1014
|
+
This method performs the following operations in a SINGLE database transaction:
|
|
1015
|
+
1. Update channel_subscriptions to set activity_id and timeout_at (waiting state)
|
|
1016
|
+
2. Update current activity (UPDATE workflow_instances.current_activity_id)
|
|
1017
|
+
3. Update status to 'waiting_for_message'
|
|
1018
|
+
4. Release lock (UPDATE workflow_instances set locked_by=NULL)
|
|
1019
|
+
|
|
1020
|
+
Args:
|
|
1021
|
+
instance_id: Workflow instance ID
|
|
1022
|
+
worker_id: Worker ID that currently holds the lock
|
|
1023
|
+
channel: Channel name being waited on
|
|
1024
|
+
activity_id: Current activity ID to record
|
|
1025
|
+
timeout_seconds: Optional timeout in seconds for the message wait
|
|
1026
|
+
|
|
1027
|
+
Raises:
|
|
1028
|
+
RuntimeError: If the worker doesn't hold the lock
|
|
1029
|
+
ValueError: If workflow is not subscribed to the channel
|
|
1030
|
+
"""
|
|
1031
|
+
...
|
|
1032
|
+
|
|
1033
|
+
async def get_pending_channel_messages(
|
|
1034
|
+
self,
|
|
1035
|
+
instance_id: str,
|
|
1036
|
+
channel: str,
|
|
1037
|
+
) -> list[dict[str, Any]]:
|
|
1038
|
+
"""
|
|
1039
|
+
Get pending messages for a subscriber on a channel.
|
|
1040
|
+
|
|
1041
|
+
For broadcast mode:
|
|
1042
|
+
Returns messages with id > cursor_message_id (messages not yet seen)
|
|
1043
|
+
|
|
1044
|
+
For competing mode:
|
|
1045
|
+
Returns unclaimed messages (not in channel_message_claims)
|
|
1046
|
+
|
|
1047
|
+
Args:
|
|
1048
|
+
instance_id: Workflow instance ID
|
|
1049
|
+
channel: Channel name
|
|
1050
|
+
|
|
1051
|
+
Returns:
|
|
1052
|
+
List of pending messages, ordered by published_at ASC.
|
|
1053
|
+
Each message contains: id, message_id, channel, data, metadata, published_at
|
|
1054
|
+
"""
|
|
1055
|
+
...
|
|
1056
|
+
|
|
1057
|
+
async def claim_channel_message(
|
|
1058
|
+
self,
|
|
1059
|
+
message_id: str,
|
|
1060
|
+
instance_id: str,
|
|
1061
|
+
) -> bool:
|
|
1062
|
+
"""
|
|
1063
|
+
Claim a message for competing consumption.
|
|
1064
|
+
|
|
1065
|
+
Uses SELECT FOR UPDATE SKIP LOCKED pattern to ensure only one
|
|
1066
|
+
subscriber claims each message.
|
|
1067
|
+
|
|
1068
|
+
Args:
|
|
1069
|
+
message_id: Message ID to claim
|
|
1070
|
+
instance_id: Workflow instance claiming the message
|
|
1071
|
+
|
|
1072
|
+
Returns:
|
|
1073
|
+
True if claim succeeded, False if already claimed by another instance
|
|
1074
|
+
"""
|
|
1075
|
+
...
|
|
1076
|
+
|
|
1077
|
+
async def delete_channel_message(self, message_id: str) -> None:
|
|
1078
|
+
"""
|
|
1079
|
+
Delete a message from the channel queue.
|
|
1080
|
+
|
|
1081
|
+
Called after successful message processing in competing mode.
|
|
1082
|
+
|
|
1083
|
+
Args:
|
|
1084
|
+
message_id: Message ID to delete
|
|
1085
|
+
"""
|
|
1086
|
+
...
|
|
1087
|
+
|
|
1088
|
+
async def update_delivery_cursor(
|
|
1089
|
+
self,
|
|
1090
|
+
channel: str,
|
|
1091
|
+
instance_id: str,
|
|
1092
|
+
message_id: int,
|
|
1093
|
+
) -> None:
|
|
1094
|
+
"""
|
|
1095
|
+
Update the delivery cursor for broadcast mode.
|
|
1096
|
+
|
|
1097
|
+
Records the last message ID delivered to a subscriber, so the same
|
|
1098
|
+
messages are not delivered again.
|
|
1099
|
+
|
|
1100
|
+
Args:
|
|
1101
|
+
channel: Channel name
|
|
1102
|
+
instance_id: Subscriber instance ID
|
|
1103
|
+
message_id: Last delivered message's internal ID (channel_messages.id)
|
|
1104
|
+
"""
|
|
1105
|
+
...
|
|
1106
|
+
|
|
1107
|
+
async def get_channel_subscribers_waiting(
|
|
1108
|
+
self,
|
|
1109
|
+
channel: str,
|
|
1110
|
+
) -> list[dict[str, Any]]:
|
|
1111
|
+
"""
|
|
1112
|
+
Get channel subscribers that are waiting (activity_id is set).
|
|
1113
|
+
|
|
1114
|
+
Called when a message is published to find subscribers to wake up.
|
|
1115
|
+
|
|
1116
|
+
Args:
|
|
1117
|
+
channel: Channel name
|
|
1118
|
+
|
|
1119
|
+
Returns:
|
|
1120
|
+
List of waiting subscribers.
|
|
1121
|
+
Each item contains: instance_id, channel, mode, activity_id
|
|
1122
|
+
"""
|
|
1123
|
+
...
|
|
1124
|
+
|
|
1125
|
+
async def clear_channel_waiting_state(
|
|
1126
|
+
self,
|
|
1127
|
+
instance_id: str,
|
|
1128
|
+
channel: str,
|
|
1129
|
+
) -> None:
|
|
1130
|
+
"""
|
|
1131
|
+
Clear the waiting state for a channel subscription.
|
|
1132
|
+
|
|
1133
|
+
Called after a message is delivered to a waiting subscriber.
|
|
1134
|
+
|
|
1135
|
+
Args:
|
|
1136
|
+
instance_id: Workflow instance ID
|
|
1137
|
+
channel: Channel name
|
|
1138
|
+
"""
|
|
1139
|
+
...
|
|
1140
|
+
|
|
1141
|
+
async def deliver_channel_message(
|
|
1142
|
+
self,
|
|
1143
|
+
instance_id: str,
|
|
1144
|
+
channel: str,
|
|
1145
|
+
message_id: str,
|
|
1146
|
+
data: dict[str, Any] | bytes,
|
|
1147
|
+
metadata: dict[str, Any],
|
|
1148
|
+
worker_id: str,
|
|
1149
|
+
) -> dict[str, Any] | None:
|
|
1150
|
+
"""
|
|
1151
|
+
Deliver a channel message to a waiting workflow.
|
|
1152
|
+
|
|
1153
|
+
Uses Lock-First pattern:
|
|
1154
|
+
1. Acquire lock on the workflow instance
|
|
1155
|
+
2. Record message to history
|
|
1156
|
+
3. Clear waiting state / update cursor / claim message
|
|
1157
|
+
4. Update status to 'running'
|
|
1158
|
+
5. Release lock
|
|
1159
|
+
|
|
1160
|
+
Args:
|
|
1161
|
+
instance_id: Target workflow instance ID
|
|
1162
|
+
channel: Channel name
|
|
1163
|
+
message_id: Message ID being delivered
|
|
1164
|
+
data: Message payload
|
|
1165
|
+
metadata: Message metadata
|
|
1166
|
+
worker_id: Worker ID for locking
|
|
1167
|
+
|
|
1168
|
+
Returns:
|
|
1169
|
+
Dict with delivery info if successful:
|
|
1170
|
+
{"instance_id": str, "workflow_name": str, "activity_id": str}
|
|
1171
|
+
None if delivery failed (lock conflict, etc.)
|
|
1172
|
+
"""
|
|
1173
|
+
...
|
|
1174
|
+
|
|
1175
|
+
async def cleanup_old_channel_messages(self, older_than_days: int = 7) -> int:
|
|
1176
|
+
"""
|
|
1177
|
+
Clean up old messages from channel queues.
|
|
1178
|
+
|
|
1179
|
+
For broadcast mode: Delete messages where all current subscribers have
|
|
1180
|
+
received them (cursor is past the message).
|
|
1181
|
+
|
|
1182
|
+
For all modes: Delete messages older than the retention period.
|
|
1183
|
+
|
|
1184
|
+
Args:
|
|
1185
|
+
older_than_days: Message retention period in days
|
|
1186
|
+
|
|
1187
|
+
Returns:
|
|
1188
|
+
Number of messages deleted
|
|
1189
|
+
"""
|
|
1190
|
+
...
|