edda-framework 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
edda/storage/protocol.py CHANGED
@@ -19,7 +19,7 @@ class StorageProtocol(Protocol):
19
19
 
20
20
  This protocol defines all the methods that a storage backend must implement
21
21
  to work with the Edda framework. It supports workflow instances, execution
22
- history, compensations, event subscriptions, outbox events, and distributed locking.
22
+ history, compensations, message subscriptions, outbox events, and distributed locking.
23
23
  """
24
24
 
25
25
  async def initialize(self) -> None:
@@ -177,6 +177,7 @@ class StorageProtocol(Protocol):
177
177
  owner_service: str,
178
178
  input_data: dict[str, Any],
179
179
  lock_timeout_seconds: int | None = None,
180
+ continued_from: str | None = None,
180
181
  ) -> None:
181
182
  """
182
183
  Create a new workflow instance.
@@ -188,6 +189,7 @@ class StorageProtocol(Protocol):
188
189
  owner_service: Service that owns this workflow (e.g., "order-service")
189
190
  input_data: Input parameters for the workflow (serializable dict)
190
191
  lock_timeout_seconds: Lock timeout for this workflow (None = use global default 300s)
192
+ continued_from: Optional instance ID this workflow continues from (for recur pattern)
191
193
  """
192
194
  ...
193
195
 
@@ -339,6 +341,47 @@ class StorageProtocol(Protocol):
339
341
  """
340
342
  ...
341
343
 
344
+ # -------------------------------------------------------------------------
345
+ # System-level Locking Methods (for background task coordination)
346
+ # -------------------------------------------------------------------------
347
+
348
+ async def try_acquire_system_lock(
349
+ self,
350
+ lock_name: str,
351
+ worker_id: str,
352
+ timeout_seconds: int = 60,
353
+ ) -> bool:
354
+ """
355
+ Try to acquire a system-level lock for coordinating background tasks.
356
+
357
+ System locks are used to coordinate operational tasks (cleanup, auto-resume)
358
+ across multiple pods, ensuring only one pod executes these tasks at a time.
359
+
360
+ Unlike workflow locks (which lock existing instances), system locks create
361
+ lock records on-demand.
362
+
363
+ Args:
364
+ lock_name: Unique name for this lock (e.g., "cleanup_stale_locks")
365
+ worker_id: Unique identifier of the worker acquiring the lock
366
+ timeout_seconds: Lock timeout in seconds (default: 60)
367
+
368
+ Returns:
369
+ True if lock was acquired, False if already locked by another worker
370
+ """
371
+ ...
372
+
373
+ async def release_system_lock(self, lock_name: str, worker_id: str) -> None:
374
+ """
375
+ Release a system-level lock.
376
+
377
+ Only the worker that holds the lock can release it.
378
+
379
+ Args:
380
+ lock_name: Name of the lock to release
381
+ worker_id: Unique identifier of the worker releasing the lock
382
+ """
383
+ ...
384
+
342
385
  # -------------------------------------------------------------------------
343
386
  # History Methods (for Deterministic Replay)
344
387
  # -------------------------------------------------------------------------
@@ -377,6 +420,38 @@ class StorageProtocol(Protocol):
377
420
  """
378
421
  ...
379
422
 
423
+ async def archive_history(self, instance_id: str) -> int:
424
+ """
425
+ Archive workflow history for the recur pattern.
426
+
427
+ Moves all history entries from workflow_history to workflow_history_archive.
428
+ This is called when a workflow uses recur() to restart with fresh history.
429
+
430
+ Args:
431
+ instance_id: Workflow instance whose history should be archived
432
+
433
+ Returns:
434
+ Number of history entries archived
435
+ """
436
+ ...
437
+
438
+ async def find_first_cancellation_event(self, instance_id: str) -> dict[str, Any] | None:
439
+ """
440
+ Find the first cancellation event in workflow history.
441
+
442
+ This is an optimized query that uses LIMIT 1 to avoid loading
443
+ all history events when checking for cancellation status.
444
+
445
+ Args:
446
+ instance_id: Workflow instance ID
447
+
448
+ Returns:
449
+ The first cancellation event if found, None otherwise.
450
+ A cancellation event is any event where event_type is
451
+ 'WorkflowCancelled' or contains 'cancel' (case-insensitive).
452
+ """
453
+ ...
454
+
380
455
  # -------------------------------------------------------------------------
381
456
  # Compensation Methods (for Saga Pattern)
382
457
  # -------------------------------------------------------------------------
@@ -426,125 +501,9 @@ class StorageProtocol(Protocol):
426
501
  ...
427
502
 
428
503
  # -------------------------------------------------------------------------
429
- # Event Subscription Methods (for wait_event)
504
+ # Timer Subscription Methods (for wait_timer)
430
505
  # -------------------------------------------------------------------------
431
506
 
432
- async def add_event_subscription(
433
- self,
434
- instance_id: str,
435
- event_type: str,
436
- timeout_at: datetime | None = None,
437
- ) -> None:
438
- """
439
- Register an event wait subscription.
440
-
441
- When a workflow calls wait_event(), a subscription is created
442
- in the database so that incoming events can be routed to the
443
- waiting workflow.
444
-
445
- Note: filter_expr is not needed because subscriptions are uniquely
446
- identified by instance_id. Events are delivered to specific workflow
447
- instances, not filtered across multiple instances.
448
-
449
- Args:
450
- instance_id: Workflow instance
451
- event_type: CloudEvent type to wait for (e.g., "payment.completed")
452
- timeout_at: Optional timeout timestamp
453
- """
454
- ...
455
-
456
- async def find_waiting_instances(self, event_type: str) -> list[dict[str, Any]]:
457
- """
458
- Find workflow instances waiting for a specific event type.
459
-
460
- Called when an event arrives to find which workflows are waiting for it.
461
-
462
- Args:
463
- event_type: CloudEvent type
464
-
465
- Returns:
466
- List of waiting instances with subscription info.
467
- Each item contains: instance_id, event_type, timeout_at
468
- """
469
- ...
470
-
471
- async def remove_event_subscription(
472
- self,
473
- instance_id: str,
474
- event_type: str,
475
- ) -> None:
476
- """
477
- Remove event subscription after the event is received.
478
-
479
- Args:
480
- instance_id: Workflow instance
481
- event_type: CloudEvent type
482
- """
483
- ...
484
-
485
- async def cleanup_expired_subscriptions(self) -> int:
486
- """
487
- Clean up event subscriptions that have timed out.
488
-
489
- Returns:
490
- Number of subscriptions cleaned up
491
- """
492
- ...
493
-
494
- async def find_expired_event_subscriptions(
495
- self,
496
- ) -> list[dict[str, Any]]:
497
- """
498
- Find event subscriptions that have timed out.
499
-
500
- Returns:
501
- List of dictionaries containing:
502
- - instance_id: Workflow instance ID
503
- - event_type: Event type that was being waited for
504
- - timeout_at: Timeout timestamp (ISO 8601 string)
505
- - created_at: Subscription creation timestamp (ISO 8601 string)
506
-
507
- Note:
508
- This method does NOT delete the subscriptions - it only finds them.
509
- Use cleanup_expired_subscriptions() to delete them after processing.
510
- """
511
- ...
512
-
513
- async def register_event_subscription_and_release_lock(
514
- self,
515
- instance_id: str,
516
- worker_id: str,
517
- event_type: str,
518
- timeout_at: datetime | None = None,
519
- activity_id: str | None = None,
520
- ) -> None:
521
- """
522
- Atomically register event subscription and release workflow lock.
523
-
524
- This method performs the following operations in a SINGLE database transaction:
525
- 1. Register event subscription (INSERT into workflow_event_subscriptions)
526
- 2. Update current activity (UPDATE workflow_instances.current_activity_id)
527
- 3. Release lock (UPDATE workflow_instances set locked_by=NULL)
528
-
529
- This ensures that when a workflow calls wait_event(), the subscription is
530
- registered and the lock is released atomically, preventing race conditions
531
- in distributed environments (distributed coroutines pattern).
532
-
533
- Note: filter_expr is not needed because subscriptions are uniquely identified
534
- by instance_id. Events are delivered to specific workflow instances.
535
-
536
- Args:
537
- instance_id: Workflow instance ID
538
- worker_id: Worker ID that currently holds the lock
539
- event_type: CloudEvent type to wait for
540
- timeout_at: Optional timeout timestamp
541
- activity_id: Current activity ID to record
542
-
543
- Raises:
544
- RuntimeError: If the worker doesn't hold the lock (sanity check)
545
- """
546
- ...
547
-
548
507
  async def register_timer_subscription_and_release_lock(
549
508
  self,
550
509
  instance_id: str,
@@ -749,3 +708,483 @@ class StorageProtocol(Protocol):
749
708
  or if instance not found
750
709
  """
751
710
  ...
711
+
712
+ # -------------------------------------------------------------------------
713
+ # Message Subscription Methods (for wait_message)
714
+ # -------------------------------------------------------------------------
715
+
716
+ async def register_message_subscription_and_release_lock(
717
+ self,
718
+ instance_id: str,
719
+ worker_id: str,
720
+ channel: str,
721
+ timeout_at: datetime | None = None,
722
+ activity_id: str | None = None,
723
+ ) -> None:
724
+ """
725
+ Atomically register message subscription and release workflow lock.
726
+
727
+ This method performs the following operations in a SINGLE database transaction:
728
+ 1. Register message subscription (INSERT into workflow_message_subscriptions)
729
+ 2. Update current activity (UPDATE workflow_instances.current_activity_id)
730
+ 3. Update status to 'waiting_for_event'
731
+ 4. Release lock (UPDATE workflow_instances set locked_by=NULL)
732
+
733
+ This ensures that when a workflow calls wait_message(), the subscription is
734
+ registered and the lock is released atomically, preventing race conditions
735
+ in distributed environments (distributed coroutines pattern).
736
+
737
+ Args:
738
+ instance_id: Workflow instance ID
739
+ worker_id: Worker ID that currently holds the lock
740
+ channel: Channel name to wait on
741
+ timeout_at: Optional timeout timestamp
742
+ activity_id: Current activity ID to record
743
+
744
+ Raises:
745
+ RuntimeError: If the worker doesn't hold the lock (sanity check)
746
+ """
747
+ ...
748
+
749
+ async def find_waiting_instances_by_channel(
750
+ self,
751
+ channel: str,
752
+ ) -> list[dict[str, Any]]:
753
+ """
754
+ Find workflow instances waiting on a specific channel.
755
+
756
+ Called when a message arrives to find which workflows are waiting for it.
757
+
758
+ Args:
759
+ channel: Channel name
760
+
761
+ Returns:
762
+ List of waiting instances with subscription info.
763
+ Each item contains: instance_id, channel, activity_id, timeout_at
764
+ """
765
+ ...
766
+
767
+ async def remove_message_subscription(
768
+ self,
769
+ instance_id: str,
770
+ channel: str,
771
+ ) -> None:
772
+ """
773
+ Remove message subscription after the message is received.
774
+
775
+ Args:
776
+ instance_id: Workflow instance
777
+ channel: Channel name
778
+ """
779
+ ...
780
+
781
+ async def deliver_message(
782
+ self,
783
+ instance_id: str,
784
+ channel: str,
785
+ data: dict[str, Any] | bytes,
786
+ metadata: dict[str, Any],
787
+ worker_id: str | None = None,
788
+ ) -> dict[str, Any] | None:
789
+ """
790
+ Deliver a message to a workflow instance waiting on a channel.
791
+
792
+ Uses Lock-First pattern to prevent race conditions in distributed environments:
793
+ 1. Checks if instance is waiting on the channel
794
+ 2. Acquires lock (Lock-First pattern) - if worker_id provided
795
+ 3. Records message to history
796
+ 4. Removes subscription
797
+ 5. Updates status to 'running'
798
+ 6. Releases lock
799
+
800
+ The workflow will be resumed by the caller or background task.
801
+
802
+ Args:
803
+ instance_id: Target workflow instance ID
804
+ channel: Channel name
805
+ data: Message payload (dict or bytes)
806
+ metadata: Message metadata
807
+ worker_id: Worker ID for locking. If None, skip locking (unsafe for distributed).
808
+
809
+ Returns:
810
+ Dict with delivery info if successful:
811
+ {"instance_id": str, "workflow_name": str, "activity_id": str}
812
+ None if message was not delivered (no subscription or lock failed)
813
+ """
814
+ ...
815
+
816
+ async def find_expired_message_subscriptions(self) -> list[dict[str, Any]]:
817
+ """
818
+ Find message subscriptions that have timed out.
819
+
820
+ Returns:
821
+ List of expired subscriptions with instance_id, channel, activity_id,
822
+ timeout_at, created_at
823
+ """
824
+ ...
825
+
826
+ # -------------------------------------------------------------------------
827
+ # Group Membership Methods (Erlang pg style)
828
+ # -------------------------------------------------------------------------
829
+
830
+ async def join_group(self, instance_id: str, group_name: str) -> None:
831
+ """
832
+ Add a workflow instance to a group.
833
+
834
+ Groups provide loose coupling for message broadcasting.
835
+ Senders don't need to know receiver instance IDs.
836
+
837
+ Args:
838
+ instance_id: Workflow instance to add
839
+ group_name: Group name (e.g., "order_notifications")
840
+ """
841
+ ...
842
+
843
+ async def leave_group(self, instance_id: str, group_name: str) -> None:
844
+ """
845
+ Remove a workflow instance from a group.
846
+
847
+ Args:
848
+ instance_id: Workflow instance to remove
849
+ group_name: Group name
850
+ """
851
+ ...
852
+
853
+ async def get_group_members(self, group_name: str) -> list[str]:
854
+ """
855
+ Get all instance IDs in a group.
856
+
857
+ Args:
858
+ group_name: Group name
859
+
860
+ Returns:
861
+ List of instance IDs that are members of the group
862
+ """
863
+ ...
864
+
865
+ async def leave_all_groups(self, instance_id: str) -> None:
866
+ """
867
+ Remove a workflow instance from all groups.
868
+
869
+ Called when a workflow completes or fails.
870
+
871
+ Args:
872
+ instance_id: Workflow instance to remove from all groups
873
+ """
874
+ ...
875
+
876
+ # -------------------------------------------------------------------------
877
+ # Workflow Resumption Methods
878
+ # -------------------------------------------------------------------------
879
+
880
+ async def find_resumable_workflows(self) -> list[dict[str, Any]]:
881
+ """
882
+ Find workflows that are ready to be resumed.
883
+
884
+ Returns workflows with status='running' that don't have an active lock.
885
+ These are typically workflows that:
886
+ - Had a message delivered (deliver_message sets status='running')
887
+ - Had their lock released after message delivery
888
+ - Haven't been picked up by auto_resume yet
889
+
890
+ This allows immediate resumption after message delivery rather than
891
+ waiting for the stale lock cleanup cycle (60+ seconds).
892
+
893
+ Returns:
894
+ List of resumable workflows.
895
+ Each item contains: instance_id, workflow_name
896
+ """
897
+ ...
898
+
899
+ # -------------------------------------------------------------------------
900
+ # Subscription Cleanup Methods (for recur())
901
+ # -------------------------------------------------------------------------
902
+
903
+ async def cleanup_instance_subscriptions(self, instance_id: str) -> None:
904
+ """
905
+ Remove all subscriptions for a workflow instance.
906
+
907
+ Called during recur() to clean up timer/message subscriptions
908
+ before archiving the history. This prevents:
909
+ - Message delivery to archived instances
910
+ - Timer expiration for non-existent workflows
911
+
912
+ Removes entries from:
913
+ - workflow_timer_subscriptions
914
+ - workflow_message_subscriptions
915
+ - channel_subscriptions (new)
916
+ - channel_message_claims (new)
917
+
918
+ Args:
919
+ instance_id: Workflow instance ID to clean up
920
+ """
921
+ ...
922
+
923
+ # -------------------------------------------------------------------------
924
+ # Channel-based Message Queue Methods
925
+ # -------------------------------------------------------------------------
926
+
927
+ async def publish_to_channel(
928
+ self,
929
+ channel: str,
930
+ data: dict[str, Any] | bytes,
931
+ metadata: dict[str, Any] | None = None,
932
+ ) -> str:
933
+ """
934
+ Publish a message to a channel.
935
+
936
+ Messages are persisted to the channel_messages table and will be
937
+ available for subscribers to receive. This implements the "mailbox"
938
+ pattern where messages are queued even before receive() is called.
939
+
940
+ Args:
941
+ channel: Channel name (e.g., "orders", "payment.completed")
942
+ data: Message payload (dict or bytes)
943
+ metadata: Optional message metadata
944
+
945
+ Returns:
946
+ Generated message_id (UUID)
947
+ """
948
+ ...
949
+
950
+ async def subscribe_to_channel(
951
+ self,
952
+ instance_id: str,
953
+ channel: str,
954
+ mode: str,
955
+ ) -> None:
956
+ """
957
+ Subscribe a workflow instance to a channel.
958
+
959
+ Args:
960
+ instance_id: Workflow instance ID
961
+ channel: Channel name
962
+ mode: Subscription mode ('broadcast' or 'competing')
963
+ - broadcast: All subscribers receive all messages
964
+ - competing: Each message is received by only one subscriber
965
+
966
+ Raises:
967
+ ValueError: If mode is not 'broadcast' or 'competing'
968
+ """
969
+ ...
970
+
971
+ async def unsubscribe_from_channel(
972
+ self,
973
+ instance_id: str,
974
+ channel: str,
975
+ ) -> None:
976
+ """
977
+ Unsubscribe a workflow instance from a channel.
978
+
979
+ Args:
980
+ instance_id: Workflow instance ID
981
+ channel: Channel name
982
+ """
983
+ ...
984
+
985
+ async def get_channel_subscription(
986
+ self,
987
+ instance_id: str,
988
+ channel: str,
989
+ ) -> dict[str, Any] | None:
990
+ """
991
+ Get the subscription info for a workflow instance on a channel.
992
+
993
+ Args:
994
+ instance_id: Workflow instance ID
995
+ channel: Channel name
996
+
997
+ Returns:
998
+ Subscription info dict with: mode, activity_id, cursor_message_id
999
+ or None if not subscribed
1000
+ """
1001
+ ...
1002
+
1003
+ async def register_channel_receive_and_release_lock(
1004
+ self,
1005
+ instance_id: str,
1006
+ worker_id: str,
1007
+ channel: str,
1008
+ activity_id: str | None = None,
1009
+ timeout_seconds: int | None = None,
1010
+ ) -> None:
1011
+ """
1012
+ Atomically register that workflow is waiting for channel message and release lock.
1013
+
1014
+ This method performs the following operations in a SINGLE database transaction:
1015
+ 1. Update channel_subscriptions to set activity_id and timeout_at (waiting state)
1016
+ 2. Update current activity (UPDATE workflow_instances.current_activity_id)
1017
+ 3. Update status to 'waiting_for_message'
1018
+ 4. Release lock (UPDATE workflow_instances set locked_by=NULL)
1019
+
1020
+ Args:
1021
+ instance_id: Workflow instance ID
1022
+ worker_id: Worker ID that currently holds the lock
1023
+ channel: Channel name being waited on
1024
+ activity_id: Current activity ID to record
1025
+ timeout_seconds: Optional timeout in seconds for the message wait
1026
+
1027
+ Raises:
1028
+ RuntimeError: If the worker doesn't hold the lock
1029
+ ValueError: If workflow is not subscribed to the channel
1030
+ """
1031
+ ...
1032
+
1033
+ async def get_pending_channel_messages(
1034
+ self,
1035
+ instance_id: str,
1036
+ channel: str,
1037
+ ) -> list[dict[str, Any]]:
1038
+ """
1039
+ Get pending messages for a subscriber on a channel.
1040
+
1041
+ For broadcast mode:
1042
+ Returns messages with id > cursor_message_id (messages not yet seen)
1043
+
1044
+ For competing mode:
1045
+ Returns unclaimed messages (not in channel_message_claims)
1046
+
1047
+ Args:
1048
+ instance_id: Workflow instance ID
1049
+ channel: Channel name
1050
+
1051
+ Returns:
1052
+ List of pending messages, ordered by published_at ASC.
1053
+ Each message contains: id, message_id, channel, data, metadata, published_at
1054
+ """
1055
+ ...
1056
+
1057
+ async def claim_channel_message(
1058
+ self,
1059
+ message_id: str,
1060
+ instance_id: str,
1061
+ ) -> bool:
1062
+ """
1063
+ Claim a message for competing consumption.
1064
+
1065
+ Uses SELECT FOR UPDATE SKIP LOCKED pattern to ensure only one
1066
+ subscriber claims each message.
1067
+
1068
+ Args:
1069
+ message_id: Message ID to claim
1070
+ instance_id: Workflow instance claiming the message
1071
+
1072
+ Returns:
1073
+ True if claim succeeded, False if already claimed by another instance
1074
+ """
1075
+ ...
1076
+
1077
+ async def delete_channel_message(self, message_id: str) -> None:
1078
+ """
1079
+ Delete a message from the channel queue.
1080
+
1081
+ Called after successful message processing in competing mode.
1082
+
1083
+ Args:
1084
+ message_id: Message ID to delete
1085
+ """
1086
+ ...
1087
+
1088
+ async def update_delivery_cursor(
1089
+ self,
1090
+ channel: str,
1091
+ instance_id: str,
1092
+ message_id: int,
1093
+ ) -> None:
1094
+ """
1095
+ Update the delivery cursor for broadcast mode.
1096
+
1097
+ Records the last message ID delivered to a subscriber, so the same
1098
+ messages are not delivered again.
1099
+
1100
+ Args:
1101
+ channel: Channel name
1102
+ instance_id: Subscriber instance ID
1103
+ message_id: Last delivered message's internal ID (channel_messages.id)
1104
+ """
1105
+ ...
1106
+
1107
+ async def get_channel_subscribers_waiting(
1108
+ self,
1109
+ channel: str,
1110
+ ) -> list[dict[str, Any]]:
1111
+ """
1112
+ Get channel subscribers that are waiting (activity_id is set).
1113
+
1114
+ Called when a message is published to find subscribers to wake up.
1115
+
1116
+ Args:
1117
+ channel: Channel name
1118
+
1119
+ Returns:
1120
+ List of waiting subscribers.
1121
+ Each item contains: instance_id, channel, mode, activity_id
1122
+ """
1123
+ ...
1124
+
1125
+ async def clear_channel_waiting_state(
1126
+ self,
1127
+ instance_id: str,
1128
+ channel: str,
1129
+ ) -> None:
1130
+ """
1131
+ Clear the waiting state for a channel subscription.
1132
+
1133
+ Called after a message is delivered to a waiting subscriber.
1134
+
1135
+ Args:
1136
+ instance_id: Workflow instance ID
1137
+ channel: Channel name
1138
+ """
1139
+ ...
1140
+
1141
+ async def deliver_channel_message(
1142
+ self,
1143
+ instance_id: str,
1144
+ channel: str,
1145
+ message_id: str,
1146
+ data: dict[str, Any] | bytes,
1147
+ metadata: dict[str, Any],
1148
+ worker_id: str,
1149
+ ) -> dict[str, Any] | None:
1150
+ """
1151
+ Deliver a channel message to a waiting workflow.
1152
+
1153
+ Uses Lock-First pattern:
1154
+ 1. Acquire lock on the workflow instance
1155
+ 2. Record message to history
1156
+ 3. Clear waiting state / update cursor / claim message
1157
+ 4. Update status to 'running'
1158
+ 5. Release lock
1159
+
1160
+ Args:
1161
+ instance_id: Target workflow instance ID
1162
+ channel: Channel name
1163
+ message_id: Message ID being delivered
1164
+ data: Message payload
1165
+ metadata: Message metadata
1166
+ worker_id: Worker ID for locking
1167
+
1168
+ Returns:
1169
+ Dict with delivery info if successful:
1170
+ {"instance_id": str, "workflow_name": str, "activity_id": str}
1171
+ None if delivery failed (lock conflict, etc.)
1172
+ """
1173
+ ...
1174
+
1175
+ async def cleanup_old_channel_messages(self, older_than_days: int = 7) -> int:
1176
+ """
1177
+ Clean up old messages from channel queues.
1178
+
1179
+ For broadcast mode: Delete messages where all current subscribers have
1180
+ received them (cursor is past the message).
1181
+
1182
+ For all modes: Delete messages older than the retention period.
1183
+
1184
+ Args:
1185
+ older_than_days: Message retention period in days
1186
+
1187
+ Returns:
1188
+ Number of messages deleted
1189
+ """
1190
+ ...