edda-framework 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
edda/storage/protocol.py CHANGED
@@ -5,6 +5,7 @@ This module defines the StorageProtocol using Python's structural typing (Protoc
5
5
  Any storage implementation that conforms to this protocol can be used with Edda.
6
6
  """
7
7
 
8
+ from collections.abc import Awaitable, Callable
8
9
  from datetime import datetime
9
10
  from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
10
11
 
@@ -19,7 +20,7 @@ class StorageProtocol(Protocol):
19
20
 
20
21
  This protocol defines all the methods that a storage backend must implement
21
22
  to work with the Edda framework. It supports workflow instances, execution
22
- history, compensations, event subscriptions, outbox events, and distributed locking.
23
+ history, compensations, message subscriptions, outbox events, and distributed locking.
23
24
  """
24
25
 
25
26
  async def initialize(self) -> None:
@@ -104,6 +105,21 @@ class StorageProtocol(Protocol):
104
105
  """
105
106
  ...
106
107
 
108
+ def register_post_commit_callback(self, callback: Callable[[], Awaitable[None]]) -> None:
109
+ """
110
+ Register a callback to be executed after the current transaction commits.
111
+
112
+ The callback will be executed after the top-level transaction commits successfully.
113
+ If the transaction is rolled back, the callback will NOT be executed.
114
+
115
+ Args:
116
+ callback: An async function to call after commit.
117
+
118
+ Raises:
119
+ RuntimeError: If not in a transaction.
120
+ """
121
+ ...
122
+
107
123
  # -------------------------------------------------------------------------
108
124
  # Workflow Definition Methods
109
125
  # -------------------------------------------------------------------------
@@ -177,6 +193,7 @@ class StorageProtocol(Protocol):
177
193
  owner_service: str,
178
194
  input_data: dict[str, Any],
179
195
  lock_timeout_seconds: int | None = None,
196
+ continued_from: str | None = None,
180
197
  ) -> None:
181
198
  """
182
199
  Create a new workflow instance.
@@ -188,6 +205,7 @@ class StorageProtocol(Protocol):
188
205
  owner_service: Service that owns this workflow (e.g., "order-service")
189
206
  input_data: Input parameters for the workflow (serializable dict)
190
207
  lock_timeout_seconds: Lock timeout for this workflow (None = use global default 300s)
208
+ continued_from: Optional instance ID this workflow continues from (for recur pattern)
191
209
  """
192
210
  ...
193
211
 
@@ -339,6 +357,47 @@ class StorageProtocol(Protocol):
339
357
  """
340
358
  ...
341
359
 
360
+ # -------------------------------------------------------------------------
361
+ # System-level Locking Methods (for background task coordination)
362
+ # -------------------------------------------------------------------------
363
+
364
+ async def try_acquire_system_lock(
365
+ self,
366
+ lock_name: str,
367
+ worker_id: str,
368
+ timeout_seconds: int = 60,
369
+ ) -> bool:
370
+ """
371
+ Try to acquire a system-level lock for coordinating background tasks.
372
+
373
+ System locks are used to coordinate operational tasks (cleanup, auto-resume)
374
+ across multiple pods, ensuring only one pod executes these tasks at a time.
375
+
376
+ Unlike workflow locks (which lock existing instances), system locks create
377
+ lock records on-demand.
378
+
379
+ Args:
380
+ lock_name: Unique name for this lock (e.g., "cleanup_stale_locks")
381
+ worker_id: Unique identifier of the worker acquiring the lock
382
+ timeout_seconds: Lock timeout in seconds (default: 60)
383
+
384
+ Returns:
385
+ True if lock was acquired, False if already locked by another worker
386
+ """
387
+ ...
388
+
389
+ async def release_system_lock(self, lock_name: str, worker_id: str) -> None:
390
+ """
391
+ Release a system-level lock.
392
+
393
+ Only the worker that holds the lock can release it.
394
+
395
+ Args:
396
+ lock_name: Name of the lock to release
397
+ worker_id: Unique identifier of the worker releasing the lock
398
+ """
399
+ ...
400
+
342
401
  # -------------------------------------------------------------------------
343
402
  # History Methods (for Deterministic Replay)
344
403
  # -------------------------------------------------------------------------
@@ -377,6 +436,38 @@ class StorageProtocol(Protocol):
377
436
  """
378
437
  ...
379
438
 
439
+ async def archive_history(self, instance_id: str) -> int:
440
+ """
441
+ Archive workflow history for the recur pattern.
442
+
443
+ Moves all history entries from workflow_history to workflow_history_archive.
444
+ This is called when a workflow uses recur() to restart with fresh history.
445
+
446
+ Args:
447
+ instance_id: Workflow instance whose history should be archived
448
+
449
+ Returns:
450
+ Number of history entries archived
451
+ """
452
+ ...
453
+
454
+ async def find_first_cancellation_event(self, instance_id: str) -> dict[str, Any] | None:
455
+ """
456
+ Find the first cancellation event in workflow history.
457
+
458
+ This is an optimized query that uses LIMIT 1 to avoid loading
459
+ all history events when checking for cancellation status.
460
+
461
+ Args:
462
+ instance_id: Workflow instance ID
463
+
464
+ Returns:
465
+ The first cancellation event if found, None otherwise.
466
+ A cancellation event is any event where event_type is
467
+ 'WorkflowCancelled' or contains 'cancel' (case-insensitive).
468
+ """
469
+ ...
470
+
380
471
  # -------------------------------------------------------------------------
381
472
  # Compensation Methods (for Saga Pattern)
382
473
  # -------------------------------------------------------------------------
@@ -426,125 +517,9 @@ class StorageProtocol(Protocol):
426
517
  ...
427
518
 
428
519
  # -------------------------------------------------------------------------
429
- # Event Subscription Methods (for wait_event)
520
+ # Timer Subscription Methods (for wait_timer)
430
521
  # -------------------------------------------------------------------------
431
522
 
432
- async def add_event_subscription(
433
- self,
434
- instance_id: str,
435
- event_type: str,
436
- timeout_at: datetime | None = None,
437
- ) -> None:
438
- """
439
- Register an event wait subscription.
440
-
441
- When a workflow calls wait_event(), a subscription is created
442
- in the database so that incoming events can be routed to the
443
- waiting workflow.
444
-
445
- Note: filter_expr is not needed because subscriptions are uniquely
446
- identified by instance_id. Events are delivered to specific workflow
447
- instances, not filtered across multiple instances.
448
-
449
- Args:
450
- instance_id: Workflow instance
451
- event_type: CloudEvent type to wait for (e.g., "payment.completed")
452
- timeout_at: Optional timeout timestamp
453
- """
454
- ...
455
-
456
- async def find_waiting_instances(self, event_type: str) -> list[dict[str, Any]]:
457
- """
458
- Find workflow instances waiting for a specific event type.
459
-
460
- Called when an event arrives to find which workflows are waiting for it.
461
-
462
- Args:
463
- event_type: CloudEvent type
464
-
465
- Returns:
466
- List of waiting instances with subscription info.
467
- Each item contains: instance_id, event_type, timeout_at
468
- """
469
- ...
470
-
471
- async def remove_event_subscription(
472
- self,
473
- instance_id: str,
474
- event_type: str,
475
- ) -> None:
476
- """
477
- Remove event subscription after the event is received.
478
-
479
- Args:
480
- instance_id: Workflow instance
481
- event_type: CloudEvent type
482
- """
483
- ...
484
-
485
- async def cleanup_expired_subscriptions(self) -> int:
486
- """
487
- Clean up event subscriptions that have timed out.
488
-
489
- Returns:
490
- Number of subscriptions cleaned up
491
- """
492
- ...
493
-
494
- async def find_expired_event_subscriptions(
495
- self,
496
- ) -> list[dict[str, Any]]:
497
- """
498
- Find event subscriptions that have timed out.
499
-
500
- Returns:
501
- List of dictionaries containing:
502
- - instance_id: Workflow instance ID
503
- - event_type: Event type that was being waited for
504
- - timeout_at: Timeout timestamp (ISO 8601 string)
505
- - created_at: Subscription creation timestamp (ISO 8601 string)
506
-
507
- Note:
508
- This method does NOT delete the subscriptions - it only finds them.
509
- Use cleanup_expired_subscriptions() to delete them after processing.
510
- """
511
- ...
512
-
513
- async def register_event_subscription_and_release_lock(
514
- self,
515
- instance_id: str,
516
- worker_id: str,
517
- event_type: str,
518
- timeout_at: datetime | None = None,
519
- activity_id: str | None = None,
520
- ) -> None:
521
- """
522
- Atomically register event subscription and release workflow lock.
523
-
524
- This method performs the following operations in a SINGLE database transaction:
525
- 1. Register event subscription (INSERT into workflow_event_subscriptions)
526
- 2. Update current activity (UPDATE workflow_instances.current_activity_id)
527
- 3. Release lock (UPDATE workflow_instances set locked_by=NULL)
528
-
529
- This ensures that when a workflow calls wait_event(), the subscription is
530
- registered and the lock is released atomically, preventing race conditions
531
- in distributed environments (distributed coroutines pattern).
532
-
533
- Note: filter_expr is not needed because subscriptions are uniquely identified
534
- by instance_id. Events are delivered to specific workflow instances.
535
-
536
- Args:
537
- instance_id: Workflow instance ID
538
- worker_id: Worker ID that currently holds the lock
539
- event_type: CloudEvent type to wait for
540
- timeout_at: Optional timeout timestamp
541
- activity_id: Current activity ID to record
542
-
543
- Raises:
544
- RuntimeError: If the worker doesn't hold the lock (sanity check)
545
- """
546
- ...
547
-
548
523
  async def register_timer_subscription_and_release_lock(
549
524
  self,
550
525
  instance_id: str,
@@ -749,3 +724,449 @@ class StorageProtocol(Protocol):
749
724
  or if instance not found
750
725
  """
751
726
  ...
727
+
728
+ # -------------------------------------------------------------------------
729
+ # Message Subscription Methods (for wait_message)
730
+ # -------------------------------------------------------------------------
731
+
732
+ async def find_waiting_instances_by_channel(
733
+ self,
734
+ channel: str,
735
+ ) -> list[dict[str, Any]]:
736
+ """
737
+ Find workflow instances waiting on a specific channel.
738
+
739
+ Called when a message arrives to find which workflows are waiting for it.
740
+
741
+ Args:
742
+ channel: Channel name
743
+
744
+ Returns:
745
+ List of waiting instances with subscription info.
746
+ Each item contains: instance_id, channel, activity_id, timeout_at
747
+ """
748
+ ...
749
+
750
+ async def remove_message_subscription(
751
+ self,
752
+ instance_id: str,
753
+ channel: str,
754
+ ) -> None:
755
+ """
756
+ Remove message subscription after the message is received.
757
+
758
+ Args:
759
+ instance_id: Workflow instance
760
+ channel: Channel name
761
+ """
762
+ ...
763
+
764
+ async def deliver_message(
765
+ self,
766
+ instance_id: str,
767
+ channel: str,
768
+ data: dict[str, Any] | bytes,
769
+ metadata: dict[str, Any],
770
+ worker_id: str | None = None,
771
+ ) -> dict[str, Any] | None:
772
+ """
773
+ Deliver a message to a workflow instance waiting on a channel.
774
+
775
+ Uses Lock-First pattern to prevent race conditions in distributed environments:
776
+ 1. Checks if instance is waiting on the channel
777
+ 2. Acquires lock (Lock-First pattern) - if worker_id provided
778
+ 3. Records message to history
779
+ 4. Removes subscription
780
+ 5. Updates status to 'running'
781
+ 6. Releases lock
782
+
783
+ The workflow will be resumed by the caller or background task.
784
+
785
+ Args:
786
+ instance_id: Target workflow instance ID
787
+ channel: Channel name
788
+ data: Message payload (dict or bytes)
789
+ metadata: Message metadata
790
+ worker_id: Worker ID for locking. If None, skip locking (unsafe for distributed).
791
+
792
+ Returns:
793
+ Dict with delivery info if successful:
794
+ {"instance_id": str, "workflow_name": str, "activity_id": str}
795
+ None if message was not delivered (no subscription or lock failed)
796
+ """
797
+ ...
798
+
799
+ async def find_expired_message_subscriptions(self) -> list[dict[str, Any]]:
800
+ """
801
+ Find message subscriptions that have timed out.
802
+
803
+ Returns:
804
+ List of expired subscriptions with instance_id, channel, activity_id,
805
+ timeout_at, created_at
806
+ """
807
+ ...
808
+
809
+ # -------------------------------------------------------------------------
810
+ # Group Membership Methods (Erlang pg style)
811
+ # -------------------------------------------------------------------------
812
+
813
+ async def join_group(self, instance_id: str, group_name: str) -> None:
814
+ """
815
+ Add a workflow instance to a group.
816
+
817
+ Groups provide loose coupling for message broadcasting.
818
+ Senders don't need to know receiver instance IDs.
819
+
820
+ Args:
821
+ instance_id: Workflow instance to add
822
+ group_name: Group name (e.g., "order_notifications")
823
+ """
824
+ ...
825
+
826
+ async def leave_group(self, instance_id: str, group_name: str) -> None:
827
+ """
828
+ Remove a workflow instance from a group.
829
+
830
+ Args:
831
+ instance_id: Workflow instance to remove
832
+ group_name: Group name
833
+ """
834
+ ...
835
+
836
+ async def get_group_members(self, group_name: str) -> list[str]:
837
+ """
838
+ Get all instance IDs in a group.
839
+
840
+ Args:
841
+ group_name: Group name
842
+
843
+ Returns:
844
+ List of instance IDs that are members of the group
845
+ """
846
+ ...
847
+
848
+ async def leave_all_groups(self, instance_id: str) -> None:
849
+ """
850
+ Remove a workflow instance from all groups.
851
+
852
+ Called when a workflow completes or fails.
853
+
854
+ Args:
855
+ instance_id: Workflow instance to remove from all groups
856
+ """
857
+ ...
858
+
859
+ # -------------------------------------------------------------------------
860
+ # Workflow Resumption Methods
861
+ # -------------------------------------------------------------------------
862
+
863
+ async def find_resumable_workflows(self) -> list[dict[str, Any]]:
864
+ """
865
+ Find workflows that are ready to be resumed.
866
+
867
+ Returns workflows with status='running' that don't have an active lock.
868
+ These are typically workflows that:
869
+ - Had a message delivered (deliver_message sets status='running')
870
+ - Had their lock released after message delivery
871
+ - Haven't been picked up by auto_resume yet
872
+
873
+ This allows immediate resumption after message delivery rather than
874
+ waiting for the stale lock cleanup cycle (60+ seconds).
875
+
876
+ Returns:
877
+ List of resumable workflows.
878
+ Each item contains: instance_id, workflow_name
879
+ """
880
+ ...
881
+
882
+ # -------------------------------------------------------------------------
883
+ # Subscription Cleanup Methods (for recur())
884
+ # -------------------------------------------------------------------------
885
+
886
+ async def cleanup_instance_subscriptions(self, instance_id: str) -> None:
887
+ """
888
+ Remove all subscriptions for a workflow instance.
889
+
890
+ Called during recur() to clean up timer/message subscriptions
891
+ before archiving the history. This prevents:
892
+ - Message delivery to archived instances
893
+ - Timer expiration for non-existent workflows
894
+
895
+ Removes entries from:
896
+ - workflow_timer_subscriptions
897
+ - channel_subscriptions
898
+ - channel_message_claims
899
+
900
+ Args:
901
+ instance_id: Workflow instance ID to clean up
902
+ """
903
+ ...
904
+
905
+ # -------------------------------------------------------------------------
906
+ # Channel-based Message Queue Methods
907
+ # -------------------------------------------------------------------------
908
+
909
+ async def publish_to_channel(
910
+ self,
911
+ channel: str,
912
+ data: dict[str, Any] | bytes,
913
+ metadata: dict[str, Any] | None = None,
914
+ ) -> str:
915
+ """
916
+ Publish a message to a channel.
917
+
918
+ Messages are persisted to the channel_messages table and will be
919
+ available for subscribers to receive. This implements the "mailbox"
920
+ pattern where messages are queued even before receive() is called.
921
+
922
+ Args:
923
+ channel: Channel name (e.g., "orders", "payment.completed")
924
+ data: Message payload (dict or bytes)
925
+ metadata: Optional message metadata
926
+
927
+ Returns:
928
+ Generated message_id (UUID)
929
+ """
930
+ ...
931
+
932
+ async def subscribe_to_channel(
933
+ self,
934
+ instance_id: str,
935
+ channel: str,
936
+ mode: str,
937
+ ) -> None:
938
+ """
939
+ Subscribe a workflow instance to a channel.
940
+
941
+ Args:
942
+ instance_id: Workflow instance ID
943
+ channel: Channel name
944
+ mode: Subscription mode ('broadcast' or 'competing')
945
+ - broadcast: All subscribers receive all messages
946
+ - competing: Each message is received by only one subscriber
947
+
948
+ Raises:
949
+ ValueError: If mode is not 'broadcast' or 'competing'
950
+ """
951
+ ...
952
+
953
+ async def unsubscribe_from_channel(
954
+ self,
955
+ instance_id: str,
956
+ channel: str,
957
+ ) -> None:
958
+ """
959
+ Unsubscribe a workflow instance from a channel.
960
+
961
+ Args:
962
+ instance_id: Workflow instance ID
963
+ channel: Channel name
964
+ """
965
+ ...
966
+
967
+ async def get_channel_subscription(
968
+ self,
969
+ instance_id: str,
970
+ channel: str,
971
+ ) -> dict[str, Any] | None:
972
+ """
973
+ Get the subscription info for a workflow instance on a channel.
974
+
975
+ Args:
976
+ instance_id: Workflow instance ID
977
+ channel: Channel name
978
+
979
+ Returns:
980
+ Subscription info dict with: mode, activity_id, cursor_message_id
981
+ or None if not subscribed
982
+ """
983
+ ...
984
+
985
+ async def register_channel_receive_and_release_lock(
986
+ self,
987
+ instance_id: str,
988
+ worker_id: str,
989
+ channel: str,
990
+ activity_id: str | None = None,
991
+ timeout_seconds: int | None = None,
992
+ ) -> None:
993
+ """
994
+ Atomically register that workflow is waiting for channel message and release lock.
995
+
996
+ This method performs the following operations in a SINGLE database transaction:
997
+ 1. Update channel_subscriptions to set activity_id and timeout_at (waiting state)
998
+ 2. Update current activity (UPDATE workflow_instances.current_activity_id)
999
+ 3. Update status to 'waiting_for_message'
1000
+ 4. Release lock (UPDATE workflow_instances set locked_by=NULL)
1001
+
1002
+ Args:
1003
+ instance_id: Workflow instance ID
1004
+ worker_id: Worker ID that currently holds the lock
1005
+ channel: Channel name being waited on
1006
+ activity_id: Current activity ID to record
1007
+ timeout_seconds: Optional timeout in seconds for the message wait
1008
+
1009
+ Raises:
1010
+ RuntimeError: If the worker doesn't hold the lock
1011
+ ValueError: If workflow is not subscribed to the channel
1012
+ """
1013
+ ...
1014
+
1015
+ async def get_pending_channel_messages(
1016
+ self,
1017
+ instance_id: str,
1018
+ channel: str,
1019
+ ) -> list[dict[str, Any]]:
1020
+ """
1021
+ Get pending messages for a subscriber on a channel.
1022
+
1023
+ For broadcast mode:
1024
+ Returns messages with id > cursor_message_id (messages not yet seen)
1025
+
1026
+ For competing mode:
1027
+ Returns unclaimed messages (not in channel_message_claims)
1028
+
1029
+ Args:
1030
+ instance_id: Workflow instance ID
1031
+ channel: Channel name
1032
+
1033
+ Returns:
1034
+ List of pending messages, ordered by published_at ASC.
1035
+ Each message contains: id, message_id, channel, data, metadata, published_at
1036
+ """
1037
+ ...
1038
+
1039
+ async def claim_channel_message(
1040
+ self,
1041
+ message_id: str,
1042
+ instance_id: str,
1043
+ ) -> bool:
1044
+ """
1045
+ Claim a message for competing consumption.
1046
+
1047
+ Uses SELECT FOR UPDATE SKIP LOCKED pattern to ensure only one
1048
+ subscriber claims each message.
1049
+
1050
+ Args:
1051
+ message_id: Message ID to claim
1052
+ instance_id: Workflow instance claiming the message
1053
+
1054
+ Returns:
1055
+ True if claim succeeded, False if already claimed by another instance
1056
+ """
1057
+ ...
1058
+
1059
+ async def delete_channel_message(self, message_id: str) -> None:
1060
+ """
1061
+ Delete a message from the channel queue.
1062
+
1063
+ Called after successful message processing in competing mode.
1064
+
1065
+ Args:
1066
+ message_id: Message ID to delete
1067
+ """
1068
+ ...
1069
+
1070
+ async def update_delivery_cursor(
1071
+ self,
1072
+ channel: str,
1073
+ instance_id: str,
1074
+ message_id: int,
1075
+ ) -> None:
1076
+ """
1077
+ Update the delivery cursor for broadcast mode.
1078
+
1079
+ Records the last message ID delivered to a subscriber, so the same
1080
+ messages are not delivered again.
1081
+
1082
+ Args:
1083
+ channel: Channel name
1084
+ instance_id: Subscriber instance ID
1085
+ message_id: Last delivered message's internal ID (channel_messages.id)
1086
+ """
1087
+ ...
1088
+
1089
+ async def get_channel_subscribers_waiting(
1090
+ self,
1091
+ channel: str,
1092
+ ) -> list[dict[str, Any]]:
1093
+ """
1094
+ Get channel subscribers that are waiting (activity_id is set).
1095
+
1096
+ Called when a message is published to find subscribers to wake up.
1097
+
1098
+ Args:
1099
+ channel: Channel name
1100
+
1101
+ Returns:
1102
+ List of waiting subscribers.
1103
+ Each item contains: instance_id, channel, mode, activity_id
1104
+ """
1105
+ ...
1106
+
1107
+ async def clear_channel_waiting_state(
1108
+ self,
1109
+ instance_id: str,
1110
+ channel: str,
1111
+ ) -> None:
1112
+ """
1113
+ Clear the waiting state for a channel subscription.
1114
+
1115
+ Called after a message is delivered to a waiting subscriber.
1116
+
1117
+ Args:
1118
+ instance_id: Workflow instance ID
1119
+ channel: Channel name
1120
+ """
1121
+ ...
1122
+
1123
+ async def deliver_channel_message(
1124
+ self,
1125
+ instance_id: str,
1126
+ channel: str,
1127
+ message_id: str,
1128
+ data: dict[str, Any] | bytes,
1129
+ metadata: dict[str, Any],
1130
+ worker_id: str,
1131
+ ) -> dict[str, Any] | None:
1132
+ """
1133
+ Deliver a channel message to a waiting workflow.
1134
+
1135
+ Uses Lock-First pattern:
1136
+ 1. Acquire lock on the workflow instance
1137
+ 2. Record message to history
1138
+ 3. Clear waiting state / update cursor / claim message
1139
+ 4. Update status to 'running'
1140
+ 5. Release lock
1141
+
1142
+ Args:
1143
+ instance_id: Target workflow instance ID
1144
+ channel: Channel name
1145
+ message_id: Message ID being delivered
1146
+ data: Message payload
1147
+ metadata: Message metadata
1148
+ worker_id: Worker ID for locking
1149
+
1150
+ Returns:
1151
+ Dict with delivery info if successful:
1152
+ {"instance_id": str, "workflow_name": str, "activity_id": str}
1153
+ None if delivery failed (lock conflict, etc.)
1154
+ """
1155
+ ...
1156
+
1157
+ async def cleanup_old_channel_messages(self, older_than_days: int = 7) -> int:
1158
+ """
1159
+ Clean up old messages from channel queues.
1160
+
1161
+ For broadcast mode: Delete messages where all current subscribers have
1162
+ received them (cursor is past the message).
1163
+
1164
+ For all modes: Delete messages older than the retention period.
1165
+
1166
+ Args:
1167
+ older_than_days: Message retention period in days
1168
+
1169
+ Returns:
1170
+ Number of messages deleted
1171
+ """
1172
+ ...