edda-framework 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,737 @@
1
+ """
2
+ Storage protocol definition for Edda framework.
3
+
4
+ This module defines the StorageProtocol using Python's structural typing (Protocol).
5
+ Any storage implementation that conforms to this protocol can be used with Edda.
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
10
+
11
+ if TYPE_CHECKING:
12
+ pass
13
+
14
+
15
+ @runtime_checkable
16
+ class StorageProtocol(Protocol):
17
+ """
18
+ Protocol for storage backend implementations.
19
+
20
+ This protocol defines all the methods that a storage backend must implement
21
+ to work with the Edda framework. It supports workflow instances, execution
22
+ history, compensations, event subscriptions, outbox events, and distributed locking.
23
+ """
24
+
25
+ async def initialize(self) -> None:
26
+ """
27
+ Initialize storage (create tables, connections, etc.).
28
+
29
+ This method should be idempotent - calling it multiple times
30
+ should not cause errors.
31
+ """
32
+ ...
33
+
34
+ async def close(self) -> None:
35
+ """
36
+ Close storage connections and cleanup resources.
37
+
38
+ This method should be called when shutting down the application.
39
+ """
40
+ ...
41
+
42
+ # -------------------------------------------------------------------------
43
+ # Transaction Management Methods
44
+ # -------------------------------------------------------------------------
45
+
46
+ async def begin_transaction(self) -> None:
47
+ """
48
+ Begin a new transaction.
49
+
50
+ If a transaction is already in progress, this will create a nested
51
+ transaction using savepoints (supported by SQLite and PostgreSQL).
52
+
53
+ This method is typically called by WorkflowContext.transaction() and
54
+ should not be called directly by user code.
55
+
56
+ Example:
57
+ async with ctx.transaction():
58
+ # All operations here are in the same transaction
59
+ await ctx.storage.append_history(...)
60
+ await send_event_transactional(ctx, ...)
61
+ """
62
+ ...
63
+
64
+ async def commit_transaction(self) -> None:
65
+ """
66
+ Commit the current transaction.
67
+
68
+ For nested transactions (savepoints), this will release the savepoint.
69
+ For top-level transactions, this will commit all changes to the database.
70
+
71
+ This method is typically called by WorkflowContext.transaction() and
72
+ should not be called directly by user code.
73
+
74
+ Raises:
75
+ RuntimeError: If not in a transaction
76
+ """
77
+ ...
78
+
79
+ async def rollback_transaction(self) -> None:
80
+ """
81
+ Rollback the current transaction.
82
+
83
+ For nested transactions (savepoints), this will rollback to the savepoint.
84
+ For top-level transactions, this will rollback all changes.
85
+
86
+ This method is typically called by WorkflowContext.transaction() on
87
+ exception and should not be called directly by user code.
88
+
89
+ Raises:
90
+ RuntimeError: If not in a transaction
91
+ """
92
+ ...
93
+
94
+ def in_transaction(self) -> bool:
95
+ """
96
+ Check if currently in a transaction.
97
+
98
+ Returns:
99
+ True if in a transaction, False otherwise.
100
+
101
+ Note:
102
+ This is a synchronous method because it only checks state,
103
+ it does not perform any I/O operations.
104
+ """
105
+ ...
106
+
107
+ # -------------------------------------------------------------------------
108
+ # Workflow Definition Methods
109
+ # -------------------------------------------------------------------------
110
+
111
+ async def upsert_workflow_definition(
112
+ self,
113
+ workflow_name: str,
114
+ source_hash: str,
115
+ source_code: str,
116
+ ) -> None:
117
+ """
118
+ Insert or update a workflow definition.
119
+
120
+ This method stores the workflow source code with a unique combination
121
+ of workflow_name and source_hash. If the same combination already exists,
122
+ it updates the record (idempotent).
123
+
124
+ Args:
125
+ workflow_name: Name of the workflow (e.g., "order_saga")
126
+ source_hash: SHA256 hash of the source code
127
+ source_code: Source code of the workflow function
128
+ """
129
+ ...
130
+
131
+ async def get_workflow_definition(
132
+ self,
133
+ workflow_name: str,
134
+ source_hash: str,
135
+ ) -> dict[str, Any] | None:
136
+ """
137
+ Get a workflow definition by name and hash.
138
+
139
+ Args:
140
+ workflow_name: Name of the workflow
141
+ source_hash: SHA256 hash of the source code
142
+
143
+ Returns:
144
+ Dictionary containing definition metadata, or None if not found.
145
+ Expected keys: workflow_name, source_hash, source_code, created_at
146
+ """
147
+ ...
148
+
149
+ async def get_current_workflow_definition(
150
+ self,
151
+ workflow_name: str,
152
+ ) -> dict[str, Any] | None:
153
+ """
154
+ Get the most recent workflow definition by name.
155
+
156
+ This returns the latest definition for a workflow, which may differ
157
+ from older definitions if the workflow code has changed.
158
+
159
+ Args:
160
+ workflow_name: Name of the workflow
161
+
162
+ Returns:
163
+ Dictionary containing definition metadata, or None if not found.
164
+ Expected keys: workflow_name, source_hash, source_code, created_at
165
+ """
166
+ ...
167
+
168
+ # -------------------------------------------------------------------------
169
+ # Workflow Instance Methods
170
+ # -------------------------------------------------------------------------
171
+
172
+ async def create_instance(
173
+ self,
174
+ instance_id: str,
175
+ workflow_name: str,
176
+ source_hash: str,
177
+ owner_service: str,
178
+ input_data: dict[str, Any],
179
+ lock_timeout_seconds: int | None = None,
180
+ ) -> None:
181
+ """
182
+ Create a new workflow instance.
183
+
184
+ Args:
185
+ instance_id: Unique identifier for the workflow instance
186
+ workflow_name: Name of the workflow (e.g., "order_saga")
187
+ source_hash: SHA256 hash of the workflow source code
188
+ owner_service: Service that owns this workflow (e.g., "order-service")
189
+ input_data: Input parameters for the workflow (serializable dict)
190
+ lock_timeout_seconds: Lock timeout for this workflow (None = use global default 300s)
191
+ """
192
+ ...
193
+
194
+ async def get_instance(self, instance_id: str) -> dict[str, Any] | None:
195
+ """
196
+ Get workflow instance metadata with its definition.
197
+
198
+ This method JOINs workflow_instances with workflow_definitions to
199
+ return the instance along with its source code.
200
+
201
+ Args:
202
+ instance_id: Unique identifier for the workflow instance
203
+
204
+ Returns:
205
+ Dictionary containing instance metadata, or None if not found.
206
+ Expected keys: instance_id, workflow_name, source_hash, owner_service,
207
+ status, current_activity_id, started_at, updated_at, input_data, source_code,
208
+ output_data, locked_by, locked_at
209
+ """
210
+ ...
211
+
212
+ async def update_instance_status(
213
+ self,
214
+ instance_id: str,
215
+ status: str,
216
+ output_data: dict[str, Any] | None = None,
217
+ ) -> None:
218
+ """
219
+ Update workflow instance status.
220
+
221
+ Args:
222
+ instance_id: Unique identifier for the workflow instance
223
+ status: New status (e.g., "running", "completed", "failed", "waiting_for_event")
224
+ output_data: Optional output data (for completed workflows)
225
+ """
226
+ ...
227
+
228
+ async def update_instance_activity(self, instance_id: str, activity_id: str) -> None:
229
+ """
230
+ Update the current activity ID for a workflow instance.
231
+
232
+ Args:
233
+ instance_id: Unique identifier for the workflow instance
234
+ activity_id: Current activity ID being executed
235
+ """
236
+ ...
237
+
238
+ async def list_instances(
239
+ self,
240
+ limit: int = 50,
241
+ status_filter: str | None = None,
242
+ ) -> list[dict[str, Any]]:
243
+ """
244
+ List workflow instances with optional filtering.
245
+
246
+ This method JOINs workflow_instances with workflow_definitions to
247
+ return instances along with their source code.
248
+
249
+ Args:
250
+ limit: Maximum number of instances to return
251
+ status_filter: Optional status filter (e.g., "running", "completed", "failed")
252
+
253
+ Returns:
254
+ List of workflow instances, ordered by started_at DESC.
255
+ Each instance contains: instance_id, workflow_name, source_hash,
256
+ owner_service, status, current_activity_id, started_at, updated_at,
257
+ input_data, source_code, output_data, locked_by, locked_at
258
+ """
259
+ ...
260
+
261
+ # -------------------------------------------------------------------------
262
+ # Distributed Locking Methods
263
+ # -------------------------------------------------------------------------
264
+
265
+ async def try_acquire_lock(
266
+ self,
267
+ instance_id: str,
268
+ worker_id: str,
269
+ timeout_seconds: int = 300,
270
+ ) -> bool:
271
+ """
272
+ Try to acquire lock for workflow instance.
273
+
274
+ This method implements distributed locking to ensure only one worker
275
+ processes a workflow instance at a time. It can acquire locks that
276
+ have timed out.
277
+
278
+ Args:
279
+ instance_id: Workflow instance to lock
280
+ worker_id: Unique identifier of the worker acquiring the lock
281
+ timeout_seconds: Lock timeout in seconds (default: 300)
282
+
283
+ Returns:
284
+ True if lock was acquired, False if already locked by another worker
285
+ """
286
+ ...
287
+
288
+ async def release_lock(self, instance_id: str, worker_id: str) -> None:
289
+ """
290
+ Release lock for workflow instance.
291
+
292
+ Only the worker that holds the lock can release it.
293
+
294
+ Args:
295
+ instance_id: Workflow instance to unlock
296
+ worker_id: Unique identifier of the worker releasing the lock
297
+ """
298
+ ...
299
+
300
+ async def refresh_lock(self, instance_id: str, worker_id: str) -> bool:
301
+ """
302
+ Refresh lock timestamp for long-running workflows.
303
+
304
+ This prevents the lock from timing out during long operations.
305
+
306
+ Args:
307
+ instance_id: Workflow instance to refresh
308
+ worker_id: Unique identifier of the worker holding the lock
309
+
310
+ Returns:
311
+ True if successfully refreshed, False if lock was lost
312
+ """
313
+ ...
314
+
315
+ async def cleanup_stale_locks(self) -> list[dict[str, str]]:
316
+ """
317
+ Clean up locks that have expired (based on lock_expires_at column).
318
+
319
+ This should be called periodically to clean up locks from crashed workers.
320
+
321
+ Returns:
322
+ List of cleaned workflow instances with status='running' or 'compensating'.
323
+ Each dict contains: {'instance_id': str, 'workflow_name': str, 'source_hash': str}
324
+ These are workflows that need to be auto-resumed.
325
+ """
326
+ ...
327
+
328
+ # -------------------------------------------------------------------------
329
+ # History Methods (for Deterministic Replay)
330
+ # -------------------------------------------------------------------------
331
+
332
+ async def append_history(
333
+ self,
334
+ instance_id: str,
335
+ activity_id: str,
336
+ event_type: str,
337
+ event_data: dict[str, Any] | bytes,
338
+ ) -> None:
339
+ """
340
+ Append an event to workflow execution history.
341
+
342
+ The history is used for deterministic replay - each activity result
343
+ is stored as a history event.
344
+
345
+ Args:
346
+ instance_id: Workflow instance
347
+ activity_id: Activity ID in the workflow
348
+ event_type: Type of event (e.g., "ActivityCompleted", "ActivityFailed")
349
+ event_data: Event payload (JSON dict or binary bytes)
350
+ """
351
+ ...
352
+
353
+ async def get_history(self, instance_id: str) -> list[dict[str, Any]]:
354
+ """
355
+ Get workflow execution history in order.
356
+
357
+ Args:
358
+ instance_id: Workflow instance
359
+
360
+ Returns:
361
+ List of history events, ordered by creation time.
362
+ Each event contains: id, instance_id, activity_id, event_type, event_data, created_at
363
+ """
364
+ ...
365
+
366
+ # -------------------------------------------------------------------------
367
+ # Compensation Methods (for Saga Pattern)
368
+ # -------------------------------------------------------------------------
369
+
370
+ async def push_compensation(
371
+ self,
372
+ instance_id: str,
373
+ activity_id: str,
374
+ activity_name: str,
375
+ args: dict[str, Any],
376
+ ) -> None:
377
+ """
378
+ Push a compensation to the stack (LIFO).
379
+
380
+ Compensations are executed in reverse order when a saga fails.
381
+
382
+ Args:
383
+ instance_id: Workflow instance
384
+ activity_id: Activity ID where compensation was registered
385
+ activity_name: Name of the compensation activity
386
+ args: Arguments to pass to the compensation activity
387
+ """
388
+ ...
389
+
390
+ async def get_compensations(self, instance_id: str) -> list[dict[str, Any]]:
391
+ """
392
+ Get compensations in LIFO order (most recent first).
393
+
394
+ Args:
395
+ instance_id: Workflow instance
396
+
397
+ Returns:
398
+ List of compensations, ordered by creation time DESC (most recent first).
399
+ Each compensation contains: id, instance_id, activity_id, activity_name, args, created_at
400
+ """
401
+ ...
402
+
403
+ async def clear_compensations(self, instance_id: str) -> None:
404
+ """
405
+ Clear all compensations for a workflow instance.
406
+
407
+ Called after successful workflow completion.
408
+
409
+ Args:
410
+ instance_id: Workflow instance
411
+ """
412
+ ...
413
+
414
+ # -------------------------------------------------------------------------
415
+ # Event Subscription Methods (for wait_event)
416
+ # -------------------------------------------------------------------------
417
+
418
+ async def add_event_subscription(
419
+ self,
420
+ instance_id: str,
421
+ event_type: str,
422
+ timeout_at: datetime | None = None,
423
+ ) -> None:
424
+ """
425
+ Register an event wait subscription.
426
+
427
+ When a workflow calls wait_event(), a subscription is created
428
+ in the database so that incoming events can be routed to the
429
+ waiting workflow.
430
+
431
+ Note: filter_expr is not needed because subscriptions are uniquely
432
+ identified by instance_id. Events are delivered to specific workflow
433
+ instances, not filtered across multiple instances.
434
+
435
+ Args:
436
+ instance_id: Workflow instance
437
+ event_type: CloudEvent type to wait for (e.g., "payment.completed")
438
+ timeout_at: Optional timeout timestamp
439
+ """
440
+ ...
441
+
442
+ async def find_waiting_instances(self, event_type: str) -> list[dict[str, Any]]:
443
+ """
444
+ Find workflow instances waiting for a specific event type.
445
+
446
+ Called when an event arrives to find which workflows are waiting for it.
447
+
448
+ Args:
449
+ event_type: CloudEvent type
450
+
451
+ Returns:
452
+ List of waiting instances with subscription info.
453
+ Each item contains: instance_id, event_type, timeout_at
454
+ """
455
+ ...
456
+
457
+ async def remove_event_subscription(
458
+ self,
459
+ instance_id: str,
460
+ event_type: str,
461
+ ) -> None:
462
+ """
463
+ Remove event subscription after the event is received.
464
+
465
+ Args:
466
+ instance_id: Workflow instance
467
+ event_type: CloudEvent type
468
+ """
469
+ ...
470
+
471
+ async def cleanup_expired_subscriptions(self) -> int:
472
+ """
473
+ Clean up event subscriptions that have timed out.
474
+
475
+ Returns:
476
+ Number of subscriptions cleaned up
477
+ """
478
+ ...
479
+
480
+ async def find_expired_event_subscriptions(
481
+ self,
482
+ ) -> list[dict[str, Any]]:
483
+ """
484
+ Find event subscriptions that have timed out.
485
+
486
+ Returns:
487
+ List of dictionaries containing:
488
+ - instance_id: Workflow instance ID
489
+ - event_type: Event type that was being waited for
490
+ - timeout_at: Timeout timestamp (ISO 8601 string)
491
+ - created_at: Subscription creation timestamp (ISO 8601 string)
492
+
493
+ Note:
494
+ This method does NOT delete the subscriptions - it only finds them.
495
+ Use cleanup_expired_subscriptions() to delete them after processing.
496
+ """
497
+ ...
498
+
499
+ async def register_event_subscription_and_release_lock(
500
+ self,
501
+ instance_id: str,
502
+ worker_id: str,
503
+ event_type: str,
504
+ timeout_at: datetime | None = None,
505
+ activity_id: str | None = None,
506
+ ) -> None:
507
+ """
508
+ Atomically register event subscription and release workflow lock.
509
+
510
+ This method performs the following operations in a SINGLE database transaction:
511
+ 1. Register event subscription (INSERT into workflow_event_subscriptions)
512
+ 2. Update current activity (UPDATE workflow_instances.current_activity_id)
513
+ 3. Release lock (UPDATE workflow_instances set locked_by=NULL)
514
+
515
+ This ensures that when a workflow calls wait_event(), the subscription is
516
+ registered and the lock is released atomically, preventing race conditions
517
+ in distributed environments (distributed coroutines pattern).
518
+
519
+ Note: filter_expr is not needed because subscriptions are uniquely identified
520
+ by instance_id. Events are delivered to specific workflow instances.
521
+
522
+ Args:
523
+ instance_id: Workflow instance ID
524
+ worker_id: Worker ID that currently holds the lock
525
+ event_type: CloudEvent type to wait for
526
+ timeout_at: Optional timeout timestamp
527
+ activity_id: Current activity ID to record
528
+
529
+ Raises:
530
+ RuntimeError: If the worker doesn't hold the lock (sanity check)
531
+ """
532
+ ...
533
+
534
+ async def register_timer_subscription_and_release_lock(
535
+ self,
536
+ instance_id: str,
537
+ worker_id: str,
538
+ timer_id: str,
539
+ expires_at: datetime,
540
+ activity_id: str | None = None,
541
+ ) -> None:
542
+ """
543
+ Atomically register timer subscription and release workflow lock.
544
+
545
+ This method performs the following operations in a SINGLE database transaction:
546
+ 1. Register timer subscription (INSERT into workflow_timer_subscriptions)
547
+ 2. Update current activity (UPDATE workflow_instances.current_activity_id)
548
+ 3. Release lock (UPDATE workflow_instances set locked_by=NULL)
549
+
550
+ This ensures that when a workflow calls wait_timer(), the subscription is
551
+ registered and the lock is released atomically, preventing race conditions
552
+ in distributed environments (distributed coroutines pattern).
553
+
554
+ Args:
555
+ instance_id: Workflow instance ID
556
+ worker_id: Worker ID that currently holds the lock
557
+ timer_id: Timer identifier (unique per instance)
558
+ expires_at: Expiration timestamp
559
+ activity_id: Current activity ID to record
560
+
561
+ Raises:
562
+ RuntimeError: If the worker doesn't hold the lock (sanity check)
563
+ """
564
+ ...
565
+
566
+ async def find_expired_timers(self) -> list[dict[str, Any]]:
567
+ """
568
+ Find timer subscriptions that have expired.
569
+
570
+ This method is called periodically by background task to find
571
+ workflows waiting for timers that have expired.
572
+
573
+ Returns:
574
+ List of expired timer subscriptions.
575
+ Each item contains: instance_id, timer_id, expires_at, activity_id, workflow_name
576
+ """
577
+ ...
578
+
579
+ async def remove_timer_subscription(
580
+ self,
581
+ instance_id: str,
582
+ timer_id: str,
583
+ ) -> None:
584
+ """
585
+ Remove timer subscription after the timer expires.
586
+
587
+ Args:
588
+ instance_id: Workflow instance ID
589
+ timer_id: Timer identifier
590
+ """
591
+ ...
592
+
593
+ # -------------------------------------------------------------------------
594
+ # Transactional Outbox Methods
595
+ # -------------------------------------------------------------------------
596
+
597
+ async def add_outbox_event(
598
+ self,
599
+ event_id: str,
600
+ event_type: str,
601
+ event_source: str,
602
+ event_data: dict[str, Any] | bytes,
603
+ content_type: str = "application/json",
604
+ ) -> None:
605
+ """
606
+ Add an event to the transactional outbox.
607
+
608
+ Events in the outbox are published asynchronously by the relayer.
609
+
610
+ Args:
611
+ event_id: Unique event identifier
612
+ event_type: CloudEvent type
613
+ event_source: CloudEvent source
614
+ event_data: Event payload (JSON dict or binary bytes)
615
+ content_type: Content type (defaults to application/json)
616
+ """
617
+ ...
618
+
619
+ async def get_pending_outbox_events(self, limit: int = 10) -> list[dict[str, Any]]:
620
+ """
621
+ Get pending/failed outbox events and atomically mark them as 'processing'.
622
+
623
+ This method uses SELECT FOR UPDATE (with SKIP LOCKED on PostgreSQL/MySQL)
624
+ to safely fetch events in a multi-worker environment. It fetches both
625
+ 'pending' and 'failed' events (for automatic retry). Fetched events are
626
+ immediately marked as 'processing' within the same transaction to prevent
627
+ duplicate processing by other workers.
628
+
629
+ Args:
630
+ limit: Maximum number of events to return
631
+
632
+ Returns:
633
+ List of events (now with status='processing'), ordered by created_at.
634
+ Each event contains: event_id, event_type, event_source, event_data,
635
+ created_at, status ('processing'), retry_count, last_error
636
+
637
+ Note:
638
+ - Fetches both 'pending' and 'failed' events (failed events will be retried)
639
+ - Returned events will always have status='processing' (not 'pending'/'failed')
640
+ - This prevents duplicate processing in distributed environments
641
+ - After successful publishing, call mark_outbox_published(event_id)
642
+ - On failure, call mark_outbox_failed(event_id, error_message)
643
+ """
644
+ ...
645
+
646
+ async def mark_outbox_published(self, event_id: str) -> None:
647
+ """
648
+ Mark outbox event as successfully published.
649
+
650
+ Args:
651
+ event_id: Event identifier
652
+ """
653
+ ...
654
+
655
+ async def mark_outbox_failed(self, event_id: str, error: str) -> None:
656
+ """
657
+ Mark outbox event as failed and increment retry count.
658
+
659
+ Args:
660
+ event_id: Event identifier
661
+ error: Error message
662
+ """
663
+ ...
664
+
665
+ async def mark_outbox_permanently_failed(self, event_id: str, error: str) -> None:
666
+ """
667
+ Mark outbox event as permanently failed (no more retries).
668
+
669
+ Args:
670
+ event_id: Event identifier
671
+ error: Error message
672
+ """
673
+ ...
674
+
675
+ async def mark_outbox_invalid(self, event_id: str, error: str) -> None:
676
+ """
677
+ Mark outbox event as invalid (client error, don't retry).
678
+
679
+ Used for 4xx HTTP errors where retrying won't help (malformed payload,
680
+ authentication failure, etc.).
681
+
682
+ Args:
683
+ event_id: Event identifier
684
+ error: Error message (should include HTTP status code)
685
+ """
686
+ ...
687
+
688
+ async def mark_outbox_expired(self, event_id: str, error: str) -> None:
689
+ """
690
+ Mark outbox event as expired (too old to retry).
691
+
692
+ Used when max_age_hours is exceeded. Events become meaningless after
693
+ a certain time.
694
+
695
+ Args:
696
+ event_id: Event identifier
697
+ error: Error message
698
+ """
699
+ ...
700
+
701
+ async def cleanup_published_events(self, older_than_hours: int = 24) -> int:
702
+ """
703
+ Clean up successfully published events older than threshold.
704
+
705
+ Args:
706
+ older_than_hours: Age threshold in hours
707
+
708
+ Returns:
709
+ Number of events cleaned up
710
+ """
711
+ ...
712
+
713
+ # -------------------------------------------------------------------------
714
+ # Workflow Cancellation Methods
715
+ # -------------------------------------------------------------------------
716
+
717
+ async def cancel_instance(self, instance_id: str, cancelled_by: str) -> bool:
718
+ """
719
+ Cancel a workflow instance.
720
+
721
+ Only running or waiting_for_event workflows can be cancelled.
722
+ This method will:
723
+ 1. Check current status (only cancel if running/waiting_for_event)
724
+ 2. Update status to 'cancelled'
725
+ 3. Clear locks so other workers are not blocked
726
+ 4. Remove event subscriptions (if waiting for event)
727
+ 5. Record cancellation metadata (cancelled_by, cancelled_at)
728
+
729
+ Args:
730
+ instance_id: Workflow instance to cancel
731
+ cancelled_by: Who/what triggered the cancellation (e.g., "user", "timeout", "admin")
732
+
733
+ Returns:
734
+ True if successfully cancelled, False if already completed/failed/cancelled
735
+ or if instance not found
736
+ """
737
+ ...