edda-framework 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edda/__init__.py +56 -0
- edda/activity.py +505 -0
- edda/app.py +996 -0
- edda/compensation.py +326 -0
- edda/context.py +489 -0
- edda/events.py +505 -0
- edda/exceptions.py +64 -0
- edda/hooks.py +284 -0
- edda/locking.py +322 -0
- edda/outbox/__init__.py +15 -0
- edda/outbox/relayer.py +274 -0
- edda/outbox/transactional.py +112 -0
- edda/pydantic_utils.py +316 -0
- edda/replay.py +799 -0
- edda/retry.py +207 -0
- edda/serialization/__init__.py +9 -0
- edda/serialization/base.py +83 -0
- edda/serialization/json.py +102 -0
- edda/storage/__init__.py +9 -0
- edda/storage/models.py +194 -0
- edda/storage/protocol.py +737 -0
- edda/storage/sqlalchemy_storage.py +1809 -0
- edda/viewer_ui/__init__.py +20 -0
- edda/viewer_ui/app.py +1399 -0
- edda/viewer_ui/components.py +1105 -0
- edda/viewer_ui/data_service.py +880 -0
- edda/visualizer/__init__.py +11 -0
- edda/visualizer/ast_analyzer.py +383 -0
- edda/visualizer/mermaid_generator.py +355 -0
- edda/workflow.py +218 -0
- edda_framework-0.1.0.dist-info/METADATA +748 -0
- edda_framework-0.1.0.dist-info/RECORD +35 -0
- edda_framework-0.1.0.dist-info/WHEEL +4 -0
- edda_framework-0.1.0.dist-info/entry_points.txt +2 -0
- edda_framework-0.1.0.dist-info/licenses/LICENSE +21 -0
edda/storage/protocol.py
ADDED
|
@@ -0,0 +1,737 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Storage protocol definition for Edda framework.
|
|
3
|
+
|
|
4
|
+
This module defines the StorageProtocol using Python's structural typing (Protocol).
|
|
5
|
+
Any storage implementation that conforms to this protocol can be used with Edda.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@runtime_checkable
|
|
16
|
+
class StorageProtocol(Protocol):
|
|
17
|
+
"""
|
|
18
|
+
Protocol for storage backend implementations.
|
|
19
|
+
|
|
20
|
+
This protocol defines all the methods that a storage backend must implement
|
|
21
|
+
to work with the Edda framework. It supports workflow instances, execution
|
|
22
|
+
history, compensations, event subscriptions, outbox events, and distributed locking.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
async def initialize(self) -> None:
|
|
26
|
+
"""
|
|
27
|
+
Initialize storage (create tables, connections, etc.).
|
|
28
|
+
|
|
29
|
+
This method should be idempotent - calling it multiple times
|
|
30
|
+
should not cause errors.
|
|
31
|
+
"""
|
|
32
|
+
...
|
|
33
|
+
|
|
34
|
+
async def close(self) -> None:
|
|
35
|
+
"""
|
|
36
|
+
Close storage connections and cleanup resources.
|
|
37
|
+
|
|
38
|
+
This method should be called when shutting down the application.
|
|
39
|
+
"""
|
|
40
|
+
...
|
|
41
|
+
|
|
42
|
+
# -------------------------------------------------------------------------
|
|
43
|
+
# Transaction Management Methods
|
|
44
|
+
# -------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
async def begin_transaction(self) -> None:
|
|
47
|
+
"""
|
|
48
|
+
Begin a new transaction.
|
|
49
|
+
|
|
50
|
+
If a transaction is already in progress, this will create a nested
|
|
51
|
+
transaction using savepoints (supported by SQLite and PostgreSQL).
|
|
52
|
+
|
|
53
|
+
This method is typically called by WorkflowContext.transaction() and
|
|
54
|
+
should not be called directly by user code.
|
|
55
|
+
|
|
56
|
+
Example:
|
|
57
|
+
async with ctx.transaction():
|
|
58
|
+
# All operations here are in the same transaction
|
|
59
|
+
await ctx.storage.append_history(...)
|
|
60
|
+
await send_event_transactional(ctx, ...)
|
|
61
|
+
"""
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
async def commit_transaction(self) -> None:
|
|
65
|
+
"""
|
|
66
|
+
Commit the current transaction.
|
|
67
|
+
|
|
68
|
+
For nested transactions (savepoints), this will release the savepoint.
|
|
69
|
+
For top-level transactions, this will commit all changes to the database.
|
|
70
|
+
|
|
71
|
+
This method is typically called by WorkflowContext.transaction() and
|
|
72
|
+
should not be called directly by user code.
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
RuntimeError: If not in a transaction
|
|
76
|
+
"""
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
async def rollback_transaction(self) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Rollback the current transaction.
|
|
82
|
+
|
|
83
|
+
For nested transactions (savepoints), this will rollback to the savepoint.
|
|
84
|
+
For top-level transactions, this will rollback all changes.
|
|
85
|
+
|
|
86
|
+
This method is typically called by WorkflowContext.transaction() on
|
|
87
|
+
exception and should not be called directly by user code.
|
|
88
|
+
|
|
89
|
+
Raises:
|
|
90
|
+
RuntimeError: If not in a transaction
|
|
91
|
+
"""
|
|
92
|
+
...
|
|
93
|
+
|
|
94
|
+
def in_transaction(self) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Check if currently in a transaction.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
True if in a transaction, False otherwise.
|
|
100
|
+
|
|
101
|
+
Note:
|
|
102
|
+
This is a synchronous method because it only checks state,
|
|
103
|
+
it does not perform any I/O operations.
|
|
104
|
+
"""
|
|
105
|
+
...
|
|
106
|
+
|
|
107
|
+
# -------------------------------------------------------------------------
|
|
108
|
+
# Workflow Definition Methods
|
|
109
|
+
# -------------------------------------------------------------------------
|
|
110
|
+
|
|
111
|
+
async def upsert_workflow_definition(
|
|
112
|
+
self,
|
|
113
|
+
workflow_name: str,
|
|
114
|
+
source_hash: str,
|
|
115
|
+
source_code: str,
|
|
116
|
+
) -> None:
|
|
117
|
+
"""
|
|
118
|
+
Insert or update a workflow definition.
|
|
119
|
+
|
|
120
|
+
This method stores the workflow source code with a unique combination
|
|
121
|
+
of workflow_name and source_hash. If the same combination already exists,
|
|
122
|
+
it updates the record (idempotent).
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
workflow_name: Name of the workflow (e.g., "order_saga")
|
|
126
|
+
source_hash: SHA256 hash of the source code
|
|
127
|
+
source_code: Source code of the workflow function
|
|
128
|
+
"""
|
|
129
|
+
...
|
|
130
|
+
|
|
131
|
+
async def get_workflow_definition(
|
|
132
|
+
self,
|
|
133
|
+
workflow_name: str,
|
|
134
|
+
source_hash: str,
|
|
135
|
+
) -> dict[str, Any] | None:
|
|
136
|
+
"""
|
|
137
|
+
Get a workflow definition by name and hash.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
workflow_name: Name of the workflow
|
|
141
|
+
source_hash: SHA256 hash of the source code
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Dictionary containing definition metadata, or None if not found.
|
|
145
|
+
Expected keys: workflow_name, source_hash, source_code, created_at
|
|
146
|
+
"""
|
|
147
|
+
...
|
|
148
|
+
|
|
149
|
+
async def get_current_workflow_definition(
|
|
150
|
+
self,
|
|
151
|
+
workflow_name: str,
|
|
152
|
+
) -> dict[str, Any] | None:
|
|
153
|
+
"""
|
|
154
|
+
Get the most recent workflow definition by name.
|
|
155
|
+
|
|
156
|
+
This returns the latest definition for a workflow, which may differ
|
|
157
|
+
from older definitions if the workflow code has changed.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
workflow_name: Name of the workflow
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Dictionary containing definition metadata, or None if not found.
|
|
164
|
+
Expected keys: workflow_name, source_hash, source_code, created_at
|
|
165
|
+
"""
|
|
166
|
+
...
|
|
167
|
+
|
|
168
|
+
# -------------------------------------------------------------------------
|
|
169
|
+
# Workflow Instance Methods
|
|
170
|
+
# -------------------------------------------------------------------------
|
|
171
|
+
|
|
172
|
+
async def create_instance(
|
|
173
|
+
self,
|
|
174
|
+
instance_id: str,
|
|
175
|
+
workflow_name: str,
|
|
176
|
+
source_hash: str,
|
|
177
|
+
owner_service: str,
|
|
178
|
+
input_data: dict[str, Any],
|
|
179
|
+
lock_timeout_seconds: int | None = None,
|
|
180
|
+
) -> None:
|
|
181
|
+
"""
|
|
182
|
+
Create a new workflow instance.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
instance_id: Unique identifier for the workflow instance
|
|
186
|
+
workflow_name: Name of the workflow (e.g., "order_saga")
|
|
187
|
+
source_hash: SHA256 hash of the workflow source code
|
|
188
|
+
owner_service: Service that owns this workflow (e.g., "order-service")
|
|
189
|
+
input_data: Input parameters for the workflow (serializable dict)
|
|
190
|
+
lock_timeout_seconds: Lock timeout for this workflow (None = use global default 300s)
|
|
191
|
+
"""
|
|
192
|
+
...
|
|
193
|
+
|
|
194
|
+
async def get_instance(self, instance_id: str) -> dict[str, Any] | None:
|
|
195
|
+
"""
|
|
196
|
+
Get workflow instance metadata with its definition.
|
|
197
|
+
|
|
198
|
+
This method JOINs workflow_instances with workflow_definitions to
|
|
199
|
+
return the instance along with its source code.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
instance_id: Unique identifier for the workflow instance
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Dictionary containing instance metadata, or None if not found.
|
|
206
|
+
Expected keys: instance_id, workflow_name, source_hash, owner_service,
|
|
207
|
+
status, current_activity_id, started_at, updated_at, input_data, source_code,
|
|
208
|
+
output_data, locked_by, locked_at
|
|
209
|
+
"""
|
|
210
|
+
...
|
|
211
|
+
|
|
212
|
+
async def update_instance_status(
|
|
213
|
+
self,
|
|
214
|
+
instance_id: str,
|
|
215
|
+
status: str,
|
|
216
|
+
output_data: dict[str, Any] | None = None,
|
|
217
|
+
) -> None:
|
|
218
|
+
"""
|
|
219
|
+
Update workflow instance status.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
instance_id: Unique identifier for the workflow instance
|
|
223
|
+
status: New status (e.g., "running", "completed", "failed", "waiting_for_event")
|
|
224
|
+
output_data: Optional output data (for completed workflows)
|
|
225
|
+
"""
|
|
226
|
+
...
|
|
227
|
+
|
|
228
|
+
async def update_instance_activity(self, instance_id: str, activity_id: str) -> None:
|
|
229
|
+
"""
|
|
230
|
+
Update the current activity ID for a workflow instance.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
instance_id: Unique identifier for the workflow instance
|
|
234
|
+
activity_id: Current activity ID being executed
|
|
235
|
+
"""
|
|
236
|
+
...
|
|
237
|
+
|
|
238
|
+
async def list_instances(
|
|
239
|
+
self,
|
|
240
|
+
limit: int = 50,
|
|
241
|
+
status_filter: str | None = None,
|
|
242
|
+
) -> list[dict[str, Any]]:
|
|
243
|
+
"""
|
|
244
|
+
List workflow instances with optional filtering.
|
|
245
|
+
|
|
246
|
+
This method JOINs workflow_instances with workflow_definitions to
|
|
247
|
+
return instances along with their source code.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
limit: Maximum number of instances to return
|
|
251
|
+
status_filter: Optional status filter (e.g., "running", "completed", "failed")
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
List of workflow instances, ordered by started_at DESC.
|
|
255
|
+
Each instance contains: instance_id, workflow_name, source_hash,
|
|
256
|
+
owner_service, status, current_activity_id, started_at, updated_at,
|
|
257
|
+
input_data, source_code, output_data, locked_by, locked_at
|
|
258
|
+
"""
|
|
259
|
+
...
|
|
260
|
+
|
|
261
|
+
# -------------------------------------------------------------------------
|
|
262
|
+
# Distributed Locking Methods
|
|
263
|
+
# -------------------------------------------------------------------------
|
|
264
|
+
|
|
265
|
+
async def try_acquire_lock(
|
|
266
|
+
self,
|
|
267
|
+
instance_id: str,
|
|
268
|
+
worker_id: str,
|
|
269
|
+
timeout_seconds: int = 300,
|
|
270
|
+
) -> bool:
|
|
271
|
+
"""
|
|
272
|
+
Try to acquire lock for workflow instance.
|
|
273
|
+
|
|
274
|
+
This method implements distributed locking to ensure only one worker
|
|
275
|
+
processes a workflow instance at a time. It can acquire locks that
|
|
276
|
+
have timed out.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
instance_id: Workflow instance to lock
|
|
280
|
+
worker_id: Unique identifier of the worker acquiring the lock
|
|
281
|
+
timeout_seconds: Lock timeout in seconds (default: 300)
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
True if lock was acquired, False if already locked by another worker
|
|
285
|
+
"""
|
|
286
|
+
...
|
|
287
|
+
|
|
288
|
+
async def release_lock(self, instance_id: str, worker_id: str) -> None:
|
|
289
|
+
"""
|
|
290
|
+
Release lock for workflow instance.
|
|
291
|
+
|
|
292
|
+
Only the worker that holds the lock can release it.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
instance_id: Workflow instance to unlock
|
|
296
|
+
worker_id: Unique identifier of the worker releasing the lock
|
|
297
|
+
"""
|
|
298
|
+
...
|
|
299
|
+
|
|
300
|
+
async def refresh_lock(self, instance_id: str, worker_id: str) -> bool:
|
|
301
|
+
"""
|
|
302
|
+
Refresh lock timestamp for long-running workflows.
|
|
303
|
+
|
|
304
|
+
This prevents the lock from timing out during long operations.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
instance_id: Workflow instance to refresh
|
|
308
|
+
worker_id: Unique identifier of the worker holding the lock
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
True if successfully refreshed, False if lock was lost
|
|
312
|
+
"""
|
|
313
|
+
...
|
|
314
|
+
|
|
315
|
+
async def cleanup_stale_locks(self) -> list[dict[str, str]]:
|
|
316
|
+
"""
|
|
317
|
+
Clean up locks that have expired (based on lock_expires_at column).
|
|
318
|
+
|
|
319
|
+
This should be called periodically to clean up locks from crashed workers.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
List of cleaned workflow instances with status='running' or 'compensating'.
|
|
323
|
+
Each dict contains: {'instance_id': str, 'workflow_name': str, 'source_hash': str}
|
|
324
|
+
These are workflows that need to be auto-resumed.
|
|
325
|
+
"""
|
|
326
|
+
...
|
|
327
|
+
|
|
328
|
+
# -------------------------------------------------------------------------
|
|
329
|
+
# History Methods (for Deterministic Replay)
|
|
330
|
+
# -------------------------------------------------------------------------
|
|
331
|
+
|
|
332
|
+
async def append_history(
|
|
333
|
+
self,
|
|
334
|
+
instance_id: str,
|
|
335
|
+
activity_id: str,
|
|
336
|
+
event_type: str,
|
|
337
|
+
event_data: dict[str, Any] | bytes,
|
|
338
|
+
) -> None:
|
|
339
|
+
"""
|
|
340
|
+
Append an event to workflow execution history.
|
|
341
|
+
|
|
342
|
+
The history is used for deterministic replay - each activity result
|
|
343
|
+
is stored as a history event.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
instance_id: Workflow instance
|
|
347
|
+
activity_id: Activity ID in the workflow
|
|
348
|
+
event_type: Type of event (e.g., "ActivityCompleted", "ActivityFailed")
|
|
349
|
+
event_data: Event payload (JSON dict or binary bytes)
|
|
350
|
+
"""
|
|
351
|
+
...
|
|
352
|
+
|
|
353
|
+
async def get_history(self, instance_id: str) -> list[dict[str, Any]]:
|
|
354
|
+
"""
|
|
355
|
+
Get workflow execution history in order.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
instance_id: Workflow instance
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
List of history events, ordered by creation time.
|
|
362
|
+
Each event contains: id, instance_id, activity_id, event_type, event_data, created_at
|
|
363
|
+
"""
|
|
364
|
+
...
|
|
365
|
+
|
|
366
|
+
# -------------------------------------------------------------------------
|
|
367
|
+
# Compensation Methods (for Saga Pattern)
|
|
368
|
+
# -------------------------------------------------------------------------
|
|
369
|
+
|
|
370
|
+
async def push_compensation(
|
|
371
|
+
self,
|
|
372
|
+
instance_id: str,
|
|
373
|
+
activity_id: str,
|
|
374
|
+
activity_name: str,
|
|
375
|
+
args: dict[str, Any],
|
|
376
|
+
) -> None:
|
|
377
|
+
"""
|
|
378
|
+
Push a compensation to the stack (LIFO).
|
|
379
|
+
|
|
380
|
+
Compensations are executed in reverse order when a saga fails.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
instance_id: Workflow instance
|
|
384
|
+
activity_id: Activity ID where compensation was registered
|
|
385
|
+
activity_name: Name of the compensation activity
|
|
386
|
+
args: Arguments to pass to the compensation activity
|
|
387
|
+
"""
|
|
388
|
+
...
|
|
389
|
+
|
|
390
|
+
async def get_compensations(self, instance_id: str) -> list[dict[str, Any]]:
|
|
391
|
+
"""
|
|
392
|
+
Get compensations in LIFO order (most recent first).
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
instance_id: Workflow instance
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
List of compensations, ordered by creation time DESC (most recent first).
|
|
399
|
+
Each compensation contains: id, instance_id, activity_id, activity_name, args, created_at
|
|
400
|
+
"""
|
|
401
|
+
...
|
|
402
|
+
|
|
403
|
+
async def clear_compensations(self, instance_id: str) -> None:
|
|
404
|
+
"""
|
|
405
|
+
Clear all compensations for a workflow instance.
|
|
406
|
+
|
|
407
|
+
Called after successful workflow completion.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
instance_id: Workflow instance
|
|
411
|
+
"""
|
|
412
|
+
...
|
|
413
|
+
|
|
414
|
+
# -------------------------------------------------------------------------
|
|
415
|
+
# Event Subscription Methods (for wait_event)
|
|
416
|
+
# -------------------------------------------------------------------------
|
|
417
|
+
|
|
418
|
+
async def add_event_subscription(
|
|
419
|
+
self,
|
|
420
|
+
instance_id: str,
|
|
421
|
+
event_type: str,
|
|
422
|
+
timeout_at: datetime | None = None,
|
|
423
|
+
) -> None:
|
|
424
|
+
"""
|
|
425
|
+
Register an event wait subscription.
|
|
426
|
+
|
|
427
|
+
When a workflow calls wait_event(), a subscription is created
|
|
428
|
+
in the database so that incoming events can be routed to the
|
|
429
|
+
waiting workflow.
|
|
430
|
+
|
|
431
|
+
Note: filter_expr is not needed because subscriptions are uniquely
|
|
432
|
+
identified by instance_id. Events are delivered to specific workflow
|
|
433
|
+
instances, not filtered across multiple instances.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
instance_id: Workflow instance
|
|
437
|
+
event_type: CloudEvent type to wait for (e.g., "payment.completed")
|
|
438
|
+
timeout_at: Optional timeout timestamp
|
|
439
|
+
"""
|
|
440
|
+
...
|
|
441
|
+
|
|
442
|
+
async def find_waiting_instances(self, event_type: str) -> list[dict[str, Any]]:
|
|
443
|
+
"""
|
|
444
|
+
Find workflow instances waiting for a specific event type.
|
|
445
|
+
|
|
446
|
+
Called when an event arrives to find which workflows are waiting for it.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
event_type: CloudEvent type
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
List of waiting instances with subscription info.
|
|
453
|
+
Each item contains: instance_id, event_type, timeout_at
|
|
454
|
+
"""
|
|
455
|
+
...
|
|
456
|
+
|
|
457
|
+
async def remove_event_subscription(
|
|
458
|
+
self,
|
|
459
|
+
instance_id: str,
|
|
460
|
+
event_type: str,
|
|
461
|
+
) -> None:
|
|
462
|
+
"""
|
|
463
|
+
Remove event subscription after the event is received.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
instance_id: Workflow instance
|
|
467
|
+
event_type: CloudEvent type
|
|
468
|
+
"""
|
|
469
|
+
...
|
|
470
|
+
|
|
471
|
+
async def cleanup_expired_subscriptions(self) -> int:
|
|
472
|
+
"""
|
|
473
|
+
Clean up event subscriptions that have timed out.
|
|
474
|
+
|
|
475
|
+
Returns:
|
|
476
|
+
Number of subscriptions cleaned up
|
|
477
|
+
"""
|
|
478
|
+
...
|
|
479
|
+
|
|
480
|
+
async def find_expired_event_subscriptions(
|
|
481
|
+
self,
|
|
482
|
+
) -> list[dict[str, Any]]:
|
|
483
|
+
"""
|
|
484
|
+
Find event subscriptions that have timed out.
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
List of dictionaries containing:
|
|
488
|
+
- instance_id: Workflow instance ID
|
|
489
|
+
- event_type: Event type that was being waited for
|
|
490
|
+
- timeout_at: Timeout timestamp (ISO 8601 string)
|
|
491
|
+
- created_at: Subscription creation timestamp (ISO 8601 string)
|
|
492
|
+
|
|
493
|
+
Note:
|
|
494
|
+
This method does NOT delete the subscriptions - it only finds them.
|
|
495
|
+
Use cleanup_expired_subscriptions() to delete them after processing.
|
|
496
|
+
"""
|
|
497
|
+
...
|
|
498
|
+
|
|
499
|
+
async def register_event_subscription_and_release_lock(
|
|
500
|
+
self,
|
|
501
|
+
instance_id: str,
|
|
502
|
+
worker_id: str,
|
|
503
|
+
event_type: str,
|
|
504
|
+
timeout_at: datetime | None = None,
|
|
505
|
+
activity_id: str | None = None,
|
|
506
|
+
) -> None:
|
|
507
|
+
"""
|
|
508
|
+
Atomically register event subscription and release workflow lock.
|
|
509
|
+
|
|
510
|
+
This method performs the following operations in a SINGLE database transaction:
|
|
511
|
+
1. Register event subscription (INSERT into workflow_event_subscriptions)
|
|
512
|
+
2. Update current activity (UPDATE workflow_instances.current_activity_id)
|
|
513
|
+
3. Release lock (UPDATE workflow_instances set locked_by=NULL)
|
|
514
|
+
|
|
515
|
+
This ensures that when a workflow calls wait_event(), the subscription is
|
|
516
|
+
registered and the lock is released atomically, preventing race conditions
|
|
517
|
+
in distributed environments (distributed coroutines pattern).
|
|
518
|
+
|
|
519
|
+
Note: filter_expr is not needed because subscriptions are uniquely identified
|
|
520
|
+
by instance_id. Events are delivered to specific workflow instances.
|
|
521
|
+
|
|
522
|
+
Args:
|
|
523
|
+
instance_id: Workflow instance ID
|
|
524
|
+
worker_id: Worker ID that currently holds the lock
|
|
525
|
+
event_type: CloudEvent type to wait for
|
|
526
|
+
timeout_at: Optional timeout timestamp
|
|
527
|
+
activity_id: Current activity ID to record
|
|
528
|
+
|
|
529
|
+
Raises:
|
|
530
|
+
RuntimeError: If the worker doesn't hold the lock (sanity check)
|
|
531
|
+
"""
|
|
532
|
+
...
|
|
533
|
+
|
|
534
|
+
async def register_timer_subscription_and_release_lock(
|
|
535
|
+
self,
|
|
536
|
+
instance_id: str,
|
|
537
|
+
worker_id: str,
|
|
538
|
+
timer_id: str,
|
|
539
|
+
expires_at: datetime,
|
|
540
|
+
activity_id: str | None = None,
|
|
541
|
+
) -> None:
|
|
542
|
+
"""
|
|
543
|
+
Atomically register timer subscription and release workflow lock.
|
|
544
|
+
|
|
545
|
+
This method performs the following operations in a SINGLE database transaction:
|
|
546
|
+
1. Register timer subscription (INSERT into workflow_timer_subscriptions)
|
|
547
|
+
2. Update current activity (UPDATE workflow_instances.current_activity_id)
|
|
548
|
+
3. Release lock (UPDATE workflow_instances set locked_by=NULL)
|
|
549
|
+
|
|
550
|
+
This ensures that when a workflow calls wait_timer(), the subscription is
|
|
551
|
+
registered and the lock is released atomically, preventing race conditions
|
|
552
|
+
in distributed environments (distributed coroutines pattern).
|
|
553
|
+
|
|
554
|
+
Args:
|
|
555
|
+
instance_id: Workflow instance ID
|
|
556
|
+
worker_id: Worker ID that currently holds the lock
|
|
557
|
+
timer_id: Timer identifier (unique per instance)
|
|
558
|
+
expires_at: Expiration timestamp
|
|
559
|
+
activity_id: Current activity ID to record
|
|
560
|
+
|
|
561
|
+
Raises:
|
|
562
|
+
RuntimeError: If the worker doesn't hold the lock (sanity check)
|
|
563
|
+
"""
|
|
564
|
+
...
|
|
565
|
+
|
|
566
|
+
async def find_expired_timers(self) -> list[dict[str, Any]]:
|
|
567
|
+
"""
|
|
568
|
+
Find timer subscriptions that have expired.
|
|
569
|
+
|
|
570
|
+
This method is called periodically by background task to find
|
|
571
|
+
workflows waiting for timers that have expired.
|
|
572
|
+
|
|
573
|
+
Returns:
|
|
574
|
+
List of expired timer subscriptions.
|
|
575
|
+
Each item contains: instance_id, timer_id, expires_at, activity_id, workflow_name
|
|
576
|
+
"""
|
|
577
|
+
...
|
|
578
|
+
|
|
579
|
+
async def remove_timer_subscription(
|
|
580
|
+
self,
|
|
581
|
+
instance_id: str,
|
|
582
|
+
timer_id: str,
|
|
583
|
+
) -> None:
|
|
584
|
+
"""
|
|
585
|
+
Remove timer subscription after the timer expires.
|
|
586
|
+
|
|
587
|
+
Args:
|
|
588
|
+
instance_id: Workflow instance ID
|
|
589
|
+
timer_id: Timer identifier
|
|
590
|
+
"""
|
|
591
|
+
...
|
|
592
|
+
|
|
593
|
+
# -------------------------------------------------------------------------
|
|
594
|
+
# Transactional Outbox Methods
|
|
595
|
+
# -------------------------------------------------------------------------
|
|
596
|
+
|
|
597
|
+
async def add_outbox_event(
|
|
598
|
+
self,
|
|
599
|
+
event_id: str,
|
|
600
|
+
event_type: str,
|
|
601
|
+
event_source: str,
|
|
602
|
+
event_data: dict[str, Any] | bytes,
|
|
603
|
+
content_type: str = "application/json",
|
|
604
|
+
) -> None:
|
|
605
|
+
"""
|
|
606
|
+
Add an event to the transactional outbox.
|
|
607
|
+
|
|
608
|
+
Events in the outbox are published asynchronously by the relayer.
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
event_id: Unique event identifier
|
|
612
|
+
event_type: CloudEvent type
|
|
613
|
+
event_source: CloudEvent source
|
|
614
|
+
event_data: Event payload (JSON dict or binary bytes)
|
|
615
|
+
content_type: Content type (defaults to application/json)
|
|
616
|
+
"""
|
|
617
|
+
...
|
|
618
|
+
|
|
619
|
+
async def get_pending_outbox_events(self, limit: int = 10) -> list[dict[str, Any]]:
|
|
620
|
+
"""
|
|
621
|
+
Get pending/failed outbox events and atomically mark them as 'processing'.
|
|
622
|
+
|
|
623
|
+
This method uses SELECT FOR UPDATE (with SKIP LOCKED on PostgreSQL/MySQL)
|
|
624
|
+
to safely fetch events in a multi-worker environment. It fetches both
|
|
625
|
+
'pending' and 'failed' events (for automatic retry). Fetched events are
|
|
626
|
+
immediately marked as 'processing' within the same transaction to prevent
|
|
627
|
+
duplicate processing by other workers.
|
|
628
|
+
|
|
629
|
+
Args:
|
|
630
|
+
limit: Maximum number of events to return
|
|
631
|
+
|
|
632
|
+
Returns:
|
|
633
|
+
List of events (now with status='processing'), ordered by created_at.
|
|
634
|
+
Each event contains: event_id, event_type, event_source, event_data,
|
|
635
|
+
created_at, status ('processing'), retry_count, last_error
|
|
636
|
+
|
|
637
|
+
Note:
|
|
638
|
+
- Fetches both 'pending' and 'failed' events (failed events will be retried)
|
|
639
|
+
- Returned events will always have status='processing' (not 'pending'/'failed')
|
|
640
|
+
- This prevents duplicate processing in distributed environments
|
|
641
|
+
- After successful publishing, call mark_outbox_published(event_id)
|
|
642
|
+
- On failure, call mark_outbox_failed(event_id, error_message)
|
|
643
|
+
"""
|
|
644
|
+
...
|
|
645
|
+
|
|
646
|
+
async def mark_outbox_published(self, event_id: str) -> None:
|
|
647
|
+
"""
|
|
648
|
+
Mark outbox event as successfully published.
|
|
649
|
+
|
|
650
|
+
Args:
|
|
651
|
+
event_id: Event identifier
|
|
652
|
+
"""
|
|
653
|
+
...
|
|
654
|
+
|
|
655
|
+
async def mark_outbox_failed(self, event_id: str, error: str) -> None:
|
|
656
|
+
"""
|
|
657
|
+
Mark outbox event as failed and increment retry count.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
event_id: Event identifier
|
|
661
|
+
error: Error message
|
|
662
|
+
"""
|
|
663
|
+
...
|
|
664
|
+
|
|
665
|
+
async def mark_outbox_permanently_failed(self, event_id: str, error: str) -> None:
|
|
666
|
+
"""
|
|
667
|
+
Mark outbox event as permanently failed (no more retries).
|
|
668
|
+
|
|
669
|
+
Args:
|
|
670
|
+
event_id: Event identifier
|
|
671
|
+
error: Error message
|
|
672
|
+
"""
|
|
673
|
+
...
|
|
674
|
+
|
|
675
|
+
async def mark_outbox_invalid(self, event_id: str, error: str) -> None:
|
|
676
|
+
"""
|
|
677
|
+
Mark outbox event as invalid (client error, don't retry).
|
|
678
|
+
|
|
679
|
+
Used for 4xx HTTP errors where retrying won't help (malformed payload,
|
|
680
|
+
authentication failure, etc.).
|
|
681
|
+
|
|
682
|
+
Args:
|
|
683
|
+
event_id: Event identifier
|
|
684
|
+
error: Error message (should include HTTP status code)
|
|
685
|
+
"""
|
|
686
|
+
...
|
|
687
|
+
|
|
688
|
+
async def mark_outbox_expired(self, event_id: str, error: str) -> None:
|
|
689
|
+
"""
|
|
690
|
+
Mark outbox event as expired (too old to retry).
|
|
691
|
+
|
|
692
|
+
Used when max_age_hours is exceeded. Events become meaningless after
|
|
693
|
+
a certain time.
|
|
694
|
+
|
|
695
|
+
Args:
|
|
696
|
+
event_id: Event identifier
|
|
697
|
+
error: Error message
|
|
698
|
+
"""
|
|
699
|
+
...
|
|
700
|
+
|
|
701
|
+
async def cleanup_published_events(self, older_than_hours: int = 24) -> int:
|
|
702
|
+
"""
|
|
703
|
+
Clean up successfully published events older than threshold.
|
|
704
|
+
|
|
705
|
+
Args:
|
|
706
|
+
older_than_hours: Age threshold in hours
|
|
707
|
+
|
|
708
|
+
Returns:
|
|
709
|
+
Number of events cleaned up
|
|
710
|
+
"""
|
|
711
|
+
...
|
|
712
|
+
|
|
713
|
+
# -------------------------------------------------------------------------
|
|
714
|
+
# Workflow Cancellation Methods
|
|
715
|
+
# -------------------------------------------------------------------------
|
|
716
|
+
|
|
717
|
+
async def cancel_instance(self, instance_id: str, cancelled_by: str) -> bool:
|
|
718
|
+
"""
|
|
719
|
+
Cancel a workflow instance.
|
|
720
|
+
|
|
721
|
+
Only running or waiting_for_event workflows can be cancelled.
|
|
722
|
+
This method will:
|
|
723
|
+
1. Check current status (only cancel if running/waiting_for_event)
|
|
724
|
+
2. Update status to 'cancelled'
|
|
725
|
+
3. Clear locks so other workers are not blocked
|
|
726
|
+
4. Remove event subscriptions (if waiting for event)
|
|
727
|
+
5. Record cancellation metadata (cancelled_by, cancelled_at)
|
|
728
|
+
|
|
729
|
+
Args:
|
|
730
|
+
instance_id: Workflow instance to cancel
|
|
731
|
+
cancelled_by: Who/what triggered the cancellation (e.g., "user", "timeout", "admin")
|
|
732
|
+
|
|
733
|
+
Returns:
|
|
734
|
+
True if successfully cancelled, False if already completed/failed/cancelled
|
|
735
|
+
or if instance not found
|
|
736
|
+
"""
|
|
737
|
+
...
|