edda-framework 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edda/__init__.py +56 -0
- edda/activity.py +505 -0
- edda/app.py +996 -0
- edda/compensation.py +326 -0
- edda/context.py +489 -0
- edda/events.py +505 -0
- edda/exceptions.py +64 -0
- edda/hooks.py +284 -0
- edda/locking.py +322 -0
- edda/outbox/__init__.py +15 -0
- edda/outbox/relayer.py +274 -0
- edda/outbox/transactional.py +112 -0
- edda/pydantic_utils.py +316 -0
- edda/replay.py +799 -0
- edda/retry.py +207 -0
- edda/serialization/__init__.py +9 -0
- edda/serialization/base.py +83 -0
- edda/serialization/json.py +102 -0
- edda/storage/__init__.py +9 -0
- edda/storage/models.py +194 -0
- edda/storage/protocol.py +737 -0
- edda/storage/sqlalchemy_storage.py +1809 -0
- edda/viewer_ui/__init__.py +20 -0
- edda/viewer_ui/app.py +1399 -0
- edda/viewer_ui/components.py +1105 -0
- edda/viewer_ui/data_service.py +880 -0
- edda/visualizer/__init__.py +11 -0
- edda/visualizer/ast_analyzer.py +383 -0
- edda/visualizer/mermaid_generator.py +355 -0
- edda/workflow.py +218 -0
- edda_framework-0.1.0.dist-info/METADATA +748 -0
- edda_framework-0.1.0.dist-info/RECORD +35 -0
- edda_framework-0.1.0.dist-info/WHEEL +4 -0
- edda_framework-0.1.0.dist-info/entry_points.txt +2 -0
- edda_framework-0.1.0.dist-info/licenses/LICENSE +21 -0
edda/app.py
ADDED
|
@@ -0,0 +1,996 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main application module for Edda framework.
|
|
3
|
+
|
|
4
|
+
This module provides the EddaApp class, which is an ASGI/WSGI compatible
|
|
5
|
+
application for handling CloudEvents and executing workflows.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
import sys
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import uvloop
|
|
15
|
+
from cloudevents.exceptions import GenericException as CloudEventsException
|
|
16
|
+
from cloudevents.http import from_http
|
|
17
|
+
from sqlalchemy.ext.asyncio import create_async_engine
|
|
18
|
+
|
|
19
|
+
from edda import workflow
|
|
20
|
+
from edda.hooks import WorkflowHooks
|
|
21
|
+
from edda.locking import auto_resume_stale_workflows_periodically, generate_worker_id
|
|
22
|
+
from edda.outbox.relayer import OutboxRelayer
|
|
23
|
+
from edda.replay import ReplayEngine
|
|
24
|
+
from edda.retry import RetryPolicy
|
|
25
|
+
from edda.storage.sqlalchemy_storage import SQLAlchemyStorage
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class EddaApp:
|
|
29
|
+
"""
|
|
30
|
+
ASGI/WSGI compatible workflow application with distributed execution support.
|
|
31
|
+
|
|
32
|
+
This is the main entry point for the Edda framework. It handles:
|
|
33
|
+
- CloudEvents HTTP endpoint
|
|
34
|
+
- Event routing and workflow triggering
|
|
35
|
+
- Distributed locking and coordination
|
|
36
|
+
- Storage management
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
service_name: str,
|
|
42
|
+
db_url: str,
|
|
43
|
+
outbox_enabled: bool = False,
|
|
44
|
+
broker_url: str = "http://broker-ingress.knative-eventing.svc.cluster.local/default/default",
|
|
45
|
+
hooks: WorkflowHooks | None = None,
|
|
46
|
+
default_retry_policy: "RetryPolicy | None" = None,
|
|
47
|
+
):
|
|
48
|
+
"""
|
|
49
|
+
Initialize Edda application.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
service_name: Service name for distributed execution (e.g., "order-service")
|
|
53
|
+
db_url: Database URL (e.g., "sqlite:///workflow.db")
|
|
54
|
+
outbox_enabled: Enable transactional outbox pattern
|
|
55
|
+
broker_url: Knative Broker URL for outbox publishing
|
|
56
|
+
hooks: Optional WorkflowHooks implementation for observability
|
|
57
|
+
default_retry_policy: Default retry policy for all activities.
|
|
58
|
+
If None, uses DEFAULT_RETRY_POLICY (5 attempts, exponential backoff).
|
|
59
|
+
Can be overridden per-activity using @activity(retry_policy=...).
|
|
60
|
+
"""
|
|
61
|
+
self.db_url = db_url
|
|
62
|
+
self.service_name = service_name
|
|
63
|
+
self.outbox_enabled = outbox_enabled
|
|
64
|
+
self.broker_url = broker_url
|
|
65
|
+
self.hooks = hooks
|
|
66
|
+
self.default_retry_policy = default_retry_policy
|
|
67
|
+
|
|
68
|
+
# Generate unique worker ID for this process
|
|
69
|
+
self.worker_id = generate_worker_id(service_name)
|
|
70
|
+
|
|
71
|
+
# Initialize storage
|
|
72
|
+
self.storage = self._create_storage(db_url)
|
|
73
|
+
|
|
74
|
+
# Event handlers registry
|
|
75
|
+
self.event_handlers: dict[str, list[Callable[..., Any]]] = {}
|
|
76
|
+
|
|
77
|
+
# Replay engine (will be initialized in initialize())
|
|
78
|
+
self.replay_engine: ReplayEngine | None = None
|
|
79
|
+
|
|
80
|
+
# Outbox relayer (will be initialized if outbox_enabled)
|
|
81
|
+
self.outbox_relayer: OutboxRelayer | None = None
|
|
82
|
+
|
|
83
|
+
# Background tasks
|
|
84
|
+
self._background_tasks: list[asyncio.Task[Any]] = []
|
|
85
|
+
self._initialized = False
|
|
86
|
+
|
|
87
|
+
def _create_storage(self, db_url: str) -> SQLAlchemyStorage:
|
|
88
|
+
"""
|
|
89
|
+
Create storage backend from database URL.
|
|
90
|
+
|
|
91
|
+
Supports SQLite, PostgreSQL, and MySQL via SQLAlchemy.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
db_url: Database URL in SQLAlchemy format
|
|
95
|
+
Examples:
|
|
96
|
+
- SQLite: "sqlite:///saga.db" or "sqlite+aiosqlite:///saga.db"
|
|
97
|
+
- PostgreSQL: "postgresql+asyncpg://user:pass@localhost/dbname"
|
|
98
|
+
- MySQL: "mysql+aiomysql://user:pass@localhost/dbname"
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
SQLAlchemyStorage instance
|
|
102
|
+
"""
|
|
103
|
+
# Convert plain sqlite:// URLs to use aiosqlite driver
|
|
104
|
+
if db_url.startswith("sqlite:///"):
|
|
105
|
+
db_url = db_url.replace("sqlite:///", "sqlite+aiosqlite:///", 1)
|
|
106
|
+
elif db_url == "sqlite:///:memory:" or db_url.startswith("sqlite:///:memory:"):
|
|
107
|
+
db_url = "sqlite+aiosqlite:///:memory:"
|
|
108
|
+
|
|
109
|
+
# Create async engine
|
|
110
|
+
engine = create_async_engine(
|
|
111
|
+
db_url,
|
|
112
|
+
echo=False, # Set to True for SQL logging
|
|
113
|
+
future=True,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
return SQLAlchemyStorage(engine)
|
|
117
|
+
|
|
118
|
+
async def initialize(self) -> None:
|
|
119
|
+
"""
|
|
120
|
+
Initialize the application.
|
|
121
|
+
|
|
122
|
+
This should be called before the app starts receiving requests.
|
|
123
|
+
"""
|
|
124
|
+
if self._initialized:
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
# Install uvloop for better performance
|
|
128
|
+
# Python 3.12+ uses asyncio.set_event_loop_policy() instead of uvloop.install()
|
|
129
|
+
if sys.version_info >= (3, 12):
|
|
130
|
+
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
|
131
|
+
else:
|
|
132
|
+
uvloop.install()
|
|
133
|
+
|
|
134
|
+
# Initialize storage
|
|
135
|
+
await self.storage.initialize()
|
|
136
|
+
|
|
137
|
+
# Initialize replay engine
|
|
138
|
+
self.replay_engine = ReplayEngine(
|
|
139
|
+
storage=self.storage,
|
|
140
|
+
service_name=self.service_name,
|
|
141
|
+
worker_id=self.worker_id,
|
|
142
|
+
hooks=self.hooks,
|
|
143
|
+
default_retry_policy=self.default_retry_policy,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Set global replay engine for workflow decorator
|
|
147
|
+
workflow.set_replay_engine(self.replay_engine)
|
|
148
|
+
|
|
149
|
+
# Initialize outbox relayer if enabled
|
|
150
|
+
if self.outbox_enabled:
|
|
151
|
+
self.outbox_relayer = OutboxRelayer(
|
|
152
|
+
storage=self.storage,
|
|
153
|
+
broker_url=self.broker_url,
|
|
154
|
+
poll_interval=1.0,
|
|
155
|
+
max_retries=3,
|
|
156
|
+
batch_size=10,
|
|
157
|
+
)
|
|
158
|
+
await self.outbox_relayer.start()
|
|
159
|
+
|
|
160
|
+
# Auto-register all @workflow decorated workflows
|
|
161
|
+
self._auto_register_workflows()
|
|
162
|
+
|
|
163
|
+
# Start background tasks
|
|
164
|
+
self._start_background_tasks()
|
|
165
|
+
|
|
166
|
+
self._initialized = True
|
|
167
|
+
|
|
168
|
+
async def shutdown(self) -> None:
|
|
169
|
+
"""
|
|
170
|
+
Shutdown the application and cleanup resources.
|
|
171
|
+
|
|
172
|
+
This should be called when the app is shutting down.
|
|
173
|
+
"""
|
|
174
|
+
# Stop outbox relayer if enabled
|
|
175
|
+
if self.outbox_relayer:
|
|
176
|
+
await self.outbox_relayer.stop()
|
|
177
|
+
|
|
178
|
+
# Cancel background tasks
|
|
179
|
+
for task in self._background_tasks:
|
|
180
|
+
task.cancel()
|
|
181
|
+
|
|
182
|
+
# Wait for tasks to complete
|
|
183
|
+
await asyncio.gather(*self._background_tasks, return_exceptions=True)
|
|
184
|
+
|
|
185
|
+
# Close storage
|
|
186
|
+
await self.storage.close()
|
|
187
|
+
|
|
188
|
+
self._initialized = False
|
|
189
|
+
|
|
190
|
+
def _start_background_tasks(self) -> None:
|
|
191
|
+
"""Start background maintenance tasks."""
|
|
192
|
+
# Task to cleanup stale locks and auto-resume workflows
|
|
193
|
+
auto_resume_task = asyncio.create_task(
|
|
194
|
+
auto_resume_stale_workflows_periodically(
|
|
195
|
+
self.storage,
|
|
196
|
+
self.replay_engine,
|
|
197
|
+
interval=60, # Check every 60 seconds
|
|
198
|
+
)
|
|
199
|
+
)
|
|
200
|
+
self._background_tasks.append(auto_resume_task)
|
|
201
|
+
|
|
202
|
+
# Task to check expired timers and resume workflows
|
|
203
|
+
timer_check_task = asyncio.create_task(
|
|
204
|
+
self._check_expired_timers_periodically(interval=10) # Check every 10 seconds
|
|
205
|
+
)
|
|
206
|
+
self._background_tasks.append(timer_check_task)
|
|
207
|
+
|
|
208
|
+
# Task to check expired event timeouts and fail workflows
|
|
209
|
+
event_timeout_task = asyncio.create_task(
|
|
210
|
+
self._check_expired_event_timeouts_periodically(interval=10) # Check every 10 seconds
|
|
211
|
+
)
|
|
212
|
+
self._background_tasks.append(event_timeout_task)
|
|
213
|
+
|
|
214
|
+
def _auto_register_workflows(self) -> None:
|
|
215
|
+
"""
|
|
216
|
+
Auto-register workflows with event_handler=True as CloudEvent handlers.
|
|
217
|
+
|
|
218
|
+
Only workflows explicitly marked with @workflow(event_handler=True) will be
|
|
219
|
+
auto-registered. For each eligible workflow, a default handler is registered that:
|
|
220
|
+
1. Extracts data from CloudEvent
|
|
221
|
+
2. Starts the workflow with data as kwargs
|
|
222
|
+
|
|
223
|
+
Manual @app.on_event() registrations take precedence.
|
|
224
|
+
"""
|
|
225
|
+
from edda.workflow import get_all_workflows
|
|
226
|
+
|
|
227
|
+
for workflow_name, workflow_instance in get_all_workflows().items():
|
|
228
|
+
# Only register if event_handler=True
|
|
229
|
+
if not workflow_instance.event_handler:
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
# Skip if already manually registered (manual takes precedence)
|
|
233
|
+
if workflow_name not in self.event_handlers:
|
|
234
|
+
self._register_default_workflow_handler(workflow_name, workflow_instance)
|
|
235
|
+
|
|
236
|
+
def _register_default_workflow_handler(self, event_type: str, wf: Any) -> None:
|
|
237
|
+
"""
|
|
238
|
+
Register a default CloudEvent handler for a workflow.
|
|
239
|
+
|
|
240
|
+
The default handler extracts the CloudEvent data and passes it
|
|
241
|
+
as kwargs to workflow.start().
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
event_type: CloudEvent type (same as workflow name)
|
|
245
|
+
wf: Workflow instance to start when event is received
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
async def default_handler(event: Any) -> None:
|
|
249
|
+
"""Default handler that starts workflow with CloudEvent data."""
|
|
250
|
+
# Extract data from CloudEvent
|
|
251
|
+
data = event.get_data()
|
|
252
|
+
|
|
253
|
+
# Start workflow with data as kwargs
|
|
254
|
+
if isinstance(data, dict):
|
|
255
|
+
await wf.start(**data)
|
|
256
|
+
else:
|
|
257
|
+
# If data is not a dict, start without arguments
|
|
258
|
+
await wf.start()
|
|
259
|
+
|
|
260
|
+
# Register the handler
|
|
261
|
+
if event_type not in self.event_handlers:
|
|
262
|
+
self.event_handlers[event_type] = []
|
|
263
|
+
self.event_handlers[event_type].append(default_handler)
|
|
264
|
+
|
|
265
|
+
def on_event(
|
|
266
|
+
self, event_type: str, proto_type: type[Any] | None = None
|
|
267
|
+
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
|
268
|
+
"""
|
|
269
|
+
Decorator to register an event handler.
|
|
270
|
+
|
|
271
|
+
Example:
|
|
272
|
+
>>> @app.on_event("order.created")
|
|
273
|
+
... async def handle_order_created(event):
|
|
274
|
+
... await order_workflow.start(...)
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
event_type: CloudEvent type to handle
|
|
278
|
+
proto_type: Optional protobuf message type
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Decorator function
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
285
|
+
if event_type not in self.event_handlers:
|
|
286
|
+
self.event_handlers[event_type] = []
|
|
287
|
+
self.event_handlers[event_type].append(func)
|
|
288
|
+
|
|
289
|
+
# Store proto_type if provided
|
|
290
|
+
if proto_type is not None:
|
|
291
|
+
func._proto_type = proto_type # type: ignore[attr-defined]
|
|
292
|
+
|
|
293
|
+
return func
|
|
294
|
+
|
|
295
|
+
return decorator
|
|
296
|
+
|
|
297
|
+
async def handle_cloudevent(self, event: Any, wait: bool = False) -> None:
|
|
298
|
+
"""
|
|
299
|
+
Handle incoming CloudEvent.
|
|
300
|
+
|
|
301
|
+
This will route the event to registered handlers and deliver events
|
|
302
|
+
to waiting workflows.
|
|
303
|
+
|
|
304
|
+
By default, handlers are executed as background tasks to avoid blocking
|
|
305
|
+
the HTTP response. Set wait=True for synchronous execution (useful for testing).
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
event: CloudEvent instance
|
|
309
|
+
wait: If True, wait for handlers to complete before returning.
|
|
310
|
+
If False (default), execute handlers as background tasks.
|
|
311
|
+
"""
|
|
312
|
+
import asyncio
|
|
313
|
+
|
|
314
|
+
event_type = event["type"]
|
|
315
|
+
|
|
316
|
+
# Find handlers for this event type
|
|
317
|
+
handlers = self.event_handlers.get(event_type, [])
|
|
318
|
+
|
|
319
|
+
if wait:
|
|
320
|
+
# Synchronous execution (for tests)
|
|
321
|
+
for handler in handlers:
|
|
322
|
+
await self._run_handler(handler, event, event_type)
|
|
323
|
+
await self._deliver_event_to_waiting_workflows_safe(event)
|
|
324
|
+
else:
|
|
325
|
+
# Background execution (for production)
|
|
326
|
+
for handler in handlers:
|
|
327
|
+
asyncio.create_task(self._run_handler(handler, event, event_type))
|
|
328
|
+
asyncio.create_task(self._deliver_event_to_waiting_workflows_safe(event))
|
|
329
|
+
|
|
330
|
+
async def _run_handler(self, handler: Callable[..., Any], event: Any, event_type: str) -> None:
|
|
331
|
+
"""
|
|
332
|
+
Run a CloudEvent handler with error handling.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
handler: Event handler function
|
|
336
|
+
event: CloudEvent instance
|
|
337
|
+
event_type: Event type for logging
|
|
338
|
+
"""
|
|
339
|
+
try:
|
|
340
|
+
await handler(event)
|
|
341
|
+
except Exception as e:
|
|
342
|
+
# Log error (in a real implementation, use proper logging)
|
|
343
|
+
print(f"Error handling event {event_type}: {e}")
|
|
344
|
+
import traceback
|
|
345
|
+
|
|
346
|
+
traceback.print_exc()
|
|
347
|
+
|
|
348
|
+
async def _deliver_event_to_waiting_workflows_safe(self, event: Any) -> None:
|
|
349
|
+
"""
|
|
350
|
+
Deliver event to waiting workflows with error handling.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
event: CloudEvent instance
|
|
354
|
+
"""
|
|
355
|
+
try:
|
|
356
|
+
await self._deliver_event_to_waiting_workflows(event)
|
|
357
|
+
except Exception as e:
|
|
358
|
+
print(f"Error delivering event to waiting workflows: {e}")
|
|
359
|
+
import traceback
|
|
360
|
+
|
|
361
|
+
traceback.print_exc()
|
|
362
|
+
|
|
363
|
+
async def _deliver_event_to_waiting_workflows(self, event: Any) -> None:
|
|
364
|
+
"""
|
|
365
|
+
Deliver event to workflows waiting for this event type.
|
|
366
|
+
|
|
367
|
+
This method:
|
|
368
|
+
1. Finds workflows waiting for the event type
|
|
369
|
+
2. Records event data to workflow history
|
|
370
|
+
3. Removes event subscription
|
|
371
|
+
4. Resumes the workflow
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
event: CloudEvent instance
|
|
375
|
+
"""
|
|
376
|
+
event_type = event["type"]
|
|
377
|
+
event_data = event.get_data()
|
|
378
|
+
|
|
379
|
+
# Extract CloudEvents metadata
|
|
380
|
+
event_metadata = {
|
|
381
|
+
"type": event["type"],
|
|
382
|
+
"source": event["source"],
|
|
383
|
+
"id": event["id"],
|
|
384
|
+
"time": event.get("time"),
|
|
385
|
+
"datacontenttype": event.get("datacontenttype"),
|
|
386
|
+
"subject": event.get("subject"),
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
# Extract extension attributes (any attributes not in the standard set)
|
|
390
|
+
standard_attrs = {
|
|
391
|
+
"type",
|
|
392
|
+
"source",
|
|
393
|
+
"id",
|
|
394
|
+
"time",
|
|
395
|
+
"datacontenttype",
|
|
396
|
+
"subject",
|
|
397
|
+
"specversion",
|
|
398
|
+
"data",
|
|
399
|
+
"data_base64",
|
|
400
|
+
}
|
|
401
|
+
extensions = {k: v for k, v in event.get_attributes().items() if k not in standard_attrs}
|
|
402
|
+
|
|
403
|
+
# Find workflows waiting for this event type
|
|
404
|
+
waiting_instances = await self.storage.find_waiting_instances(event_type)
|
|
405
|
+
|
|
406
|
+
if not waiting_instances:
|
|
407
|
+
return # No workflows waiting for this event
|
|
408
|
+
|
|
409
|
+
print(
|
|
410
|
+
f"[EventDelivery] Found {len(waiting_instances)} workflow(s) waiting for '{event_type}'"
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
for subscription in waiting_instances:
|
|
414
|
+
instance_id = subscription["instance_id"]
|
|
415
|
+
|
|
416
|
+
# Get workflow instance
|
|
417
|
+
instance = await self.storage.get_instance(instance_id)
|
|
418
|
+
if not instance:
|
|
419
|
+
print(f"[EventDelivery] Warning: Instance {instance_id} not found, skipping")
|
|
420
|
+
continue
|
|
421
|
+
|
|
422
|
+
# Check if instance is still waiting
|
|
423
|
+
if instance.get("status") != "waiting_for_event":
|
|
424
|
+
print(
|
|
425
|
+
f"[EventDelivery] Warning: Instance {instance_id} "
|
|
426
|
+
f"status is '{instance.get('status')}', expected 'waiting_for_event', skipping"
|
|
427
|
+
)
|
|
428
|
+
continue
|
|
429
|
+
|
|
430
|
+
# Get activity_id from the subscription (stored when wait_event was called)
|
|
431
|
+
activity_id = subscription.get("activity_id")
|
|
432
|
+
if not activity_id:
|
|
433
|
+
print(
|
|
434
|
+
f"[EventDelivery] Warning: No activity_id in subscription for {instance_id}, skipping"
|
|
435
|
+
)
|
|
436
|
+
continue
|
|
437
|
+
|
|
438
|
+
workflow_name = instance["workflow_name"]
|
|
439
|
+
|
|
440
|
+
# Distributed Coroutines: Acquire lock FIRST to prevent race conditions
|
|
441
|
+
# This ensures only ONE pod processes this event, even if multiple pods
|
|
442
|
+
# receive the event simultaneously
|
|
443
|
+
lock_acquired = await self.storage.try_acquire_lock(
|
|
444
|
+
instance_id, self.worker_id, timeout_seconds=300
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
if not lock_acquired:
|
|
448
|
+
print(
|
|
449
|
+
f"[EventDelivery] Another worker is processing {instance_id}, skipping "
|
|
450
|
+
"(distributed coroutine - lock already held)"
|
|
451
|
+
)
|
|
452
|
+
continue
|
|
453
|
+
|
|
454
|
+
try:
|
|
455
|
+
print(
|
|
456
|
+
f"[EventDelivery] Delivering event to workflow {instance_id} (activity_id: {activity_id})"
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
# 1. Record event data and metadata to history
|
|
460
|
+
try:
|
|
461
|
+
await self.storage.append_history(
|
|
462
|
+
instance_id,
|
|
463
|
+
activity_id=activity_id,
|
|
464
|
+
event_type="EventReceived",
|
|
465
|
+
event_data={
|
|
466
|
+
"payload": event_data,
|
|
467
|
+
"metadata": event_metadata,
|
|
468
|
+
"extensions": extensions,
|
|
469
|
+
},
|
|
470
|
+
)
|
|
471
|
+
except Exception as history_error:
|
|
472
|
+
# If history entry already exists (UNIQUE constraint), this event was already
|
|
473
|
+
# delivered by another worker in a multi-process environment.
|
|
474
|
+
# Skip workflow resumption to prevent duplicate processing.
|
|
475
|
+
print(
|
|
476
|
+
f"[EventDelivery] History already exists for activity_id {activity_id}: {history_error}"
|
|
477
|
+
)
|
|
478
|
+
print(
|
|
479
|
+
f"[EventDelivery] Event '{event_type}' was already delivered by another worker, skipping"
|
|
480
|
+
)
|
|
481
|
+
continue
|
|
482
|
+
|
|
483
|
+
# 2. Remove event subscription
|
|
484
|
+
await self.storage.remove_event_subscription(instance_id, event_type)
|
|
485
|
+
|
|
486
|
+
# 3. Resume workflow (lock already held by this worker - distributed coroutine pattern)
|
|
487
|
+
if self.replay_engine is None:
|
|
488
|
+
print("[EventDelivery] Error: Replay engine not initialized")
|
|
489
|
+
continue
|
|
490
|
+
|
|
491
|
+
await self.replay_engine.resume_by_name(
|
|
492
|
+
instance_id, workflow_name, already_locked=True
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
print(
|
|
496
|
+
f"[EventDelivery] ✅ Resumed workflow {instance_id} after receiving '{event_type}'"
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
except Exception as e:
|
|
500
|
+
print(f"[EventDelivery] ❌ Error resuming workflow {instance_id}: {e}")
|
|
501
|
+
import traceback
|
|
502
|
+
|
|
503
|
+
traceback.print_exc()
|
|
504
|
+
|
|
505
|
+
finally:
|
|
506
|
+
# Always release the lock, even if an error occurred
|
|
507
|
+
await self.storage.release_lock(instance_id, self.worker_id)
|
|
508
|
+
|
|
509
|
+
async def _check_expired_timers(self) -> None:
|
|
510
|
+
"""
|
|
511
|
+
Check for expired timers and resume waiting workflows.
|
|
512
|
+
|
|
513
|
+
This method:
|
|
514
|
+
1. Finds timers that have expired
|
|
515
|
+
2. Records timer expiration to workflow history
|
|
516
|
+
3. Removes timer subscription
|
|
517
|
+
4. Resumes the workflow
|
|
518
|
+
|
|
519
|
+
Note:
|
|
520
|
+
This is called periodically by a background task.
|
|
521
|
+
Timer expiration is recorded to history to enable deterministic replay.
|
|
522
|
+
During replay, wait_timer() will find this history entry and skip the wait.
|
|
523
|
+
"""
|
|
524
|
+
# Find expired timers
|
|
525
|
+
expired_timers = await self.storage.find_expired_timers()
|
|
526
|
+
|
|
527
|
+
if not expired_timers:
|
|
528
|
+
return # No expired timers
|
|
529
|
+
|
|
530
|
+
print(f"[TimerCheck] Found {len(expired_timers)} expired timer(s)")
|
|
531
|
+
|
|
532
|
+
for timer in expired_timers:
|
|
533
|
+
instance_id = timer["instance_id"]
|
|
534
|
+
timer_id = timer["timer_id"]
|
|
535
|
+
workflow_name = timer["workflow_name"]
|
|
536
|
+
activity_id = timer.get("activity_id")
|
|
537
|
+
|
|
538
|
+
if not activity_id:
|
|
539
|
+
print(f"[TimerCheck] Warning: No activity_id in timer for {instance_id}, skipping")
|
|
540
|
+
continue
|
|
541
|
+
|
|
542
|
+
# Get workflow instance
|
|
543
|
+
instance = await self.storage.get_instance(instance_id)
|
|
544
|
+
if not instance:
|
|
545
|
+
print(f"[TimerCheck] Warning: Instance {instance_id} not found, skipping")
|
|
546
|
+
continue
|
|
547
|
+
|
|
548
|
+
# Check if instance is still waiting for timer
|
|
549
|
+
if instance.get("status") != "waiting_for_timer":
|
|
550
|
+
print(
|
|
551
|
+
f"[TimerCheck] Warning: Instance {instance_id} "
|
|
552
|
+
f"status is '{instance.get('status')}', expected 'waiting_for_timer', skipping"
|
|
553
|
+
)
|
|
554
|
+
continue
|
|
555
|
+
|
|
556
|
+
# Distributed Coroutines: Acquire lock FIRST to prevent race conditions
|
|
557
|
+
# This ensures only ONE pod processes this timer, even if multiple pods
|
|
558
|
+
# check timers simultaneously
|
|
559
|
+
lock_acquired = await self.storage.try_acquire_lock(
|
|
560
|
+
instance_id, self.worker_id, timeout_seconds=300
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
if not lock_acquired:
|
|
564
|
+
print(
|
|
565
|
+
f"[TimerCheck] Another worker is processing {instance_id}, skipping "
|
|
566
|
+
"(distributed coroutine - lock already held)"
|
|
567
|
+
)
|
|
568
|
+
continue
|
|
569
|
+
|
|
570
|
+
try:
|
|
571
|
+
print(
|
|
572
|
+
f"[TimerCheck] Timer '{timer_id}' expired for workflow {instance_id} (activity_id: {activity_id})"
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
# 1. Record timer expiration to history (allows deterministic replay)
|
|
576
|
+
# During replay, wait_timer() will find this entry and skip the wait
|
|
577
|
+
try:
|
|
578
|
+
await self.storage.append_history(
|
|
579
|
+
instance_id,
|
|
580
|
+
activity_id=activity_id,
|
|
581
|
+
event_type="TimerExpired",
|
|
582
|
+
event_data={
|
|
583
|
+
"result": None,
|
|
584
|
+
"timer_id": timer_id,
|
|
585
|
+
"expires_at": timer["expires_at"],
|
|
586
|
+
},
|
|
587
|
+
)
|
|
588
|
+
except Exception as history_error:
|
|
589
|
+
# If history entry already exists (UNIQUE constraint), this timer was already
|
|
590
|
+
# processed by another worker in a multi-process environment.
|
|
591
|
+
# Skip workflow resumption to prevent duplicate processing.
|
|
592
|
+
print(
|
|
593
|
+
f"[TimerCheck] History already exists for activity_id {activity_id}: {history_error}"
|
|
594
|
+
)
|
|
595
|
+
print(
|
|
596
|
+
f"[TimerCheck] Timer '{timer_id}' was already processed by another worker, skipping"
|
|
597
|
+
)
|
|
598
|
+
continue
|
|
599
|
+
|
|
600
|
+
# 2. Remove timer subscription
|
|
601
|
+
await self.storage.remove_timer_subscription(instance_id, timer_id)
|
|
602
|
+
|
|
603
|
+
# 3. Resume workflow (lock already held by this worker - distributed coroutine pattern)
|
|
604
|
+
if self.replay_engine is None:
|
|
605
|
+
print("[TimerCheck] Error: Replay engine not initialized")
|
|
606
|
+
continue
|
|
607
|
+
|
|
608
|
+
await self.replay_engine.resume_by_name(
|
|
609
|
+
instance_id, workflow_name, already_locked=True
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
print(
|
|
613
|
+
f"[TimerCheck] ✅ Resumed workflow {instance_id} after timer '{timer_id}' expired"
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
except Exception as e:
|
|
617
|
+
print(f"[TimerCheck] ❌ Error resuming workflow {instance_id}: {e}")
|
|
618
|
+
import traceback
|
|
619
|
+
|
|
620
|
+
traceback.print_exc()
|
|
621
|
+
|
|
622
|
+
finally:
|
|
623
|
+
# Always release the lock, even if an error occurred
|
|
624
|
+
await self.storage.release_lock(instance_id, self.worker_id)
|
|
625
|
+
|
|
626
|
+
async def _check_expired_timers_periodically(self, interval: int = 10) -> None:
|
|
627
|
+
"""
|
|
628
|
+
Background task to periodically check for expired timers.
|
|
629
|
+
|
|
630
|
+
Args:
|
|
631
|
+
interval: Check interval in seconds (default: 10)
|
|
632
|
+
|
|
633
|
+
Note:
|
|
634
|
+
This runs indefinitely until the application is shut down.
|
|
635
|
+
The actual resume time may be slightly later than the specified
|
|
636
|
+
duration depending on the check interval.
|
|
637
|
+
"""
|
|
638
|
+
while True:
|
|
639
|
+
try:
|
|
640
|
+
await asyncio.sleep(interval)
|
|
641
|
+
await self._check_expired_timers()
|
|
642
|
+
except Exception as e:
|
|
643
|
+
print(f"[TimerCheck] Error in periodic timer check: {e}")
|
|
644
|
+
import traceback
|
|
645
|
+
|
|
646
|
+
traceback.print_exc()
|
|
647
|
+
|
|
648
|
+
async def _check_expired_event_timeouts(self) -> None:
|
|
649
|
+
"""
|
|
650
|
+
Check for event subscriptions that have timed out and fail those workflows.
|
|
651
|
+
|
|
652
|
+
This method:
|
|
653
|
+
1. Finds all event subscriptions where timeout_at <= now
|
|
654
|
+
2. For each timeout, acquires workflow lock (Lock-First pattern)
|
|
655
|
+
3. Records EventTimeout to history
|
|
656
|
+
4. Removes event subscription
|
|
657
|
+
5. Fails the workflow with EventTimeoutError
|
|
658
|
+
"""
|
|
659
|
+
# Find all expired event subscriptions
|
|
660
|
+
expired = await self.storage.find_expired_event_subscriptions()
|
|
661
|
+
|
|
662
|
+
if not expired:
|
|
663
|
+
return
|
|
664
|
+
|
|
665
|
+
print(f"[EventTimeoutCheck] Found {len(expired)} expired event subscriptions")
|
|
666
|
+
|
|
667
|
+
for subscription in expired:
|
|
668
|
+
instance_id = subscription["instance_id"]
|
|
669
|
+
event_type = subscription["event_type"]
|
|
670
|
+
timeout_at = subscription["timeout_at"]
|
|
671
|
+
created_at = subscription["created_at"]
|
|
672
|
+
|
|
673
|
+
# Lock-First pattern: Try to acquire the lock before processing
|
|
674
|
+
# If we can't get the lock, another worker is processing this workflow
|
|
675
|
+
lock_acquired = await self.storage.try_acquire_lock(instance_id, self.worker_id)
|
|
676
|
+
if not lock_acquired:
|
|
677
|
+
print(
|
|
678
|
+
f"[EventTimeoutCheck] Could not acquire lock for workflow {instance_id}, skipping (another worker is processing)"
|
|
679
|
+
)
|
|
680
|
+
continue
|
|
681
|
+
|
|
682
|
+
try:
|
|
683
|
+
print(
|
|
684
|
+
f"[EventTimeoutCheck] Event '{event_type}' timed out for workflow {instance_id}"
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
# Get workflow instance
|
|
688
|
+
instance = await self.storage.get_instance(instance_id)
|
|
689
|
+
if not instance:
|
|
690
|
+
print(f"[EventTimeoutCheck] Workflow {instance_id} not found")
|
|
691
|
+
continue
|
|
692
|
+
|
|
693
|
+
# Get activity_id from the subscription (stored when wait_event was called)
|
|
694
|
+
activity_id = subscription.get("activity_id")
|
|
695
|
+
if not activity_id:
|
|
696
|
+
print(
|
|
697
|
+
f"[EventTimeoutCheck] Warning: No activity_id in subscription for {instance_id}, skipping"
|
|
698
|
+
)
|
|
699
|
+
continue
|
|
700
|
+
|
|
701
|
+
# 1. Record event timeout to history
|
|
702
|
+
# This allows the workflow to see what happened during replay
|
|
703
|
+
try:
|
|
704
|
+
await self.storage.append_history(
|
|
705
|
+
instance_id,
|
|
706
|
+
activity_id=activity_id,
|
|
707
|
+
event_type="EventTimeout",
|
|
708
|
+
event_data={
|
|
709
|
+
"event_type": event_type,
|
|
710
|
+
"timeout_at": timeout_at,
|
|
711
|
+
"error_message": f"Event '{event_type}' did not arrive within timeout",
|
|
712
|
+
},
|
|
713
|
+
)
|
|
714
|
+
except Exception as history_error:
|
|
715
|
+
# If history entry already exists, this timeout was already processed
|
|
716
|
+
print(
|
|
717
|
+
f"[EventTimeoutCheck] History already exists for activity_id {activity_id}: {history_error}"
|
|
718
|
+
)
|
|
719
|
+
print(
|
|
720
|
+
f"[EventTimeoutCheck] Timeout for '{event_type}' was already processed, skipping"
|
|
721
|
+
)
|
|
722
|
+
continue
|
|
723
|
+
|
|
724
|
+
# 2. Remove event subscription
|
|
725
|
+
await self.storage.remove_event_subscription(instance_id, event_type)
|
|
726
|
+
|
|
727
|
+
# 3. Fail the workflow with EventTimeoutError
|
|
728
|
+
# Create error details similar to workflow failure
|
|
729
|
+
import traceback
|
|
730
|
+
|
|
731
|
+
# Get timeout_seconds from timeout_at and created_at
|
|
732
|
+
from datetime import datetime
|
|
733
|
+
|
|
734
|
+
from edda.events import EventTimeoutError
|
|
735
|
+
|
|
736
|
+
try:
|
|
737
|
+
timeout_dt = datetime.fromisoformat(timeout_at)
|
|
738
|
+
created_dt = datetime.fromisoformat(created_at)
|
|
739
|
+
# Calculate the original timeout duration (timeout_at - created_at)
|
|
740
|
+
timeout_seconds = int((timeout_dt - created_dt).total_seconds())
|
|
741
|
+
except Exception:
|
|
742
|
+
timeout_seconds = 0 # Fallback
|
|
743
|
+
|
|
744
|
+
error = EventTimeoutError(event_type, timeout_seconds)
|
|
745
|
+
stack_trace = "".join(
|
|
746
|
+
traceback.format_exception(type(error), error, error.__traceback__)
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
# Update workflow status to failed with error details
|
|
750
|
+
await self.storage.update_instance_status(
|
|
751
|
+
instance_id,
|
|
752
|
+
"failed",
|
|
753
|
+
{
|
|
754
|
+
"error_message": str(error),
|
|
755
|
+
"error_type": "EventTimeoutError",
|
|
756
|
+
"stack_trace": stack_trace,
|
|
757
|
+
},
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
print(
|
|
761
|
+
f"[EventTimeoutCheck] ✅ Marked workflow {instance_id} as failed due to event timeout"
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
except Exception as e:
|
|
765
|
+
print(f"[EventTimeoutCheck] ❌ Error processing timeout for {instance_id}: {e}")
|
|
766
|
+
import traceback
|
|
767
|
+
|
|
768
|
+
traceback.print_exc()
|
|
769
|
+
|
|
770
|
+
finally:
|
|
771
|
+
# Always release the lock
|
|
772
|
+
await self.storage.release_lock(instance_id, self.worker_id)
|
|
773
|
+
|
|
774
|
+
async def _check_expired_event_timeouts_periodically(self, interval: int = 10) -> None:
|
|
775
|
+
"""
|
|
776
|
+
Background task to periodically check for expired event timeouts.
|
|
777
|
+
|
|
778
|
+
Args:
|
|
779
|
+
interval: Check interval in seconds (default: 10)
|
|
780
|
+
|
|
781
|
+
Note:
|
|
782
|
+
This runs indefinitely until the application is shut down.
|
|
783
|
+
"""
|
|
784
|
+
while True:
|
|
785
|
+
try:
|
|
786
|
+
await asyncio.sleep(interval)
|
|
787
|
+
await self._check_expired_event_timeouts()
|
|
788
|
+
except Exception as e:
|
|
789
|
+
print(f"[EventTimeoutCheck] Error in periodic timeout check: {e}")
|
|
790
|
+
import traceback
|
|
791
|
+
|
|
792
|
+
traceback.print_exc()
|
|
793
|
+
|
|
794
|
+
# -------------------------------------------------------------------------
|
|
795
|
+
# ASGI Interface
|
|
796
|
+
# -------------------------------------------------------------------------
|
|
797
|
+
|
|
798
|
+
async def __call__(
|
|
799
|
+
self,
|
|
800
|
+
scope: dict[str, Any],
|
|
801
|
+
receive: Callable[[], Any],
|
|
802
|
+
send: Callable[[dict[str, Any]], Any],
|
|
803
|
+
) -> None:
|
|
804
|
+
"""
|
|
805
|
+
ASGI interface.
|
|
806
|
+
|
|
807
|
+
Args:
|
|
808
|
+
scope: ASGI scope dictionary
|
|
809
|
+
receive: Async function to receive messages
|
|
810
|
+
send: Async function to send messages
|
|
811
|
+
"""
|
|
812
|
+
# Initialize if not already done
|
|
813
|
+
if not self._initialized:
|
|
814
|
+
await self.initialize()
|
|
815
|
+
|
|
816
|
+
if scope["type"] == "lifespan":
|
|
817
|
+
await self._handle_lifespan(scope, receive, send)
|
|
818
|
+
elif scope["type"] == "http":
|
|
819
|
+
await self._handle_http(scope, receive, send)
|
|
820
|
+
else:
|
|
821
|
+
raise NotImplementedError(f"Unsupported scope type: {scope['type']}")
|
|
822
|
+
|
|
823
|
+
async def _handle_lifespan(
|
|
824
|
+
self,
|
|
825
|
+
_scope: dict[str, Any],
|
|
826
|
+
receive: Callable[[], Any],
|
|
827
|
+
send: Callable[[dict[str, Any]], Any],
|
|
828
|
+
) -> None:
|
|
829
|
+
"""Handle ASGI lifespan events."""
|
|
830
|
+
while True:
|
|
831
|
+
message = await receive()
|
|
832
|
+
if message["type"] == "lifespan.startup":
|
|
833
|
+
await self.initialize()
|
|
834
|
+
await send({"type": "lifespan.startup.complete"})
|
|
835
|
+
elif message["type"] == "lifespan.shutdown":
|
|
836
|
+
await self.shutdown()
|
|
837
|
+
await send({"type": "lifespan.shutdown.complete"})
|
|
838
|
+
return
|
|
839
|
+
|
|
840
|
+
async def _handle_http(
|
|
841
|
+
self,
|
|
842
|
+
scope: dict[str, Any],
|
|
843
|
+
receive: Callable[[], Any],
|
|
844
|
+
send: Callable[[dict[str, Any]], Any],
|
|
845
|
+
) -> None:
|
|
846
|
+
"""Handle HTTP request (CloudEvents and API endpoints)."""
|
|
847
|
+
# Get request path and method
|
|
848
|
+
path = scope.get("path", "/")
|
|
849
|
+
method = scope.get("method", "GET")
|
|
850
|
+
|
|
851
|
+
# Route to appropriate handler
|
|
852
|
+
if path.startswith("/cancel/") and method == "POST":
|
|
853
|
+
await self._handle_cancel_request(scope, receive, send)
|
|
854
|
+
else:
|
|
855
|
+
# Default: CloudEvents handler
|
|
856
|
+
await self._handle_cloudevent_request(scope, receive, send)
|
|
857
|
+
|
|
858
|
+
async def _handle_cloudevent_request(
|
|
859
|
+
self,
|
|
860
|
+
scope: dict[str, Any],
|
|
861
|
+
receive: Callable[[], Any],
|
|
862
|
+
send: Callable[[dict[str, Any]], Any],
|
|
863
|
+
) -> None:
|
|
864
|
+
"""
|
|
865
|
+
Handle CloudEvent HTTP request.
|
|
866
|
+
|
|
867
|
+
CloudEvents HTTP Binding compliant responses:
|
|
868
|
+
- 202 Accepted: Event accepted for async processing
|
|
869
|
+
- 400 Bad Request: CloudEvents parsing/validation error (non-retryable)
|
|
870
|
+
- 500 Internal Server Error: Internal error (retryable)
|
|
871
|
+
"""
|
|
872
|
+
# Read request body
|
|
873
|
+
body = b""
|
|
874
|
+
while True:
|
|
875
|
+
message = await receive()
|
|
876
|
+
if message["type"] == "http.request":
|
|
877
|
+
body += message.get("body", b"")
|
|
878
|
+
if not message.get("more_body", False):
|
|
879
|
+
break
|
|
880
|
+
|
|
881
|
+
# Parse and handle CloudEvent
|
|
882
|
+
try:
|
|
883
|
+
headers = {k.decode("latin1"): v.decode("latin1") for k, v in scope.get("headers", [])}
|
|
884
|
+
|
|
885
|
+
# Create CloudEvent from HTTP request
|
|
886
|
+
event = from_http(headers, body)
|
|
887
|
+
|
|
888
|
+
# Handle the event (background task execution)
|
|
889
|
+
await self.handle_cloudevent(event)
|
|
890
|
+
|
|
891
|
+
# Success: 202 Accepted (async processing)
|
|
892
|
+
status = 202
|
|
893
|
+
response_body: dict[str, Any] = {"status": "accepted"}
|
|
894
|
+
|
|
895
|
+
except (ValueError, TypeError, KeyError, CloudEventsException) as e:
|
|
896
|
+
# CloudEvents parsing/validation error: 400 Bad Request (non-retryable)
|
|
897
|
+
status = 400
|
|
898
|
+
response_body = {
|
|
899
|
+
"error": str(e),
|
|
900
|
+
"error_type": type(e).__name__,
|
|
901
|
+
"retryable": False,
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
except Exception as e:
|
|
905
|
+
# Internal error: 500 Internal Server Error (retryable)
|
|
906
|
+
status = 500
|
|
907
|
+
response_body = {
|
|
908
|
+
"error": str(e),
|
|
909
|
+
"error_type": type(e).__name__,
|
|
910
|
+
"retryable": True,
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
# Send response (only once, at the end)
|
|
914
|
+
await send(
|
|
915
|
+
{
|
|
916
|
+
"type": "http.response.start",
|
|
917
|
+
"status": status,
|
|
918
|
+
"headers": [[b"content-type", b"application/json"]],
|
|
919
|
+
}
|
|
920
|
+
)
|
|
921
|
+
await send(
|
|
922
|
+
{
|
|
923
|
+
"type": "http.response.body",
|
|
924
|
+
"body": json.dumps(response_body).encode("utf-8"),
|
|
925
|
+
}
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
async def _handle_cancel_request(
|
|
929
|
+
self,
|
|
930
|
+
scope: dict[str, Any],
|
|
931
|
+
receive: Callable[[], Any],
|
|
932
|
+
send: Callable[[dict[str, Any]], Any],
|
|
933
|
+
) -> None:
|
|
934
|
+
"""Handle workflow cancellation request."""
|
|
935
|
+
# Extract instance_id from path: /cancel/{instance_id}
|
|
936
|
+
path = scope.get("path", "")
|
|
937
|
+
instance_id = path.split("/cancel/")[-1]
|
|
938
|
+
|
|
939
|
+
# Determine response (default: error)
|
|
940
|
+
status = 500
|
|
941
|
+
response_body: dict[str, Any] = {"error": "Unknown error"}
|
|
942
|
+
|
|
943
|
+
if not instance_id:
|
|
944
|
+
status = 400
|
|
945
|
+
response_body = {"error": "Missing instance_id"}
|
|
946
|
+
else:
|
|
947
|
+
# Consume request body (even if we don't use it)
|
|
948
|
+
while True:
|
|
949
|
+
message = await receive()
|
|
950
|
+
if message["type"] == "http.request" and not message.get("more_body", False):
|
|
951
|
+
break
|
|
952
|
+
|
|
953
|
+
# Try to cancel the workflow
|
|
954
|
+
try:
|
|
955
|
+
if self.replay_engine is None:
|
|
956
|
+
raise RuntimeError("Replay engine not initialized")
|
|
957
|
+
|
|
958
|
+
success = await self.replay_engine.cancel_workflow(
|
|
959
|
+
instance_id=instance_id, cancelled_by="api_user"
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
if success:
|
|
963
|
+
# Successfully cancelled
|
|
964
|
+
status = 200
|
|
965
|
+
response_body = {"status": "cancelled", "instance_id": instance_id}
|
|
966
|
+
else:
|
|
967
|
+
# Could not cancel (not found or already completed/failed)
|
|
968
|
+
status = 400
|
|
969
|
+
response_body = {
|
|
970
|
+
"error": "Cannot cancel workflow (not found or already completed/failed/cancelled)"
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
except Exception as e:
|
|
974
|
+
# Internal error - log detailed traceback
|
|
975
|
+
print(f"[Cancel] Error cancelling workflow {instance_id}: {e}")
|
|
976
|
+
import traceback
|
|
977
|
+
|
|
978
|
+
traceback.print_exc()
|
|
979
|
+
|
|
980
|
+
status = 500
|
|
981
|
+
response_body = {"error": str(e), "type": type(e).__name__}
|
|
982
|
+
|
|
983
|
+
# Send response (only once, at the end)
|
|
984
|
+
await send(
|
|
985
|
+
{
|
|
986
|
+
"type": "http.response.start",
|
|
987
|
+
"status": status,
|
|
988
|
+
"headers": [[b"content-type", b"application/json"]],
|
|
989
|
+
}
|
|
990
|
+
)
|
|
991
|
+
await send(
|
|
992
|
+
{
|
|
993
|
+
"type": "http.response.body",
|
|
994
|
+
"body": json.dumps(response_body).encode("utf-8"),
|
|
995
|
+
}
|
|
996
|
+
)
|