async-durable-execution-runner 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. LICENSE +175 -0
  2. NOTICE +8 -0
  3. VERSION.py +5 -0
  4. async_durable_execution_runner/__about__.py +33 -0
  5. async_durable_execution_runner/__init__.py +23 -0
  6. async_durable_execution_runner/checkpoint/__init__.py +1 -0
  7. async_durable_execution_runner/checkpoint/processor.py +101 -0
  8. async_durable_execution_runner/checkpoint/processors/__init__.py +1 -0
  9. async_durable_execution_runner/checkpoint/processors/base.py +199 -0
  10. async_durable_execution_runner/checkpoint/processors/callback.py +89 -0
  11. async_durable_execution_runner/checkpoint/processors/context.py +59 -0
  12. async_durable_execution_runner/checkpoint/processors/execution.py +52 -0
  13. async_durable_execution_runner/checkpoint/processors/step.py +124 -0
  14. async_durable_execution_runner/checkpoint/processors/wait.py +95 -0
  15. async_durable_execution_runner/checkpoint/transformer.py +104 -0
  16. async_durable_execution_runner/checkpoint/validators/__init__.py +1 -0
  17. async_durable_execution_runner/checkpoint/validators/checkpoint.py +242 -0
  18. async_durable_execution_runner/checkpoint/validators/operations/__init__.py +1 -0
  19. async_durable_execution_runner/checkpoint/validators/operations/callback.py +45 -0
  20. async_durable_execution_runner/checkpoint/validators/operations/context.py +73 -0
  21. async_durable_execution_runner/checkpoint/validators/operations/execution.py +47 -0
  22. async_durable_execution_runner/checkpoint/validators/operations/invoke.py +56 -0
  23. async_durable_execution_runner/checkpoint/validators/operations/step.py +106 -0
  24. async_durable_execution_runner/checkpoint/validators/operations/wait.py +54 -0
  25. async_durable_execution_runner/checkpoint/validators/transitions.py +66 -0
  26. async_durable_execution_runner/cli.py +498 -0
  27. async_durable_execution_runner/client.py +50 -0
  28. async_durable_execution_runner/exceptions.py +288 -0
  29. async_durable_execution_runner/execution.py +444 -0
  30. async_durable_execution_runner/executor.py +1234 -0
  31. async_durable_execution_runner/invoker.py +340 -0
  32. async_durable_execution_runner/model.py +3296 -0
  33. async_durable_execution_runner/observer.py +144 -0
  34. async_durable_execution_runner/py.typed +1 -0
  35. async_durable_execution_runner/runner.py +1167 -0
  36. async_durable_execution_runner/scheduler.py +246 -0
  37. async_durable_execution_runner/stores/__init__.py +1 -0
  38. async_durable_execution_runner/stores/base.py +147 -0
  39. async_durable_execution_runner/stores/filesystem.py +79 -0
  40. async_durable_execution_runner/stores/memory.py +38 -0
  41. async_durable_execution_runner/stores/sqlite.py +273 -0
  42. async_durable_execution_runner/token.py +49 -0
  43. async_durable_execution_runner/web/__init__.py +1 -0
  44. async_durable_execution_runner/web/errors.py +8 -0
  45. async_durable_execution_runner/web/handlers.py +813 -0
  46. async_durable_execution_runner/web/models.py +266 -0
  47. async_durable_execution_runner/web/routes.py +692 -0
  48. async_durable_execution_runner/web/serialization.py +235 -0
  49. async_durable_execution_runner/web/server.py +243 -0
  50. async_durable_execution_runner-2.0.0a1.dist-info/METADATA +238 -0
  51. async_durable_execution_runner-2.0.0a1.dist-info/RECORD +55 -0
  52. async_durable_execution_runner-2.0.0a1.dist-info/WHEEL +4 -0
  53. async_durable_execution_runner-2.0.0a1.dist-info/entry_points.txt +2 -0
  54. async_durable_execution_runner-2.0.0a1.dist-info/licenses/LICENSE +175 -0
  55. async_durable_execution_runner-2.0.0a1.dist-info/licenses/NOTICE +1 -0
@@ -0,0 +1,1234 @@
1
+ """Execution life-cycle logic."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import time
7
+ import uuid
8
+ from datetime import UTC, datetime
9
+ from typing import TYPE_CHECKING
10
+
11
+ from async_durable_execution.execution import (
12
+ DurableExecutionInvocationInput,
13
+ DurableExecutionInvocationOutput,
14
+ InvocationStatus,
15
+ )
16
+ from async_durable_execution.lambda_service import (
17
+ CallbackTimeoutType,
18
+ ErrorObject,
19
+ Operation,
20
+ OperationUpdate,
21
+ OperationStatus,
22
+ OperationType,
23
+ CallbackOptions,
24
+ )
25
+
26
+ from async_durable_execution_runner.exceptions import (
27
+ ExecutionAlreadyStartedException,
28
+ IllegalStateException,
29
+ InvalidParameterValueException,
30
+ ResourceNotFoundException,
31
+ )
32
+ from async_durable_execution_runner.execution import Execution
33
+ from async_durable_execution_runner.model import (
34
+ CheckpointDurableExecutionResponse,
35
+ CheckpointUpdatedExecutionState,
36
+ EventCreationContext,
37
+ EventType,
38
+ GetDurableExecutionHistoryResponse,
39
+ GetDurableExecutionResponse,
40
+ GetDurableExecutionStateResponse,
41
+ ListDurableExecutionsByFunctionResponse,
42
+ ListDurableExecutionsResponse,
43
+ SendDurableExecutionCallbackFailureResponse,
44
+ SendDurableExecutionCallbackHeartbeatResponse,
45
+ SendDurableExecutionCallbackSuccessResponse,
46
+ StartDurableExecutionInput,
47
+ StartDurableExecutionOutput,
48
+ StopDurableExecutionResponse,
49
+ TERMINAL_STATUSES,
50
+ )
51
+ from async_durable_execution_runner.model import (
52
+ Event as HistoryEvent,
53
+ )
54
+ from async_durable_execution_runner.model import (
55
+ Execution as ExecutionSummary,
56
+ )
57
+ from async_durable_execution_runner.observer import ExecutionObserver
58
+ from async_durable_execution_runner.token import CallbackToken
59
+
60
+
61
+ if TYPE_CHECKING:
62
+ from collections.abc import Awaitable, Callable
63
+ from concurrent.futures import Future
64
+
65
+ from async_durable_execution_runner.checkpoint.processor import (
66
+ CheckpointProcessor,
67
+ )
68
+ from async_durable_execution_runner.invoker import Invoker
69
+ from async_durable_execution_runner.scheduler import Event, Scheduler
70
+ from async_durable_execution_runner.stores.base import ExecutionStore
71
+
72
+ logger = logging.getLogger(__name__)
73
+
74
+
75
+ class Executor(ExecutionObserver):
76
+ MAX_CONSECUTIVE_FAILED_ATTEMPTS: int = 5
77
+ RETRY_BACKOFF_SECONDS: int = 5
78
+
79
+ def __init__(
80
+ self,
81
+ store: ExecutionStore,
82
+ scheduler: Scheduler,
83
+ invoker: Invoker,
84
+ checkpoint_processor: CheckpointProcessor,
85
+ ):
86
+ self._store = store
87
+ self._scheduler = scheduler
88
+ self._invoker = invoker
89
+ self._checkpoint_processor = checkpoint_processor
90
+ self._completion_events: dict[str, Event] = {}
91
+ self._callback_timeouts: dict[str, Future] = {}
92
+ self._callback_heartbeats: dict[str, Future] = {}
93
+ self._execution_timeout: Future | None = None
94
+
95
+ def start_execution(
96
+ self,
97
+ input: StartDurableExecutionInput, # noqa: A002
98
+ ) -> StartDurableExecutionOutput:
99
+ # Generate invocation_id if not provided
100
+ if input.invocation_id is None:
101
+ input = StartDurableExecutionInput(
102
+ account_id=input.account_id,
103
+ function_name=input.function_name,
104
+ function_qualifier=input.function_qualifier,
105
+ execution_name=input.execution_name,
106
+ execution_timeout_seconds=input.execution_timeout_seconds,
107
+ execution_retention_period_days=input.execution_retention_period_days,
108
+ invocation_id=str(uuid.uuid4()),
109
+ trace_fields=input.trace_fields,
110
+ tenant_id=input.tenant_id,
111
+ input=input.input,
112
+ lambda_endpoint=input.lambda_endpoint,
113
+ )
114
+
115
+ execution = Execution.new(input=input)
116
+ execution.start()
117
+ self._store.save(execution)
118
+ logger.debug("Created execution with ARN: %s", execution.durable_execution_arn)
119
+
120
+ completion_event = self._scheduler.create_event()
121
+ self._completion_events[execution.durable_execution_arn] = completion_event
122
+
123
+ # Schedule execution timeout
124
+ if input.execution_timeout_seconds > 0:
125
+
126
+ def timeout_handler():
127
+ error = ErrorObject.from_message(
128
+ f"Execution timed out after {input.execution_timeout_seconds} seconds."
129
+ )
130
+ self.on_timed_out(execution.durable_execution_arn, error)
131
+
132
+ self._execution_timeout = self._scheduler.call_later(
133
+ timeout_handler,
134
+ delay=input.execution_timeout_seconds,
135
+ completion_event=completion_event,
136
+ )
137
+
138
+ # Schedule initial invocation to run immediately
139
+ self._invoke_execution(execution.durable_execution_arn)
140
+
141
+ return StartDurableExecutionOutput(
142
+ execution_arn=execution.durable_execution_arn
143
+ )
144
+
145
+ def get_execution(self, execution_arn: str) -> Execution:
146
+ """Get execution by ARN.
147
+
148
+ Args:
149
+ execution_arn: The execution ARN to retrieve
150
+
151
+ Returns:
152
+ Execution: The execution object
153
+
154
+ Raises:
155
+ ResourceNotFoundException: If execution does not exist
156
+ """
157
+ try:
158
+ return self._store.load(execution_arn)
159
+ except KeyError as e:
160
+ msg: str = f"Execution {execution_arn} not found"
161
+ raise ResourceNotFoundException(msg) from e
162
+
163
+ def get_execution_details(self, execution_arn: str) -> GetDurableExecutionResponse:
164
+ """Get detailed execution information for web API response.
165
+
166
+ Args:
167
+ execution_arn: The execution ARN to retrieve
168
+
169
+ Returns:
170
+ GetDurableExecutionResponse: Detailed execution information
171
+
172
+ Raises:
173
+ ResourceNotFoundException: If execution does not exist
174
+ """
175
+ execution = self.get_execution(execution_arn)
176
+
177
+ # Extract execution details from the first operation (EXECUTION type)
178
+ execution_op = execution.get_operation_execution_started()
179
+ status = execution.current_status().value
180
+
181
+ # Extract result and error from execution result
182
+ result = None
183
+ error = None
184
+ if execution.result:
185
+ if execution.result.status == InvocationStatus.SUCCEEDED:
186
+ result = execution.result.result
187
+ elif execution.result.status == InvocationStatus.FAILED:
188
+ error = execution.result.error
189
+
190
+ return GetDurableExecutionResponse(
191
+ durable_execution_arn=execution.durable_execution_arn,
192
+ durable_execution_name=execution.start_input.execution_name,
193
+ function_arn=f"arn:aws:lambda:us-east-1:123456789012:function:{execution.start_input.function_name}",
194
+ status=status,
195
+ start_timestamp=execution_op.start_timestamp
196
+ if execution_op.start_timestamp
197
+ else datetime.now(UTC),
198
+ input_payload=execution_op.execution_details.input_payload
199
+ if execution_op.execution_details
200
+ else None,
201
+ result=result,
202
+ error=error,
203
+ end_timestamp=execution_op.end_timestamp
204
+ if execution_op.end_timestamp
205
+ else None,
206
+ version="1.0",
207
+ )
208
+
209
+ def list_executions(
210
+ self,
211
+ function_name: str | None = None,
212
+ function_version: str | None = None, # noqa: ARG002
213
+ execution_name: str | None = None,
214
+ status_filter: str | None = None,
215
+ started_after: str | None = None,
216
+ started_before: str | None = None,
217
+ marker: str | None = None,
218
+ max_items: int | None = None,
219
+ reverse_order: bool = False, # noqa: FBT001, FBT002
220
+ ) -> ListDurableExecutionsResponse:
221
+ """List executions with filtering and pagination.
222
+
223
+ Args:
224
+ function_name: Filter by function name
225
+ function_version: Filter by function version
226
+ execution_name: Filter by execution name
227
+ status_filter: Filter by status (RUNNING, SUCCEEDED, FAILED)
228
+ started_after: Filter executions started after this time
229
+ started_before: Filter executions started before this time
230
+ marker: Pagination marker
231
+ max_items: Maximum items to return (default 50)
232
+ reverse_order: Return results in reverse chronological order
233
+
234
+ Returns:
235
+ ListDurableExecutionsResponse: List of executions with pagination
236
+ """
237
+ # Convert marker to offset
238
+ offset: int = 0
239
+ if marker:
240
+ try:
241
+ offset = int(marker)
242
+ except ValueError:
243
+ offset = 0
244
+
245
+ # Query store directly with parameters
246
+ executions, next_marker = self._store.query(
247
+ function_name=function_name,
248
+ execution_name=execution_name,
249
+ status_filter=status_filter,
250
+ started_after=started_after,
251
+ started_before=started_before,
252
+ limit=max_items or 50,
253
+ offset=offset,
254
+ reverse_order=reverse_order,
255
+ )
256
+
257
+ # Convert to ExecutionSummary objects
258
+ execution_summaries: list[ExecutionSummary] = [
259
+ ExecutionSummary.from_execution(execution, execution.current_status().value)
260
+ for execution in executions
261
+ ]
262
+
263
+ return ListDurableExecutionsResponse(
264
+ durable_executions=execution_summaries, next_marker=next_marker
265
+ )
266
+
267
+ def list_executions_by_function(
268
+ self,
269
+ function_name: str,
270
+ qualifier: str | None = None, # noqa: ARG002
271
+ execution_name: str | None = None,
272
+ status_filter: str | None = None,
273
+ started_after: str | None = None,
274
+ started_before: str | None = None,
275
+ marker: str | None = None,
276
+ max_items: int | None = None,
277
+ reverse_order: bool = False, # noqa: FBT001, FBT002
278
+ ) -> ListDurableExecutionsByFunctionResponse:
279
+ """List executions for a specific function.
280
+
281
+ Args:
282
+ function_name: The function name to filter by
283
+ qualifier: Function qualifier/version
284
+ execution_name: Filter by execution name
285
+ status_filter: Filter by status (RUNNING, SUCCEEDED, FAILED)
286
+ started_after: Filter executions started after this time
287
+ started_before: Filter executions started before this time
288
+ marker: Pagination marker
289
+ max_items: Maximum items to return (default 50)
290
+ reverse_order: Return results in reverse chronological order
291
+
292
+ Returns:
293
+ ListDurableExecutionsByFunctionResponse: List of executions for the function
294
+ """
295
+ # Use the general list_executions method with function_name filter
296
+ list_response = self.list_executions(
297
+ function_name=function_name,
298
+ execution_name=execution_name,
299
+ status_filter=status_filter,
300
+ started_after=started_after,
301
+ started_before=started_before,
302
+ marker=marker,
303
+ max_items=max_items,
304
+ reverse_order=reverse_order,
305
+ )
306
+
307
+ return ListDurableExecutionsByFunctionResponse(
308
+ durable_executions=list_response.durable_executions,
309
+ next_marker=list_response.next_marker,
310
+ )
311
+
312
+ def stop_execution(
313
+ self, execution_arn: str, error: ErrorObject | None = None
314
+ ) -> StopDurableExecutionResponse:
315
+ """Stop a running execution.
316
+
317
+ Args:
318
+ execution_arn: The execution ARN to stop
319
+ error: Optional error to use when stopping the execution
320
+
321
+ Returns:
322
+ StopDurableExecutionResponse: Response containing end timestamp
323
+
324
+ Raises:
325
+ ResourceNotFoundException: If execution does not exist
326
+ """
327
+ execution = self.get_execution(execution_arn)
328
+
329
+ if execution.is_complete:
330
+ # Idempotent: return the existing stop timestamp
331
+ execution_op = execution.get_operation_execution_started()
332
+ stop_timestamp = execution_op.end_timestamp or datetime.now(UTC)
333
+ return StopDurableExecutionResponse(stop_timestamp=stop_timestamp)
334
+
335
+ # Use provided error or create a default one
336
+ stop_error = error or ErrorObject.from_message(
337
+ "Execution stopped by user request"
338
+ )
339
+
340
+ # Stop sets TERMINATED close status (different from fail)
341
+ logger.exception("[%s] Stopping execution.", execution_arn)
342
+ execution.complete_stopped(error=stop_error) # Sets CloseStatus.TERMINATED
343
+ self._store.update(execution)
344
+ self._complete_events(execution_arn=execution_arn)
345
+
346
+ return StopDurableExecutionResponse(stop_timestamp=datetime.now(UTC))
347
+
348
+ def get_execution_state(
349
+ self,
350
+ execution_arn: str,
351
+ checkpoint_token: str | None = None,
352
+ marker: str | None = None,
353
+ max_items: int | None = None,
354
+ ) -> GetDurableExecutionStateResponse:
355
+ """Get execution state with operations.
356
+
357
+ Args:
358
+ execution_arn: The execution ARN
359
+ checkpoint_token: Checkpoint token for state consistency
360
+ marker: Pagination marker
361
+ max_items: Maximum items to return
362
+
363
+ Returns:
364
+ GetDurableExecutionStateResponse: Execution state with operations
365
+
366
+ Raises:
367
+ ResourceNotFoundException: If execution does not exist
368
+ InvalidParameterValueException: If checkpoint token is invalid
369
+ """
370
+ execution = self.get_execution(execution_arn)
371
+
372
+ # TODO: Validate checkpoint token if provided
373
+ if checkpoint_token and checkpoint_token not in execution.used_tokens:
374
+ msg: str = f"Invalid checkpoint token: {checkpoint_token}"
375
+ raise InvalidParameterValueException(msg)
376
+
377
+ # Get operations (excluding the initial EXECUTION operation for state)
378
+ operations = execution.get_assertable_operations()
379
+
380
+ # Apply pagination
381
+ if max_items is None:
382
+ max_items = 100
383
+
384
+ # Simple pagination - in real implementation would need proper marker handling
385
+ start_index = 0
386
+ if marker:
387
+ try:
388
+ start_index = int(marker)
389
+ except ValueError:
390
+ start_index = 0
391
+
392
+ end_index = start_index + max_items
393
+ paginated_operations = operations[start_index:end_index]
394
+
395
+ next_marker = None
396
+ if end_index < len(operations):
397
+ next_marker = str(end_index)
398
+
399
+ return GetDurableExecutionStateResponse(
400
+ operations=paginated_operations, next_marker=next_marker
401
+ )
402
+
403
+ def get_execution_history(
404
+ self,
405
+ execution_arn: str,
406
+ include_execution_data: bool = False, # noqa: FBT001, FBT002
407
+ reverse_order: bool = False, # noqa: FBT001, FBT002
408
+ marker: str | None = None,
409
+ max_items: int | None = None,
410
+ ) -> GetDurableExecutionHistoryResponse:
411
+ """Get execution history with events.
412
+
413
+ Args:
414
+ execution_arn: The execution ARN
415
+ include_execution_data: Whether to include execution data in events
416
+ reverse_order: Return events in reverse chronological order
417
+ marker: Pagination marker (event_id)
418
+ max_items: Maximum items to return
419
+
420
+ Returns:
421
+ GetDurableExecutionHistoryResponse: Execution history with events
422
+
423
+ Raises:
424
+ ResourceNotFoundException: If execution does not exist
425
+ """
426
+ execution: Execution = self.get_execution(execution_arn)
427
+
428
+ # Generate events
429
+ all_events: list[HistoryEvent] = []
430
+ ops: list[Operation] = execution.operations
431
+ updates: list[OperationUpdate] = execution.updates
432
+ updates_dict: dict[str, OperationUpdate] = {u.operation_id: u for u in updates}
433
+ durable_execution_arn: str = execution.durable_execution_arn
434
+
435
+ # Add InvocationCompleted events
436
+ for completion in execution.invocation_completions:
437
+ invocation_event = HistoryEvent.create_invocation_completed(
438
+ event_id=0, # Temporary, will be reassigned
439
+ event_timestamp=completion.end_timestamp,
440
+ start_timestamp=completion.start_timestamp,
441
+ end_timestamp=completion.end_timestamp,
442
+ request_id=completion.request_id,
443
+ )
444
+ all_events.append(invocation_event)
445
+
446
+ # Generate all events first (without final event IDs)
447
+ for op in ops:
448
+ operation_update: OperationUpdate | None = updates_dict.get(
449
+ op.operation_id, None
450
+ )
451
+
452
+ if op.status is OperationStatus.PENDING:
453
+ if (
454
+ op.operation_type is not OperationType.CHAINED_INVOKE
455
+ or op.start_timestamp is None
456
+ ):
457
+ continue
458
+ context: EventCreationContext = EventCreationContext(
459
+ op,
460
+ 0, # Temporary event_id, will be reassigned after sorting
461
+ durable_execution_arn,
462
+ execution.start_input,
463
+ execution.result,
464
+ operation_update,
465
+ include_execution_data,
466
+ )
467
+ pending = HistoryEvent.create_chained_invoke_event_pending(context)
468
+ all_events.append(pending)
469
+ if op.start_timestamp is not None:
470
+ context = EventCreationContext(
471
+ op,
472
+ 0, # Temporary event_id, will be reassigned after sorting
473
+ durable_execution_arn,
474
+ execution.start_input,
475
+ execution.result,
476
+ operation_update,
477
+ include_execution_data,
478
+ )
479
+ started = HistoryEvent.create_event_started(context)
480
+ all_events.append(started)
481
+ if op.end_timestamp is not None and op.status in TERMINAL_STATUSES:
482
+ context = EventCreationContext(
483
+ op,
484
+ 0, # Temporary event_id, will be reassigned after sorting
485
+ durable_execution_arn,
486
+ execution.start_input,
487
+ execution.result,
488
+ operation_update,
489
+ include_execution_data,
490
+ )
491
+ finished = HistoryEvent.create_event_terminated(context)
492
+ all_events.append(finished)
493
+
494
+ # Sort events by timestamp to get correct chronological order
495
+ all_events.sort(key=lambda event: event.event_timestamp)
496
+
497
+ # Reassign event IDs based on chronological order
498
+ all_events = [
499
+ HistoryEvent.from_event_with_id(event, i)
500
+ for i, event in enumerate(all_events, 1)
501
+ ]
502
+
503
+ # Apply cursor-based pagination
504
+ if max_items is None:
505
+ max_items = 100
506
+
507
+ # Handle pagination marker
508
+ if reverse_order:
509
+ all_events.reverse()
510
+ start_index: int = 0
511
+ if marker:
512
+ try:
513
+ marker_event_id: int = int(marker)
514
+ # Find the index of the first event with event_id >= marker
515
+ start_index = len(all_events)
516
+ for i, e in enumerate(all_events):
517
+ is_valid_page_start: bool = (
518
+ e.event_id < marker_event_id
519
+ if reverse_order
520
+ else e.event_id >= marker_event_id
521
+ )
522
+ if is_valid_page_start:
523
+ start_index = i
524
+ break
525
+ except ValueError:
526
+ start_index = 0
527
+
528
+ # Get paginated events
529
+ end_index: int = start_index + max_items
530
+ paginated_events: list[HistoryEvent] = all_events[start_index:end_index]
531
+
532
+ # Generate next marker
533
+ next_marker: str | None = None
534
+ if end_index < len(all_events):
535
+ if reverse_order:
536
+ # Next marker is the event_id of the last returned event
537
+ next_marker = (
538
+ str(paginated_events[-1].event_id) if paginated_events else None
539
+ )
540
+ else:
541
+ # Next marker is the event_id of the next event after the last returned
542
+ next_marker = (
543
+ str(all_events[end_index].event_id)
544
+ if end_index < len(all_events)
545
+ else None
546
+ )
547
+
548
+ return GetDurableExecutionHistoryResponse(
549
+ events=paginated_events, next_marker=next_marker
550
+ )
551
+
552
+ def checkpoint_execution(
553
+ self,
554
+ execution_arn: str,
555
+ checkpoint_token: str,
556
+ updates: list[OperationUpdate] | None = None,
557
+ client_token: str | None = None,
558
+ ) -> CheckpointDurableExecutionResponse:
559
+ """Process checkpoint for an execution.
560
+
561
+ Args:
562
+ execution_arn: The execution ARN
563
+ checkpoint_token: Current checkpoint token
564
+ updates: List of operation updates to process
565
+ client_token: Client token for idempotency
566
+
567
+ Returns:
568
+ CheckpointDurableExecutionResponse: Updated checkpoint token and state
569
+
570
+ Raises:
571
+ ResourceNotFoundException: If execution does not exist
572
+ InvalidParameterValueException: If checkpoint token is invalid
573
+ """
574
+ execution = self.get_execution(execution_arn)
575
+
576
+ # Validate checkpoint token
577
+ if checkpoint_token not in execution.used_tokens:
578
+ msg: str = f"Invalid checkpoint token: {checkpoint_token}"
579
+ raise InvalidParameterValueException(msg)
580
+
581
+ if updates:
582
+ checkpoint_output = self._checkpoint_processor.process_checkpoint(
583
+ checkpoint_token=checkpoint_token,
584
+ updates=updates,
585
+ client_token=client_token,
586
+ )
587
+
588
+ new_execution_state = None
589
+ if checkpoint_output.new_execution_state:
590
+ new_execution_state = CheckpointUpdatedExecutionState(
591
+ operations=checkpoint_output.new_execution_state.operations,
592
+ next_marker=checkpoint_output.new_execution_state.next_marker,
593
+ )
594
+
595
+ return CheckpointDurableExecutionResponse(
596
+ checkpoint_token=checkpoint_output.checkpoint_token,
597
+ new_execution_state=new_execution_state,
598
+ )
599
+
600
+ # Save execution state after generating new token
601
+ new_checkpoint_token = execution.get_new_checkpoint_token()
602
+ self._store.update(execution)
603
+
604
+ return CheckpointDurableExecutionResponse(
605
+ checkpoint_token=new_checkpoint_token,
606
+ new_execution_state=None,
607
+ )
608
+
609
+ def send_callback_success(
610
+ self,
611
+ callback_id: str,
612
+ result: bytes | None = None,
613
+ ) -> SendDurableExecutionCallbackSuccessResponse:
614
+ """Send callback success response.
615
+
616
+ Args:
617
+ callback_id: The callback ID to respond to
618
+ result: Optional result data for the callback
619
+
620
+ Returns:
621
+ SendDurableExecutionCallbackSuccessResponse: Empty response
622
+
623
+ Raises:
624
+ InvalidParameterValueException: If callback_id is invalid
625
+ ResourceNotFoundException: If callback does not exist
626
+ """
627
+ if not callback_id:
628
+ msg: str = "callback_id is required"
629
+ raise InvalidParameterValueException(msg)
630
+
631
+ try:
632
+ callback_token = CallbackToken.from_str(callback_id)
633
+ execution = self.get_execution(callback_token.execution_arn)
634
+ execution.complete_callback_success(callback_id, result)
635
+ self._store.update(execution)
636
+ self._cleanup_callback_timeouts(callback_id)
637
+ self._invoke_execution(callback_token.execution_arn)
638
+ logger.info("Callback success completed for callback_id: %s", callback_id)
639
+ except Exception as e:
640
+ msg = f"Failed to process callback success: {e}"
641
+ raise ResourceNotFoundException(msg) from e
642
+
643
+ return SendDurableExecutionCallbackSuccessResponse()
644
+
645
+ def send_callback_failure(
646
+ self,
647
+ callback_id: str,
648
+ error: ErrorObject | None = None,
649
+ ) -> SendDurableExecutionCallbackFailureResponse:
650
+ """Send callback failure response.
651
+
652
+ Args:
653
+ callback_id: The callback ID to respond to
654
+ error: Optional error object for the callback failure
655
+
656
+ Returns:
657
+ SendDurableExecutionCallbackFailureResponse: Empty response
658
+
659
+ Raises:
660
+ InvalidParameterValueException: If callback_id is invalid
661
+ ResourceNotFoundException: If callback does not exist
662
+ """
663
+ if not callback_id:
664
+ msg: str = "callback_id is required"
665
+ raise InvalidParameterValueException(msg)
666
+
667
+ callback_error: ErrorObject = error or ErrorObject.from_message("")
668
+
669
+ try:
670
+ callback_token: CallbackToken = CallbackToken.from_str(callback_id)
671
+ execution: Execution = self.get_execution(callback_token.execution_arn)
672
+ execution.complete_callback_failure(callback_id, callback_error)
673
+ self._store.update(execution)
674
+ self._cleanup_callback_timeouts(callback_id)
675
+ self._invoke_execution(callback_token.execution_arn)
676
+ logger.info("Callback failure completed for callback_id: %s", callback_id)
677
+ except Exception as e:
678
+ msg = f"Failed to process callback failure: {e}"
679
+ raise ResourceNotFoundException(msg) from e
680
+
681
+ return SendDurableExecutionCallbackFailureResponse()
682
+
683
+ def send_callback_heartbeat(
684
+ self, callback_id: str
685
+ ) -> SendDurableExecutionCallbackHeartbeatResponse:
686
+ """Send callback heartbeat to keep callback alive.
687
+
688
+ Args:
689
+ callback_id: The callback ID to send heartbeat for
690
+
691
+ Returns:
692
+ SendDurableExecutionCallbackHeartbeatResponse: Empty response
693
+
694
+ Raises:
695
+ InvalidParameterValueException: If callback_id is invalid
696
+ ResourceNotFoundException: If callback does not exist
697
+ """
698
+ if not callback_id:
699
+ msg: str = "callback_id is required"
700
+ raise InvalidParameterValueException(msg)
701
+
702
+ try:
703
+ callback_token: CallbackToken = CallbackToken.from_str(callback_id)
704
+ execution: Execution = self.get_execution(callback_token.execution_arn)
705
+
706
+ # Find callback operation to verify it exists and is active
707
+ _, operation = execution.find_callback_operation(callback_id)
708
+ if operation.status != OperationStatus.STARTED:
709
+ msg = f"Callback {callback_id} is not active"
710
+ raise ResourceNotFoundException(msg)
711
+
712
+ # Reset heartbeat timeout if configured
713
+ self._reset_callback_heartbeat_timeout(
714
+ callback_id, execution.durable_execution_arn
715
+ )
716
+ logger.info("Callback heartbeat processed for callback_id: %s", callback_id)
717
+ except Exception as e:
718
+ msg = f"Failed to process callback heartbeat: {e}"
719
+ raise ResourceNotFoundException(msg) from e
720
+
721
+ return SendDurableExecutionCallbackHeartbeatResponse()
722
+
723
+ def _validate_invocation_response_and_store(
724
+ self,
725
+ execution_arn: str,
726
+ response: DurableExecutionInvocationOutput,
727
+ execution: Execution,
728
+ ):
729
+ """Validate response status and save it to the store if fine.
730
+
731
+ Raises:
732
+ InvalidParameterValueException: If the response status is invalid.
733
+ IllegalStateException: If the response status is valid but the execution is already completed.
734
+ """
735
+ if execution.is_complete:
736
+ msg_already_complete: str = "Execution already completed, ignoring result"
737
+
738
+ raise IllegalStateException(msg_already_complete)
739
+
740
+ if response.status is None:
741
+ msg_status_required: str = "Response status is required"
742
+
743
+ raise InvalidParameterValueException(msg_status_required)
744
+
745
+ match response.status:
746
+ case InvocationStatus.FAILED:
747
+ if response.result is not None:
748
+ msg_failed_result: str = (
749
+ "Cannot provide a Result for FAILED status."
750
+ )
751
+ raise InvalidParameterValueException(msg_failed_result)
752
+ logger.info("[%s] Execution failed", execution_arn)
753
+ self._complete_workflow(
754
+ execution_arn, result=None, error=response.error
755
+ )
756
+
757
+ case InvocationStatus.SUCCEEDED:
758
+ if response.error is not None:
759
+ msg_success_error: str = (
760
+ "Cannot provide an Error for SUCCEEDED status."
761
+ )
762
+ raise InvalidParameterValueException(msg_success_error)
763
+ logger.info("[%s] Execution succeeded", execution_arn)
764
+ self._complete_workflow(
765
+ execution_arn, result=response.result, error=None
766
+ )
767
+
768
+ case InvocationStatus.PENDING:
769
+ if not execution.has_pending_operations(execution):
770
+ msg_pending_ops: str = (
771
+ "Cannot return PENDING status with no pending operations."
772
+ )
773
+ raise InvalidParameterValueException(msg_pending_ops)
774
+ logger.info("[%s] Execution pending async work", execution_arn)
775
+
776
+ case _:
777
+ msg_unexpected_status: str = (
778
+ f"Unexpected invocation status: {response.status}"
779
+ )
780
+ raise IllegalStateException(msg_unexpected_status)
781
+
782
+ def _invoke_handler(self, execution_arn: str) -> Callable[[], Awaitable[None]]:
783
+ """Create a parameterless callable that captures execution arn for the scheduler."""
784
+
785
+ async def invoke() -> None:
786
+ execution: Execution = self._store.load(execution_arn)
787
+
788
+ # Early exit if execution is already completed - like Java's COMPLETED check
789
+ if execution.is_complete:
790
+ logger.info(
791
+ "[%s] Execution already completed, ignoring result", execution_arn
792
+ )
793
+ return
794
+
795
+ try:
796
+ invocation_input: DurableExecutionInvocationInput = (
797
+ self._invoker.create_invocation_input(execution=execution)
798
+ )
799
+
800
+ self._store.save(execution)
801
+
802
+ invocation_start = datetime.now(UTC)
803
+ invoke_response = self._invoker.invoke(
804
+ execution.start_input.function_name,
805
+ invocation_input,
806
+ execution.start_input.lambda_endpoint,
807
+ )
808
+ invocation_end = datetime.now(UTC)
809
+
810
+ # Reload execution after invocation in case it was completed via checkpoint
811
+ execution = self._store.load(execution_arn)
812
+
813
+ # Record invocation completion and save immediately
814
+ execution.record_invocation_completion(
815
+ invocation_start, invocation_end, invoke_response.request_id
816
+ )
817
+ self._store.save(execution)
818
+
819
+ if execution.is_complete:
820
+ logger.info(
821
+ "[%s] Execution completed during invocation, ignoring result",
822
+ execution_arn,
823
+ )
824
+ return
825
+
826
+ # Process successful received response - validate status and handle accordingly
827
+ response = invoke_response.invocation_output
828
+ try:
829
+ self._validate_invocation_response_and_store(
830
+ execution_arn, response, execution
831
+ )
832
+ except (InvalidParameterValueException, IllegalStateException) as e:
833
+ logger.warning(
834
+ "[%s] Lambda output validation failure: %s", execution_arn, e
835
+ )
836
+ error_obj = ErrorObject.from_exception(e)
837
+ self._retry_invocation(execution, error_obj)
838
+
839
+ except ResourceNotFoundException:
840
+ logger.warning(
841
+ "[%s] Function No longer exists: %s",
842
+ execution_arn,
843
+ execution.start_input.function_name,
844
+ )
845
+ error_obj = ErrorObject.from_message(
846
+ message=f"Function not found: {execution.start_input.function_name}"
847
+ )
848
+ self._fail_workflow(execution_arn, error_obj)
849
+
850
+ except Exception as e: # noqa: BLE001
851
+ # Handle invocation errors (network, function not found, etc.)
852
+ logger.warning("[%s] Invocation failed: %s", execution_arn, e)
853
+ error_obj = ErrorObject.from_exception(e)
854
+ self._retry_invocation(execution, error_obj)
855
+
856
+ return invoke
857
+
858
+ def _invoke_execution(self, execution_arn: str, delay: float = 0) -> None:
859
+ """Invoke execution after delay in seconds."""
860
+ completion_event = self._completion_events.get(execution_arn)
861
+ self._scheduler.call_later(
862
+ self._invoke_handler(execution_arn),
863
+ delay=delay,
864
+ completion_event=completion_event,
865
+ )
866
+
867
+ def _complete_workflow(
868
+ self, execution_arn: str, result: str | None, error: ErrorObject | None
869
+ ):
870
+ """Complete workflow - handles both success and failure with terminal state validation."""
871
+ execution = self._store.load(execution_arn)
872
+
873
+ if execution.is_complete:
874
+ msg: str = "Cannot make multiple close workflow decisions."
875
+
876
+ raise IllegalStateException(msg)
877
+
878
+ if error is not None:
879
+ self.fail_execution(execution_arn, error)
880
+ else:
881
+ self.complete_execution(execution_arn, result)
882
+
883
+ def _fail_workflow(self, execution_arn: str, error: ErrorObject):
884
+ """Fail workflow with terminal state validation."""
885
+ execution = self._store.load(execution_arn)
886
+
887
+ if execution.is_complete:
888
+ msg: str = "Cannot make multiple close workflow decisions."
889
+
890
+ raise IllegalStateException(msg)
891
+
892
+ self.fail_execution(execution_arn, error)
893
+
894
+ def _retry_invocation(self, execution: Execution, error: ErrorObject):
895
+ """Handle retry logic or fail execution if retries exhausted."""
896
+ if (
897
+ execution.consecutive_failed_invocation_attempts
898
+ > self.MAX_CONSECUTIVE_FAILED_ATTEMPTS
899
+ ):
900
+ # Exhausted retries - fail the execution
901
+ self._fail_workflow(
902
+ execution_arn=execution.durable_execution_arn, error=error
903
+ )
904
+ else:
905
+ # Schedule retry with backoff
906
+ execution.consecutive_failed_invocation_attempts += 1
907
+ self._store.save(execution)
908
+ self._invoke_execution(
909
+ execution_arn=execution.durable_execution_arn,
910
+ delay=self.RETRY_BACKOFF_SECONDS,
911
+ )
912
+
913
+ def _complete_events(self, execution_arn: str):
914
+ # complete doesn't actually checkpoint explicitly
915
+ if event := self._completion_events.get(execution_arn):
916
+ event.set()
917
+ if self._execution_timeout:
918
+ self._execution_timeout.cancel()
919
+ self._execution_timeout = None
920
+
921
+ def wait_until_complete(
922
+ self, execution_arn: str, timeout: float | None = None
923
+ ) -> bool:
924
+ """Block until execution completion. Don't do this unless you actually want to block.
925
+
926
+ Args
927
+ timeout (int|float|None): Wait for event to set until this timeout.
928
+
929
+ Returns:
930
+ True when set. False if the event timed out without being set.
931
+ """
932
+ if event := self._completion_events.get(execution_arn):
933
+ return event.wait(timeout)
934
+
935
+ # this really shouldn't happen - implies execution timed out?
936
+ msg: str = "execution does not exist."
937
+
938
+ raise ResourceNotFoundException(msg)
939
+
940
+ def complete_execution(self, execution_arn: str, result: str | None = None) -> None:
941
+ """Complete execution successfully (COMPLETE_WORKFLOW_EXECUTION decision)."""
942
+ logger.debug("[%s] Completing execution with result: %s", execution_arn, result)
943
+ execution: Execution = self._store.load(execution_arn=execution_arn)
944
+ execution.complete_success(result=result) # Sets CloseStatus.COMPLETED
945
+ self._store.update(execution)
946
+ if execution.result is None:
947
+ msg: str = "Execution result is required"
948
+ raise IllegalStateException(msg)
949
+ self._complete_events(execution_arn=execution_arn)
950
+
951
+ def fail_execution(self, execution_arn: str, error: ErrorObject) -> None:
952
+ """Fail execution with error (FAIL_WORKFLOW_EXECUTION decision)."""
953
+ logger.error("[%s] Completing execution with error: %s", execution_arn, error)
954
+ execution: Execution = self._store.load(execution_arn=execution_arn)
955
+ execution.complete_fail(error=error) # Sets CloseStatus.FAILED
956
+ self._store.update(execution)
957
+ # set by complete_fail
958
+ if execution.result is None:
959
+ msg: str = "Execution result is required"
960
+ raise IllegalStateException(msg)
961
+ self._complete_events(execution_arn=execution_arn)
962
+
963
+ def _on_wait_succeeded(self, execution_arn: str, operation_id: str) -> None:
964
+ """Private method - called when a wait operation completes successfully."""
965
+ execution = self._store.load(execution_arn)
966
+
967
+ if execution.is_complete:
968
+ logger.info(
969
+ "[%s] Execution already completed, ignoring wait succeeded event",
970
+ execution_arn,
971
+ )
972
+ return
973
+
974
+ try:
975
+ execution.complete_wait(operation_id=operation_id)
976
+ self._store.update(execution)
977
+ logger.debug(
978
+ "[%s] Wait succeeded for operation %s", execution_arn, operation_id
979
+ )
980
+ except Exception:
981
+ logger.exception("[%s] Error processing wait succeeded.", execution_arn)
982
+
983
+ def _on_retry_ready(self, execution_arn: str, operation_id: str) -> None:
984
+ """Private method - called when a retry delay has elapsed and retry is ready."""
985
+ execution = self._store.load(execution_arn)
986
+
987
+ if execution.is_complete:
988
+ logger.info(
989
+ "[%s] Execution already completed, ignoring retry", execution_arn
990
+ )
991
+ return
992
+
993
+ try:
994
+ execution.complete_retry(operation_id=operation_id)
995
+ self._store.update(execution)
996
+ logger.debug(
997
+ "[%s] Retry ready for operation %s", execution_arn, operation_id
998
+ )
999
+ except Exception:
1000
+ logger.exception("[%s] Error processing retry ready.", execution_arn)
1001
+
1002
+ # region ExecutionObserver
1003
+ def on_completed(self, execution_arn: str, result: str | None = None) -> None:
1004
+ """Complete execution successfully. Observer method triggered by notifier."""
1005
+ self.complete_execution(execution_arn, result)
1006
+
1007
+ def on_failed(self, execution_arn: str, error: ErrorObject) -> None:
1008
+ """Fail execution. Observer method triggered by notifier."""
1009
+ self.fail_execution(execution_arn, error)
1010
+
1011
+ def on_timed_out(self, execution_arn: str, error: ErrorObject) -> None:
1012
+ """Handle execution timeout (workflow timeout). Observer method triggered by notifier."""
1013
+ logger.exception("[%s] Execution timed out.", execution_arn)
1014
+ execution: Execution = self._store.load(execution_arn=execution_arn)
1015
+ execution.complete_timeout(error=error) # Sets CloseStatus.TIMED_OUT
1016
+ self._store.update(execution)
1017
+ self._complete_events(execution_arn=execution_arn)
1018
+
1019
+ def on_stopped(self, execution_arn: str, error: ErrorObject) -> None:
1020
+ """Handle execution stop. Observer method triggered by notifier."""
1021
+ # This should not be called directly - stop_execution handles termination
1022
+ self.fail_execution(execution_arn, error)
1023
+
1024
+ def on_wait_timer_scheduled(
1025
+ self, execution_arn: str, operation_id: str, delay: float
1026
+ ) -> None:
1027
+ """Schedule a wait operation. Observer method triggered by notifier."""
1028
+ logger.debug("[%s] scheduling wait with delay: %d", execution_arn, delay)
1029
+
1030
+ def wait_handler() -> None:
1031
+ self._on_wait_succeeded(execution_arn, operation_id)
1032
+ self._invoke_execution(execution_arn, delay=0)
1033
+
1034
+ completion_event = self._completion_events.get(execution_arn)
1035
+ self._scheduler.call_later(
1036
+ wait_handler, delay=delay, completion_event=completion_event
1037
+ )
1038
+
1039
+ def on_step_retry_scheduled(
1040
+ self, execution_arn: str, operation_id: str, delay: float
1041
+ ) -> None:
1042
+ """Schedule a retry a step. Observer method triggered by notifier."""
1043
+ logger.debug(
1044
+ "[%s] scheduling retry for %s with delay: %d",
1045
+ execution_arn,
1046
+ operation_id,
1047
+ delay,
1048
+ )
1049
+
1050
+ def retry_handler() -> None:
1051
+ self._on_retry_ready(execution_arn, operation_id)
1052
+ self._invoke_execution(execution_arn, delay=0)
1053
+
1054
+ completion_event = self._completion_events.get(execution_arn)
1055
+ self._scheduler.call_later(
1056
+ retry_handler, delay=delay, completion_event=completion_event
1057
+ )
1058
+
1059
+ def on_callback_created(
1060
+ self,
1061
+ execution_arn: str,
1062
+ operation_id: str,
1063
+ callback_options: CallbackOptions | None,
1064
+ callback_token: CallbackToken,
1065
+ ) -> None:
1066
+ """Handle callback creation. Observer method triggered by notifier."""
1067
+ callback_id = callback_token.to_str()
1068
+ logger.debug(
1069
+ "[%s] Callback created for operation %s with callback_id: %s",
1070
+ execution_arn,
1071
+ operation_id,
1072
+ callback_id,
1073
+ )
1074
+
1075
+ # Schedule callback timeouts if configured
1076
+ self._schedule_callback_timeouts(execution_arn, callback_options, callback_id)
1077
+
1078
+ # endregion ExecutionObserver
1079
+
1080
+ # region Callback Timeouts
1081
+ def _schedule_callback_timeouts(
1082
+ self,
1083
+ execution_arn: str,
1084
+ callback_options: CallbackOptions | None,
1085
+ callback_id: str,
1086
+ ) -> None:
1087
+ """Schedule callback timeout and heartbeat timeout if configured."""
1088
+ try:
1089
+ if not callback_options:
1090
+ return
1091
+
1092
+ completion_event = self._completion_events.get(execution_arn)
1093
+
1094
+ # Schedule main timeout if configured
1095
+ if callback_options.timeout_seconds > 0:
1096
+
1097
+ def timeout_handler():
1098
+ self._on_callback_timeout(execution_arn, callback_id)
1099
+
1100
+ timeout_future = self._scheduler.call_later(
1101
+ timeout_handler,
1102
+ delay=callback_options.timeout_seconds,
1103
+ completion_event=completion_event,
1104
+ )
1105
+ self._callback_timeouts[callback_id] = timeout_future
1106
+
1107
+ # Schedule heartbeat timeout if configured
1108
+ if callback_options.heartbeat_timeout_seconds > 0:
1109
+
1110
+ def heartbeat_timeout_handler():
1111
+ self._on_callback_heartbeat_timeout(execution_arn, callback_id)
1112
+
1113
+ heartbeat_future = self._scheduler.call_later(
1114
+ heartbeat_timeout_handler,
1115
+ delay=callback_options.heartbeat_timeout_seconds,
1116
+ completion_event=completion_event,
1117
+ )
1118
+ self._callback_heartbeats[callback_id] = heartbeat_future
1119
+
1120
+ except Exception:
1121
+ logger.exception(
1122
+ "[%s] Error scheduling callback timeouts for %s",
1123
+ execution_arn,
1124
+ callback_id,
1125
+ )
1126
+
1127
+ def _reset_callback_heartbeat_timeout(
1128
+ self, callback_id: str, execution_arn: str
1129
+ ) -> None:
1130
+ """Reset the heartbeat timeout for a callback."""
1131
+ # Cancel existing heartbeat timeout
1132
+ if heartbeat_future := self._callback_heartbeats.pop(callback_id, None):
1133
+ heartbeat_future.cancel()
1134
+
1135
+ # Find callback options to reschedule heartbeat timeout
1136
+ try:
1137
+ callback_token = CallbackToken.from_str(callback_id)
1138
+ execution = self.get_execution(callback_token.execution_arn)
1139
+
1140
+ callback_options = None
1141
+ for update in execution.updates:
1142
+ if (
1143
+ update.operation_id == callback_token.operation_id
1144
+ and update.callback_options
1145
+ and update.action.value == "START"
1146
+ ):
1147
+ callback_options = update.callback_options
1148
+ break
1149
+
1150
+ if callback_options and callback_options.heartbeat_timeout_seconds > 0:
1151
+
1152
+ def heartbeat_timeout_handler():
1153
+ self._on_callback_heartbeat_timeout(execution_arn, callback_id)
1154
+
1155
+ completion_event = self._completion_events.get(execution_arn)
1156
+
1157
+ heartbeat_future = self._scheduler.call_later(
1158
+ heartbeat_timeout_handler,
1159
+ delay=callback_options.heartbeat_timeout_seconds,
1160
+ completion_event=completion_event,
1161
+ )
1162
+ self._callback_heartbeats[callback_id] = heartbeat_future
1163
+
1164
+ except Exception:
1165
+ logger.exception(
1166
+ "[%s] Error resetting callback heartbeat timeout for %s",
1167
+ execution_arn,
1168
+ callback_id,
1169
+ )
1170
+
1171
+ def _cleanup_callback_timeouts(self, callback_id: str) -> None:
1172
+ """Clean up timeout events for a completed callback."""
1173
+ # Clean up main timeout
1174
+ if timeout_future := self._callback_timeouts.pop(callback_id, None):
1175
+ timeout_future.cancel()
1176
+
1177
+ # Clean up heartbeat timeout
1178
+ if heartbeat_future := self._callback_heartbeats.pop(callback_id, None):
1179
+ heartbeat_future.cancel()
1180
+
1181
+ def _on_callback_timeout(self, execution_arn: str, callback_id: str) -> None:
1182
+ """Handle callback timeout."""
1183
+ try:
1184
+ callback_token = CallbackToken.from_str(callback_id)
1185
+ execution = self.get_execution(callback_token.execution_arn)
1186
+
1187
+ if execution.is_complete:
1188
+ return
1189
+
1190
+ # Fail the callback with timeout error
1191
+ timeout_error = ErrorObject.from_message(
1192
+ f"Callback timed out: {CallbackTimeoutType.TIMEOUT.value}"
1193
+ )
1194
+ execution.complete_callback_timeout(callback_id, timeout_error)
1195
+ self._store.update(execution)
1196
+ logger.warning("[%s] Callback %s timed out", execution_arn, callback_id)
1197
+ self._invoke_execution(callback_token.execution_arn)
1198
+ except Exception:
1199
+ logger.exception(
1200
+ "[%s] Error processing callback timeout for %s",
1201
+ execution_arn,
1202
+ callback_id,
1203
+ )
1204
+
1205
+ def _on_callback_heartbeat_timeout(
1206
+ self, execution_arn: str, callback_id: str
1207
+ ) -> None:
1208
+ """Handle callback heartbeat timeout."""
1209
+ try:
1210
+ callback_token = CallbackToken.from_str(callback_id)
1211
+ execution = self.get_execution(callback_token.execution_arn)
1212
+
1213
+ if execution.is_complete:
1214
+ return
1215
+
1216
+ # Fail the callback with heartbeat timeout error
1217
+
1218
+ heartbeat_error = ErrorObject.from_message(
1219
+ f"Callback heartbeat timed out: {CallbackTimeoutType.HEARTBEAT.value}"
1220
+ )
1221
+ execution.complete_callback_timeout(callback_id, heartbeat_error)
1222
+ self._store.update(execution)
1223
+ logger.warning(
1224
+ "[%s] Callback %s heartbeat timed out", execution_arn, callback_id
1225
+ )
1226
+ self._invoke_execution(callback_token.execution_arn)
1227
+ except Exception:
1228
+ logger.exception(
1229
+ "[%s] Error processing callback heartbeat timeout for %s",
1230
+ execution_arn,
1231
+ callback_id,
1232
+ )
1233
+
1234
+ # endregion Callback Timeouts