async-durable-execution 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. async_durable_execution/.gitignore +0 -0
  2. async_durable_execution/__about__.py +4 -0
  3. async_durable_execution/__init__.py +42 -0
  4. async_durable_execution/concurrency/__init__.py +0 -0
  5. async_durable_execution/concurrency/executor.py +461 -0
  6. async_durable_execution/concurrency/models.py +540 -0
  7. async_durable_execution/config.py +499 -0
  8. async_durable_execution/context.py +635 -0
  9. async_durable_execution/exceptions.py +403 -0
  10. async_durable_execution/execution.py +463 -0
  11. async_durable_execution/identifier.py +14 -0
  12. async_durable_execution/lambda_service.py +1120 -0
  13. async_durable_execution/logger.py +131 -0
  14. async_durable_execution/operation/__init__.py +1 -0
  15. async_durable_execution/operation/base.py +187 -0
  16. async_durable_execution/operation/callback.py +182 -0
  17. async_durable_execution/operation/child.py +277 -0
  18. async_durable_execution/operation/invoke.py +172 -0
  19. async_durable_execution/operation/map.py +137 -0
  20. async_durable_execution/operation/parallel.py +122 -0
  21. async_durable_execution/operation/step.py +359 -0
  22. async_durable_execution/operation/wait.py +111 -0
  23. async_durable_execution/operation/wait_for_condition.py +283 -0
  24. async_durable_execution/py.typed +1 -0
  25. async_durable_execution/retries.py +174 -0
  26. async_durable_execution/serdes.py +502 -0
  27. async_durable_execution/state.py +798 -0
  28. async_durable_execution/suspend.py +84 -0
  29. async_durable_execution/threading.py +222 -0
  30. async_durable_execution/types.py +180 -0
  31. async_durable_execution/waits.py +130 -0
  32. async_durable_execution-0.1.0.dist-info/METADATA +80 -0
  33. async_durable_execution-0.1.0.dist-info/RECORD +35 -0
  34. async_durable_execution-0.1.0.dist-info/WHEEL +4 -0
  35. async_durable_execution-0.1.0.dist-info/licenses/LICENSE +175 -0
File without changes
@@ -0,0 +1,4 @@
1
+ # SPDX-FileCopyrightText: 2025-present Amazon.com, Inc. or its affiliates.
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ __version__ = "0.1.0"
@@ -0,0 +1,42 @@
1
+ """AWS Lambda Durable Executions Python SDK."""
2
+
3
+ # Package metadata
4
+ from .__about__ import __version__
5
+
6
+ # Main context - used in every durable function
7
+ # Helper decorators - commonly used for step functions
8
+ # Concurrency
9
+ from .concurrency.models import BatchResult
10
+ from .context import (
11
+ DurableContext,
12
+ durable_step,
13
+ durable_wait_for_callback,
14
+ durable_with_child_context,
15
+ )
16
+
17
+ # Most common exceptions - users need to handle these exceptions
18
+ from .exceptions import (
19
+ DurableExecutionsError,
20
+ InvocationError,
21
+ ValidationError,
22
+ )
23
+
24
+ # Core decorator - used in every durable function
25
+ from .execution import durable_execution
26
+
27
+ # Essential context types - passed to user functions
28
+ from .types import StepContext
29
+
30
+ __all__ = [
31
+ "BatchResult",
32
+ "DurableContext",
33
+ "DurableExecutionsError",
34
+ "InvocationError",
35
+ "StepContext",
36
+ "ValidationError",
37
+ "__version__",
38
+ "durable_execution",
39
+ "durable_step",
40
+ "durable_wait_for_callback",
41
+ "durable_with_child_context",
42
+ ]
File without changes
@@ -0,0 +1,461 @@
1
+ """Concurrent executor for parallel and map operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import heapq
6
+ import logging
7
+ import threading
8
+ import time
9
+ from abc import ABC, abstractmethod
10
+ from concurrent.futures import Future, ThreadPoolExecutor
11
+ from typing import TYPE_CHECKING, Generic, Self, TypeVar
12
+
13
+ from .models import (
14
+ BatchItem,
15
+ BatchItemStatus,
16
+ BatchResult,
17
+ BranchStatus,
18
+ Executable,
19
+ ExecutableWithState,
20
+ ExecutionCounters,
21
+ SuspendResult,
22
+ )
23
+ from ..config import ChildConfig
24
+ from ..exceptions import (
25
+ OrphanedChildException,
26
+ SuspendExecution,
27
+ TimedSuspendExecution,
28
+ )
29
+ from ..identifier import OperationIdentifier
30
+ from ..lambda_service import ErrorObject
31
+ from ..operation.child import child_handler
32
+
33
+ if TYPE_CHECKING:
34
+ from collections.abc import Callable
35
+
36
+ from ..config import CompletionConfig
37
+ from ..context import DurableContext
38
+ from ..lambda_service import OperationSubType
39
+ from ..serdes import SerDes
40
+ from ..state import ExecutionState
41
+ from ..types import SummaryGenerator
42
+
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+ T = TypeVar("T")
47
+ R = TypeVar("R")
48
+
49
+ CallableType = TypeVar("CallableType")
50
+ ResultType = TypeVar("ResultType")
51
+
52
+
53
+ # region concurrency logic
54
+ class TimerScheduler:
55
+ """Manage timed suspend tasks with a background timer thread."""
56
+
57
+ def __init__(
58
+ self, resubmit_callback: Callable[[ExecutableWithState], None]
59
+ ) -> None:
60
+ self.resubmit_callback = resubmit_callback
61
+ self._pending_resumes: list[tuple[float, int, ExecutableWithState]] = []
62
+ self._lock = threading.Lock()
63
+ self._schedule_counter = 0
64
+ self._shutdown = threading.Event()
65
+ self._timer_thread = threading.Thread(target=self._timer_loop, daemon=True)
66
+ self._timer_thread.start()
67
+
68
+ def __enter__(self) -> Self:
69
+ return self
70
+
71
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
72
+ self.shutdown()
73
+
74
+ def schedule_resume(
75
+ self, exe_state: ExecutableWithState, resume_time: float
76
+ ) -> None:
77
+ """Schedule a task to resume at the specified time.
78
+
79
+ Uses a counter as a tie-breaker to ensure FIFO ordering when multiple
80
+ tasks have the same resume_time, preventing TypeError from comparing
81
+ ExecutableWithState objects.
82
+ """
83
+ with self._lock:
84
+ heapq.heappush(
85
+ self._pending_resumes,
86
+ (resume_time, self._schedule_counter, exe_state),
87
+ )
88
+ self._schedule_counter += 1
89
+
90
+ def shutdown(self) -> None:
91
+ """Shutdown the timer thread and cancel all pending resumes."""
92
+ self._shutdown.set()
93
+ self._timer_thread.join(timeout=1.0)
94
+ with self._lock:
95
+ self._pending_resumes.clear()
96
+
97
+ def _timer_loop(self) -> None:
98
+ """Background thread that processes timed resumes."""
99
+ while not self._shutdown.is_set():
100
+ next_resume_time = None
101
+
102
+ with self._lock:
103
+ if self._pending_resumes:
104
+ next_resume_time = self._pending_resumes[0][0]
105
+
106
+ if next_resume_time is None:
107
+ # No pending resumes, wait a bit and check again
108
+ self._shutdown.wait(timeout=0.1)
109
+ continue
110
+
111
+ current_time = time.time()
112
+ if current_time >= next_resume_time:
113
+ # Time to resume
114
+ with self._lock:
115
+ # no branch cover because hard to test reliably - this is a double-safety check if heap mutated
116
+ # since the first peek on next_resume_time further up
117
+ if ( # pragma: no branch
118
+ self._pending_resumes
119
+ and self._pending_resumes[0][0] <= current_time
120
+ ):
121
+ _, _, exe_state = heapq.heappop(self._pending_resumes)
122
+ if exe_state.can_resume:
123
+ exe_state.reset_to_pending()
124
+ self.resubmit_callback(exe_state)
125
+ else:
126
+ # Wait until next resume time
127
+ wait_time = min(next_resume_time - current_time, 0.1)
128
+ self._shutdown.wait(timeout=wait_time)
129
+
130
+
131
+ class ConcurrentExecutor(ABC, Generic[CallableType, ResultType]):
132
+ """Execute durable operations concurrently. This contains the execution logic for Map and Parallel."""
133
+
134
+ def __init__(
135
+ self,
136
+ executables: list[Executable[CallableType]],
137
+ max_concurrency: int | None,
138
+ completion_config: CompletionConfig,
139
+ sub_type_top: OperationSubType,
140
+ sub_type_iteration: OperationSubType,
141
+ name_prefix: str,
142
+ serdes: SerDes | None,
143
+ item_serdes: SerDes | None = None,
144
+ summary_generator: SummaryGenerator | None = None,
145
+ ):
146
+ """Initialize ConcurrentExecutor.
147
+
148
+ Args:
149
+ summary_generator: Optional function to generate compact summaries for large results.
150
+ When the serialized result exceeds 256KB, this generator creates a JSON summary
151
+ instead of checkpointing the full result. Used by map/parallel operations to
152
+ handle large BatchResult payloads efficiently. Matches TypeScript behavior in
153
+ run-in-child-context-handler.ts.
154
+ """
155
+ self.executables = executables
156
+ self.max_concurrency = max_concurrency
157
+ self.completion_config = completion_config
158
+ self.sub_type_top = sub_type_top
159
+ self.sub_type_iteration = sub_type_iteration
160
+ self.name_prefix = name_prefix
161
+ self.summary_generator = summary_generator
162
+
163
+ # Event-driven state tracking for when the executor is done
164
+ self._completion_event = threading.Event()
165
+ self._suspend_exception: SuspendExecution | None = None
166
+
167
+ # ExecutionCounters will keep track of completion criteria and on-going counters
168
+ min_successful = self.completion_config.min_successful or len(self.executables)
169
+ tolerated_failure_count = self.completion_config.tolerated_failure_count
170
+ tolerated_failure_percentage = (
171
+ self.completion_config.tolerated_failure_percentage
172
+ )
173
+
174
+ self.counters: ExecutionCounters = ExecutionCounters(
175
+ len(executables),
176
+ min_successful,
177
+ tolerated_failure_count,
178
+ tolerated_failure_percentage,
179
+ )
180
+ self.executables_with_state: list[ExecutableWithState] = []
181
+ self.serdes = serdes
182
+ self.item_serdes = item_serdes
183
+
184
+ @abstractmethod
185
+ def execute_item(
186
+ self, child_context: DurableContext, executable: Executable[CallableType]
187
+ ) -> ResultType:
188
+ """Execute a single executable in a child context and return the result."""
189
+ raise NotImplementedError
190
+
191
+ def execute(
192
+ self, execution_state: ExecutionState, executor_context: DurableContext
193
+ ) -> BatchResult[ResultType]:
194
+ """Execute items concurrently with event-driven state management."""
195
+ logger.debug(
196
+ "▶️ Executing concurrent operation, items: %d", len(self.executables)
197
+ )
198
+
199
+ max_workers = self.max_concurrency or len(self.executables)
200
+
201
+ self.executables_with_state = [
202
+ ExecutableWithState(executable=exe) for exe in self.executables
203
+ ]
204
+ self._completion_event.clear()
205
+ self._suspend_exception = None
206
+
207
+ def resubmitter(executable_with_state: ExecutableWithState) -> None:
208
+ """Resubmit a timed suspended task."""
209
+ execution_state.create_checkpoint()
210
+ submit_task(executable_with_state)
211
+
212
+ thread_executor = ThreadPoolExecutor(max_workers=max_workers)
213
+ try:
214
+ with TimerScheduler(resubmitter) as scheduler:
215
+
216
+ def submit_task(executable_with_state: ExecutableWithState) -> Future:
217
+ """Submit task to the thread executor and mark its state as started."""
218
+ future = thread_executor.submit(
219
+ self._execute_item_in_child_context,
220
+ executor_context,
221
+ executable_with_state.executable,
222
+ )
223
+ executable_with_state.run(future)
224
+
225
+ def on_done(future: Future) -> None:
226
+ self._on_task_complete(executable_with_state, future, scheduler)
227
+
228
+ future.add_done_callback(on_done)
229
+ return future
230
+
231
+ # Submit initial tasks
232
+ futures = [
233
+ submit_task(exe_state) for exe_state in self.executables_with_state
234
+ ]
235
+
236
+ # Wait for completion
237
+ self._completion_event.wait()
238
+
239
+ # Cancel futures that haven't started yet
240
+ for future in futures:
241
+ future.cancel()
242
+
243
+ # Suspend execution if everything done and at least one of the tasks raised a suspend exception.
244
+ if self._suspend_exception:
245
+ raise self._suspend_exception
246
+
247
+ finally:
248
+ # Shutdown without waiting for running threads for early return when
249
+ # completion criteria are met (e.g., min_successful).
250
+ # Running threads will continue in background but they raise OrphanedChildException
251
+ # on the next attempt to checkpoint.
252
+ thread_executor.shutdown(wait=False, cancel_futures=True)
253
+
254
+ # Build final result
255
+ return self._create_result()
256
+
257
+ def should_execution_suspend(self) -> SuspendResult:
258
+ """Check if execution should suspend."""
259
+ earliest_timestamp: float = float("inf")
260
+ indefinite_suspend_task: (
261
+ ExecutableWithState[CallableType, ResultType] | None
262
+ ) = None
263
+
264
+ for exe_state in self.executables_with_state:
265
+ if exe_state.status in {BranchStatus.PENDING, BranchStatus.RUNNING}:
266
+ # Exit here! Still have tasks that can make progress, don't suspend.
267
+ return SuspendResult.do_not_suspend()
268
+ if exe_state.status is BranchStatus.SUSPENDED_WITH_TIMEOUT:
269
+ if (
270
+ exe_state.suspend_until
271
+ and exe_state.suspend_until < earliest_timestamp
272
+ ):
273
+ earliest_timestamp = exe_state.suspend_until
274
+ elif exe_state.status is BranchStatus.SUSPENDED:
275
+ indefinite_suspend_task = exe_state
276
+
277
+ # All tasks are in final states and at least one of them is a suspend.
278
+ if earliest_timestamp != float("inf"):
279
+ return SuspendResult.suspend(
280
+ TimedSuspendExecution(
281
+ "All concurrent work complete or suspended pending retry.",
282
+ earliest_timestamp,
283
+ )
284
+ )
285
+ if indefinite_suspend_task:
286
+ return SuspendResult.suspend(
287
+ SuspendExecution(
288
+ "All concurrent work complete or suspended and pending external callback."
289
+ )
290
+ )
291
+
292
+ return SuspendResult.do_not_suspend()
293
+
294
+ def _on_task_complete(
295
+ self,
296
+ exe_state: ExecutableWithState,
297
+ future: Future,
298
+ scheduler: TimerScheduler,
299
+ ) -> None:
300
+ """Handle task completion, suspension, or failure."""
301
+
302
+ if future.cancelled():
303
+ exe_state.suspend()
304
+ return
305
+
306
+ try:
307
+ result = future.result()
308
+ exe_state.complete(result)
309
+ self.counters.complete_task()
310
+ except OrphanedChildException:
311
+ # Parent already completed and returned.
312
+ # State is already RUNNING, which _create_result() marked as STARTED
313
+ # Just log and exit - no state change needed
314
+ logger.debug(
315
+ "Terminating orphaned branch %s without error because parent has completed already",
316
+ exe_state.index,
317
+ )
318
+ return
319
+ except TimedSuspendExecution as tse:
320
+ exe_state.suspend_with_timeout(tse.scheduled_timestamp)
321
+ scheduler.schedule_resume(exe_state, tse.scheduled_timestamp)
322
+ except SuspendExecution:
323
+ exe_state.suspend()
324
+ # For indefinite suspend, don't schedule resume
325
+ except Exception as e: # noqa: BLE001
326
+ exe_state.fail(e)
327
+ self.counters.fail_task()
328
+
329
+ # Check if execution should complete or suspend
330
+ if self.counters.should_complete():
331
+ self._completion_event.set()
332
+ else:
333
+ suspend_result = self.should_execution_suspend()
334
+ if suspend_result.should_suspend:
335
+ self._suspend_exception = suspend_result.exception
336
+ self._completion_event.set()
337
+
338
+ def _create_result(self) -> BatchResult[ResultType]:
339
+ """
340
+ Build the final BatchResult.
341
+
342
+ When this function executes, we've terminated the upper/parent context for whatever reason.
343
+ It follows that our items can be only in 3 states, Completed, Failed and Started (in all of the possible forms).
344
+ We tag each branch based on its observed value at the time of completion of the parent / upper context, and pass the
345
+ results to BatchResult.
346
+
347
+ Any inference wrt completion reason is left up to BatchResult, keeping the logic inference isolated.
348
+ """
349
+ batch_items: list[BatchItem[ResultType]] = []
350
+ for executable in self.executables_with_state:
351
+ match executable.status:
352
+ case BranchStatus.COMPLETED:
353
+ batch_items.append(
354
+ BatchItem(
355
+ executable.index,
356
+ BatchItemStatus.SUCCEEDED,
357
+ executable.result,
358
+ )
359
+ )
360
+ case BranchStatus.FAILED:
361
+ batch_items.append(
362
+ BatchItem(
363
+ executable.index,
364
+ BatchItemStatus.FAILED,
365
+ error=ErrorObject.from_exception(executable.error),
366
+ )
367
+ )
368
+ case (
369
+ BranchStatus.PENDING
370
+ | BranchStatus.RUNNING
371
+ | BranchStatus.SUSPENDED
372
+ | BranchStatus.SUSPENDED_WITH_TIMEOUT
373
+ ):
374
+ batch_items.append(
375
+ BatchItem(executable.index, BatchItemStatus.STARTED)
376
+ )
377
+
378
+ return BatchResult.from_items(batch_items, self.completion_config)
379
+
380
+ def _execute_item_in_child_context(
381
+ self,
382
+ executor_context: DurableContext,
383
+ executable: Executable[CallableType],
384
+ ) -> ResultType:
385
+ """
386
+ Execute a single item in a derived child context.
387
+
388
+ instead of relying on `executor_context.run_in_child_context`
389
+ we generate an operation_id for the child, and then call `child_handler`
390
+ directly. This avoids the hidden mutation of the context's internal counter.
391
+ we can do this because we explicitly control the generation of step_id and do it
392
+ using executable.index.
393
+
394
+
395
+ invariant: `operation_id` for a given executable is deterministic,
396
+ and execution order invariant.
397
+ """
398
+
399
+ operation_id = executor_context._create_step_id_for_logical_step( # noqa: SLF001
400
+ executable.index
401
+ )
402
+ name = f"{self.name_prefix}{executable.index}"
403
+ child_context = executor_context.create_child_context(operation_id)
404
+ operation_identifier = OperationIdentifier(
405
+ operation_id,
406
+ executor_context._parent_id, # noqa: SLF001
407
+ name,
408
+ )
409
+
410
+ def run_in_child_handler():
411
+ return self.execute_item(child_context, executable)
412
+
413
+ result: ResultType = child_handler(
414
+ run_in_child_handler,
415
+ child_context.state,
416
+ operation_identifier=operation_identifier,
417
+ config=ChildConfig(
418
+ serdes=self.item_serdes or self.serdes,
419
+ sub_type=self.sub_type_iteration,
420
+ summary_generator=self.summary_generator,
421
+ ),
422
+ )
423
+ child_context.state.track_replay(operation_id=operation_id)
424
+ return result
425
+
426
+ def replay(self, execution_state: ExecutionState, executor_context: DurableContext):
427
+ """
428
+ Replay rather than re-run children.
429
+
430
+ if we are here, then we are in replay_children.
431
+ This will pre-generate all the operation ids for the children and collect the checkpointed
432
+ results.
433
+ """
434
+ items: list[BatchItem[ResultType]] = []
435
+ for executable in self.executables:
436
+ operation_id = executor_context._create_step_id_for_logical_step( # noqa: SLF001
437
+ executable.index
438
+ )
439
+ checkpoint = execution_state.get_checkpoint_result(operation_id)
440
+
441
+ result: ResultType | None = None
442
+ error = None
443
+ status: BatchItemStatus
444
+ if checkpoint.is_succeeded():
445
+ status = BatchItemStatus.SUCCEEDED
446
+ result = self._execute_item_in_child_context(
447
+ executor_context, executable
448
+ )
449
+
450
+ elif checkpoint.is_failed():
451
+ error = checkpoint.error
452
+ status = BatchItemStatus.FAILED
453
+ else:
454
+ status = BatchItemStatus.STARTED
455
+
456
+ batch_item = BatchItem(executable.index, status, result=result, error=error)
457
+ items.append(batch_item)
458
+ return BatchResult.from_items(items, self.completion_config)
459
+
460
+
461
+ # endregion concurrency logic