async-durable-execution 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- async_durable_execution/.gitignore +0 -0
- async_durable_execution/__about__.py +4 -0
- async_durable_execution/__init__.py +42 -0
- async_durable_execution/concurrency/__init__.py +0 -0
- async_durable_execution/concurrency/executor.py +461 -0
- async_durable_execution/concurrency/models.py +540 -0
- async_durable_execution/config.py +499 -0
- async_durable_execution/context.py +635 -0
- async_durable_execution/exceptions.py +403 -0
- async_durable_execution/execution.py +463 -0
- async_durable_execution/identifier.py +14 -0
- async_durable_execution/lambda_service.py +1120 -0
- async_durable_execution/logger.py +131 -0
- async_durable_execution/operation/__init__.py +1 -0
- async_durable_execution/operation/base.py +187 -0
- async_durable_execution/operation/callback.py +182 -0
- async_durable_execution/operation/child.py +277 -0
- async_durable_execution/operation/invoke.py +172 -0
- async_durable_execution/operation/map.py +137 -0
- async_durable_execution/operation/parallel.py +122 -0
- async_durable_execution/operation/step.py +359 -0
- async_durable_execution/operation/wait.py +111 -0
- async_durable_execution/operation/wait_for_condition.py +283 -0
- async_durable_execution/py.typed +1 -0
- async_durable_execution/retries.py +174 -0
- async_durable_execution/serdes.py +502 -0
- async_durable_execution/state.py +798 -0
- async_durable_execution/suspend.py +84 -0
- async_durable_execution/threading.py +222 -0
- async_durable_execution/types.py +180 -0
- async_durable_execution/waits.py +130 -0
- async_durable_execution-0.1.0.dist-info/METADATA +80 -0
- async_durable_execution-0.1.0.dist-info/RECORD +35 -0
- async_durable_execution-0.1.0.dist-info/WHEEL +4 -0
- async_durable_execution-0.1.0.dist-info/licenses/LICENSE +175 -0
|
File without changes
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""AWS Lambda Durable Executions Python SDK."""
|
|
2
|
+
|
|
3
|
+
# Package metadata
|
|
4
|
+
from .__about__ import __version__
|
|
5
|
+
|
|
6
|
+
# Main context - used in every durable function
|
|
7
|
+
# Helper decorators - commonly used for step functions
|
|
8
|
+
# Concurrency
|
|
9
|
+
from .concurrency.models import BatchResult
|
|
10
|
+
from .context import (
|
|
11
|
+
DurableContext,
|
|
12
|
+
durable_step,
|
|
13
|
+
durable_wait_for_callback,
|
|
14
|
+
durable_with_child_context,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Most common exceptions - users need to handle these exceptions
|
|
18
|
+
from .exceptions import (
|
|
19
|
+
DurableExecutionsError,
|
|
20
|
+
InvocationError,
|
|
21
|
+
ValidationError,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Core decorator - used in every durable function
|
|
25
|
+
from .execution import durable_execution
|
|
26
|
+
|
|
27
|
+
# Essential context types - passed to user functions
|
|
28
|
+
from .types import StepContext
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"BatchResult",
|
|
32
|
+
"DurableContext",
|
|
33
|
+
"DurableExecutionsError",
|
|
34
|
+
"InvocationError",
|
|
35
|
+
"StepContext",
|
|
36
|
+
"ValidationError",
|
|
37
|
+
"__version__",
|
|
38
|
+
"durable_execution",
|
|
39
|
+
"durable_step",
|
|
40
|
+
"durable_wait_for_callback",
|
|
41
|
+
"durable_with_child_context",
|
|
42
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
"""Concurrent executor for parallel and map operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import heapq
|
|
6
|
+
import logging
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from concurrent.futures import Future, ThreadPoolExecutor
|
|
11
|
+
from typing import TYPE_CHECKING, Generic, Self, TypeVar
|
|
12
|
+
|
|
13
|
+
from .models import (
|
|
14
|
+
BatchItem,
|
|
15
|
+
BatchItemStatus,
|
|
16
|
+
BatchResult,
|
|
17
|
+
BranchStatus,
|
|
18
|
+
Executable,
|
|
19
|
+
ExecutableWithState,
|
|
20
|
+
ExecutionCounters,
|
|
21
|
+
SuspendResult,
|
|
22
|
+
)
|
|
23
|
+
from ..config import ChildConfig
|
|
24
|
+
from ..exceptions import (
|
|
25
|
+
OrphanedChildException,
|
|
26
|
+
SuspendExecution,
|
|
27
|
+
TimedSuspendExecution,
|
|
28
|
+
)
|
|
29
|
+
from ..identifier import OperationIdentifier
|
|
30
|
+
from ..lambda_service import ErrorObject
|
|
31
|
+
from ..operation.child import child_handler
|
|
32
|
+
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
from collections.abc import Callable
|
|
35
|
+
|
|
36
|
+
from ..config import CompletionConfig
|
|
37
|
+
from ..context import DurableContext
|
|
38
|
+
from ..lambda_service import OperationSubType
|
|
39
|
+
from ..serdes import SerDes
|
|
40
|
+
from ..state import ExecutionState
|
|
41
|
+
from ..types import SummaryGenerator
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
T = TypeVar("T")
|
|
47
|
+
R = TypeVar("R")
|
|
48
|
+
|
|
49
|
+
CallableType = TypeVar("CallableType")
|
|
50
|
+
ResultType = TypeVar("ResultType")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# region concurrency logic
|
|
54
|
+
class TimerScheduler:
|
|
55
|
+
"""Manage timed suspend tasks with a background timer thread."""
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self, resubmit_callback: Callable[[ExecutableWithState], None]
|
|
59
|
+
) -> None:
|
|
60
|
+
self.resubmit_callback = resubmit_callback
|
|
61
|
+
self._pending_resumes: list[tuple[float, int, ExecutableWithState]] = []
|
|
62
|
+
self._lock = threading.Lock()
|
|
63
|
+
self._schedule_counter = 0
|
|
64
|
+
self._shutdown = threading.Event()
|
|
65
|
+
self._timer_thread = threading.Thread(target=self._timer_loop, daemon=True)
|
|
66
|
+
self._timer_thread.start()
|
|
67
|
+
|
|
68
|
+
def __enter__(self) -> Self:
|
|
69
|
+
return self
|
|
70
|
+
|
|
71
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
72
|
+
self.shutdown()
|
|
73
|
+
|
|
74
|
+
def schedule_resume(
|
|
75
|
+
self, exe_state: ExecutableWithState, resume_time: float
|
|
76
|
+
) -> None:
|
|
77
|
+
"""Schedule a task to resume at the specified time.
|
|
78
|
+
|
|
79
|
+
Uses a counter as a tie-breaker to ensure FIFO ordering when multiple
|
|
80
|
+
tasks have the same resume_time, preventing TypeError from comparing
|
|
81
|
+
ExecutableWithState objects.
|
|
82
|
+
"""
|
|
83
|
+
with self._lock:
|
|
84
|
+
heapq.heappush(
|
|
85
|
+
self._pending_resumes,
|
|
86
|
+
(resume_time, self._schedule_counter, exe_state),
|
|
87
|
+
)
|
|
88
|
+
self._schedule_counter += 1
|
|
89
|
+
|
|
90
|
+
def shutdown(self) -> None:
|
|
91
|
+
"""Shutdown the timer thread and cancel all pending resumes."""
|
|
92
|
+
self._shutdown.set()
|
|
93
|
+
self._timer_thread.join(timeout=1.0)
|
|
94
|
+
with self._lock:
|
|
95
|
+
self._pending_resumes.clear()
|
|
96
|
+
|
|
97
|
+
def _timer_loop(self) -> None:
|
|
98
|
+
"""Background thread that processes timed resumes."""
|
|
99
|
+
while not self._shutdown.is_set():
|
|
100
|
+
next_resume_time = None
|
|
101
|
+
|
|
102
|
+
with self._lock:
|
|
103
|
+
if self._pending_resumes:
|
|
104
|
+
next_resume_time = self._pending_resumes[0][0]
|
|
105
|
+
|
|
106
|
+
if next_resume_time is None:
|
|
107
|
+
# No pending resumes, wait a bit and check again
|
|
108
|
+
self._shutdown.wait(timeout=0.1)
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
current_time = time.time()
|
|
112
|
+
if current_time >= next_resume_time:
|
|
113
|
+
# Time to resume
|
|
114
|
+
with self._lock:
|
|
115
|
+
# no branch cover because hard to test reliably - this is a double-safety check if heap mutated
|
|
116
|
+
# since the first peek on next_resume_time further up
|
|
117
|
+
if ( # pragma: no branch
|
|
118
|
+
self._pending_resumes
|
|
119
|
+
and self._pending_resumes[0][0] <= current_time
|
|
120
|
+
):
|
|
121
|
+
_, _, exe_state = heapq.heappop(self._pending_resumes)
|
|
122
|
+
if exe_state.can_resume:
|
|
123
|
+
exe_state.reset_to_pending()
|
|
124
|
+
self.resubmit_callback(exe_state)
|
|
125
|
+
else:
|
|
126
|
+
# Wait until next resume time
|
|
127
|
+
wait_time = min(next_resume_time - current_time, 0.1)
|
|
128
|
+
self._shutdown.wait(timeout=wait_time)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class ConcurrentExecutor(ABC, Generic[CallableType, ResultType]):
|
|
132
|
+
"""Execute durable operations concurrently. This contains the execution logic for Map and Parallel."""
|
|
133
|
+
|
|
134
|
+
def __init__(
|
|
135
|
+
self,
|
|
136
|
+
executables: list[Executable[CallableType]],
|
|
137
|
+
max_concurrency: int | None,
|
|
138
|
+
completion_config: CompletionConfig,
|
|
139
|
+
sub_type_top: OperationSubType,
|
|
140
|
+
sub_type_iteration: OperationSubType,
|
|
141
|
+
name_prefix: str,
|
|
142
|
+
serdes: SerDes | None,
|
|
143
|
+
item_serdes: SerDes | None = None,
|
|
144
|
+
summary_generator: SummaryGenerator | None = None,
|
|
145
|
+
):
|
|
146
|
+
"""Initialize ConcurrentExecutor.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
summary_generator: Optional function to generate compact summaries for large results.
|
|
150
|
+
When the serialized result exceeds 256KB, this generator creates a JSON summary
|
|
151
|
+
instead of checkpointing the full result. Used by map/parallel operations to
|
|
152
|
+
handle large BatchResult payloads efficiently. Matches TypeScript behavior in
|
|
153
|
+
run-in-child-context-handler.ts.
|
|
154
|
+
"""
|
|
155
|
+
self.executables = executables
|
|
156
|
+
self.max_concurrency = max_concurrency
|
|
157
|
+
self.completion_config = completion_config
|
|
158
|
+
self.sub_type_top = sub_type_top
|
|
159
|
+
self.sub_type_iteration = sub_type_iteration
|
|
160
|
+
self.name_prefix = name_prefix
|
|
161
|
+
self.summary_generator = summary_generator
|
|
162
|
+
|
|
163
|
+
# Event-driven state tracking for when the executor is done
|
|
164
|
+
self._completion_event = threading.Event()
|
|
165
|
+
self._suspend_exception: SuspendExecution | None = None
|
|
166
|
+
|
|
167
|
+
# ExecutionCounters will keep track of completion criteria and on-going counters
|
|
168
|
+
min_successful = self.completion_config.min_successful or len(self.executables)
|
|
169
|
+
tolerated_failure_count = self.completion_config.tolerated_failure_count
|
|
170
|
+
tolerated_failure_percentage = (
|
|
171
|
+
self.completion_config.tolerated_failure_percentage
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
self.counters: ExecutionCounters = ExecutionCounters(
|
|
175
|
+
len(executables),
|
|
176
|
+
min_successful,
|
|
177
|
+
tolerated_failure_count,
|
|
178
|
+
tolerated_failure_percentage,
|
|
179
|
+
)
|
|
180
|
+
self.executables_with_state: list[ExecutableWithState] = []
|
|
181
|
+
self.serdes = serdes
|
|
182
|
+
self.item_serdes = item_serdes
|
|
183
|
+
|
|
184
|
+
@abstractmethod
|
|
185
|
+
def execute_item(
|
|
186
|
+
self, child_context: DurableContext, executable: Executable[CallableType]
|
|
187
|
+
) -> ResultType:
|
|
188
|
+
"""Execute a single executable in a child context and return the result."""
|
|
189
|
+
raise NotImplementedError
|
|
190
|
+
|
|
191
|
+
def execute(
|
|
192
|
+
self, execution_state: ExecutionState, executor_context: DurableContext
|
|
193
|
+
) -> BatchResult[ResultType]:
|
|
194
|
+
"""Execute items concurrently with event-driven state management."""
|
|
195
|
+
logger.debug(
|
|
196
|
+
"▶️ Executing concurrent operation, items: %d", len(self.executables)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
max_workers = self.max_concurrency or len(self.executables)
|
|
200
|
+
|
|
201
|
+
self.executables_with_state = [
|
|
202
|
+
ExecutableWithState(executable=exe) for exe in self.executables
|
|
203
|
+
]
|
|
204
|
+
self._completion_event.clear()
|
|
205
|
+
self._suspend_exception = None
|
|
206
|
+
|
|
207
|
+
def resubmitter(executable_with_state: ExecutableWithState) -> None:
|
|
208
|
+
"""Resubmit a timed suspended task."""
|
|
209
|
+
execution_state.create_checkpoint()
|
|
210
|
+
submit_task(executable_with_state)
|
|
211
|
+
|
|
212
|
+
thread_executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
213
|
+
try:
|
|
214
|
+
with TimerScheduler(resubmitter) as scheduler:
|
|
215
|
+
|
|
216
|
+
def submit_task(executable_with_state: ExecutableWithState) -> Future:
|
|
217
|
+
"""Submit task to the thread executor and mark its state as started."""
|
|
218
|
+
future = thread_executor.submit(
|
|
219
|
+
self._execute_item_in_child_context,
|
|
220
|
+
executor_context,
|
|
221
|
+
executable_with_state.executable,
|
|
222
|
+
)
|
|
223
|
+
executable_with_state.run(future)
|
|
224
|
+
|
|
225
|
+
def on_done(future: Future) -> None:
|
|
226
|
+
self._on_task_complete(executable_with_state, future, scheduler)
|
|
227
|
+
|
|
228
|
+
future.add_done_callback(on_done)
|
|
229
|
+
return future
|
|
230
|
+
|
|
231
|
+
# Submit initial tasks
|
|
232
|
+
futures = [
|
|
233
|
+
submit_task(exe_state) for exe_state in self.executables_with_state
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
# Wait for completion
|
|
237
|
+
self._completion_event.wait()
|
|
238
|
+
|
|
239
|
+
# Cancel futures that haven't started yet
|
|
240
|
+
for future in futures:
|
|
241
|
+
future.cancel()
|
|
242
|
+
|
|
243
|
+
# Suspend execution if everything done and at least one of the tasks raised a suspend exception.
|
|
244
|
+
if self._suspend_exception:
|
|
245
|
+
raise self._suspend_exception
|
|
246
|
+
|
|
247
|
+
finally:
|
|
248
|
+
# Shutdown without waiting for running threads for early return when
|
|
249
|
+
# completion criteria are met (e.g., min_successful).
|
|
250
|
+
# Running threads will continue in background but they raise OrphanedChildException
|
|
251
|
+
# on the next attempt to checkpoint.
|
|
252
|
+
thread_executor.shutdown(wait=False, cancel_futures=True)
|
|
253
|
+
|
|
254
|
+
# Build final result
|
|
255
|
+
return self._create_result()
|
|
256
|
+
|
|
257
|
+
def should_execution_suspend(self) -> SuspendResult:
|
|
258
|
+
"""Check if execution should suspend."""
|
|
259
|
+
earliest_timestamp: float = float("inf")
|
|
260
|
+
indefinite_suspend_task: (
|
|
261
|
+
ExecutableWithState[CallableType, ResultType] | None
|
|
262
|
+
) = None
|
|
263
|
+
|
|
264
|
+
for exe_state in self.executables_with_state:
|
|
265
|
+
if exe_state.status in {BranchStatus.PENDING, BranchStatus.RUNNING}:
|
|
266
|
+
# Exit here! Still have tasks that can make progress, don't suspend.
|
|
267
|
+
return SuspendResult.do_not_suspend()
|
|
268
|
+
if exe_state.status is BranchStatus.SUSPENDED_WITH_TIMEOUT:
|
|
269
|
+
if (
|
|
270
|
+
exe_state.suspend_until
|
|
271
|
+
and exe_state.suspend_until < earliest_timestamp
|
|
272
|
+
):
|
|
273
|
+
earliest_timestamp = exe_state.suspend_until
|
|
274
|
+
elif exe_state.status is BranchStatus.SUSPENDED:
|
|
275
|
+
indefinite_suspend_task = exe_state
|
|
276
|
+
|
|
277
|
+
# All tasks are in final states and at least one of them is a suspend.
|
|
278
|
+
if earliest_timestamp != float("inf"):
|
|
279
|
+
return SuspendResult.suspend(
|
|
280
|
+
TimedSuspendExecution(
|
|
281
|
+
"All concurrent work complete or suspended pending retry.",
|
|
282
|
+
earliest_timestamp,
|
|
283
|
+
)
|
|
284
|
+
)
|
|
285
|
+
if indefinite_suspend_task:
|
|
286
|
+
return SuspendResult.suspend(
|
|
287
|
+
SuspendExecution(
|
|
288
|
+
"All concurrent work complete or suspended and pending external callback."
|
|
289
|
+
)
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
return SuspendResult.do_not_suspend()
|
|
293
|
+
|
|
294
|
+
def _on_task_complete(
|
|
295
|
+
self,
|
|
296
|
+
exe_state: ExecutableWithState,
|
|
297
|
+
future: Future,
|
|
298
|
+
scheduler: TimerScheduler,
|
|
299
|
+
) -> None:
|
|
300
|
+
"""Handle task completion, suspension, or failure."""
|
|
301
|
+
|
|
302
|
+
if future.cancelled():
|
|
303
|
+
exe_state.suspend()
|
|
304
|
+
return
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
result = future.result()
|
|
308
|
+
exe_state.complete(result)
|
|
309
|
+
self.counters.complete_task()
|
|
310
|
+
except OrphanedChildException:
|
|
311
|
+
# Parent already completed and returned.
|
|
312
|
+
# State is already RUNNING, which _create_result() marked as STARTED
|
|
313
|
+
# Just log and exit - no state change needed
|
|
314
|
+
logger.debug(
|
|
315
|
+
"Terminating orphaned branch %s without error because parent has completed already",
|
|
316
|
+
exe_state.index,
|
|
317
|
+
)
|
|
318
|
+
return
|
|
319
|
+
except TimedSuspendExecution as tse:
|
|
320
|
+
exe_state.suspend_with_timeout(tse.scheduled_timestamp)
|
|
321
|
+
scheduler.schedule_resume(exe_state, tse.scheduled_timestamp)
|
|
322
|
+
except SuspendExecution:
|
|
323
|
+
exe_state.suspend()
|
|
324
|
+
# For indefinite suspend, don't schedule resume
|
|
325
|
+
except Exception as e: # noqa: BLE001
|
|
326
|
+
exe_state.fail(e)
|
|
327
|
+
self.counters.fail_task()
|
|
328
|
+
|
|
329
|
+
# Check if execution should complete or suspend
|
|
330
|
+
if self.counters.should_complete():
|
|
331
|
+
self._completion_event.set()
|
|
332
|
+
else:
|
|
333
|
+
suspend_result = self.should_execution_suspend()
|
|
334
|
+
if suspend_result.should_suspend:
|
|
335
|
+
self._suspend_exception = suspend_result.exception
|
|
336
|
+
self._completion_event.set()
|
|
337
|
+
|
|
338
|
+
def _create_result(self) -> BatchResult[ResultType]:
|
|
339
|
+
"""
|
|
340
|
+
Build the final BatchResult.
|
|
341
|
+
|
|
342
|
+
When this function executes, we've terminated the upper/parent context for whatever reason.
|
|
343
|
+
It follows that our items can be only in 3 states, Completed, Failed and Started (in all of the possible forms).
|
|
344
|
+
We tag each branch based on its observed value at the time of completion of the parent / upper context, and pass the
|
|
345
|
+
results to BatchResult.
|
|
346
|
+
|
|
347
|
+
Any inference wrt completion reason is left up to BatchResult, keeping the logic inference isolated.
|
|
348
|
+
"""
|
|
349
|
+
batch_items: list[BatchItem[ResultType]] = []
|
|
350
|
+
for executable in self.executables_with_state:
|
|
351
|
+
match executable.status:
|
|
352
|
+
case BranchStatus.COMPLETED:
|
|
353
|
+
batch_items.append(
|
|
354
|
+
BatchItem(
|
|
355
|
+
executable.index,
|
|
356
|
+
BatchItemStatus.SUCCEEDED,
|
|
357
|
+
executable.result,
|
|
358
|
+
)
|
|
359
|
+
)
|
|
360
|
+
case BranchStatus.FAILED:
|
|
361
|
+
batch_items.append(
|
|
362
|
+
BatchItem(
|
|
363
|
+
executable.index,
|
|
364
|
+
BatchItemStatus.FAILED,
|
|
365
|
+
error=ErrorObject.from_exception(executable.error),
|
|
366
|
+
)
|
|
367
|
+
)
|
|
368
|
+
case (
|
|
369
|
+
BranchStatus.PENDING
|
|
370
|
+
| BranchStatus.RUNNING
|
|
371
|
+
| BranchStatus.SUSPENDED
|
|
372
|
+
| BranchStatus.SUSPENDED_WITH_TIMEOUT
|
|
373
|
+
):
|
|
374
|
+
batch_items.append(
|
|
375
|
+
BatchItem(executable.index, BatchItemStatus.STARTED)
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
return BatchResult.from_items(batch_items, self.completion_config)
|
|
379
|
+
|
|
380
|
+
def _execute_item_in_child_context(
|
|
381
|
+
self,
|
|
382
|
+
executor_context: DurableContext,
|
|
383
|
+
executable: Executable[CallableType],
|
|
384
|
+
) -> ResultType:
|
|
385
|
+
"""
|
|
386
|
+
Execute a single item in a derived child context.
|
|
387
|
+
|
|
388
|
+
instead of relying on `executor_context.run_in_child_context`
|
|
389
|
+
we generate an operation_id for the child, and then call `child_handler`
|
|
390
|
+
directly. This avoids the hidden mutation of the context's internal counter.
|
|
391
|
+
we can do this because we explicitly control the generation of step_id and do it
|
|
392
|
+
using executable.index.
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
invariant: `operation_id` for a given executable is deterministic,
|
|
396
|
+
and execution order invariant.
|
|
397
|
+
"""
|
|
398
|
+
|
|
399
|
+
operation_id = executor_context._create_step_id_for_logical_step( # noqa: SLF001
|
|
400
|
+
executable.index
|
|
401
|
+
)
|
|
402
|
+
name = f"{self.name_prefix}{executable.index}"
|
|
403
|
+
child_context = executor_context.create_child_context(operation_id)
|
|
404
|
+
operation_identifier = OperationIdentifier(
|
|
405
|
+
operation_id,
|
|
406
|
+
executor_context._parent_id, # noqa: SLF001
|
|
407
|
+
name,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
def run_in_child_handler():
|
|
411
|
+
return self.execute_item(child_context, executable)
|
|
412
|
+
|
|
413
|
+
result: ResultType = child_handler(
|
|
414
|
+
run_in_child_handler,
|
|
415
|
+
child_context.state,
|
|
416
|
+
operation_identifier=operation_identifier,
|
|
417
|
+
config=ChildConfig(
|
|
418
|
+
serdes=self.item_serdes or self.serdes,
|
|
419
|
+
sub_type=self.sub_type_iteration,
|
|
420
|
+
summary_generator=self.summary_generator,
|
|
421
|
+
),
|
|
422
|
+
)
|
|
423
|
+
child_context.state.track_replay(operation_id=operation_id)
|
|
424
|
+
return result
|
|
425
|
+
|
|
426
|
+
def replay(self, execution_state: ExecutionState, executor_context: DurableContext):
|
|
427
|
+
"""
|
|
428
|
+
Replay rather than re-run children.
|
|
429
|
+
|
|
430
|
+
if we are here, then we are in replay_children.
|
|
431
|
+
This will pre-generate all the operation ids for the children and collect the checkpointed
|
|
432
|
+
results.
|
|
433
|
+
"""
|
|
434
|
+
items: list[BatchItem[ResultType]] = []
|
|
435
|
+
for executable in self.executables:
|
|
436
|
+
operation_id = executor_context._create_step_id_for_logical_step( # noqa: SLF001
|
|
437
|
+
executable.index
|
|
438
|
+
)
|
|
439
|
+
checkpoint = execution_state.get_checkpoint_result(operation_id)
|
|
440
|
+
|
|
441
|
+
result: ResultType | None = None
|
|
442
|
+
error = None
|
|
443
|
+
status: BatchItemStatus
|
|
444
|
+
if checkpoint.is_succeeded():
|
|
445
|
+
status = BatchItemStatus.SUCCEEDED
|
|
446
|
+
result = self._execute_item_in_child_context(
|
|
447
|
+
executor_context, executable
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
elif checkpoint.is_failed():
|
|
451
|
+
error = checkpoint.error
|
|
452
|
+
status = BatchItemStatus.FAILED
|
|
453
|
+
else:
|
|
454
|
+
status = BatchItemStatus.STARTED
|
|
455
|
+
|
|
456
|
+
batch_item = BatchItem(executable.index, status, result=result, error=error)
|
|
457
|
+
items.append(batch_item)
|
|
458
|
+
return BatchResult.from_items(items, self.completion_config)
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
# endregion concurrency logic
|