@aws/durable-execution-sdk-js 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -5
- package/dist/index.mjs +101 -33
- package/dist/index.mjs.map +1 -1
- package/dist-cjs/index.js +101 -33
- package/dist-cjs/index.js.map +1 -1
- package/dist-types/handlers/concurrent-execution-handler/concurrent-execution-handler.d.ts.map +1 -1
- package/dist-types/index.d.ts +1 -1
- package/dist-types/index.d.ts.map +1 -1
- package/dist-types/types/step.d.ts +45 -0
- package/dist-types/types/step.d.ts.map +1 -1
- package/dist-types/utils/checkpoint/checkpoint-manager.d.ts +6 -0
- package/dist-types/utils/checkpoint/checkpoint-manager.d.ts.map +1 -1
- package/dist-types/with-durable-execution.d.ts.map +1 -1
- package/package.json +1 -2
package/README.md
CHANGED
|
@@ -285,17 +285,37 @@ Control execution guarantees:
|
|
|
285
285
|
```typescript
|
|
286
286
|
import { StepSemantics } from "@aws/durable-execution-sdk-js";
|
|
287
287
|
|
|
288
|
-
// At-
|
|
288
|
+
// At-least-once per retry (default)
|
|
289
|
+
await context.step("retriable-operation", async () => sendNotification(), {
|
|
290
|
+
semantics: StepSemantics.AtLeastOncePerRetry,
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
// At-most-once per retry
|
|
289
294
|
await context.step("idempotent-operation", async () => updateDatabase(), {
|
|
290
295
|
semantics: StepSemantics.AtMostOncePerRetry,
|
|
291
296
|
});
|
|
297
|
+
```
|
|
292
298
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
299
|
+
**Important**: These semantics apply _per retry_, not per overall execution:
|
|
300
|
+
|
|
301
|
+
- **AtLeastOncePerRetry**: The step will execute at least once on each retry attempt. If the step succeeds but the checkpoint fails (e.g., sandbox crash), the step will re-execute on replay.
|
|
302
|
+
- **AtMostOncePerRetry**: The step will execute at most once per retry attempt. A checkpoint is created before execution, so if a failure occurs after the checkpoint but before step completion, the previous step retry attempt is skipped on replay.
|
|
303
|
+
|
|
304
|
+
**To achieve at-most-once semantics on a step-level**, use a custom retry strategy:
|
|
305
|
+
|
|
306
|
+
```typescript
|
|
307
|
+
await context.step(
|
|
308
|
+
"truly-once-only",
|
|
309
|
+
async () => callThatCannotTolerateDuplicates(),
|
|
310
|
+
{
|
|
311
|
+
semantics: StepSemantics.AtMostOncePerRetry,
|
|
312
|
+
retryStrategy: () => ({ shouldRetry: false }), // No retries
|
|
313
|
+
},
|
|
314
|
+
);
|
|
297
315
|
```
|
|
298
316
|
|
|
317
|
+
Without this, a step using `AtMostOncePerRetry` with retries enabled could still execute multiple times across different retry attempts.
|
|
318
|
+
|
|
299
319
|
### Jitter Strategies
|
|
300
320
|
|
|
301
321
|
Prevent thundering herd:
|
package/dist/index.mjs
CHANGED
|
@@ -165,11 +165,56 @@ var DurableLogLevel;
|
|
|
165
165
|
})(DurableLogLevel || (DurableLogLevel = {}));
|
|
166
166
|
|
|
167
167
|
/**
|
|
168
|
+
* Execution semantics for step operations.
|
|
169
|
+
*
|
|
170
|
+
* @remarks
|
|
171
|
+
* These semantics control how step execution is checkpointed and replayed. **Important**: The guarantees apply *per
|
|
172
|
+
* retry attempt*, not per overall workflow execution.
|
|
173
|
+
*
|
|
174
|
+
* With retries enabled (the default), a step could execute multiple times across different retry attempts even when
|
|
175
|
+
* using `AtMostOncePerRetry`. To achieve step-level at-most-once execution, combine `AtMostOncePerRetry` with a retry
|
|
176
|
+
* strategy that disables retries (`shouldRetry: false`).
|
|
177
|
+
*
|
|
178
|
+
* @example
|
|
179
|
+
* ```typescript
|
|
180
|
+
* // At-least-once per retry (default) - safe for idempotent operations
|
|
181
|
+
* await context.step("send-notification", async () => sendEmail(), {
|
|
182
|
+
* semantics: StepSemantics.AtLeastOncePerRetry,
|
|
183
|
+
* });
|
|
184
|
+
*
|
|
185
|
+
* // At-most-once per retry - for non-idempotent operations
|
|
186
|
+
* await context.step("charge-payment", async () => processPayment(), {
|
|
187
|
+
* semantics: StepSemantics.AtMostOncePerRetry,
|
|
188
|
+
* retryStrategy: () => ({ shouldRetry: false }),
|
|
189
|
+
* });
|
|
190
|
+
* ```
|
|
191
|
+
*
|
|
168
192
|
* @public
|
|
169
193
|
*/
|
|
170
194
|
var StepSemantics;
|
|
171
195
|
(function (StepSemantics) {
|
|
196
|
+
/**
|
|
197
|
+
* At-most-once execution per retry attempt.
|
|
198
|
+
*
|
|
199
|
+
* @remarks
|
|
200
|
+
* A checkpoint is created before step execution. If a failure occurs after the checkpoint
|
|
201
|
+
* but before step completion, the previous step retry attempt is skipped on replay.
|
|
202
|
+
*
|
|
203
|
+
* **Note**: This is "at-most-once *per retry*". With multiple retry attempts, the step
|
|
204
|
+
* could still execute multiple times across different retries. To guarantee the step
|
|
205
|
+
* executes at most once, disable retries by returning
|
|
206
|
+
* `{ shouldRetry: false }` from your retry strategy.
|
|
207
|
+
*/
|
|
172
208
|
StepSemantics["AtMostOncePerRetry"] = "AT_MOST_ONCE_PER_RETRY";
|
|
209
|
+
/**
|
|
210
|
+
* At-least-once execution per retry attempt (default).
|
|
211
|
+
*
|
|
212
|
+
* @remarks
|
|
213
|
+
* The step will execute at least once on each retry attempt. If the step succeeds
|
|
214
|
+
* but the checkpoint fails (e.g., due to a sandbox crash), the step will re-execute
|
|
215
|
+
* on replay. This is the safer default for operations that are idempotent or can
|
|
216
|
+
* tolerate duplicate execution.
|
|
217
|
+
*/
|
|
173
218
|
StepSemantics["AtLeastOncePerRetry"] = "AT_LEAST_ONCE_PER_RETRY";
|
|
174
219
|
})(StepSemantics || (StepSemantics = {}));
|
|
175
220
|
/**
|
|
@@ -2794,7 +2839,13 @@ class ConcurrencyController {
|
|
|
2794
2839
|
tryStartNext();
|
|
2795
2840
|
}
|
|
2796
2841
|
};
|
|
2797
|
-
|
|
2842
|
+
if (items.length === 0) {
|
|
2843
|
+
log("🎉", `${this.operationName} completed with no items`);
|
|
2844
|
+
resolve(new BatchResultImpl([], getCompletionReason(0)));
|
|
2845
|
+
}
|
|
2846
|
+
else {
|
|
2847
|
+
tryStartNext();
|
|
2848
|
+
}
|
|
2798
2849
|
});
|
|
2799
2850
|
}
|
|
2800
2851
|
}
|
|
@@ -3232,6 +3283,13 @@ class CheckpointUnrecoverableExecutionError extends UnrecoverableExecutionError
|
|
|
3232
3283
|
}
|
|
3233
3284
|
|
|
3234
3285
|
const STEP_DATA_UPDATED_EVENT = "stepDataUpdated";
|
|
3286
|
+
const TERMINAL_STATUSES = [
|
|
3287
|
+
OperationStatus.SUCCEEDED,
|
|
3288
|
+
OperationStatus.CANCELLED,
|
|
3289
|
+
OperationStatus.FAILED,
|
|
3290
|
+
OperationStatus.STOPPED,
|
|
3291
|
+
OperationStatus.TIMED_OUT,
|
|
3292
|
+
];
|
|
3235
3293
|
class CheckpointManager {
|
|
3236
3294
|
durableExecutionArn;
|
|
3237
3295
|
stepData;
|
|
@@ -3246,6 +3304,7 @@ class CheckpointManager {
|
|
|
3246
3304
|
forceCheckpointPromises = [];
|
|
3247
3305
|
queueCompletionResolver = null;
|
|
3248
3306
|
MAX_PAYLOAD_SIZE = 750 * 1024; // 750KB in bytes
|
|
3307
|
+
MAX_ITEMS_IN_BATCH = 250;
|
|
3249
3308
|
isTerminating = false;
|
|
3250
3309
|
static textEncoder = new TextEncoder();
|
|
3251
3310
|
// Operation lifecycle tracking
|
|
@@ -3407,7 +3466,9 @@ class CheckpointManager {
|
|
|
3407
3466
|
while (this.queue.length > 0) {
|
|
3408
3467
|
const nextItem = this.queue[0];
|
|
3409
3468
|
const itemSize = CheckpointManager.textEncoder.encode(JSON.stringify(nextItem)).length;
|
|
3410
|
-
if (currentSize + itemSize > this.MAX_PAYLOAD_SIZE
|
|
3469
|
+
if ((currentSize + itemSize > this.MAX_PAYLOAD_SIZE ||
|
|
3470
|
+
batch.length >= this.MAX_ITEMS_IN_BATCH) &&
|
|
3471
|
+
batch.length > 0) {
|
|
3411
3472
|
break;
|
|
3412
3473
|
}
|
|
3413
3474
|
this.queue.shift();
|
|
@@ -3589,6 +3650,11 @@ class CheckpointManager {
|
|
|
3589
3650
|
if (op.state !== OperationLifecycleState.RETRY_WAITING) {
|
|
3590
3651
|
throw new Error(`Operation ${stepId} must be in RETRY_WAITING state, got ${op.state}`);
|
|
3591
3652
|
}
|
|
3653
|
+
// Resolve immediately if the step was completed already
|
|
3654
|
+
const stepData = this.stepData[hashId(stepId)];
|
|
3655
|
+
if (stepData?.Status && TERMINAL_STATUSES.includes(stepData.Status)) {
|
|
3656
|
+
return Promise.resolve();
|
|
3657
|
+
}
|
|
3592
3658
|
// Start timer with polling
|
|
3593
3659
|
this.startTimerWithPolling(stepId, op.endTimestamp);
|
|
3594
3660
|
// Return promise that resolves when status changes
|
|
@@ -3604,6 +3670,11 @@ class CheckpointManager {
|
|
|
3604
3670
|
if (op.state !== OperationLifecycleState.IDLE_AWAITED) {
|
|
3605
3671
|
throw new Error(`Operation ${stepId} must be in IDLE_AWAITED state, got ${op.state}`);
|
|
3606
3672
|
}
|
|
3673
|
+
// Resolve immediately if the step was completed already
|
|
3674
|
+
const stepData = this.stepData[hashId(stepId)];
|
|
3675
|
+
if (stepData?.Status && TERMINAL_STATUSES.includes(stepData.Status)) {
|
|
3676
|
+
return Promise.resolve();
|
|
3677
|
+
}
|
|
3607
3678
|
// Start timer with polling
|
|
3608
3679
|
this.startTimerWithPolling(stepId, op.endTimestamp);
|
|
3609
3680
|
// Return promise that resolves when status changes
|
|
@@ -3653,28 +3724,28 @@ class CheckpointManager {
|
|
|
3653
3724
|
op.resolver = undefined;
|
|
3654
3725
|
}
|
|
3655
3726
|
}
|
|
3656
|
-
|
|
3727
|
+
/**
|
|
3728
|
+
* Determines if the function should terminate.
|
|
3729
|
+
* @returns TerminationReason if the function should terminate, or undefined if the function should not terminate
|
|
3730
|
+
*/
|
|
3731
|
+
shouldTerminate() {
|
|
3657
3732
|
// Rule 1: Can't terminate if checkpoint queue is not empty
|
|
3658
3733
|
if (this.queue.length > 0) {
|
|
3659
|
-
|
|
3660
|
-
return;
|
|
3734
|
+
return undefined;
|
|
3661
3735
|
}
|
|
3662
3736
|
// Rule 2: Can't terminate if checkpoint is currently processing
|
|
3663
3737
|
if (this.isProcessing) {
|
|
3664
|
-
|
|
3665
|
-
return;
|
|
3738
|
+
return undefined;
|
|
3666
3739
|
}
|
|
3667
3740
|
// Rule 3: Can't terminate if there are pending force checkpoint promises
|
|
3668
3741
|
if (this.forceCheckpointPromises.length > 0) {
|
|
3669
|
-
|
|
3670
|
-
return;
|
|
3742
|
+
return undefined;
|
|
3671
3743
|
}
|
|
3672
3744
|
const allOps = Array.from(this.operations.values());
|
|
3673
3745
|
// Rule 4: Can't terminate if any operation is EXECUTING
|
|
3674
3746
|
const hasExecuting = allOps.some((op) => op.state === OperationLifecycleState.EXECUTING);
|
|
3675
3747
|
if (hasExecuting) {
|
|
3676
|
-
|
|
3677
|
-
return;
|
|
3748
|
+
return undefined;
|
|
3678
3749
|
}
|
|
3679
3750
|
// Rule 5: Clean up operations whose ancestors are complete or pending completion
|
|
3680
3751
|
for (const op of allOps) {
|
|
@@ -3697,12 +3768,17 @@ class CheckpointManager {
|
|
|
3697
3768
|
op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
|
|
3698
3769
|
op.state === OperationLifecycleState.IDLE_AWAITED);
|
|
3699
3770
|
if (hasWaiting) {
|
|
3700
|
-
|
|
3701
|
-
this.scheduleTermination(reason);
|
|
3771
|
+
return this.determineTerminationReason(remainingOps);
|
|
3702
3772
|
}
|
|
3703
|
-
|
|
3704
|
-
|
|
3773
|
+
return undefined;
|
|
3774
|
+
}
|
|
3775
|
+
checkAndTerminate() {
|
|
3776
|
+
const terminationReason = this.shouldTerminate();
|
|
3777
|
+
if (terminationReason) {
|
|
3778
|
+
this.scheduleTermination(terminationReason);
|
|
3779
|
+
return;
|
|
3705
3780
|
}
|
|
3781
|
+
this.abortTermination();
|
|
3706
3782
|
}
|
|
3707
3783
|
abortTermination() {
|
|
3708
3784
|
if (this.terminationTimer) {
|
|
@@ -3726,6 +3802,11 @@ class CheckpointManager {
|
|
|
3726
3802
|
cooldownMs: this.TERMINATION_COOLDOWN_MS,
|
|
3727
3803
|
});
|
|
3728
3804
|
this.terminationTimer = setTimeout(() => {
|
|
3805
|
+
if (!this.shouldTerminate()) {
|
|
3806
|
+
log("🔄", "Termination conditions no longer valid after cooldown, aborting termination");
|
|
3807
|
+
this.abortTermination();
|
|
3808
|
+
return;
|
|
3809
|
+
}
|
|
3729
3810
|
this.executeTermination(reason);
|
|
3730
3811
|
}, this.TERMINATION_COOLDOWN_MS);
|
|
3731
3812
|
}
|
|
@@ -4477,16 +4558,10 @@ async function runHandler(event, context, executionContext, durableExecutionMode
|
|
|
4477
4558
|
* Validates that the event is a proper durable execution input
|
|
4478
4559
|
*/
|
|
4479
4560
|
function validateDurableExecutionEvent(event) {
|
|
4480
|
-
|
|
4481
|
-
|
|
4482
|
-
|
|
4483
|
-
|
|
4484
|
-
}
|
|
4485
|
-
}
|
|
4486
|
-
catch {
|
|
4487
|
-
const msg = `Unexpected payload provided to start the durable execution.
|
|
4488
|
-
Check your resource configurations to confirm the durability is set.`;
|
|
4489
|
-
throw new Error(msg);
|
|
4561
|
+
const eventObj = event;
|
|
4562
|
+
if (!eventObj?.DurableExecutionArn || !eventObj?.CheckpointToken) {
|
|
4563
|
+
throw new Error("Unexpected payload provided to start the durable execution.\n" +
|
|
4564
|
+
"Check your resource configurations to confirm the durability is set.");
|
|
4490
4565
|
}
|
|
4491
4566
|
}
|
|
4492
4567
|
/**
|
|
@@ -4564,14 +4639,7 @@ const withDurableExecution = (handler, config) => {
|
|
|
4564
4639
|
return async (event, context) => {
|
|
4565
4640
|
validateDurableExecutionEvent(event);
|
|
4566
4641
|
const { executionContext, durableExecutionMode, checkpointToken } = await initializeExecutionContext(event, context, config?.client);
|
|
4567
|
-
|
|
4568
|
-
try {
|
|
4569
|
-
response = await runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler);
|
|
4570
|
-
return response;
|
|
4571
|
-
}
|
|
4572
|
-
catch (err) {
|
|
4573
|
-
throw err;
|
|
4574
|
-
}
|
|
4642
|
+
return runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler);
|
|
4575
4643
|
};
|
|
4576
4644
|
};
|
|
4577
4645
|
|