@aws/durable-execution-sdk-js 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -285,17 +285,37 @@ Control execution guarantees:
285
285
  ```typescript
286
286
  import { StepSemantics } from "@aws/durable-execution-sdk-js";
287
287
 
288
- // At-most-once per retry (default)
288
+ // At-least-once per retry (default)
289
+ await context.step("retriable-operation", async () => sendNotification(), {
290
+ semantics: StepSemantics.AtLeastOncePerRetry,
291
+ });
292
+
293
+ // At-most-once per retry
289
294
  await context.step("idempotent-operation", async () => updateDatabase(), {
290
295
  semantics: StepSemantics.AtMostOncePerRetry,
291
296
  });
297
+ ```
292
298
 
293
- // At-least-once per retry
294
- await context.step("retriable-operation", async () => sendNotification(), {
295
- semantics: StepSemantics.AtLeastOncePerRetry,
296
- });
299
+ **Important**: These semantics apply _per retry_, not per overall execution:
300
+
301
+ - **AtLeastOncePerRetry**: The step will execute at least once on each retry attempt. If the step succeeds but the checkpoint fails (e.g., sandbox crash), the step will re-execute on replay.
302
+ - **AtMostOncePerRetry**: The step will execute at most once per retry attempt. A checkpoint is created before execution, so if a failure occurs after the checkpoint but before step completion, the previous step retry attempt is skipped on replay.
303
+
304
+ **To achieve at-most-once semantics on a step-level**, use a custom retry strategy:
305
+
306
+ ```typescript
307
+ await context.step(
308
+ "truly-once-only",
309
+ async () => callThatCannotTolerateDuplicates(),
310
+ {
311
+ semantics: StepSemantics.AtMostOncePerRetry,
312
+ retryStrategy: () => ({ shouldRetry: false }), // No retries
313
+ },
314
+ );
297
315
  ```
298
316
 
317
+ Without this, a step using `AtMostOncePerRetry` with retries enabled could still execute multiple times across different retry attempts.
318
+
299
319
  ### Jitter Strategies
300
320
 
301
321
  Prevent thundering herd:
package/dist/index.mjs CHANGED
@@ -165,11 +165,56 @@ var DurableLogLevel;
165
165
  })(DurableLogLevel || (DurableLogLevel = {}));
166
166
 
167
167
  /**
168
+ * Execution semantics for step operations.
169
+ *
170
+ * @remarks
171
+ * These semantics control how step execution is checkpointed and replayed. **Important**: The guarantees apply *per
172
+ * retry attempt*, not per overall workflow execution.
173
+ *
174
+ * With retries enabled (the default), a step could execute multiple times across different retry attempts even when
175
+ * using `AtMostOncePerRetry`. To achieve step-level at-most-once execution, combine `AtMostOncePerRetry` with a retry
176
+ * strategy that disables retries (`shouldRetry: false`).
177
+ *
178
+ * @example
179
+ * ```typescript
180
+ * // At-least-once per retry (default) - safe for idempotent operations
181
+ * await context.step("send-notification", async () => sendEmail(), {
182
+ * semantics: StepSemantics.AtLeastOncePerRetry,
183
+ * });
184
+ *
185
+ * // At-most-once per retry - for non-idempotent operations
186
+ * await context.step("charge-payment", async () => processPayment(), {
187
+ * semantics: StepSemantics.AtMostOncePerRetry,
188
+ * retryStrategy: () => ({ shouldRetry: false }),
189
+ * });
190
+ * ```
191
+ *
168
192
  * @public
169
193
  */
170
194
  var StepSemantics;
171
195
  (function (StepSemantics) {
196
+ /**
197
+ * At-most-once execution per retry attempt.
198
+ *
199
+ * @remarks
200
+ * A checkpoint is created before step execution. If a failure occurs after the checkpoint
201
+ * but before step completion, the previous step retry attempt is skipped on replay.
202
+ *
203
+ * **Note**: This is "at-most-once *per retry*". With multiple retry attempts, the step
204
+ * could still execute multiple times across different retries. To guarantee the step
205
+ * executes at most once, disable retries by returning
206
+ * `{ shouldRetry: false }` from your retry strategy.
207
+ */
172
208
  StepSemantics["AtMostOncePerRetry"] = "AT_MOST_ONCE_PER_RETRY";
209
+ /**
210
+ * At-least-once execution per retry attempt (default).
211
+ *
212
+ * @remarks
213
+ * The step will execute at least once on each retry attempt. If the step succeeds
214
+ * but the checkpoint fails (e.g., due to a sandbox crash), the step will re-execute
215
+ * on replay. This is the safer default for operations that are idempotent or can
216
+ * tolerate duplicate execution.
217
+ */
173
218
  StepSemantics["AtLeastOncePerRetry"] = "AT_LEAST_ONCE_PER_RETRY";
174
219
  })(StepSemantics || (StepSemantics = {}));
175
220
  /**
@@ -2794,7 +2839,13 @@ class ConcurrencyController {
2794
2839
  tryStartNext();
2795
2840
  }
2796
2841
  };
2797
- tryStartNext();
2842
+ if (items.length === 0) {
2843
+ log("🎉", `${this.operationName} completed with no items`);
2844
+ resolve(new BatchResultImpl([], getCompletionReason(0)));
2845
+ }
2846
+ else {
2847
+ tryStartNext();
2848
+ }
2798
2849
  });
2799
2850
  }
2800
2851
  }
@@ -3232,6 +3283,13 @@ class CheckpointUnrecoverableExecutionError extends UnrecoverableExecutionError
3232
3283
  }
3233
3284
 
3234
3285
  const STEP_DATA_UPDATED_EVENT = "stepDataUpdated";
3286
+ const TERMINAL_STATUSES = [
3287
+ OperationStatus.SUCCEEDED,
3288
+ OperationStatus.CANCELLED,
3289
+ OperationStatus.FAILED,
3290
+ OperationStatus.STOPPED,
3291
+ OperationStatus.TIMED_OUT,
3292
+ ];
3235
3293
  class CheckpointManager {
3236
3294
  durableExecutionArn;
3237
3295
  stepData;
@@ -3246,6 +3304,7 @@ class CheckpointManager {
3246
3304
  forceCheckpointPromises = [];
3247
3305
  queueCompletionResolver = null;
3248
3306
  MAX_PAYLOAD_SIZE = 750 * 1024; // 750KB in bytes
3307
+ MAX_ITEMS_IN_BATCH = 250;
3249
3308
  isTerminating = false;
3250
3309
  static textEncoder = new TextEncoder();
3251
3310
  // Operation lifecycle tracking
@@ -3407,7 +3466,9 @@ class CheckpointManager {
3407
3466
  while (this.queue.length > 0) {
3408
3467
  const nextItem = this.queue[0];
3409
3468
  const itemSize = CheckpointManager.textEncoder.encode(JSON.stringify(nextItem)).length;
3410
- if (currentSize + itemSize > this.MAX_PAYLOAD_SIZE && batch.length > 0) {
3469
+ if ((currentSize + itemSize > this.MAX_PAYLOAD_SIZE ||
3470
+ batch.length >= this.MAX_ITEMS_IN_BATCH) &&
3471
+ batch.length > 0) {
3411
3472
  break;
3412
3473
  }
3413
3474
  this.queue.shift();
@@ -3589,6 +3650,11 @@ class CheckpointManager {
3589
3650
  if (op.state !== OperationLifecycleState.RETRY_WAITING) {
3590
3651
  throw new Error(`Operation ${stepId} must be in RETRY_WAITING state, got ${op.state}`);
3591
3652
  }
3653
+ // Resolve immediately if the step was completed already
3654
+ const stepData = this.stepData[hashId(stepId)];
3655
+ if (stepData?.Status && TERMINAL_STATUSES.includes(stepData.Status)) {
3656
+ return Promise.resolve();
3657
+ }
3592
3658
  // Start timer with polling
3593
3659
  this.startTimerWithPolling(stepId, op.endTimestamp);
3594
3660
  // Return promise that resolves when status changes
@@ -3604,6 +3670,11 @@ class CheckpointManager {
3604
3670
  if (op.state !== OperationLifecycleState.IDLE_AWAITED) {
3605
3671
  throw new Error(`Operation ${stepId} must be in IDLE_AWAITED state, got ${op.state}`);
3606
3672
  }
3673
+ // Resolve immediately if the step was completed already
3674
+ const stepData = this.stepData[hashId(stepId)];
3675
+ if (stepData?.Status && TERMINAL_STATUSES.includes(stepData.Status)) {
3676
+ return Promise.resolve();
3677
+ }
3607
3678
  // Start timer with polling
3608
3679
  this.startTimerWithPolling(stepId, op.endTimestamp);
3609
3680
  // Return promise that resolves when status changes
@@ -3653,28 +3724,28 @@ class CheckpointManager {
3653
3724
  op.resolver = undefined;
3654
3725
  }
3655
3726
  }
3656
- checkAndTerminate() {
3727
+ /**
3728
+ * Determines if the function should terminate.
3729
+ * @returns TerminationReason if the function should terminate, or undefined if the function should not terminate
3730
+ */
3731
+ shouldTerminate() {
3657
3732
  // Rule 1: Can't terminate if checkpoint queue is not empty
3658
3733
  if (this.queue.length > 0) {
3659
- this.abortTermination();
3660
- return;
3734
+ return undefined;
3661
3735
  }
3662
3736
  // Rule 2: Can't terminate if checkpoint is currently processing
3663
3737
  if (this.isProcessing) {
3664
- this.abortTermination();
3665
- return;
3738
+ return undefined;
3666
3739
  }
3667
3740
  // Rule 3: Can't terminate if there are pending force checkpoint promises
3668
3741
  if (this.forceCheckpointPromises.length > 0) {
3669
- this.abortTermination();
3670
- return;
3742
+ return undefined;
3671
3743
  }
3672
3744
  const allOps = Array.from(this.operations.values());
3673
3745
  // Rule 4: Can't terminate if any operation is EXECUTING
3674
3746
  const hasExecuting = allOps.some((op) => op.state === OperationLifecycleState.EXECUTING);
3675
3747
  if (hasExecuting) {
3676
- this.abortTermination();
3677
- return;
3748
+ return undefined;
3678
3749
  }
3679
3750
  // Rule 5: Clean up operations whose ancestors are complete or pending completion
3680
3751
  for (const op of allOps) {
@@ -3697,12 +3768,17 @@ class CheckpointManager {
3697
3768
  op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
3698
3769
  op.state === OperationLifecycleState.IDLE_AWAITED);
3699
3770
  if (hasWaiting) {
3700
- const reason = this.determineTerminationReason(remainingOps);
3701
- this.scheduleTermination(reason);
3771
+ return this.determineTerminationReason(remainingOps);
3702
3772
  }
3703
- else {
3704
- this.abortTermination();
3773
+ return undefined;
3774
+ }
3775
+ checkAndTerminate() {
3776
+ const terminationReason = this.shouldTerminate();
3777
+ if (terminationReason) {
3778
+ this.scheduleTermination(terminationReason);
3779
+ return;
3705
3780
  }
3781
+ this.abortTermination();
3706
3782
  }
3707
3783
  abortTermination() {
3708
3784
  if (this.terminationTimer) {
@@ -3726,6 +3802,11 @@ class CheckpointManager {
3726
3802
  cooldownMs: this.TERMINATION_COOLDOWN_MS,
3727
3803
  });
3728
3804
  this.terminationTimer = setTimeout(() => {
3805
+ if (!this.shouldTerminate()) {
3806
+ log("🔄", "Termination conditions no longer valid after cooldown, aborting termination");
3807
+ this.abortTermination();
3808
+ return;
3809
+ }
3729
3810
  this.executeTermination(reason);
3730
3811
  }, this.TERMINATION_COOLDOWN_MS);
3731
3812
  }
@@ -4477,16 +4558,10 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4477
4558
  * Validates that the event is a proper durable execution input
4478
4559
  */
4479
4560
  function validateDurableExecutionEvent(event) {
4480
- try {
4481
- const eventObj = event;
4482
- if (!eventObj?.DurableExecutionArn || !eventObj?.CheckpointToken) {
4483
- throw new Error("Missing required durable execution fields");
4484
- }
4485
- }
4486
- catch {
4487
- const msg = `Unexpected payload provided to start the durable execution.
4488
- Check your resource configurations to confirm the durability is set.`;
4489
- throw new Error(msg);
4561
+ const eventObj = event;
4562
+ if (!eventObj?.DurableExecutionArn || !eventObj?.CheckpointToken) {
4563
+ throw new Error("Unexpected payload provided to start the durable execution.\n" +
4564
+ "Check your resource configurations to confirm the durability is set.");
4490
4565
  }
4491
4566
  }
4492
4567
  /**
@@ -4564,14 +4639,7 @@ const withDurableExecution = (handler, config) => {
4564
4639
  return async (event, context) => {
4565
4640
  validateDurableExecutionEvent(event);
4566
4641
  const { executionContext, durableExecutionMode, checkpointToken } = await initializeExecutionContext(event, context, config?.client);
4567
- let response = null;
4568
- try {
4569
- response = await runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler);
4570
- return response;
4571
- }
4572
- catch (err) {
4573
- throw err;
4574
- }
4642
+ return runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler);
4575
4643
  };
4576
4644
  };
4577
4645