@aws/durable-execution-sdk-js 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +28 -55
  2. package/dist/index.mjs +1945 -2020
  3. package/dist/index.mjs.map +1 -1
  4. package/dist-cjs/index.js +1944 -2019
  5. package/dist-cjs/index.js.map +1 -1
  6. package/dist-types/context/durable-context/durable-context.d.ts +0 -6
  7. package/dist-types/context/durable-context/durable-context.d.ts.map +1 -1
  8. package/dist-types/errors/durable-error/durable-error.d.ts +6 -0
  9. package/dist-types/errors/durable-error/durable-error.d.ts.map +1 -1
  10. package/dist-types/errors/step-errors/step-errors.d.ts +1 -0
  11. package/dist-types/errors/step-errors/step-errors.d.ts.map +1 -1
  12. package/dist-types/handlers/callback-handler/callback-promise.d.ts +2 -2
  13. package/dist-types/handlers/callback-handler/callback-promise.d.ts.map +1 -1
  14. package/dist-types/handlers/callback-handler/callback.d.ts +1 -2
  15. package/dist-types/handlers/callback-handler/callback.d.ts.map +1 -1
  16. package/dist-types/handlers/concurrent-execution-handler/concurrent-execution-handler.d.ts +1 -0
  17. package/dist-types/handlers/concurrent-execution-handler/concurrent-execution-handler.d.ts.map +1 -1
  18. package/dist-types/handlers/invoke-handler/invoke-handler.d.ts +1 -2
  19. package/dist-types/handlers/invoke-handler/invoke-handler.d.ts.map +1 -1
  20. package/dist-types/handlers/run-in-child-context-handler/run-in-child-context-handler.d.ts.map +1 -1
  21. package/dist-types/handlers/step-handler/step-handler.d.ts +1 -9
  22. package/dist-types/handlers/step-handler/step-handler.d.ts.map +1 -1
  23. package/dist-types/handlers/wait-for-condition-handler/wait-for-condition-handler.d.ts +1 -6
  24. package/dist-types/handlers/wait-for-condition-handler/wait-for-condition-handler.d.ts.map +1 -1
  25. package/dist-types/handlers/wait-handler/wait-handler-comparison.test.d.ts +2 -0
  26. package/dist-types/handlers/wait-handler/wait-handler-comparison.test.d.ts.map +1 -0
  27. package/dist-types/handlers/wait-handler/wait-handler.d.ts +1 -2
  28. package/dist-types/handlers/wait-handler/wait-handler.d.ts.map +1 -1
  29. package/dist-types/index.d.ts +1 -1
  30. package/dist-types/index.d.ts.map +1 -1
  31. package/dist-types/testing/create-test-checkpoint-manager.d.ts.map +1 -1
  32. package/dist-types/testing/create-test-durable-context.d.ts.map +1 -1
  33. package/dist-types/testing/mock-checkpoint-manager.d.ts +0 -1
  34. package/dist-types/testing/mock-checkpoint-manager.d.ts.map +1 -1
  35. package/dist-types/testing/mock-checkpoint.d.ts +1 -0
  36. package/dist-types/testing/mock-checkpoint.d.ts.map +1 -1
  37. package/dist-types/types/batch.d.ts +8 -0
  38. package/dist-types/types/batch.d.ts.map +1 -1
  39. package/dist-types/types/core.d.ts +1 -3
  40. package/dist-types/types/core.d.ts.map +1 -1
  41. package/dist-types/types/durable-context.d.ts +8 -6
  42. package/dist-types/types/durable-context.d.ts.map +1 -1
  43. package/dist-types/types/index.d.ts +2 -0
  44. package/dist-types/types/index.d.ts.map +1 -1
  45. package/dist-types/types/operation-lifecycle-state.d.ts +27 -0
  46. package/dist-types/types/operation-lifecycle-state.d.ts.map +1 -0
  47. package/dist-types/types/operation-lifecycle.d.ts +27 -0
  48. package/dist-types/types/operation-lifecycle.d.ts.map +1 -0
  49. package/dist-types/types/step.d.ts +45 -0
  50. package/dist-types/types/step.d.ts.map +1 -1
  51. package/dist-types/utils/checkpoint/checkpoint-ancestor.test.d.ts +2 -0
  52. package/dist-types/utils/checkpoint/checkpoint-ancestor.test.d.ts.map +1 -0
  53. package/dist-types/utils/checkpoint/checkpoint-central-termination.test.d.ts +2 -0
  54. package/dist-types/utils/checkpoint/checkpoint-central-termination.test.d.ts.map +1 -0
  55. package/dist-types/utils/checkpoint/checkpoint-helper.d.ts +37 -0
  56. package/dist-types/utils/checkpoint/checkpoint-helper.d.ts.map +1 -1
  57. package/dist-types/utils/checkpoint/checkpoint-manager.d.ts +43 -12
  58. package/dist-types/utils/checkpoint/checkpoint-manager.d.ts.map +1 -1
  59. package/dist-types/utils/constants/constants.d.ts +0 -1
  60. package/dist-types/utils/constants/constants.d.ts.map +1 -1
  61. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.d.ts +3 -1
  62. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.d.ts.map +1 -1
  63. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.test.d.ts +2 -0
  64. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.test.d.ts.map +1 -0
  65. package/dist-types/utils/termination-helper/termination-helper.d.ts +0 -9
  66. package/dist-types/utils/termination-helper/termination-helper.d.ts.map +1 -1
  67. package/dist-types/with-durable-execution.d.ts.map +1 -1
  68. package/package.json +8 -4
  69. package/dist-types/utils/checkpoint/checkpoint-ancestor-checking.test.d.ts +0 -2
  70. package/dist-types/utils/checkpoint/checkpoint-ancestor-checking.test.d.ts.map +0 -1
  71. package/dist-types/utils/termination-helper/active-operations-tracker.d.ts +0 -31
  72. package/dist-types/utils/termination-helper/active-operations-tracker.d.ts.map +0 -1
  73. package/dist-types/utils/termination-helper/active-operations-tracker.test.d.ts +0 -2
  74. package/dist-types/utils/termination-helper/active-operations-tracker.test.d.ts.map +0 -1
  75. package/dist-types/utils/wait-before-continue/wait-before-continue.d.ts +0 -35
  76. package/dist-types/utils/wait-before-continue/wait-before-continue.d.ts.map +0 -1
  77. package/dist-types/utils/wait-before-continue/wait-before-continue.test.d.ts +0 -2
  78. package/dist-types/utils/wait-before-continue/wait-before-continue.test.d.ts.map +0 -1
package/dist-cjs/index.js CHANGED
@@ -167,11 +167,56 @@ var DurableLogLevel;
167
167
  })(DurableLogLevel || (DurableLogLevel = {}));
168
168
 
169
169
  /**
170
+ * Execution semantics for step operations.
171
+ *
172
+ * @remarks
173
+ * These semantics control how step execution is checkpointed and replayed. **Important**: The guarantees apply *per
174
+ * retry attempt*, not per overall workflow execution.
175
+ *
176
+ * With retries enabled (the default), a step could execute multiple times across different retry attempts even when
177
+ * using `AtMostOncePerRetry`. To achieve step-level at-most-once execution, combine `AtMostOncePerRetry` with a retry
178
+ * strategy that disables retries (`shouldRetry: false`).
179
+ *
180
+ * @example
181
+ * ```typescript
182
+ * // At-least-once per retry (default) - safe for idempotent operations
183
+ * await context.step("send-notification", async () => sendEmail(), {
184
+ * semantics: StepSemantics.AtLeastOncePerRetry,
185
+ * });
186
+ *
187
+ * // At-most-once per retry - for non-idempotent operations
188
+ * await context.step("charge-payment", async () => processPayment(), {
189
+ * semantics: StepSemantics.AtMostOncePerRetry,
190
+ * retryStrategy: () => ({ shouldRetry: false }),
191
+ * });
192
+ * ```
193
+ *
170
194
  * @public
171
195
  */
172
196
  exports.StepSemantics = void 0;
173
197
  (function (StepSemantics) {
198
+ /**
199
+ * At-most-once execution per retry attempt.
200
+ *
201
+ * @remarks
202
+ * A checkpoint is created before step execution. If a failure occurs after the checkpoint
203
+ * but before step completion, the previous step retry attempt is skipped on replay.
204
+ *
205
+ * **Note**: This is "at-most-once *per retry*". With multiple retry attempts, the step
206
+ * could still execute multiple times across different retries. To guarantee the step
207
+ * executes at most once, disable retries by returning
208
+ * `{ shouldRetry: false }` from your retry strategy.
209
+ */
174
210
  StepSemantics["AtMostOncePerRetry"] = "AT_MOST_ONCE_PER_RETRY";
211
+ /**
212
+ * At-least-once execution per retry attempt (default).
213
+ *
214
+ * @remarks
215
+ * The step will execute at least once on each retry attempt. If the step succeeds
216
+ * but the checkpoint fails (e.g., due to a sandbox crash), the step will re-execute
217
+ * on replay. This is the safer default for operations that are idempotent or can
218
+ * tolerate duplicate execution.
219
+ */
175
220
  StepSemantics["AtLeastOncePerRetry"] = "AT_LEAST_ONCE_PER_RETRY";
176
221
  })(exports.StepSemantics || (exports.StepSemantics = {}));
177
222
  /**
@@ -300,6 +345,34 @@ class DurablePromise {
300
345
  }
301
346
  }
302
347
 
348
+ /**
349
+ * Represents the lifecycle state of an operation in the durable execution system.
350
+ * This is distinct from AWS SDK's OperationStatus (PENDING, SUCCEEDED, FAILED).
351
+ */
352
+ var OperationLifecycleState;
353
+ (function (OperationLifecycleState) {
354
+ /**
355
+ * Operation is currently executing user code (step function, waitForCondition check)
356
+ */
357
+ OperationLifecycleState["EXECUTING"] = "EXECUTING";
358
+ /**
359
+ * Operation is waiting for retry timer to expire before re-executing user code
360
+ */
361
+ OperationLifecycleState["RETRY_WAITING"] = "RETRY_WAITING";
362
+ /**
363
+ * Operation is waiting for external event (timer, callback, invoke) but not awaited yet (phase 1)
364
+ */
365
+ OperationLifecycleState["IDLE_NOT_AWAITED"] = "IDLE_NOT_AWAITED";
366
+ /**
367
+ * Operation is waiting for external event and has been awaited (phase 2)
368
+ */
369
+ OperationLifecycleState["IDLE_AWAITED"] = "IDLE_AWAITED";
370
+ /**
371
+ * Operation has completed (success or permanent failure)
372
+ */
373
+ OperationLifecycleState["COMPLETED"] = "COMPLETED";
374
+ })(OperationLifecycleState || (OperationLifecycleState = {}));
375
+
303
376
  /**
304
377
  * Converts a Duration object to total seconds
305
378
  * @param duration - Duration object with at least one time unit specified
@@ -313,6 +386,21 @@ function durationToSeconds(duration) {
313
386
  return days * 24 * 60 * 60 + hours * 60 * 60 + minutes * 60 + seconds;
314
387
  }
315
388
 
389
+ /**
390
+ * Terminates execution for unrecoverable errors and returns a never-resolving promise
391
+ * @param context - The execution context containing the termination manager
392
+ * @param error - The unrecoverable error that caused termination
393
+ * @param stepIdentifier - The step name or ID for error messaging
394
+ * @returns A never-resolving promise
395
+ */
396
+ function terminateForUnrecoverableError(context, error, stepIdentifier) {
397
+ context.terminationManager.terminate({
398
+ reason: error.terminationReason,
399
+ message: `Unrecoverable error in step ${stepIdentifier}: ${error.message}`,
400
+ });
401
+ return new Promise(() => { }); // Never-resolving promise
402
+ }
403
+
316
404
  const safeStringify = (data) => {
317
405
  try {
318
406
  const seen = new WeakSet();
@@ -346,238 +434,6 @@ const log = (emoji, message, data) => {
346
434
  }
347
435
  };
348
436
 
349
- var TerminationReason;
350
- (function (TerminationReason) {
351
- // Default termination reason
352
- TerminationReason["OPERATION_TERMINATED"] = "OPERATION_TERMINATED";
353
- // Retry-related reasons
354
- TerminationReason["RETRY_SCHEDULED"] = "RETRY_SCHEDULED";
355
- TerminationReason["RETRY_INTERRUPTED_STEP"] = "RETRY_INTERRUPTED_STEP";
356
- // Wait-related reasons
357
- TerminationReason["WAIT_SCHEDULED"] = "WAIT_SCHEDULED";
358
- // Callback-related reasons
359
- TerminationReason["CALLBACK_PENDING"] = "CALLBACK_PENDING";
360
- // Error-related reasons
361
- TerminationReason["CHECKPOINT_FAILED"] = "CHECKPOINT_FAILED";
362
- TerminationReason["SERDES_FAILED"] = "SERDES_FAILED";
363
- TerminationReason["CONTEXT_VALIDATION_ERROR"] = "CONTEXT_VALIDATION_ERROR";
364
- // Custom reason
365
- TerminationReason["CUSTOM"] = "CUSTOM";
366
- })(TerminationReason || (TerminationReason = {}));
367
-
368
- const asyncLocalStorage = new async_hooks.AsyncLocalStorage();
369
- const getActiveContext = () => {
370
- return asyncLocalStorage.getStore();
371
- };
372
- const runWithContext = (contextId, parentId, fn, attempt, durableExecutionMode) => {
373
- return asyncLocalStorage.run({ contextId, parentId, attempt, durableExecutionMode }, fn);
374
- };
375
- const validateContextUsage = (operationContextId, operationName, terminationManager) => {
376
- const contextId = operationContextId || "root";
377
- const activeContext = getActiveContext();
378
- if (!activeContext) {
379
- return;
380
- }
381
- if (activeContext.contextId !== contextId) {
382
- const errorMessage = `Context usage error in "${operationName}": You are using a parent or sibling context instead of the current child context. Expected context ID: "${activeContext.contextId}", but got: "${operationContextId}". When inside runInChildContext(), you must use the child context parameter, not the parent context.`;
383
- terminationManager.terminate({
384
- reason: TerminationReason.CONTEXT_VALIDATION_ERROR,
385
- message: errorMessage,
386
- error: new Error(errorMessage),
387
- });
388
- // Only call termination manager, don't throw or return promise
389
- }
390
- };
391
-
392
- const HASH_LENGTH = 16;
393
- /**
394
- * Creates an MD5 hash of the input string for better performance than SHA-256
395
- * @param input - The string to hash
396
- * @returns The truncated hexadecimal hash string
397
- */
398
- const hashId = (input) => {
399
- return crypto.createHash("md5")
400
- .update(input)
401
- .digest("hex")
402
- .substring(0, HASH_LENGTH);
403
- };
404
- /**
405
- * Helper function to get step data using the original stepId
406
- * This function handles the hashing internally so callers don't need to worry about it
407
- * @param stepData - The stepData record from context
408
- * @param stepId - The original stepId (will be hashed internally)
409
- * @returns The operation data or undefined if not found
410
- */
411
- const getStepData = (stepData, stepId) => {
412
- const hashedId = hashId(stepId);
413
- return stepData[hashedId];
414
- };
415
-
416
- /**
417
- * Checks if any ancestor operation in the parent chain has finished (SUCCEEDED or FAILED)
418
- * or has a pending completion checkpoint
419
- */
420
- function hasFinishedAncestor(context, parentId) {
421
- if (!parentId) {
422
- log("🔍", "hasFinishedAncestor: No parentId provided");
423
- return false;
424
- }
425
- // First check if any ancestor has a pending completion checkpoint
426
- if (hasPendingAncestorCompletion(context, parentId)) {
427
- log("🔍", "hasFinishedAncestor: Found ancestor with pending completion!", {
428
- parentId,
429
- });
430
- return true;
431
- }
432
- let currentHashedId = hashId(parentId);
433
- log("🔍", "hasFinishedAncestor: Starting check", {
434
- parentId,
435
- initialHashedId: currentHashedId,
436
- });
437
- while (currentHashedId) {
438
- const parentOperation = context._stepData[currentHashedId];
439
- log("🔍", "hasFinishedAncestor: Checking operation", {
440
- hashedId: currentHashedId,
441
- hasOperation: !!parentOperation,
442
- status: parentOperation?.Status,
443
- type: parentOperation?.Type,
444
- });
445
- if (parentOperation?.Status === clientLambda.OperationStatus.SUCCEEDED ||
446
- parentOperation?.Status === clientLambda.OperationStatus.FAILED) {
447
- log("🔍", "hasFinishedAncestor: Found finished ancestor!", {
448
- hashedId: currentHashedId,
449
- status: parentOperation.Status,
450
- });
451
- return true;
452
- }
453
- currentHashedId = parentOperation?.ParentId;
454
- }
455
- log("🔍", "hasFinishedAncestor: No finished ancestor found");
456
- return false;
457
- }
458
- /**
459
- * Checks if any ancestor has a pending completion checkpoint
460
- */
461
- function hasPendingAncestorCompletion(context, stepId) {
462
- let currentHashedId = hashId(stepId);
463
- while (currentHashedId) {
464
- if (context.pendingCompletions.has(currentHashedId)) {
465
- return true;
466
- }
467
- const operation = context._stepData[currentHashedId];
468
- currentHashedId = operation?.ParentId;
469
- }
470
- return false;
471
- }
472
- /**
473
- * Terminates execution and returns a never-resolving promise to prevent code progression
474
- * @param context - The execution context containing the termination manager
475
- * @param reason - The termination reason
476
- * @param message - The termination message
477
- * @returns A never-resolving promise
478
- */
479
- function terminate(context, reason, message) {
480
- const activeContext = getActiveContext();
481
- // If we have a parent context, add delay to let checkpoints process
482
- if (activeContext?.parentId) {
483
- return new Promise(async (_resolve, _reject) => {
484
- // Wait a tick to let any pending checkpoints start processing
485
- await new Promise((resolve) => setImmediate(resolve));
486
- log("🔍", "Terminate called - checking context:", {
487
- hasActiveContext: !!activeContext,
488
- contextId: activeContext?.contextId,
489
- parentId: activeContext?.parentId,
490
- reason,
491
- message,
492
- });
493
- const ancestorFinished = hasFinishedAncestor(context, activeContext.parentId);
494
- log("🔍", "Ancestor check result:", {
495
- parentId: activeContext.parentId,
496
- ancestorFinished,
497
- });
498
- if (ancestorFinished) {
499
- log("🛑", "Skipping termination - ancestor already finished:", {
500
- contextId: activeContext.contextId,
501
- parentId: activeContext.parentId,
502
- reason,
503
- message,
504
- });
505
- // Return never-resolving promise without terminating
506
- return;
507
- }
508
- // Check if there are active operations before terminating
509
- const tracker = context.activeOperationsTracker;
510
- if (tracker && tracker.hasActive()) {
511
- log("⏳", "Deferring termination - active operations in progress:", {
512
- activeCount: tracker.getCount(),
513
- reason,
514
- message,
515
- });
516
- // Wait for operations to complete, then terminate
517
- const checkInterval = setInterval(() => {
518
- if (!tracker.hasActive()) {
519
- clearInterval(checkInterval);
520
- log("✅", "Active operations completed, proceeding with termination:", {
521
- reason,
522
- message,
523
- });
524
- context.terminationManager.terminate({
525
- reason,
526
- message,
527
- });
528
- }
529
- }, 10);
530
- return;
531
- }
532
- // No active operations, terminate immediately
533
- context.terminationManager.terminate({
534
- reason,
535
- message,
536
- });
537
- });
538
- }
539
- // No parent context - check active operations and terminate
540
- const tracker = context.activeOperationsTracker;
541
- if (tracker && tracker.hasActive()) {
542
- log("⏳", "Deferring termination - active operations in progress:", {
543
- activeCount: tracker.getCount(),
544
- reason,
545
- message,
546
- });
547
- return new Promise((_resolve, _reject) => {
548
- const checkInterval = setInterval(() => {
549
- if (!tracker.hasActive()) {
550
- clearInterval(checkInterval);
551
- log("✅", "Active operations completed, proceeding with termination:", {
552
- reason,
553
- message,
554
- });
555
- context.terminationManager.terminate({
556
- reason,
557
- message,
558
- });
559
- }
560
- }, 10);
561
- });
562
- }
563
- // No parent, no active operations - terminate immediately
564
- context.terminationManager.terminate({
565
- reason,
566
- message,
567
- });
568
- return new Promise(() => { });
569
- }
570
- /**
571
- * Terminates execution for unrecoverable errors and returns a never-resolving promise
572
- * @param context - The execution context containing the termination manager
573
- * @param error - The unrecoverable error that caused termination
574
- * @param stepIdentifier - The step name or ID for error messaging
575
- * @returns A never-resolving promise
576
- */
577
- function terminateForUnrecoverableError(context, error, stepIdentifier) {
578
- return terminate(context, error.terminationReason, `Unrecoverable error in step ${stepIdentifier}: ${error.message}`);
579
- }
580
-
581
437
  const DEFAULT_CONFIG$1 = {
582
438
  maxAttempts: 3,
583
439
  initialDelay: { seconds: 5 },
@@ -749,6 +605,7 @@ const retryPresets = {
749
605
  /**
750
606
  * Error thrown when a step with AT_MOST_ONCE_PER_RETRY semantics was started but interrupted
751
607
  * before completion.
608
+ * @public
752
609
  */
753
610
  class StepInterruptedError extends Error {
754
611
  constructor(_stepId, _stepName) {
@@ -757,13 +614,9 @@ class StepInterruptedError extends Error {
757
614
  }
758
615
  }
759
616
 
760
- /**
761
- * Shared constants to avoid circular dependencies
762
- */
763
- const OPERATIONS_COMPLETE_EVENT = "allOperationsComplete";
764
-
765
617
  /**
766
618
  * Base class for all durable operation errors
619
+ * @public
767
620
  */
768
621
  class DurableOperationError extends Error {
769
622
  cause;
@@ -812,6 +665,7 @@ class DurableOperationError extends Error {
812
665
  }
813
666
  /**
814
667
  * Error thrown when a step operation fails
668
+ * @public
815
669
  */
816
670
  class StepError extends DurableOperationError {
817
671
  errorType = "StepError";
@@ -821,6 +675,7 @@ class StepError extends DurableOperationError {
821
675
  }
822
676
  /**
823
677
  * Error thrown when a callback operation fails
678
+ * @public
824
679
  */
825
680
  class CallbackError extends DurableOperationError {
826
681
  errorType = "CallbackError";
@@ -830,6 +685,7 @@ class CallbackError extends DurableOperationError {
830
685
  }
831
686
  /**
832
687
  * Error thrown when an invoke operation fails
688
+ * @public
833
689
  */
834
690
  class InvokeError extends DurableOperationError {
835
691
  errorType = "InvokeError";
@@ -839,6 +695,7 @@ class InvokeError extends DurableOperationError {
839
695
  }
840
696
  /**
841
697
  * Error thrown when a child context operation fails
698
+ * @public
842
699
  */
843
700
  class ChildContextError extends DurableOperationError {
844
701
  errorType = "ChildContextError";
@@ -848,6 +705,7 @@ class ChildContextError extends DurableOperationError {
848
705
  }
849
706
  /**
850
707
  * Error thrown when a wait for condition operation fails
708
+ * @public
851
709
  */
852
710
  class WaitForConditionError extends DurableOperationError {
853
711
  errorType = "WaitForConditionError";
@@ -1015,6 +873,25 @@ function createClassSerdesWithDates(cls, dateProps) {
1015
873
  };
1016
874
  }
1017
875
 
876
+ var TerminationReason;
877
+ (function (TerminationReason) {
878
+ // Default termination reason
879
+ TerminationReason["OPERATION_TERMINATED"] = "OPERATION_TERMINATED";
880
+ // Retry-related reasons
881
+ TerminationReason["RETRY_SCHEDULED"] = "RETRY_SCHEDULED";
882
+ TerminationReason["RETRY_INTERRUPTED_STEP"] = "RETRY_INTERRUPTED_STEP";
883
+ // Wait-related reasons
884
+ TerminationReason["WAIT_SCHEDULED"] = "WAIT_SCHEDULED";
885
+ // Callback-related reasons
886
+ TerminationReason["CALLBACK_PENDING"] = "CALLBACK_PENDING";
887
+ // Error-related reasons
888
+ TerminationReason["CHECKPOINT_FAILED"] = "CHECKPOINT_FAILED";
889
+ TerminationReason["SERDES_FAILED"] = "SERDES_FAILED";
890
+ TerminationReason["CONTEXT_VALIDATION_ERROR"] = "CONTEXT_VALIDATION_ERROR";
891
+ // Custom reason
892
+ TerminationReason["CUSTOM"] = "CUSTOM";
893
+ })(TerminationReason || (TerminationReason = {}));
894
+
1018
895
  /**
1019
896
  * Base class for all unrecoverable errors
1020
897
  * Any error that inherits from this class indicates a fatal condition
@@ -1135,6 +1012,30 @@ async function safeDeserialize(serdes, data, stepId, stepName, terminationManage
1135
1012
  }
1136
1013
  }
1137
1014
 
1015
+ const asyncLocalStorage = new async_hooks.AsyncLocalStorage();
1016
+ const getActiveContext = () => {
1017
+ return asyncLocalStorage.getStore();
1018
+ };
1019
+ const runWithContext = (contextId, parentId, fn, attempt, durableExecutionMode) => {
1020
+ return asyncLocalStorage.run({ contextId, parentId, attempt, durableExecutionMode }, fn);
1021
+ };
1022
+ const validateContextUsage = (operationContextId, operationName, terminationManager) => {
1023
+ const contextId = operationContextId || "root";
1024
+ const activeContext = getActiveContext();
1025
+ if (!activeContext) {
1026
+ return;
1027
+ }
1028
+ if (activeContext.contextId !== contextId) {
1029
+ const errorMessage = `Context usage error in "${operationName}": You are using a parent or sibling context instead of the current child context. Expected context ID: "${activeContext.contextId}", but got: "${operationContextId}". When inside runInChildContext(), you must use the child context parameter, not the parent context.`;
1030
+ terminationManager.terminate({
1031
+ reason: TerminationReason.CONTEXT_VALIDATION_ERROR,
1032
+ message: errorMessage,
1033
+ error: new Error(errorMessage),
1034
+ });
1035
+ // Only call termination manager, don't throw or return promise
1036
+ }
1037
+ };
1038
+
1138
1039
  function isErrorLike(obj) {
1139
1040
  return (obj instanceof Error ||
1140
1041
  (obj != null &&
@@ -1163,525 +1064,426 @@ function createErrorObjectFromError(error, data) {
1163
1064
  }
1164
1065
 
1165
1066
  /**
1166
- * Error thrown when a checkpoint operation fails due to invocation-level issues
1167
- * (e.g., 5xx errors, invalid checkpoint token)
1168
- * This will terminate the current Lambda invocation, but the execution can continue with a new invocation
1169
- */
1170
- class CheckpointUnrecoverableInvocationError extends UnrecoverableInvocationError {
1171
- terminationReason = TerminationReason.CHECKPOINT_FAILED;
1172
- constructor(message, originalError) {
1173
- super(message || "Checkpoint operation failed", originalError);
1174
- }
1175
- }
1176
- /**
1177
- * Error thrown when a checkpoint operation fails due to execution-level issues
1178
- * (e.g., 4xx errors other than invalid checkpoint token)
1179
- * This will terminate the entire execution and cannot be recovered
1067
+ * Error thrown when non-deterministic code is detected during replay
1180
1068
  */
1181
- class CheckpointUnrecoverableExecutionError extends UnrecoverableExecutionError {
1182
- terminationReason = TerminationReason.CHECKPOINT_FAILED;
1183
- constructor(message, originalError) {
1184
- super(message || "Checkpoint operation failed", originalError);
1069
+ class NonDeterministicExecutionError extends UnrecoverableExecutionError {
1070
+ terminationReason = TerminationReason.CUSTOM;
1071
+ constructor(message) {
1072
+ super(message);
1073
+ this.name = "NonDeterministicExecutionError";
1185
1074
  }
1186
1075
  }
1187
1076
 
1188
- const STEP_DATA_UPDATED_EVENT = "stepDataUpdated";
1189
- class CheckpointManager {
1190
- durableExecutionArn;
1191
- stepData;
1192
- storage;
1193
- terminationManager;
1194
- activeOperationsTracker;
1195
- stepDataEmitter;
1196
- logger;
1197
- pendingCompletions;
1198
- queue = [];
1199
- isProcessing = false;
1200
- currentTaskToken;
1201
- forceCheckpointPromises = [];
1202
- queueCompletionResolver = null;
1203
- queueCompletionTimeout = null;
1204
- MAX_PAYLOAD_SIZE = 750 * 1024; // 750KB in bytes
1205
- isTerminating = false;
1206
- static textEncoder = new TextEncoder();
1207
- constructor(durableExecutionArn, stepData, storage, terminationManager, activeOperationsTracker, initialTaskToken, stepDataEmitter, logger, pendingCompletions) {
1208
- this.durableExecutionArn = durableExecutionArn;
1209
- this.stepData = stepData;
1210
- this.storage = storage;
1211
- this.terminationManager = terminationManager;
1212
- this.activeOperationsTracker = activeOperationsTracker;
1213
- this.stepDataEmitter = stepDataEmitter;
1214
- this.logger = logger;
1215
- this.pendingCompletions = pendingCompletions;
1216
- this.currentTaskToken = initialTaskToken;
1217
- }
1218
- setTerminating() {
1219
- this.isTerminating = true;
1220
- log("🛑", "Checkpoint manager marked as terminating");
1221
- }
1222
- /**
1223
- * Checks if a step ID or any of its ancestors has a pending completion
1224
- */
1225
- hasPendingAncestorCompletion(stepId) {
1226
- let currentHashedId = hashId(stepId);
1227
- while (currentHashedId) {
1228
- if (this.pendingCompletions.has(currentHashedId)) {
1229
- return true;
1230
- }
1231
- const operation = this.stepData[currentHashedId];
1232
- currentHashedId = operation?.ParentId;
1233
- }
1234
- return false;
1235
- }
1236
- async forceCheckpoint() {
1237
- if (this.isTerminating) {
1238
- log("⚠️", "Force checkpoint skipped - termination in progress");
1239
- return new Promise(() => { }); // Never resolves during termination
1240
- }
1241
- return new Promise((resolve, reject) => {
1242
- this.forceCheckpointPromises.push({ resolve, reject });
1243
- if (!this.isProcessing) {
1244
- setImmediate(() => {
1245
- this.processQueue();
1246
- });
1247
- }
1248
- });
1077
+ const validateReplayConsistency = (stepId, currentOperation, checkpointData, context) => {
1078
+ // Skip validation if no checkpoint data exists or if Type is undefined (first execution)
1079
+ if (!checkpointData || !checkpointData.Type) {
1080
+ return;
1249
1081
  }
1250
- async waitForQueueCompletion() {
1251
- if (this.queue.length === 0 && !this.isProcessing) {
1252
- return;
1253
- }
1254
- return new Promise((resolve, reject) => {
1255
- this.queueCompletionResolver = resolve;
1256
- // Set a timeout to prevent infinite waiting
1257
- this.queueCompletionTimeout = setTimeout(() => {
1258
- this.queueCompletionResolver = null;
1259
- this.queueCompletionTimeout = null;
1260
- // Clear the queue since it's taking too long
1261
- this.clearQueue();
1262
- reject(new Error("Timeout waiting for checkpoint queue completion"));
1263
- }, 3000); // 3 second timeout
1264
- });
1082
+ // Validate operation type
1083
+ if (checkpointData.Type !== currentOperation.type) {
1084
+ const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation type mismatch for step "${stepId}". ` +
1085
+ `Expected type "${checkpointData.Type}", but got "${currentOperation.type}". ` +
1086
+ `This indicates non-deterministic control flow in your workflow code.`);
1087
+ terminateForUnrecoverableError(context, error, stepId);
1265
1088
  }
1266
- clearQueue() {
1267
- // Silently clear queue - we're terminating so no need to reject promises
1268
- this.queue = [];
1269
- this.forceCheckpointPromises = [];
1270
- // Resolve any waiting queue completion promises since we're clearing
1271
- this.notifyQueueCompletion();
1089
+ // Validate operation name (including undefined)
1090
+ if (checkpointData.Name !== currentOperation.name) {
1091
+ const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation name mismatch for step "${stepId}". ` +
1092
+ `Expected name "${checkpointData.Name ?? "undefined"}", but got "${currentOperation.name ?? "undefined"}". ` +
1093
+ `This indicates non-deterministic control flow in your workflow code.`);
1094
+ terminateForUnrecoverableError(context, error, stepId);
1272
1095
  }
1273
- // Alias for backward compatibility with Checkpoint interface
1274
- async force() {
1275
- return this.forceCheckpoint();
1096
+ // Validate operation subtype
1097
+ if (checkpointData.SubType !== currentOperation.subType) {
1098
+ const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation subtype mismatch for step "${stepId}". ` +
1099
+ `Expected subtype "${checkpointData.SubType}", but got "${currentOperation.subType}". ` +
1100
+ `This indicates non-deterministic control flow in your workflow code.`);
1101
+ terminateForUnrecoverableError(context, error, stepId);
1276
1102
  }
1277
- async checkpoint(stepId, data) {
1278
- if (this.isTerminating) {
1279
- log("⚠️", "Checkpoint skipped - termination in progress:", { stepId });
1280
- return new Promise(() => { }); // Never resolves during termination
1103
+ };
1104
+
1105
+ const createStepHandler = (context, checkpoint, parentContext, createStepId, logger, parentId) => {
1106
+ return (nameOrFn, fnOrOptions, maybeOptions) => {
1107
+ let name;
1108
+ let fn;
1109
+ let options;
1110
+ if (typeof nameOrFn === "string" || nameOrFn === undefined) {
1111
+ name = nameOrFn;
1112
+ fn = fnOrOptions;
1113
+ options = maybeOptions;
1281
1114
  }
1282
- if (this.activeOperationsTracker) {
1283
- this.activeOperationsTracker.increment();
1115
+ else {
1116
+ fn = nameOrFn;
1117
+ options = fnOrOptions;
1284
1118
  }
1285
- return new Promise((resolve, reject) => {
1286
- if (data.Action === clientLambda.OperationAction.SUCCEED ||
1287
- data.Action === clientLambda.OperationAction.FAIL) {
1288
- this.pendingCompletions.add(stepId);
1289
- }
1290
- const queuedItem = {
1291
- stepId,
1292
- data,
1293
- resolve: () => {
1294
- if (this.activeOperationsTracker) {
1295
- this.activeOperationsTracker.decrement();
1296
- }
1297
- resolve();
1298
- },
1299
- reject: (error) => {
1300
- if (this.activeOperationsTracker) {
1301
- this.activeOperationsTracker.decrement();
1302
- }
1303
- reject(error);
1304
- },
1305
- };
1306
- this.queue.push(queuedItem);
1307
- log("📥", "Checkpoint queued:", {
1308
- stepId,
1309
- queueLength: this.queue.length,
1310
- isProcessing: this.isProcessing,
1311
- });
1312
- if (!this.isProcessing) {
1313
- setImmediate(() => {
1314
- this.processQueue();
1119
+ const stepId = createStepId();
1120
+ const semantics = options?.semantics || exports.StepSemantics.AtLeastOncePerRetry;
1121
+ const serdes = options?.serdes || defaultSerdes;
1122
+ // Phase 1: Execute step
1123
+ const phase1Promise = (async () => {
1124
+ let stepData = context.getStepData(stepId);
1125
+ validateReplayConsistency(stepId, { type: clientLambda.OperationType.STEP, name, subType: exports.OperationSubType.STEP }, stepData, context);
1126
+ // Check if already completed
1127
+ if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
1128
+ log("⏭️", "Step already completed:", { stepId });
1129
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1130
+ metadata: {
1131
+ stepId,
1132
+ name,
1133
+ type: clientLambda.OperationType.STEP,
1134
+ subType: exports.OperationSubType.STEP,
1135
+ parentId,
1136
+ },
1315
1137
  });
1138
+ return await safeDeserialize(serdes, stepData.StepDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
1316
1139
  }
1317
- });
1318
- }
1319
- hasFinishedAncestor(parentId) {
1320
- if (!parentId) {
1321
- return false;
1322
- }
1323
- let currentHashedId = hashId(parentId);
1324
- while (currentHashedId) {
1325
- const parentOperation = this.stepData[currentHashedId];
1326
- if (parentOperation?.Status === clientLambda.OperationStatus.SUCCEEDED ||
1327
- parentOperation?.Status === clientLambda.OperationStatus.FAILED) {
1328
- return true;
1329
- }
1330
- currentHashedId = parentOperation?.ParentId;
1331
- }
1332
- return false;
1333
- }
1334
- classifyCheckpointError(error) {
1335
- const originalError = error instanceof Error ? error : new Error(String(error));
1336
- const awsError = error;
1337
- const statusCode = awsError.$metadata?.httpStatusCode;
1338
- const errorName = awsError.name;
1339
- const errorMessage = awsError.message || originalError.message;
1340
- log("🔍", "Classifying checkpoint error:", {
1341
- statusCode,
1342
- errorName,
1343
- errorMessage,
1344
- });
1345
- if (statusCode &&
1346
- statusCode >= 400 &&
1347
- statusCode < 500 &&
1348
- errorName === "InvalidParameterValueException" &&
1349
- errorMessage.startsWith("Invalid Checkpoint Token")) {
1350
- return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
1351
- }
1352
- if (statusCode &&
1353
- statusCode >= 400 &&
1354
- statusCode < 500 &&
1355
- statusCode !== 429) {
1356
- return new CheckpointUnrecoverableExecutionError(`Checkpoint failed: ${errorMessage}`, originalError);
1357
- }
1358
- return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
1359
- }
1360
- async processQueue() {
1361
- if (this.isProcessing) {
1362
- return;
1363
- }
1364
- const hasQueuedItems = this.queue.length > 0;
1365
- const hasForceRequests = this.forceCheckpointPromises.length > 0;
1366
- if (!hasQueuedItems && !hasForceRequests) {
1367
- return;
1368
- }
1369
- this.isProcessing = true;
1370
- const batch = [];
1371
- let skippedCount = 0;
1372
- const baseSize = this.currentTaskToken.length + 100;
1373
- let currentSize = baseSize;
1374
- while (this.queue.length > 0) {
1375
- const nextItem = this.queue[0];
1376
- const itemSize = CheckpointManager.textEncoder.encode(JSON.stringify(nextItem)).length;
1377
- if (currentSize + itemSize > this.MAX_PAYLOAD_SIZE && batch.length > 0) {
1378
- break;
1379
- }
1380
- this.queue.shift();
1381
- if (this.hasFinishedAncestor(nextItem.data.ParentId)) {
1382
- log("⚠️", "Checkpoint skipped - ancestor finished:", {
1383
- stepId: nextItem.stepId,
1384
- parentId: nextItem.data.ParentId,
1140
+ // Check if already failed
1141
+ if (stepData?.Status === clientLambda.OperationStatus.FAILED) {
1142
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1143
+ metadata: {
1144
+ stepId,
1145
+ name,
1146
+ type: clientLambda.OperationType.STEP,
1147
+ subType: exports.OperationSubType.STEP,
1148
+ parentId,
1149
+ },
1385
1150
  });
1386
- skippedCount++;
1387
- continue;
1151
+ if (stepData.StepDetails?.Error) {
1152
+ throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
1153
+ }
1154
+ throw new StepError("Unknown error");
1388
1155
  }
1389
- batch.push(nextItem);
1390
- currentSize += itemSize;
1391
- }
1392
- log("🔄", "Processing checkpoint batch:", {
1393
- batchSize: batch.length,
1394
- remainingInQueue: this.queue.length,
1395
- estimatedSize: currentSize,
1396
- maxSize: this.MAX_PAYLOAD_SIZE,
1397
- });
1398
- try {
1399
- if (batch.length > 0 || this.forceCheckpointPromises.length > 0) {
1400
- await this.processBatch(batch);
1156
+ // Check if pending retry
1157
+ if (stepData?.Status === clientLambda.OperationStatus.PENDING) {
1158
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1159
+ metadata: {
1160
+ stepId,
1161
+ name,
1162
+ type: clientLambda.OperationType.STEP,
1163
+ subType: exports.OperationSubType.STEP,
1164
+ parentId,
1165
+ },
1166
+ endTimestamp: stepData.StepDetails?.NextAttemptTimestamp,
1167
+ });
1168
+ return (async () => {
1169
+ await checkpoint.waitForRetryTimer(stepId);
1170
+ stepData = context.getStepData(stepId);
1171
+ return await executeStepLogic();
1172
+ })();
1401
1173
  }
1402
- batch.forEach((item) => {
1403
- if (item.data.Action === clientLambda.OperationAction.SUCCEED ||
1404
- item.data.Action === clientLambda.OperationAction.FAIL) {
1405
- this.pendingCompletions.delete(item.stepId);
1174
+ // Check for interrupted step with AT_MOST_ONCE_PER_RETRY
1175
+ if (stepData?.Status === clientLambda.OperationStatus.STARTED &&
1176
+ semantics === exports.StepSemantics.AtMostOncePerRetry) {
1177
+ const error = new StepInterruptedError(stepId, name);
1178
+ const currentAttempt = (stepData.StepDetails?.Attempt || 0) + 1;
1179
+ const retryDecision = options?.retryStrategy?.(error, currentAttempt) ??
1180
+ retryPresets.default(error, currentAttempt);
1181
+ if (!retryDecision.shouldRetry) {
1182
+ await checkpoint.checkpoint(stepId, {
1183
+ Id: stepId,
1184
+ ParentId: parentId,
1185
+ Action: clientLambda.OperationAction.FAIL,
1186
+ SubType: exports.OperationSubType.STEP,
1187
+ Type: clientLambda.OperationType.STEP,
1188
+ Error: createErrorObjectFromError(error),
1189
+ Name: name,
1190
+ });
1191
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1192
+ throw DurableOperationError.fromErrorObject(createErrorObjectFromError(error));
1406
1193
  }
1407
- item.resolve();
1408
- });
1409
- const forcePromises = this.forceCheckpointPromises.splice(0);
1410
- forcePromises.forEach((promise) => {
1411
- promise.resolve();
1412
- });
1413
- log("✅", "Checkpoint batch processed successfully:", {
1414
- batchSize: batch.length,
1415
- skippedCount,
1416
- forceRequests: forcePromises.length,
1417
- newTaskToken: this.currentTaskToken,
1418
- });
1419
- }
1420
- catch (error) {
1421
- log("❌", "Checkpoint batch failed:", {
1422
- batchSize: batch.length,
1423
- error,
1424
- });
1425
- const checkpointError = this.classifyCheckpointError(error);
1426
- // Clear remaining queue silently - we're terminating
1427
- this.clearQueue();
1428
- this.terminationManager.terminate({
1429
- reason: TerminationReason.CHECKPOINT_FAILED,
1430
- message: checkpointError.message,
1431
- error: checkpointError,
1432
- });
1433
- }
1434
- finally {
1435
- this.isProcessing = false;
1436
- if (this.queue.length > 0) {
1437
- setImmediate(() => {
1438
- this.processQueue();
1194
+ await checkpoint.checkpoint(stepId, {
1195
+ Id: stepId,
1196
+ ParentId: parentId,
1197
+ Action: clientLambda.OperationAction.RETRY,
1198
+ SubType: exports.OperationSubType.STEP,
1199
+ Type: clientLambda.OperationType.STEP,
1200
+ Error: createErrorObjectFromError(error),
1201
+ Name: name,
1202
+ StepOptions: {
1203
+ NextAttemptDelaySeconds: retryDecision.delay
1204
+ ? durationToSeconds(retryDecision.delay)
1205
+ : 1,
1206
+ },
1439
1207
  });
1208
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1209
+ metadata: {
1210
+ stepId,
1211
+ name,
1212
+ type: clientLambda.OperationType.STEP,
1213
+ subType: exports.OperationSubType.STEP,
1214
+ parentId,
1215
+ },
1216
+ endTimestamp: context.getStepData(stepId)?.StepDetails?.NextAttemptTimestamp,
1217
+ });
1218
+ return (async () => {
1219
+ await checkpoint.waitForRetryTimer(stepId);
1220
+ stepData = context.getStepData(stepId);
1221
+ return await executeStepLogic();
1222
+ })();
1440
1223
  }
1441
- else {
1442
- // Queue is empty and processing is done - notify all waiting promises
1443
- this.notifyQueueCompletion();
1444
- }
1445
- }
1446
- }
1447
- notifyQueueCompletion() {
1448
- if (this.queueCompletionResolver) {
1449
- if (this.queueCompletionTimeout) {
1450
- clearTimeout(this.queueCompletionTimeout);
1451
- this.queueCompletionTimeout = null;
1452
- }
1453
- this.queueCompletionResolver();
1454
- this.queueCompletionResolver = null;
1455
- }
1456
- }
1457
- async processBatch(batch) {
1458
- const updates = batch.map((item) => {
1459
- const hashedStepId = hashId(item.stepId);
1460
- const update = {
1461
- Type: item.data.Type || "STEP",
1462
- Action: item.data.Action || "START",
1463
- ...item.data,
1464
- Id: hashedStepId,
1465
- ...(item.data.ParentId && { ParentId: hashId(item.data.ParentId) }),
1466
- };
1467
- return update;
1468
- });
1469
- const checkpointData = {
1470
- DurableExecutionArn: this.durableExecutionArn,
1471
- CheckpointToken: this.currentTaskToken,
1472
- Updates: updates,
1473
- };
1474
- log("⏺️", "Creating checkpoint batch:", {
1475
- batchSize: updates.length,
1476
- checkpointToken: this.currentTaskToken,
1477
- updates: updates.map((u) => ({
1478
- Id: u.Id,
1479
- Action: u.Action,
1480
- Type: u.Type,
1481
- })),
1482
- });
1483
- const response = await this.storage.checkpoint(checkpointData, this.logger);
1484
- if (response.CheckpointToken) {
1485
- this.currentTaskToken = response.CheckpointToken;
1486
- }
1487
- if (response.NewExecutionState?.Operations) {
1488
- this.updateStepDataFromCheckpointResponse(response.NewExecutionState.Operations);
1489
- }
1490
- }
1491
- updateStepDataFromCheckpointResponse(operations) {
1492
- log("🔄", "Updating stepData from checkpoint response:", {
1493
- operationCount: operations.length,
1494
- operationIds: operations.map((op) => op.Id).filter(Boolean),
1495
- });
1496
- operations.forEach((operation) => {
1497
- if (operation.Id) {
1498
- this.stepData[operation.Id] = operation;
1499
- log("📝", "Updated stepData entry:", operation);
1500
- this.stepDataEmitter.emit(STEP_DATA_UPDATED_EVENT, operation.Id);
1224
+ return await executeStepLogic();
1225
+ async function executeStepLogic() {
1226
+ stepData = context.getStepData(stepId);
1227
+ if (stepData?.Status !== clientLambda.OperationStatus.STARTED) {
1228
+ if (semantics === exports.StepSemantics.AtMostOncePerRetry) {
1229
+ await checkpoint.checkpoint(stepId, {
1230
+ Id: stepId,
1231
+ ParentId: parentId,
1232
+ Action: clientLambda.OperationAction.START,
1233
+ SubType: exports.OperationSubType.STEP,
1234
+ Type: clientLambda.OperationType.STEP,
1235
+ Name: name,
1236
+ });
1237
+ }
1238
+ else {
1239
+ checkpoint.checkpoint(stepId, {
1240
+ Id: stepId,
1241
+ ParentId: parentId,
1242
+ Action: clientLambda.OperationAction.START,
1243
+ SubType: exports.OperationSubType.STEP,
1244
+ Type: clientLambda.OperationType.STEP,
1245
+ Name: name,
1246
+ });
1247
+ }
1248
+ }
1249
+ try {
1250
+ stepData = context.getStepData(stepId);
1251
+ const currentAttempt = stepData?.StepDetails?.Attempt || 0;
1252
+ const stepContext = { logger };
1253
+ // Mark operation as EXECUTING
1254
+ checkpoint.markOperationState(stepId, OperationLifecycleState.EXECUTING, {
1255
+ metadata: {
1256
+ stepId,
1257
+ name,
1258
+ type: clientLambda.OperationType.STEP,
1259
+ subType: exports.OperationSubType.STEP,
1260
+ parentId,
1261
+ },
1262
+ });
1263
+ let result;
1264
+ result = await runWithContext(stepId, parentId, () => fn(stepContext), currentAttempt + 1, DurableExecutionMode.ExecutionMode);
1265
+ const serializedResult = await safeSerialize(serdes, result, stepId, name, context.terminationManager, context.durableExecutionArn);
1266
+ await checkpoint.checkpoint(stepId, {
1267
+ Id: stepId,
1268
+ ParentId: parentId,
1269
+ Action: clientLambda.OperationAction.SUCCEED,
1270
+ SubType: exports.OperationSubType.STEP,
1271
+ Type: clientLambda.OperationType.STEP,
1272
+ Payload: serializedResult,
1273
+ Name: name,
1274
+ });
1275
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1276
+ return await safeDeserialize(serdes, serializedResult, stepId, name, context.terminationManager, context.durableExecutionArn);
1277
+ }
1278
+ catch (error) {
1279
+ if (isUnrecoverableError(error)) {
1280
+ return terminateForUnrecoverableError(context, error, name || stepId);
1281
+ }
1282
+ stepData = context.getStepData(stepId);
1283
+ const currentAttempt = (stepData?.StepDetails?.Attempt || 0) + 1;
1284
+ const retryDecision = options?.retryStrategy?.(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt) ??
1285
+ retryPresets.default(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt);
1286
+ if (!retryDecision.shouldRetry) {
1287
+ await checkpoint.checkpoint(stepId, {
1288
+ Id: stepId,
1289
+ ParentId: parentId,
1290
+ Action: clientLambda.OperationAction.FAIL,
1291
+ SubType: exports.OperationSubType.STEP,
1292
+ Type: clientLambda.OperationType.STEP,
1293
+ Error: createErrorObjectFromError(error),
1294
+ Name: name,
1295
+ });
1296
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1297
+ throw DurableOperationError.fromErrorObject(createErrorObjectFromError(error));
1298
+ }
1299
+ await checkpoint.checkpoint(stepId, {
1300
+ Id: stepId,
1301
+ ParentId: parentId,
1302
+ Action: clientLambda.OperationAction.RETRY,
1303
+ SubType: exports.OperationSubType.STEP,
1304
+ Type: clientLambda.OperationType.STEP,
1305
+ Error: createErrorObjectFromError(error),
1306
+ Name: name,
1307
+ StepOptions: {
1308
+ NextAttemptDelaySeconds: retryDecision.delay
1309
+ ? durationToSeconds(retryDecision.delay)
1310
+ : 1,
1311
+ },
1312
+ });
1313
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1314
+ metadata: {
1315
+ stepId,
1316
+ name,
1317
+ type: clientLambda.OperationType.STEP,
1318
+ subType: exports.OperationSubType.STEP,
1319
+ parentId,
1320
+ },
1321
+ endTimestamp: context.getStepData(stepId)?.StepDetails?.NextAttemptTimestamp,
1322
+ });
1323
+ await checkpoint.waitForRetryTimer(stepId);
1324
+ return await executeStepLogic();
1325
+ }
1501
1326
  }
1327
+ })();
1328
+ phase1Promise.catch(() => { });
1329
+ return new DurablePromise(async () => {
1330
+ checkpoint.markOperationAwaited(stepId);
1331
+ return await phase1Promise;
1502
1332
  });
1503
- log("✅", "StepData update completed:", {
1504
- totalStepDataEntries: Object.keys(this.stepData).length,
1505
- });
1506
- }
1507
- getQueueStatus() {
1508
- return {
1509
- queueLength: this.queue.length,
1510
- isProcessing: this.isProcessing,
1511
- };
1512
- }
1513
- }
1514
-
1515
- /**
1516
- * High-level helper that waits for conditions before continuing execution.
1517
- * Uses event-driven approach for both operations completion and status changes.
1518
- */
1519
- async function waitBeforeContinue(options) {
1520
- const { checkHasRunningOperations, checkStepStatus, checkTimer, scheduledEndTimestamp, stepId, context, hasRunningOperations, operationsEmitter, checkpoint, onAwaitedChange, } = options;
1521
- const promises = [];
1522
- const timers = [];
1523
- const cleanupFns = [];
1524
- // Cleanup function to clear all timers and listeners
1525
- const cleanup = () => {
1526
- timers.forEach((timer) => clearTimeout(timer));
1527
- cleanupFns.forEach((fn) => fn());
1528
1333
  };
1529
- // Timer promise - resolves when scheduled time is reached
1530
- if (checkTimer && scheduledEndTimestamp) {
1531
- const timerPromise = new Promise((resolve) => {
1532
- const timeLeft = Number(scheduledEndTimestamp) - Date.now();
1533
- if (timeLeft > 0) {
1534
- const timer = setTimeout(() => resolve({ reason: "timer", timerExpired: true }), timeLeft);
1535
- timers.push(timer);
1334
+ };
1335
+
1336
+ const createInvokeHandler = (context, checkpoint, createStepId, parentId, checkAndUpdateReplayMode) => {
1337
+ function invokeHandler(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
1338
+ const isNameFirst = typeof funcIdOrInput === "string";
1339
+ const name = isNameFirst ? nameOrFuncId : undefined;
1340
+ const funcId = isNameFirst ? funcIdOrInput : nameOrFuncId;
1341
+ const input = isNameFirst
1342
+ ? inputOrConfig
1343
+ : funcIdOrInput;
1344
+ const config = isNameFirst
1345
+ ? maybeConfig
1346
+ : inputOrConfig;
1347
+ const stepId = createStepId();
1348
+ // Phase 1: Start invoke operation
1349
+ let isCompleted = false;
1350
+ const phase1Promise = (async () => {
1351
+ log("🔗", "Invoke phase 1:", { stepId, name: name || funcId });
1352
+ let stepData = context.getStepData(stepId);
1353
+ // Validate replay consistency
1354
+ validateReplayConsistency(stepId, {
1355
+ type: clientLambda.OperationType.CHAINED_INVOKE,
1356
+ name,
1357
+ subType: exports.OperationSubType.CHAINED_INVOKE,
1358
+ }, stepData, context);
1359
+ // Check if already completed
1360
+ if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
1361
+ log("⏭️", "Invoke already completed:", { stepId });
1362
+ checkAndUpdateReplayMode?.();
1363
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1364
+ metadata: {
1365
+ stepId,
1366
+ name,
1367
+ type: clientLambda.OperationType.CHAINED_INVOKE,
1368
+ subType: exports.OperationSubType.CHAINED_INVOKE,
1369
+ parentId,
1370
+ },
1371
+ });
1372
+ isCompleted = true;
1373
+ return;
1536
1374
  }
1537
- else {
1538
- resolve({ reason: "timer", timerExpired: true });
1375
+ // Check if already failed
1376
+ if (stepData?.Status === clientLambda.OperationStatus.FAILED ||
1377
+ stepData?.Status === clientLambda.OperationStatus.TIMED_OUT ||
1378
+ stepData?.Status === clientLambda.OperationStatus.STOPPED) {
1379
+ log("❌", "Invoke already failed:", { stepId });
1380
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1381
+ metadata: {
1382
+ stepId,
1383
+ name,
1384
+ type: clientLambda.OperationType.CHAINED_INVOKE,
1385
+ subType: exports.OperationSubType.CHAINED_INVOKE,
1386
+ parentId,
1387
+ },
1388
+ });
1389
+ isCompleted = true;
1390
+ return;
1539
1391
  }
1540
- });
1541
- promises.push(timerPromise);
1542
- }
1543
- // Operations promise - event-driven approach
1544
- if (checkHasRunningOperations) {
1545
- const operationsPromise = new Promise((resolve) => {
1546
- if (!hasRunningOperations()) {
1547
- resolve({ reason: "operations" });
1392
+ // Start invoke if not already started
1393
+ if (!stepData) {
1394
+ const serializedPayload = await safeSerialize(config?.payloadSerdes || defaultSerdes, input, stepId, name, context.terminationManager, context.durableExecutionArn);
1395
+ await checkpoint.checkpoint(stepId, {
1396
+ Id: stepId,
1397
+ ParentId: parentId,
1398
+ Action: clientLambda.OperationAction.START,
1399
+ SubType: exports.OperationSubType.CHAINED_INVOKE,
1400
+ Type: clientLambda.OperationType.CHAINED_INVOKE,
1401
+ Name: name,
1402
+ Payload: serializedPayload,
1403
+ ChainedInvokeOptions: {
1404
+ FunctionName: funcId,
1405
+ },
1406
+ });
1548
1407
  }
1549
- else {
1550
- // Event-driven: listen for completion event
1551
- const handler = () => {
1552
- resolve({ reason: "operations" });
1553
- };
1554
- operationsEmitter.once(OPERATIONS_COMPLETE_EVENT, handler);
1555
- cleanupFns.push(() => operationsEmitter.off(OPERATIONS_COMPLETE_EVENT, handler));
1408
+ // Mark as IDLE_NOT_AWAITED
1409
+ checkpoint.markOperationState(stepId, OperationLifecycleState.IDLE_NOT_AWAITED, {
1410
+ metadata: {
1411
+ stepId,
1412
+ name,
1413
+ type: clientLambda.OperationType.CHAINED_INVOKE,
1414
+ subType: exports.OperationSubType.CHAINED_INVOKE,
1415
+ parentId,
1416
+ },
1417
+ });
1418
+ log("✅", "Invoke phase 1 complete:", { stepId });
1419
+ })();
1420
+ phase1Promise.catch(() => { });
1421
+ // Phase 2: Wait for completion
1422
+ return new DurablePromise(async () => {
1423
+ await phase1Promise;
1424
+ if (isCompleted) {
1425
+ const stepData = context.getStepData(stepId);
1426
+ if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
1427
+ const invokeDetails = stepData.ChainedInvokeDetails;
1428
+ return await safeDeserialize(config?.resultSerdes || defaultSerdes, invokeDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
1429
+ }
1430
+ // Handle failure
1431
+ const invokeDetails = stepData?.ChainedInvokeDetails;
1432
+ if (invokeDetails?.Error) {
1433
+ throw new InvokeError(invokeDetails.Error.ErrorMessage || "Invoke failed", invokeDetails.Error.ErrorMessage
1434
+ ? new Error(invokeDetails.Error.ErrorMessage)
1435
+ : undefined, invokeDetails.Error.ErrorData);
1436
+ }
1437
+ else {
1438
+ throw new InvokeError("Invoke failed");
1439
+ }
1556
1440
  }
1557
- });
1558
- promises.push(operationsPromise);
1559
- }
1560
- // Step status promise - event-driven approach
1561
- if (checkStepStatus) {
1562
- const originalStatus = context.getStepData(stepId)?.Status;
1563
- const hashedStepId = hashId(stepId);
1564
- const stepStatusPromise = new Promise((resolve) => {
1565
- // Check if status already changed
1566
- const currentStatus = context.getStepData(stepId)?.Status;
1567
- if (originalStatus !== currentStatus) {
1568
- resolve({ reason: "status" });
1441
+ log("🔗", "Invoke phase 2:", { stepId });
1442
+ checkpoint.markOperationAwaited(stepId);
1443
+ await checkpoint.waitForStatusChange(stepId);
1444
+ const stepData = context.getStepData(stepId);
1445
+ if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
1446
+ log("✅", "Invoke completed:", { stepId });
1447
+ checkAndUpdateReplayMode?.();
1448
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1449
+ const invokeDetails = stepData.ChainedInvokeDetails;
1450
+ return await safeDeserialize(config?.resultSerdes || defaultSerdes, invokeDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
1451
+ }
1452
+ // Handle failure
1453
+ log("❌", "Invoke failed:", { stepId, status: stepData?.Status });
1454
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1455
+ const invokeDetails = stepData?.ChainedInvokeDetails;
1456
+ if (invokeDetails?.Error) {
1457
+ throw new InvokeError(invokeDetails.Error.ErrorMessage || "Invoke failed", invokeDetails.Error.ErrorMessage
1458
+ ? new Error(invokeDetails.Error.ErrorMessage)
1459
+ : undefined, invokeDetails.Error.ErrorData);
1569
1460
  }
1570
1461
  else {
1571
- // Event-driven: listen for step data updates
1572
- const handler = (updatedStepId) => {
1573
- if (updatedStepId === hashedStepId) {
1574
- const newStatus = context.getStepData(stepId)?.Status;
1575
- if (originalStatus !== newStatus) {
1576
- resolve({ reason: "status" });
1577
- }
1578
- }
1579
- };
1580
- operationsEmitter.on(STEP_DATA_UPDATED_EVENT, handler);
1581
- cleanupFns.push(() => operationsEmitter.off(STEP_DATA_UPDATED_EVENT, handler));
1462
+ throw new InvokeError("Invoke failed");
1582
1463
  }
1583
1464
  });
1584
- promises.push(stepStatusPromise);
1585
- }
1586
- // Awaited change promise - resolves when the callback we set is invoked
1587
- // Note: This is safe from race conditions because waitBeforeContinue is called
1588
- // during Phase 1 execution (inside stepHandler), which happens BEFORE the user
1589
- // can await the DurablePromise. The callback is registered before it can be invoked.
1590
- if (onAwaitedChange) {
1591
- const awaitedChangePromise = new Promise((resolve) => {
1592
- // Register a callback that will be invoked when the promise is awaited
1593
- onAwaitedChange(() => {
1594
- resolve({ reason: "status" });
1595
- });
1596
- });
1597
- promises.push(awaitedChangePromise);
1598
- }
1599
- // If no conditions provided, return immediately
1600
- if (promises.length === 0) {
1601
- return { reason: "timeout" };
1602
- }
1603
- // Wait for any condition to be met, then cleanup timers and listeners
1604
- const result = await Promise.race(promises);
1605
- cleanup();
1606
- // If timer expired, force checkpoint to get fresh data from API
1607
- if (result.reason === "timer" && result.timerExpired && checkpoint) {
1608
- if (checkpoint.force) {
1609
- await checkpoint.force();
1610
- }
1611
- else if (checkpoint.forceCheckpoint) {
1612
- await checkpoint.forceCheckpoint();
1613
- }
1614
- }
1615
- return result;
1616
- }
1617
-
1618
- /**
1619
- * Error thrown when non-deterministic code is detected during replay
1620
- */
1621
- class NonDeterministicExecutionError extends UnrecoverableExecutionError {
1622
- terminationReason = TerminationReason.CUSTOM;
1623
- constructor(message) {
1624
- super(message);
1625
- this.name = "NonDeterministicExecutionError";
1626
1465
  }
1627
- }
1466
+ return invokeHandler;
1467
+ };
1628
1468
 
1629
- const validateReplayConsistency = (stepId, currentOperation, checkpointData, context) => {
1630
- // Skip validation if no checkpoint data exists or if Type is undefined (first execution)
1631
- if (!checkpointData || !checkpointData.Type) {
1632
- return;
1633
- }
1634
- // Validate operation type
1635
- if (checkpointData.Type !== currentOperation.type) {
1636
- const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation type mismatch for step "${stepId}". ` +
1637
- `Expected type "${checkpointData.Type}", but got "${currentOperation.type}". ` +
1638
- `This indicates non-deterministic control flow in your workflow code.`);
1639
- terminateForUnrecoverableError(context, error, stepId);
1469
+ // Checkpoint size limit in bytes (256KB)
1470
+ const CHECKPOINT_SIZE_LIMIT = 256 * 1024;
1471
+ const determineChildReplayMode = (context, stepId) => {
1472
+ const stepData = context.getStepData(stepId);
1473
+ if (!stepData) {
1474
+ return DurableExecutionMode.ExecutionMode;
1640
1475
  }
1641
- // Validate operation name (including undefined)
1642
- if (checkpointData.Name !== currentOperation.name) {
1643
- const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation name mismatch for step "${stepId}". ` +
1644
- `Expected name "${checkpointData.Name ?? "undefined"}", but got "${currentOperation.name ?? "undefined"}". ` +
1645
- `This indicates non-deterministic control flow in your workflow code.`);
1646
- terminateForUnrecoverableError(context, error, stepId);
1476
+ if (stepData.Status === clientLambda.OperationStatus.SUCCEEDED &&
1477
+ stepData.ContextDetails?.ReplayChildren) {
1478
+ return DurableExecutionMode.ReplaySucceededContext;
1647
1479
  }
1648
- // Validate operation subtype
1649
- if (checkpointData.SubType !== currentOperation.subType) {
1650
- const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation subtype mismatch for step "${stepId}". ` +
1651
- `Expected subtype "${checkpointData.SubType}", but got "${currentOperation.subType}". ` +
1652
- `This indicates non-deterministic control flow in your workflow code.`);
1653
- terminateForUnrecoverableError(context, error, stepId);
1480
+ if (stepData.Status === clientLambda.OperationStatus.SUCCEEDED ||
1481
+ stepData.Status === clientLambda.OperationStatus.FAILED) {
1482
+ return DurableExecutionMode.ReplayMode;
1654
1483
  }
1484
+ return DurableExecutionMode.ExecutionMode;
1655
1485
  };
1656
-
1657
- // Special symbol to indicate that the main loop should continue
1658
- const CONTINUE_MAIN_LOOP$1 = Symbol("CONTINUE_MAIN_LOOP");
1659
- const waitForContinuation$1 = async (context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, onAwaitedChange) => {
1660
- const stepData = context.getStepData(stepId);
1661
- // Check if there are any ongoing operations
1662
- if (!hasRunningOperations()) {
1663
- // No ongoing operations - safe to terminate
1664
- return terminate(context, TerminationReason.RETRY_SCHEDULED, `Retry scheduled for ${name || stepId}`);
1665
- }
1666
- // There are ongoing operations - wait before continuing
1667
- await waitBeforeContinue({
1668
- checkHasRunningOperations: true,
1669
- checkStepStatus: true,
1670
- checkTimer: true,
1671
- scheduledEndTimestamp: stepData?.StepDetails?.NextAttemptTimestamp,
1672
- stepId,
1673
- context,
1674
- hasRunningOperations,
1675
- operationsEmitter: getOperationsEmitter(),
1676
- checkpoint,
1677
- onAwaitedChange,
1678
- });
1679
- // Return to let the main loop re-evaluate step status
1680
- };
1681
- /**
1682
- * Creates a step handler for executing durable steps with two-phase execution.
1683
- */
1684
- const createStepHandler = (context, checkpoint, parentContext, createStepId, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId) => {
1486
+ const createRunInChildContextHandler = (context, checkpoint, parentContext, createStepId, getParentLogger, createChildContext, parentId) => {
1685
1487
  return (nameOrFn, fnOrOptions, maybeOptions) => {
1686
1488
  let name;
1687
1489
  let fn;
@@ -1695,479 +1497,30 @@ const createStepHandler = (context, checkpoint, parentContext, createStepId, log
1695
1497
  fn = nameOrFn;
1696
1498
  options = fnOrOptions;
1697
1499
  }
1698
- const stepId = createStepId();
1699
- log("▶️", "Running step:", { stepId, name, options });
1500
+ const entityId = createStepId();
1501
+ log("🔄", "Running child context:", {
1502
+ entityId,
1503
+ name,
1504
+ });
1505
+ const stepData = context.getStepData(entityId);
1506
+ // Validate replay consistency
1507
+ validateReplayConsistency(entityId, {
1508
+ type: clientLambda.OperationType.CONTEXT,
1509
+ name,
1510
+ subType: options?.subType ||
1511
+ exports.OperationSubType.RUN_IN_CHILD_CONTEXT,
1512
+ }, stepData, context);
1700
1513
  // Two-phase execution: Phase 1 starts immediately, Phase 2 returns result when awaited
1701
- let isAwaited = false;
1702
- let waitingCallback;
1703
- const setWaitingCallback = (cb) => {
1704
- waitingCallback = cb;
1705
- };
1706
- // Phase 1: Start execution immediately and capture result/error
1707
- const phase1Promise = (async () => {
1708
- // Main step logic - can be re-executed if step status changes
1709
- while (true) {
1710
- try {
1711
- const stepData = context.getStepData(stepId);
1712
- // Validate replay consistency
1713
- validateReplayConsistency(stepId, {
1714
- type: clientLambda.OperationType.STEP,
1715
- name,
1716
- subType: exports.OperationSubType.STEP,
1717
- }, stepData, context);
1718
- if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
1719
- return await handleCompletedStep(context, stepId, name, options?.serdes);
1720
- }
1721
- if (stepData?.Status === clientLambda.OperationStatus.FAILED) {
1722
- // Return an async rejected promise to ensure it's handled asynchronously
1723
- return (async () => {
1724
- // Reconstruct the original error from stored ErrorObject
1725
- if (stepData.StepDetails?.Error) {
1726
- throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
1727
- }
1728
- else {
1729
- // Fallback for legacy data without Error field
1730
- const errorMessage = stepData?.StepDetails?.Result;
1731
- throw new StepError(errorMessage || "Unknown error");
1732
- }
1733
- })();
1734
- }
1735
- // If PENDING, wait for timer to complete
1736
- if (stepData?.Status === clientLambda.OperationStatus.PENDING) {
1737
- await waitForContinuation$1(context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, isAwaited ? undefined : setWaitingCallback);
1738
- continue; // Re-evaluate step status after waiting
1739
- }
1740
- // Check for interrupted step with AT_MOST_ONCE_PER_RETRY semantics
1741
- if (stepData?.Status === clientLambda.OperationStatus.STARTED) {
1742
- const semantics = options?.semantics || exports.StepSemantics.AtLeastOncePerRetry;
1743
- if (semantics === exports.StepSemantics.AtMostOncePerRetry) {
1744
- log("⚠️", "Step was interrupted during execution:", {
1745
- stepId,
1746
- name,
1747
- });
1748
- const error = new StepInterruptedError(stepId, name);
1749
- // Handle the interrupted step as a failure
1750
- const currentAttempt = (stepData?.StepDetails?.Attempt || 0) + 1;
1751
- let retryDecision;
1752
- if (options?.retryStrategy !== undefined) {
1753
- retryDecision = options.retryStrategy(error, currentAttempt);
1754
- }
1755
- else {
1756
- retryDecision = retryPresets.default(error, currentAttempt);
1757
- }
1758
- log("⚠️", "Should Retry Interrupted Step:", {
1759
- stepId,
1760
- name,
1761
- currentAttempt,
1762
- shouldRetry: retryDecision.shouldRetry,
1763
- delayInSeconds: retryDecision.shouldRetry
1764
- ? retryDecision.delay
1765
- ? durationToSeconds(retryDecision.delay)
1766
- : undefined
1767
- : undefined,
1768
- });
1769
- if (!retryDecision.shouldRetry) {
1770
- // No retry, mark as failed
1771
- await checkpoint.checkpoint(stepId, {
1772
- Id: stepId,
1773
- ParentId: parentId,
1774
- Action: clientLambda.OperationAction.FAIL,
1775
- SubType: exports.OperationSubType.STEP,
1776
- Type: clientLambda.OperationType.STEP,
1777
- Error: createErrorObjectFromError(error),
1778
- Name: name,
1779
- });
1780
- // Reconstruct error from ErrorObject for deterministic behavior
1781
- const errorObject = createErrorObjectFromError(error);
1782
- throw DurableOperationError.fromErrorObject(errorObject);
1783
- }
1784
- else {
1785
- // Retry
1786
- await checkpoint.checkpoint(stepId, {
1787
- Id: stepId,
1788
- ParentId: parentId,
1789
- Action: clientLambda.OperationAction.RETRY,
1790
- SubType: exports.OperationSubType.STEP,
1791
- Type: clientLambda.OperationType.STEP,
1792
- Error: createErrorObjectFromError(error),
1793
- Name: name,
1794
- StepOptions: {
1795
- NextAttemptDelaySeconds: retryDecision.delay
1796
- ? durationToSeconds(retryDecision.delay)
1797
- : 1,
1798
- },
1799
- });
1800
- await waitForContinuation$1(context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, isAwaited ? undefined : setWaitingCallback);
1801
- continue; // Re-evaluate step status after waiting
1802
- }
1803
- }
1804
- }
1805
- // Execute step function for READY, STARTED (AtLeastOncePerRetry), or first time (undefined)
1806
- const result = await executeStep(context, checkpoint, stepId, name, fn, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, options, isAwaited ? undefined : setWaitingCallback);
1807
- // If executeStep signals to continue the main loop, do so
1808
- if (result === CONTINUE_MAIN_LOOP$1) {
1809
- continue;
1810
- }
1811
- return result;
1812
- }
1813
- catch (error) {
1814
- // Preserve DurableOperationError instances (StepInterruptedError is handled specifically where it's thrown)
1815
- if (error instanceof DurableOperationError) {
1816
- throw error;
1817
- }
1818
- // For any other error from executeStep, wrap it in StepError for consistency
1819
- throw new StepError(error instanceof Error ? error.message : "Step failed", error instanceof Error ? error : undefined);
1820
- }
1821
- }
1822
- })();
1823
- // Attach catch handler to prevent unhandled promise rejections
1824
- // The error will still be thrown when the DurablePromise is awaited
1825
- phase1Promise.catch(() => { });
1826
- // Phase 2: Return DurablePromise that returns Phase 1 result when awaited
1827
- return new DurablePromise(async () => {
1828
- // When promise is awaited, mark as awaited and invoke waiting callback
1829
- isAwaited = true;
1830
- if (waitingCallback) {
1831
- waitingCallback();
1832
- }
1833
- return await phase1Promise;
1834
- });
1835
- };
1836
- };
1837
- const handleCompletedStep = async (context, stepId, stepName, serdes = defaultSerdes) => {
1838
- log("⏭️", "Step already finished, returning cached result:", { stepId });
1839
- const stepData = context.getStepData(stepId);
1840
- const result = stepData?.StepDetails?.Result;
1841
- return await safeDeserialize(serdes, result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
1842
- };
1843
- const executeStep = async (context, checkpoint, stepId, name, fn, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, options, onAwaitedChange) => {
1844
- // Determine step semantics (default to AT_LEAST_ONCE_PER_RETRY if not specified)
1845
- const semantics = options?.semantics || exports.StepSemantics.AtLeastOncePerRetry;
1846
- const serdes = options?.serdes || defaultSerdes;
1847
- // Checkpoint at start for both semantics (only if not already started)
1848
- const stepData = context.getStepData(stepId);
1849
- if (stepData?.Status !== clientLambda.OperationStatus.STARTED) {
1850
- if (semantics === exports.StepSemantics.AtMostOncePerRetry) {
1851
- // Wait for checkpoint to complete
1852
- await checkpoint.checkpoint(stepId, {
1853
- Id: stepId,
1854
- ParentId: parentId,
1855
- Action: clientLambda.OperationAction.START,
1856
- SubType: exports.OperationSubType.STEP,
1857
- Type: clientLambda.OperationType.STEP,
1858
- Name: name,
1859
- });
1860
- }
1861
- else {
1862
- // Fire and forget for AtLeastOncePerRetry
1863
- checkpoint.checkpoint(stepId, {
1864
- Id: stepId,
1865
- ParentId: parentId,
1866
- Action: clientLambda.OperationAction.START,
1867
- SubType: exports.OperationSubType.STEP,
1868
- Type: clientLambda.OperationType.STEP,
1869
- Name: name,
1870
- });
1871
- }
1872
- }
1873
- try {
1874
- // Get current attempt number for logger enrichment
1875
- const stepData = context.getStepData(stepId);
1876
- const currentAttempt = stepData?.StepDetails?.Attempt || 0;
1877
- // Create step context with enriched logger
1878
- const stepContext = {
1879
- logger,
1880
- };
1881
- // Execute the step function with stepContext
1882
- addRunningOperation(stepId);
1883
- let result;
1884
- try {
1885
- result = await runWithContext(stepId, parentId, () => fn(stepContext),
1886
- // The attempt that is running is the attempt from the step data (previous step attempt) + 1
1887
- currentAttempt + 1,
1888
- // Alwasy in execution mode when running step operations
1889
- DurableExecutionMode.ExecutionMode);
1890
- }
1891
- finally {
1892
- removeRunningOperation(stepId);
1893
- }
1894
- // Serialize the result for consistency
1895
- const serializedResult = await safeSerialize(serdes, result, stepId, name, context.terminationManager, context.durableExecutionArn);
1896
- // Always checkpoint on completion
1897
- await checkpoint.checkpoint(stepId, {
1898
- Id: stepId,
1899
- ParentId: parentId,
1900
- Action: clientLambda.OperationAction.SUCCEED,
1901
- SubType: exports.OperationSubType.STEP,
1902
- Type: clientLambda.OperationType.STEP,
1903
- Payload: serializedResult,
1904
- Name: name,
1905
- });
1906
- log("✅", "Step completed successfully:", {
1907
- stepId,
1908
- name,
1909
- result,
1910
- semantics,
1911
- });
1912
- // Deserialize the result for consistency with replay behavior
1913
- return await safeDeserialize(serdes, serializedResult, stepId, name, context.terminationManager, context.durableExecutionArn);
1914
- }
1915
- catch (error) {
1916
- log("❌", "Step failed:", {
1917
- stepId,
1918
- name,
1919
- error,
1920
- semantics,
1921
- });
1922
- // Handle unrecoverable errors - these should not go through retry logic
1923
- if (isUnrecoverableError(error)) {
1924
- log("💥", "Unrecoverable error detected:", {
1925
- stepId,
1926
- name,
1927
- error: error.message,
1928
- });
1929
- return terminateForUnrecoverableError(context, error, name || stepId);
1930
- }
1931
- const stepData = context.getStepData(stepId);
1932
- const currentAttempt = (stepData?.StepDetails?.Attempt || 0) + 1;
1933
- let retryDecision;
1934
- if (options?.retryStrategy !== undefined) {
1935
- // Use provided retry configuration
1936
- retryDecision = options.retryStrategy(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt);
1937
- }
1938
- else {
1939
- // Use default retry preset if no config provided
1940
- retryDecision = retryPresets.default(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt);
1941
- }
1942
- log("⚠️", "Should Retry:", {
1943
- stepId,
1944
- name,
1945
- currentAttempt,
1946
- shouldRetry: retryDecision.shouldRetry,
1947
- delayInSeconds: retryDecision.shouldRetry
1948
- ? retryDecision.delay
1949
- ? durationToSeconds(retryDecision.delay)
1950
- : undefined
1951
- : undefined,
1952
- semantics,
1953
- });
1954
- if (!retryDecision.shouldRetry) {
1955
- // No retry
1956
- await checkpoint.checkpoint(stepId, {
1957
- Id: stepId,
1958
- ParentId: parentId,
1959
- Action: clientLambda.OperationAction.FAIL,
1960
- SubType: exports.OperationSubType.STEP,
1961
- Type: clientLambda.OperationType.STEP,
1962
- Error: createErrorObjectFromError(error),
1963
- Name: name,
1964
- });
1965
- // Reconstruct error from ErrorObject for deterministic behavior
1966
- const errorObject = createErrorObjectFromError(error);
1967
- throw DurableOperationError.fromErrorObject(errorObject);
1968
- }
1969
- else {
1970
- // Retry
1971
- await checkpoint.checkpoint(stepId, {
1972
- Id: stepId,
1973
- ParentId: parentId,
1974
- Action: clientLambda.OperationAction.RETRY,
1975
- SubType: exports.OperationSubType.STEP,
1976
- Type: clientLambda.OperationType.STEP,
1977
- Error: createErrorObjectFromError(error),
1978
- Name: name,
1979
- StepOptions: {
1980
- NextAttemptDelaySeconds: retryDecision.delay
1981
- ? durationToSeconds(retryDecision.delay)
1982
- : 1,
1983
- },
1984
- });
1985
- // Wait for continuation and signal main loop to continue
1986
- await waitForContinuation$1(context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, onAwaitedChange);
1987
- return CONTINUE_MAIN_LOOP$1;
1988
- }
1989
- }
1990
- };
1991
-
1992
- const createInvokeHandler = (context, checkpoint, createStepId, hasRunningOperations, getOperationsEmitter, parentId, checkAndUpdateReplayMode) => {
1993
- function invokeHandler(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
1994
- const isNameFirst = typeof funcIdOrInput === "string";
1995
- const name = isNameFirst ? nameOrFuncId : undefined;
1996
- const funcId = isNameFirst ? funcIdOrInput : nameOrFuncId;
1997
- const input = isNameFirst
1998
- ? inputOrConfig
1999
- : funcIdOrInput;
2000
- const config = isNameFirst
2001
- ? maybeConfig
2002
- : inputOrConfig;
2003
- const stepId = createStepId();
2004
- // Phase 1: Only checkpoint if needed, don't execute full logic
2005
- const startInvokeOperation = async () => {
2006
- log("🔗", `Invoke ${name || funcId} (${stepId}) - phase 1`);
2007
- // Check initial step data for replay consistency validation
2008
- const initialStepData = context.getStepData(stepId);
2009
- // Validate replay consistency once before any execution
2010
- validateReplayConsistency(stepId, {
2011
- type: clientLambda.OperationType.CHAINED_INVOKE,
2012
- name,
2013
- subType: exports.OperationSubType.CHAINED_INVOKE,
2014
- }, initialStepData, context);
2015
- // If stepData already exists, phase 1 has nothing to do
2016
- if (initialStepData) {
2017
- log("⏸️", `Invoke ${name || funcId} already exists (phase 1)`);
2018
- return;
2019
- }
2020
- // No stepData exists - need to start the invoke operation
2021
- // Serialize the input payload
2022
- const serializedPayload = await safeSerialize(config?.payloadSerdes || defaultSerdes, input, stepId, name, context.terminationManager, context.durableExecutionArn);
2023
- // Create checkpoint for the invoke operation
2024
- await checkpoint.checkpoint(stepId, {
2025
- Id: stepId,
2026
- ParentId: parentId,
2027
- Action: clientLambda.OperationAction.START,
2028
- SubType: exports.OperationSubType.CHAINED_INVOKE,
2029
- Type: clientLambda.OperationType.CHAINED_INVOKE,
2030
- Name: name,
2031
- Payload: serializedPayload,
2032
- ChainedInvokeOptions: {
2033
- FunctionName: funcId,
2034
- },
2035
- });
2036
- log("🚀", `Invoke ${name || funcId} started (phase 1)`);
2037
- };
2038
- // Phase 2: Execute full logic including waiting and termination
2039
- const continueInvokeOperation = async () => {
2040
- log("🔗", `Invoke ${name || funcId} (${stepId}) - phase 2`);
2041
- // Main invoke logic - can be re-executed if step status changes
2042
- while (true) {
2043
- // Check if we have existing step data
2044
- const stepData = context.getStepData(stepId);
2045
- if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
2046
- // Return cached result - no need to check for errors in successful operations
2047
- const invokeDetails = stepData.ChainedInvokeDetails;
2048
- checkAndUpdateReplayMode?.();
2049
- return await safeDeserialize(config?.resultSerdes || defaultSerdes, invokeDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2050
- }
2051
- if (stepData?.Status === clientLambda.OperationStatus.FAILED ||
2052
- stepData?.Status === clientLambda.OperationStatus.TIMED_OUT ||
2053
- stepData?.Status === clientLambda.OperationStatus.STOPPED) {
2054
- // Operation failed, return async rejected promise
2055
- const invokeDetails = stepData.ChainedInvokeDetails;
2056
- return (async () => {
2057
- if (invokeDetails?.Error) {
2058
- throw new InvokeError(invokeDetails.Error.ErrorMessage || "Invoke failed", invokeDetails.Error.ErrorMessage
2059
- ? new Error(invokeDetails.Error.ErrorMessage)
2060
- : undefined, invokeDetails.Error.ErrorData);
2061
- }
2062
- else {
2063
- throw new InvokeError("Invoke failed");
2064
- }
2065
- })();
2066
- }
2067
- if (stepData?.Status === clientLambda.OperationStatus.STARTED) {
2068
- // Operation is still running
2069
- if (hasRunningOperations()) {
2070
- // Phase 2: Wait for other operations
2071
- log("⏳", `Invoke ${name || funcId} still in progress, waiting for other operations`);
2072
- await waitBeforeContinue({
2073
- checkHasRunningOperations: true,
2074
- checkStepStatus: true,
2075
- checkTimer: false,
2076
- stepId,
2077
- context,
2078
- hasRunningOperations,
2079
- operationsEmitter: getOperationsEmitter(),
2080
- });
2081
- continue; // Re-evaluate status after waiting
2082
- }
2083
- // No other operations running - terminate
2084
- log("⏳", `Invoke ${name || funcId} still in progress, terminating`);
2085
- return terminate(context, TerminationReason.OPERATION_TERMINATED, stepId);
2086
- }
2087
- // If stepData exists but has an unexpected status, break to avoid infinite loop
2088
- if (stepData && stepData.Status !== undefined) {
2089
- throw new InvokeError(`Unexpected operation status: ${stepData.Status}`);
2090
- }
2091
- // This should not happen in phase 2 since phase 1 creates stepData
2092
- throw new InvokeError("No step data found in phase 2 - this should not happen");
2093
- }
2094
- };
2095
- // Create a promise that tracks phase 1 completion
2096
- const startInvokePromise = startInvokeOperation()
2097
- .then(() => {
2098
- log("✅", "Invoke phase 1 complete:", { stepId, name: name || funcId });
2099
- })
2100
- .catch((error) => {
2101
- log("❌", "Invoke phase 1 error:", { stepId, error: error.message });
2102
- throw error; // Re-throw to fail phase 1
2103
- });
2104
- // Attach catch handler to prevent unhandled promise rejections
2105
- // The error will still be thrown when the DurablePromise is awaited
2106
- startInvokePromise.catch(() => { });
2107
- // Return DurablePromise that will execute phase 2 when awaited
2108
- return new DurablePromise(async () => {
2109
- // Wait for phase 1 to complete first
2110
- await startInvokePromise;
2111
- // Then execute phase 2
2112
- return await continueInvokeOperation();
2113
- });
2114
- }
2115
- return invokeHandler;
2116
- };
2117
-
2118
- // Checkpoint size limit in bytes (256KB)
2119
- const CHECKPOINT_SIZE_LIMIT = 256 * 1024;
2120
- const determineChildReplayMode = (context, stepId) => {
2121
- const stepData = context.getStepData(stepId);
2122
- if (!stepData) {
2123
- return DurableExecutionMode.ExecutionMode;
2124
- }
2125
- if (stepData.Status === clientLambda.OperationStatus.SUCCEEDED &&
2126
- stepData.ContextDetails?.ReplayChildren) {
2127
- return DurableExecutionMode.ReplaySucceededContext;
2128
- }
2129
- if (stepData.Status === clientLambda.OperationStatus.SUCCEEDED ||
2130
- stepData.Status === clientLambda.OperationStatus.FAILED) {
2131
- return DurableExecutionMode.ReplayMode;
2132
- }
2133
- return DurableExecutionMode.ExecutionMode;
2134
- };
2135
- const createRunInChildContextHandler = (context, checkpoint, parentContext, createStepId, getParentLogger, createChildContext, parentId) => {
2136
- return (nameOrFn, fnOrOptions, maybeOptions) => {
2137
- let name;
2138
- let fn;
2139
- let options;
2140
- if (typeof nameOrFn === "string" || nameOrFn === undefined) {
2141
- name = nameOrFn;
2142
- fn = fnOrOptions;
2143
- options = maybeOptions;
2144
- }
2145
- else {
2146
- fn = nameOrFn;
2147
- options = fnOrOptions;
2148
- }
2149
- const entityId = createStepId();
2150
- log("🔄", "Running child context:", {
2151
- entityId,
2152
- name,
2153
- });
2154
- const stepData = context.getStepData(entityId);
2155
- // Validate replay consistency
2156
- validateReplayConsistency(entityId, {
2157
- type: clientLambda.OperationType.CONTEXT,
2158
- name,
2159
- subType: options?.subType ||
2160
- exports.OperationSubType.RUN_IN_CHILD_CONTEXT,
2161
- }, stepData, context);
2162
- // Two-phase execution: Phase 1 starts immediately, Phase 2 returns result when awaited
2163
- let phase1Result;
2164
- let phase1Error;
1514
+ let phase1Result;
1515
+ let phase1Error;
2165
1516
  // Phase 1: Start execution immediately and capture result/error
2166
1517
  const phase1Promise = (async () => {
2167
1518
  const currentStepData = context.getStepData(entityId);
2168
1519
  // If already completed, return cached result
2169
1520
  if (currentStepData?.Status === clientLambda.OperationStatus.SUCCEEDED ||
2170
1521
  currentStepData?.Status === clientLambda.OperationStatus.FAILED) {
1522
+ // Mark this run-in-child-context as finished to prevent descendant operations
1523
+ checkpoint.markAncestorFinished(entityId);
2171
1524
  return handleCompletedChildContext(context, parentContext, entityId, name, fn, options, getParentLogger, createChildContext);
2172
1525
  }
2173
1526
  // Execute if not completed
@@ -2257,8 +1610,10 @@ const executeChildContext = async (context, checkpoint, parentContext, entityId,
2257
1610
  limit: CHECKPOINT_SIZE_LIMIT,
2258
1611
  });
2259
1612
  }
1613
+ // Mark this run-in-child-context as finished to prevent descendant operations
1614
+ checkpoint.markAncestorFinished(entityId);
2260
1615
  const subType = options?.subType || exports.OperationSubType.RUN_IN_CHILD_CONTEXT;
2261
- await checkpoint.checkpoint(entityId, {
1616
+ checkpoint.checkpoint(entityId, {
2262
1617
  Id: entityId,
2263
1618
  ParentId: parentId,
2264
1619
  Action: clientLambda.OperationAction.SUCCEED,
@@ -2280,9 +1635,11 @@ const executeChildContext = async (context, checkpoint, parentContext, entityId,
2280
1635
  name,
2281
1636
  error,
2282
1637
  });
1638
+ // Mark this run-in-child-context as finished to prevent descendant operations
1639
+ checkpoint.markAncestorFinished(entityId);
2283
1640
  // Always checkpoint failures
2284
1641
  const subType = options?.subType || exports.OperationSubType.RUN_IN_CHILD_CONTEXT;
2285
- await checkpoint.checkpoint(entityId, {
1642
+ checkpoint.checkpoint(entityId, {
2286
1643
  Id: entityId,
2287
1644
  ParentId: parentId,
2288
1645
  Action: clientLambda.OperationAction.FAIL,
@@ -2298,433 +1655,324 @@ const executeChildContext = async (context, checkpoint, parentContext, entityId,
2298
1655
  }
2299
1656
  };
2300
1657
 
2301
- const createWaitHandler = (context, checkpoint, createStepId, hasRunningOperations, getOperationsEmitter, parentId, checkAndUpdateReplayMode) => {
1658
+ const createWaitHandler = (context, checkpoint, createStepId, parentId, checkAndUpdateReplayMode) => {
2302
1659
  function waitHandler(nameOrDuration, duration) {
2303
1660
  const isNameFirst = typeof nameOrDuration === "string";
2304
1661
  const actualName = isNameFirst ? nameOrDuration : undefined;
2305
1662
  const actualDuration = isNameFirst ? duration : nameOrDuration;
2306
1663
  const actualSeconds = durationToSeconds(actualDuration);
2307
1664
  const stepId = createStepId();
2308
- // Shared wait logic for both phases
2309
- const executeWaitLogic = async (canTerminate) => {
2310
- log("⏲️", `Wait executing (${canTerminate ? "phase 2" : "phase 1"}):`, {
1665
+ // Phase 1: Start wait operation
1666
+ let isCompleted = false;
1667
+ const phase1Promise = (async () => {
1668
+ log("⏲️", "Wait phase 1:", {
2311
1669
  stepId,
2312
1670
  name: actualName,
2313
- duration: actualDuration,
2314
1671
  seconds: actualSeconds,
2315
1672
  });
2316
1673
  let stepData = context.getStepData(stepId);
2317
- // Validate replay consistency once before loop
1674
+ // Validate replay consistency
2318
1675
  validateReplayConsistency(stepId, {
2319
1676
  type: clientLambda.OperationType.WAIT,
2320
1677
  name: actualName,
2321
1678
  subType: exports.OperationSubType.WAIT,
2322
1679
  }, stepData, context);
2323
- // Main wait logic - can be re-executed if step data changes
2324
- while (true) {
2325
- stepData = context.getStepData(stepId);
2326
- if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
2327
- log("⏭️", "Wait already completed:", { stepId });
2328
- checkAndUpdateReplayMode?.();
2329
- return;
2330
- }
2331
- // Only checkpoint START if we haven't started this wait before
2332
- if (!stepData) {
2333
- await checkpoint.checkpoint(stepId, {
2334
- Id: stepId,
2335
- ParentId: parentId,
2336
- Action: clientLambda.OperationAction.START,
2337
- SubType: exports.OperationSubType.WAIT,
2338
- Type: clientLambda.OperationType.WAIT,
2339
- Name: actualName,
2340
- WaitOptions: {
2341
- WaitSeconds: actualSeconds,
2342
- },
2343
- });
2344
- }
2345
- // Always refresh stepData to ensure it's up-to-date before proceeding
2346
- stepData = context.getStepData(stepId);
2347
- // Check if there are any ongoing operations
2348
- if (!hasRunningOperations()) {
2349
- // Phase 1: Just return without terminating
2350
- // Phase 2: Terminate
2351
- if (canTerminate) {
2352
- return terminate(context, TerminationReason.WAIT_SCHEDULED, `Operation ${actualName || stepId} scheduled to wait`);
2353
- }
2354
- else {
2355
- log("⏸️", "Wait ready but not terminating (phase 1):", { stepId });
2356
- return;
2357
- }
2358
- }
2359
- // There are ongoing operations - wait before continuing
2360
- await waitBeforeContinue({
2361
- checkHasRunningOperations: true,
2362
- checkStepStatus: true,
2363
- checkTimer: true,
2364
- scheduledEndTimestamp: stepData?.WaitDetails?.ScheduledEndTimestamp,
2365
- stepId,
2366
- context,
2367
- hasRunningOperations,
2368
- operationsEmitter: getOperationsEmitter(),
2369
- checkpoint,
1680
+ // Check if already completed
1681
+ if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
1682
+ log("⏭️", "Wait already completed:", { stepId });
1683
+ checkAndUpdateReplayMode?.();
1684
+ // Mark as completed
1685
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1686
+ metadata: {
1687
+ stepId,
1688
+ name: actualName,
1689
+ type: clientLambda.OperationType.WAIT,
1690
+ subType: exports.OperationSubType.WAIT,
1691
+ parentId,
1692
+ },
2370
1693
  });
2371
- // Continue the loop to re-evaluate all conditions from the beginning
1694
+ isCompleted = true;
1695
+ return;
2372
1696
  }
2373
- };
2374
- // Create a promise that tracks phase 1 completion
2375
- const phase1Promise = executeWaitLogic(false).then(() => {
2376
- log("✅", "Wait phase 1 complete:", { stepId, name: actualName });
2377
- });
2378
- // Attach catch handler to prevent unhandled promise rejections
2379
- // The error will still be thrown when the DurablePromise is awaited
1697
+ // Start wait if not already started
1698
+ if (!stepData) {
1699
+ await checkpoint.checkpoint(stepId, {
1700
+ Id: stepId,
1701
+ ParentId: parentId,
1702
+ Action: clientLambda.OperationAction.START,
1703
+ SubType: exports.OperationSubType.WAIT,
1704
+ Type: clientLambda.OperationType.WAIT,
1705
+ Name: actualName,
1706
+ WaitOptions: {
1707
+ WaitSeconds: actualSeconds,
1708
+ },
1709
+ });
1710
+ }
1711
+ // Refresh stepData after checkpoint
1712
+ stepData = context.getStepData(stepId);
1713
+ // Mark as IDLE_NOT_AWAITED (phase 1 complete, not awaited yet)
1714
+ checkpoint.markOperationState(stepId, OperationLifecycleState.IDLE_NOT_AWAITED, {
1715
+ metadata: {
1716
+ stepId,
1717
+ name: actualName,
1718
+ type: clientLambda.OperationType.WAIT,
1719
+ subType: exports.OperationSubType.WAIT,
1720
+ parentId,
1721
+ },
1722
+ endTimestamp: stepData?.WaitDetails?.ScheduledEndTimestamp,
1723
+ });
1724
+ log("✅", "Wait phase 1 complete:", { stepId });
1725
+ })();
1726
+ // Prevent unhandled rejection
2380
1727
  phase1Promise.catch(() => { });
2381
- // Return DurablePromise that will execute phase 2 when awaited
1728
+ // Phase 2: Wait for completion
2382
1729
  return new DurablePromise(async () => {
2383
- // Wait for phase 1 to complete first
1730
+ // Wait for phase 1
2384
1731
  await phase1Promise;
2385
- // Then execute phase 2
2386
- await executeWaitLogic(true);
1732
+ // If already completed in phase 1, skip phase 2
1733
+ if (isCompleted) {
1734
+ return;
1735
+ }
1736
+ log("⏲️", "Wait phase 2:", { stepId });
1737
+ // Mark as awaited
1738
+ checkpoint.markOperationAwaited(stepId);
1739
+ // Wait for status change
1740
+ await checkpoint.waitForStatusChange(stepId);
1741
+ // Check final status
1742
+ const stepData = context.getStepData(stepId);
1743
+ if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
1744
+ log("✅", "Wait completed:", { stepId });
1745
+ checkAndUpdateReplayMode?.();
1746
+ // Mark as completed
1747
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1748
+ return;
1749
+ }
1750
+ // Should not reach here, but handle gracefully
1751
+ log("⚠️", "Wait ended with unexpected status:", {
1752
+ stepId,
1753
+ status: stepData?.Status,
1754
+ });
2387
1755
  });
2388
1756
  }
2389
1757
  return waitHandler;
2390
1758
  };
2391
1759
 
2392
- // Special symbol to indicate that the main loop should continue
2393
- const CONTINUE_MAIN_LOOP = Symbol("CONTINUE_MAIN_LOOP");
2394
- const waitForContinuation = async (context, stepId, name, hasRunningOperations, checkpoint, operationsEmitter, onAwaitedChange) => {
2395
- const stepData = context.getStepData(stepId);
2396
- // Check if there are any ongoing operations
2397
- if (!hasRunningOperations()) {
2398
- // No ongoing operations - safe to terminate
2399
- return terminate(context, TerminationReason.RETRY_SCHEDULED, `Retry scheduled for ${name || stepId}`);
2400
- }
2401
- // There are ongoing operations - wait before continuing
2402
- await waitBeforeContinue({
2403
- checkHasRunningOperations: true,
2404
- checkStepStatus: true,
2405
- checkTimer: true,
2406
- scheduledEndTimestamp: stepData?.StepDetails?.NextAttemptTimestamp,
2407
- stepId,
2408
- context,
2409
- hasRunningOperations,
2410
- operationsEmitter,
2411
- checkpoint,
2412
- onAwaitedChange,
2413
- });
2414
- // Return to let the main loop re-evaluate step status
2415
- };
2416
- const createWaitForConditionHandler = (context, checkpoint, createStepId, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId) => {
1760
+ const createWaitForConditionHandler = (context, checkpoint, createStepId, logger, parentId) => {
2417
1761
  return (nameOrCheck, checkOrConfig, maybeConfig) => {
2418
- // Two-phase execution: Phase 1 starts immediately, Phase 2 returns result when awaited
2419
- let isAwaited = false;
2420
- let waitingCallback;
2421
- const setWaitingCallback = (cb) => {
2422
- waitingCallback = cb;
2423
- };
2424
- // Phase 1: Start execution immediately and capture result/error
1762
+ let name;
1763
+ let check;
1764
+ let config;
1765
+ if (typeof nameOrCheck === "string" || nameOrCheck === undefined) {
1766
+ name = nameOrCheck;
1767
+ check = checkOrConfig;
1768
+ config = maybeConfig;
1769
+ }
1770
+ else {
1771
+ check = nameOrCheck;
1772
+ config = checkOrConfig;
1773
+ }
1774
+ if (!config?.waitStrategy || config.initialState === undefined) {
1775
+ throw new Error("waitForCondition requires config with waitStrategy and initialState");
1776
+ }
1777
+ const stepId = createStepId();
1778
+ const serdes = config.serdes || defaultSerdes;
2425
1779
  const phase1Promise = (async () => {
2426
- let name;
2427
- let check;
2428
- let config;
2429
- // Parse overloaded parameters - validation errors thrown here are async
2430
- if (typeof nameOrCheck === "string" || nameOrCheck === undefined) {
2431
- name = nameOrCheck;
2432
- check = checkOrConfig;
2433
- config = maybeConfig;
1780
+ let stepData = context.getStepData(stepId);
1781
+ // Check if already completed
1782
+ if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
1783
+ log("⏭️", "WaitForCondition already completed:", { stepId });
1784
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1785
+ metadata: {
1786
+ stepId,
1787
+ name,
1788
+ type: clientLambda.OperationType.STEP,
1789
+ subType: exports.OperationSubType.WAIT_FOR_CONDITION,
1790
+ parentId,
1791
+ },
1792
+ });
1793
+ return await safeDeserialize(serdes, stepData.StepDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2434
1794
  }
2435
- else {
2436
- check = nameOrCheck;
2437
- config = checkOrConfig;
1795
+ // Check if already failed
1796
+ if (stepData?.Status === clientLambda.OperationStatus.FAILED) {
1797
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1798
+ metadata: {
1799
+ stepId,
1800
+ name,
1801
+ type: clientLambda.OperationType.STEP,
1802
+ subType: exports.OperationSubType.WAIT_FOR_CONDITION,
1803
+ parentId,
1804
+ },
1805
+ });
1806
+ if (stepData.StepDetails?.Error) {
1807
+ throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
1808
+ }
1809
+ throw new WaitForConditionError("waitForCondition failed");
2438
1810
  }
2439
- if (!config ||
2440
- !config.waitStrategy ||
2441
- config.initialState === undefined) {
2442
- throw new Error("waitForCondition requires config with waitStrategy and initialState");
1811
+ // Check if pending retry
1812
+ if (stepData?.Status === clientLambda.OperationStatus.PENDING) {
1813
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1814
+ metadata: {
1815
+ stepId,
1816
+ name,
1817
+ type: clientLambda.OperationType.STEP,
1818
+ subType: exports.OperationSubType.WAIT_FOR_CONDITION,
1819
+ parentId,
1820
+ },
1821
+ endTimestamp: stepData.StepDetails?.NextAttemptTimestamp,
1822
+ });
1823
+ return (async () => {
1824
+ await checkpoint.waitForRetryTimer(stepId);
1825
+ stepData = context.getStepData(stepId);
1826
+ return await executeCheckLogic();
1827
+ })();
2443
1828
  }
2444
- const stepId = createStepId();
2445
- log("🔄", "Running waitForCondition:", {
2446
- stepId,
2447
- name,
2448
- config,
2449
- });
2450
- // Main waitForCondition logic - can be re-executed if step status changes
2451
- while (true) {
2452
- try {
2453
- const stepData = context.getStepData(stepId);
2454
- // Check if already completed
2455
- if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
2456
- return await handleCompletedWaitForCondition(context, stepId, name, config.serdes);
2457
- }
2458
- if (stepData?.Status === clientLambda.OperationStatus.FAILED) {
2459
- // Return an async rejected promise to ensure it's handled asynchronously
2460
- return (async () => {
2461
- // Reconstruct the original error from stored ErrorObject
2462
- if (stepData.StepDetails?.Error) {
2463
- throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
2464
- }
2465
- else {
2466
- // Fallback for legacy data without Error field
2467
- const errorMessage = stepData?.StepDetails?.Result;
2468
- throw new WaitForConditionError(errorMessage || "waitForCondition failed");
2469
- }
2470
- })();
2471
- }
2472
- // If PENDING, wait for timer to complete
2473
- if (stepData?.Status === clientLambda.OperationStatus.PENDING) {
2474
- await waitForContinuation(context, stepId, name, hasRunningOperations, checkpoint, getOperationsEmitter(), isAwaited ? undefined : setWaitingCallback);
2475
- continue; // Re-evaluate step status after waiting
1829
+ return await executeCheckLogic();
1830
+ async function executeCheckLogic() {
1831
+ stepData = context.getStepData(stepId);
1832
+ // Get current state
1833
+ let currentState;
1834
+ if (stepData?.Status === clientLambda.OperationStatus.STARTED ||
1835
+ stepData?.Status === clientLambda.OperationStatus.READY) {
1836
+ const checkpointData = stepData.StepDetails?.Result;
1837
+ if (checkpointData) {
1838
+ try {
1839
+ const serdesContext = {
1840
+ entityId: stepId,
1841
+ durableExecutionArn: context.durableExecutionArn,
1842
+ };
1843
+ currentState = await serdes.deserialize(checkpointData, serdesContext);
1844
+ }
1845
+ catch {
1846
+ currentState = config.initialState;
1847
+ }
2476
1848
  }
2477
- // Execute check function for READY, STARTED, or first time (undefined)
2478
- const result = await executeWaitForCondition(context, checkpoint, stepId, name, check, config, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, isAwaited ? undefined : setWaitingCallback);
2479
- // If executeWaitForCondition signals to continue the main loop, do so
2480
- if (result === CONTINUE_MAIN_LOOP) {
2481
- continue;
1849
+ else {
1850
+ currentState = config.initialState;
2482
1851
  }
2483
- return result;
2484
1852
  }
2485
- catch (error) {
2486
- // For any error from executeWaitForCondition, re-throw it
2487
- throw error;
1853
+ else {
1854
+ currentState = config.initialState;
1855
+ }
1856
+ const currentAttempt = (stepData?.StepDetails?.Attempt ?? 0) + 1;
1857
+ // Checkpoint START if not already started
1858
+ if (stepData?.Status !== clientLambda.OperationStatus.STARTED) {
1859
+ checkpoint.checkpoint(stepId, {
1860
+ Id: stepId,
1861
+ ParentId: parentId,
1862
+ Action: clientLambda.OperationAction.START,
1863
+ SubType: exports.OperationSubType.WAIT_FOR_CONDITION,
1864
+ Type: clientLambda.OperationType.STEP,
1865
+ Name: name,
1866
+ });
1867
+ }
1868
+ try {
1869
+ const waitForConditionContext = {
1870
+ logger,
1871
+ };
1872
+ // Mark operation as EXECUTING
1873
+ checkpoint.markOperationState(stepId, OperationLifecycleState.EXECUTING, {
1874
+ metadata: {
1875
+ stepId,
1876
+ name,
1877
+ type: clientLambda.OperationType.STEP,
1878
+ subType: exports.OperationSubType.WAIT_FOR_CONDITION,
1879
+ parentId,
1880
+ },
1881
+ });
1882
+ const newState = await runWithContext(stepId, parentId, () => check(currentState, waitForConditionContext), currentAttempt, DurableExecutionMode.ExecutionMode);
1883
+ const serializedState = await safeSerialize(serdes, newState, stepId, name, context.terminationManager, context.durableExecutionArn);
1884
+ const deserializedState = await safeDeserialize(serdes, serializedState, stepId, name, context.terminationManager, context.durableExecutionArn);
1885
+ const decision = config.waitStrategy(deserializedState, currentAttempt);
1886
+ if (!decision.shouldContinue) {
1887
+ await checkpoint.checkpoint(stepId, {
1888
+ Id: stepId,
1889
+ ParentId: parentId,
1890
+ Action: clientLambda.OperationAction.SUCCEED,
1891
+ SubType: exports.OperationSubType.WAIT_FOR_CONDITION,
1892
+ Type: clientLambda.OperationType.STEP,
1893
+ Payload: serializedState,
1894
+ Name: name,
1895
+ });
1896
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1897
+ return deserializedState;
1898
+ }
1899
+ await checkpoint.checkpoint(stepId, {
1900
+ Id: stepId,
1901
+ ParentId: parentId,
1902
+ Action: clientLambda.OperationAction.RETRY,
1903
+ SubType: exports.OperationSubType.WAIT_FOR_CONDITION,
1904
+ Type: clientLambda.OperationType.STEP,
1905
+ Payload: serializedState,
1906
+ Name: name,
1907
+ StepOptions: {
1908
+ NextAttemptDelaySeconds: durationToSeconds(decision.delay),
1909
+ },
1910
+ });
1911
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1912
+ metadata: {
1913
+ stepId,
1914
+ name,
1915
+ type: clientLambda.OperationType.STEP,
1916
+ subType: exports.OperationSubType.WAIT_FOR_CONDITION,
1917
+ parentId,
1918
+ },
1919
+ endTimestamp: context.getStepData(stepId)?.StepDetails?.NextAttemptTimestamp,
1920
+ });
1921
+ await checkpoint.waitForRetryTimer(stepId);
1922
+ return await executeCheckLogic();
1923
+ }
1924
+ catch (error) {
1925
+ await checkpoint.checkpoint(stepId, {
1926
+ Id: stepId,
1927
+ ParentId: parentId,
1928
+ Action: clientLambda.OperationAction.FAIL,
1929
+ SubType: exports.OperationSubType.WAIT_FOR_CONDITION,
1930
+ Type: clientLambda.OperationType.STEP,
1931
+ Error: createErrorObjectFromError(error),
1932
+ Name: name,
1933
+ });
1934
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1935
+ throw DurableOperationError.fromErrorObject(createErrorObjectFromError(error));
2488
1936
  }
2489
1937
  }
2490
- })();
2491
- // Attach catch handler to prevent unhandled promise rejections
2492
- // The error will still be thrown when the DurablePromise is awaited
2493
- phase1Promise.catch(() => { });
2494
- // Phase 2: Return DurablePromise that returns Phase 1 result when awaited
2495
- return new DurablePromise(async () => {
2496
- // When promise is awaited, mark as awaited and invoke waiting callback
2497
- isAwaited = true;
2498
- if (waitingCallback) {
2499
- waitingCallback();
2500
- }
1938
+ })();
1939
+ phase1Promise.catch(() => { });
1940
+ return new DurablePromise(async () => {
1941
+ checkpoint.markOperationAwaited(stepId);
2501
1942
  return await phase1Promise;
2502
1943
  });
2503
1944
  };
2504
1945
  };
2505
- const handleCompletedWaitForCondition = async (context, stepId, stepName, serdes = defaultSerdes) => {
2506
- log("⏭️", "waitForCondition already finished, returning cached result:", {
2507
- stepId,
2508
- });
2509
- const stepData = context.getStepData(stepId);
2510
- const result = stepData?.StepDetails?.Result;
2511
- return await safeDeserialize(serdes, result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
2512
- };
2513
- const executeWaitForCondition = async (context, checkpoint, stepId, name, check, config, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, onAwaitedChange) => {
2514
- const serdes = config.serdes || defaultSerdes;
2515
- // Get current state from previous checkpoint or use initial state
2516
- let currentState;
2517
- const existingOperation = context.getStepData(stepId);
2518
- if (existingOperation?.Status === clientLambda.OperationStatus.STARTED ||
2519
- existingOperation?.Status === clientLambda.OperationStatus.READY) {
2520
- // This is a retry - get state from previous checkpoint
2521
- const checkpointData = existingOperation.StepDetails?.Result;
2522
- if (checkpointData) {
2523
- try {
2524
- // Try to deserialize the checkpoint data directly
2525
- const serdesContext = {
2526
- entityId: stepId,
2527
- durableExecutionArn: context.durableExecutionArn,
2528
- };
2529
- currentState = await serdes.deserialize(checkpointData, serdesContext);
2530
- }
2531
- catch (error) {
2532
- log("⚠️", "Failed to deserialize checkpoint data, using initial state:", {
2533
- stepId,
2534
- name,
2535
- error,
2536
- });
2537
- currentState = config.initialState;
2538
- }
2539
- }
2540
- else {
2541
- currentState = config.initialState;
2542
- }
2543
- }
2544
- else {
2545
- // First execution
2546
- currentState = config.initialState;
2547
- }
2548
- // Get the current attempt number (1-based for wait strategy consistency)
2549
- const currentAttempt = existingOperation?.StepDetails?.Attempt || 1;
2550
- // Checkpoint START for observability (fire and forget) - only if not already started
2551
- const stepData = context.getStepData(stepId);
2552
- if (stepData?.Status !== clientLambda.OperationStatus.STARTED) {
2553
- checkpoint.checkpoint(stepId, {
2554
- Id: stepId,
2555
- ParentId: parentId,
2556
- Action: clientLambda.OperationAction.START,
2557
- SubType: exports.OperationSubType.WAIT_FOR_CONDITION,
2558
- Type: clientLambda.OperationType.STEP,
2559
- Name: name,
2560
- });
2561
- }
2562
- try {
2563
- // Create WaitForConditionContext with enriched logger for the check function
2564
- const waitForConditionContext = {
2565
- logger,
2566
- };
2567
- // Execute the check function
2568
- addRunningOperation(stepId);
2569
- let newState;
2570
- try {
2571
- newState = await runWithContext(stepId, parentId, () => check(currentState, waitForConditionContext), currentAttempt + 1, DurableExecutionMode.ExecutionMode);
2572
- }
2573
- finally {
2574
- removeRunningOperation(stepId);
2575
- }
2576
- // Serialize the new state for consistency
2577
- const serializedState = await safeSerialize(serdes, newState, stepId, name, context.terminationManager, context.durableExecutionArn);
2578
- // Deserialize for consistency with replay behavior
2579
- const deserializedState = await safeDeserialize(serdes, serializedState, stepId, name, context.terminationManager, context.durableExecutionArn);
2580
- // Check if condition is met using the wait strategy
2581
- const decision = config.waitStrategy(deserializedState, currentAttempt);
2582
- log("🔍", "waitForCondition check completed:", {
2583
- stepId,
2584
- name,
2585
- currentAttempt: currentAttempt,
2586
- shouldContinue: decision.shouldContinue,
2587
- delayInSeconds: decision.shouldContinue
2588
- ? durationToSeconds(decision.delay)
2589
- : undefined,
2590
- });
2591
- if (!decision.shouldContinue) {
2592
- // Condition is met - complete successfully
2593
- await checkpoint.checkpoint(stepId, {
2594
- Id: stepId,
2595
- ParentId: parentId,
2596
- Action: clientLambda.OperationAction.SUCCEED,
2597
- SubType: exports.OperationSubType.WAIT_FOR_CONDITION,
2598
- Type: clientLambda.OperationType.STEP,
2599
- Payload: serializedState,
2600
- Name: name,
2601
- });
2602
- log("✅", "waitForCondition completed successfully:", {
2603
- stepId,
2604
- name,
2605
- result: deserializedState,
2606
- totalAttempts: currentAttempt,
2607
- });
2608
- return deserializedState;
2609
- }
2610
- else {
2611
- // Condition not met - schedule retry
2612
- // Only checkpoint the state, not the attempt number (system handles that)
2613
- await checkpoint.checkpoint(stepId, {
2614
- Id: stepId,
2615
- ParentId: parentId,
2616
- Action: clientLambda.OperationAction.RETRY,
2617
- SubType: exports.OperationSubType.WAIT_FOR_CONDITION,
2618
- Type: clientLambda.OperationType.STEP,
2619
- Payload: serializedState, // Just the state, not wrapped in an object
2620
- Name: name,
2621
- StepOptions: {
2622
- NextAttemptDelaySeconds: durationToSeconds(decision.delay),
2623
- },
2624
- });
2625
- // Wait for continuation and signal main loop to continue
2626
- await waitForContinuation(context, stepId, name, hasRunningOperations, checkpoint, getOperationsEmitter(), onAwaitedChange);
2627
- return CONTINUE_MAIN_LOOP;
2628
- }
2629
- }
2630
- catch (error) {
2631
- log("❌", "waitForCondition check function failed:", {
2632
- stepId,
2633
- name,
2634
- error,
2635
- currentAttempt: currentAttempt,
2636
- });
2637
- // Mark as failed - waitForCondition doesn't have its own retry logic for errors
2638
- // If the check function throws, it's considered a failure
2639
- await checkpoint.checkpoint(stepId, {
2640
- Id: stepId,
2641
- ParentId: parentId,
2642
- Action: clientLambda.OperationAction.FAIL,
2643
- SubType: exports.OperationSubType.WAIT_FOR_CONDITION,
2644
- Type: clientLambda.OperationType.STEP,
2645
- Error: createErrorObjectFromError(error),
2646
- Name: name,
2647
- });
2648
- // Reconstruct error from ErrorObject for deterministic behavior
2649
- const errorObject = createErrorObjectFromError(error);
2650
- throw DurableOperationError.fromErrorObject(errorObject);
2651
- }
2652
- };
2653
1946
 
2654
- const createCallbackPromise = (context, stepId, stepName, serdes, hasRunningOperations, operationsEmitter, terminationMessage, checkAndUpdateReplayMode) => {
1947
+ const createCallbackPromise = (context, checkpoint, stepId, stepName, serdes, checkAndUpdateReplayMode) => {
2655
1948
  return new DurablePromise(async () => {
2656
- log("🔄", "Callback promise phase 2 executing:", { stepId, stepName });
2657
- // Main callback logic - can be re-executed if step status changes
2658
- while (true) {
2659
- const stepData = context.getStepData(stepId);
2660
- // Handle case where stepData doesn't exist yet
2661
- // While Phase 1 should create stepData via checkpoint before Phase 2 starts,
2662
- // this can be undefined in test scenarios
2663
- if (!stepData) {
2664
- log("⚠️", "Step data not found, waiting for callback creation:", {
2665
- stepId,
2666
- });
2667
- if (hasRunningOperations()) {
2668
- await waitBeforeContinue({
2669
- checkHasRunningOperations: true,
2670
- checkStepStatus: true,
2671
- checkTimer: false,
2672
- stepId,
2673
- context,
2674
- hasRunningOperations,
2675
- operationsEmitter,
2676
- });
2677
- continue; // Re-evaluate after waiting
2678
- }
2679
- // No other operations and no step data - terminate gracefully
2680
- log("⏳", "No step data found and no running operations, terminating");
2681
- return terminate(context, TerminationReason.CALLBACK_PENDING, terminationMessage);
2682
- }
2683
- if (stepData.Status === clientLambda.OperationStatus.SUCCEEDED) {
2684
- const callbackData = stepData.CallbackDetails;
2685
- if (!callbackData?.CallbackId) {
2686
- throw new CallbackError(`No callback ID found for completed callback: ${stepId}`);
2687
- }
2688
- const result = await safeDeserialize(serdes, callbackData.Result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
2689
- // Check and update replay mode after callback completion
2690
- checkAndUpdateReplayMode();
2691
- return result;
2692
- }
2693
- if (stepData.Status === clientLambda.OperationStatus.FAILED ||
2694
- stepData.Status === clientLambda.OperationStatus.TIMED_OUT) {
2695
- const callbackData = stepData.CallbackDetails;
2696
- const error = callbackData?.Error;
2697
- if (error) {
2698
- const cause = new Error(error.ErrorMessage);
2699
- cause.name = error.ErrorType || "Error";
2700
- cause.stack = error.StackTrace?.join("\n");
2701
- throw new CallbackError(error.ErrorMessage || "Callback failed", cause, error.ErrorData);
2702
- }
2703
- throw new CallbackError("Callback failed");
2704
- }
2705
- if (stepData.Status === clientLambda.OperationStatus.STARTED) {
2706
- // Callback is still pending
2707
- if (hasRunningOperations()) {
2708
- // Wait for other operations or callback completion
2709
- log("⏳", "Callback still pending, waiting for other operations");
2710
- await waitBeforeContinue({
2711
- checkHasRunningOperations: true,
2712
- checkStepStatus: true,
2713
- checkTimer: false,
2714
- stepId,
2715
- context,
2716
- hasRunningOperations,
2717
- operationsEmitter,
2718
- });
2719
- continue; // Re-evaluate status after waiting
2720
- }
2721
- // No other operations running - terminate
2722
- log("⏳", "Callback still pending, terminating");
2723
- return terminate(context, TerminationReason.CALLBACK_PENDING, terminationMessage);
1949
+ log("🔄", "Callback promise phase 2:", { stepId, stepName });
1950
+ checkpoint.markOperationAwaited(stepId);
1951
+ await checkpoint.waitForStatusChange(stepId);
1952
+ const stepData = context.getStepData(stepId);
1953
+ if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
1954
+ log("✅", "Callback completed:", { stepId });
1955
+ checkAndUpdateReplayMode();
1956
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1957
+ const callbackData = stepData.CallbackDetails;
1958
+ if (!callbackData) {
1959
+ throw new CallbackError(`No callback data found for completed callback: ${stepId}`);
2724
1960
  }
2725
- // Should not reach here, but handle unexpected status
2726
- throw new CallbackError(`Unexpected callback status: ${stepData.Status}`);
1961
+ const result = await safeDeserialize(serdes, callbackData.Result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
1962
+ return result;
2727
1963
  }
1964
+ // Handle failure
1965
+ log("❌", "Callback failed:", { stepId, status: stepData?.Status });
1966
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1967
+ const callbackData = stepData?.CallbackDetails;
1968
+ const error = callbackData?.Error;
1969
+ if (error) {
1970
+ const cause = new Error(error.ErrorMessage);
1971
+ cause.name = error.ErrorType || "Error";
1972
+ cause.stack = error.StackTrace?.join("\n");
1973
+ throw new CallbackError(error.ErrorMessage || "Callback failed", cause, error.ErrorData);
1974
+ }
1975
+ throw new CallbackError("Callback failed");
2728
1976
  });
2729
1977
  };
2730
1978
 
@@ -2732,7 +1980,7 @@ const createPassThroughSerdes = () => ({
2732
1980
  serialize: async (value) => value,
2733
1981
  deserialize: async (data) => data,
2734
1982
  });
2735
- const createCallback = (context, checkpoint, createStepId, hasRunningOperations, getOperationsEmitter, checkAndUpdateReplayMode, parentId) => {
1983
+ const createCallback = (context, checkpoint, createStepId, checkAndUpdateReplayMode, parentId) => {
2736
1984
  return (nameOrConfig, maybeConfig) => {
2737
1985
  let name;
2738
1986
  let config;
@@ -2745,82 +1993,99 @@ const createCallback = (context, checkpoint, createStepId, hasRunningOperations,
2745
1993
  }
2746
1994
  const stepId = createStepId();
2747
1995
  const serdes = config?.serdes || createPassThroughSerdes();
2748
- // Validate replay consistency first
2749
- const stepData = context.getStepData(stepId);
2750
- validateReplayConsistency(stepId, {
2751
- type: clientLambda.OperationType.CALLBACK,
2752
- name,
2753
- subType: exports.OperationSubType.CALLBACK,
2754
- }, stepData, context);
2755
- // Phase 1: Setup and checkpoint (immediate execution)
2756
- const setupPromise = (async () => {
2757
- log("📞", "Creating callback phase 1:", { stepId, name, config });
2758
- // Handle already completed callbacks
1996
+ // Phase 1: Setup and checkpoint
1997
+ let isCompleted = false;
1998
+ const phase1Promise = (async () => {
1999
+ log("📞", "Callback phase 1:", { stepId, name });
2000
+ let stepData = context.getStepData(stepId);
2001
+ // Validate replay consistency
2002
+ validateReplayConsistency(stepId, {
2003
+ type: clientLambda.OperationType.CALLBACK,
2004
+ name,
2005
+ subType: exports.OperationSubType.CALLBACK,
2006
+ }, stepData, context);
2007
+ // Check if already completed
2759
2008
  if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
2760
- log("⏭️", "Callback already completed in phase 1:", { stepId });
2761
- return { wasNewCallback: false };
2009
+ log("⏭️", "Callback already completed:", { stepId });
2010
+ checkAndUpdateReplayMode();
2011
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
2012
+ metadata: {
2013
+ stepId,
2014
+ name,
2015
+ type: clientLambda.OperationType.CALLBACK,
2016
+ subType: exports.OperationSubType.CALLBACK,
2017
+ parentId,
2018
+ },
2019
+ });
2020
+ isCompleted = true;
2021
+ return;
2762
2022
  }
2023
+ // Check if already failed
2763
2024
  if (stepData?.Status === clientLambda.OperationStatus.FAILED ||
2764
2025
  stepData?.Status === clientLambda.OperationStatus.TIMED_OUT) {
2765
- log("❌", "Callback already failed in phase 1:", { stepId });
2766
- return { wasNewCallback: false };
2026
+ log("❌", "Callback already failed:", { stepId });
2027
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
2028
+ metadata: {
2029
+ stepId,
2030
+ name,
2031
+ type: clientLambda.OperationType.CALLBACK,
2032
+ subType: exports.OperationSubType.CALLBACK,
2033
+ parentId,
2034
+ },
2035
+ });
2036
+ isCompleted = true;
2037
+ return;
2767
2038
  }
2768
- // Handle already started callbacks
2769
- if (stepData?.Status === clientLambda.OperationStatus.STARTED) {
2770
- log("⏳", "Callback already started in phase 1:", { stepId });
2771
- return { wasNewCallback: false };
2039
+ // Start callback if not already started
2040
+ if (!stepData) {
2041
+ await checkpoint.checkpoint(stepId, {
2042
+ Id: stepId,
2043
+ ParentId: parentId,
2044
+ Action: "START",
2045
+ SubType: exports.OperationSubType.CALLBACK,
2046
+ Type: clientLambda.OperationType.CALLBACK,
2047
+ Name: name,
2048
+ CallbackOptions: {
2049
+ TimeoutSeconds: config?.timeout
2050
+ ? durationToSeconds(config.timeout)
2051
+ : undefined,
2052
+ HeartbeatTimeoutSeconds: config?.heartbeatTimeout
2053
+ ? durationToSeconds(config.heartbeatTimeout)
2054
+ : undefined,
2055
+ },
2056
+ });
2057
+ // Refresh stepData after checkpoint
2058
+ stepData = context.getStepData(stepId);
2772
2059
  }
2773
- // Create new callback - checkpoint START operation
2774
- log("🆕", "Creating new callback in phase 1:", { stepId, name });
2775
- await checkpoint.checkpoint(stepId, {
2776
- Id: stepId,
2777
- ParentId: parentId,
2778
- Action: "START",
2779
- SubType: exports.OperationSubType.CALLBACK,
2780
- Type: clientLambda.OperationType.CALLBACK,
2781
- Name: name,
2782
- CallbackOptions: {
2783
- TimeoutSeconds: config?.timeout
2784
- ? durationToSeconds(config.timeout)
2785
- : undefined,
2786
- HeartbeatTimeoutSeconds: config?.heartbeatTimeout
2787
- ? durationToSeconds(config.heartbeatTimeout)
2788
- : undefined,
2060
+ // Mark as IDLE_NOT_AWAITED
2061
+ checkpoint.markOperationState(stepId, OperationLifecycleState.IDLE_NOT_AWAITED, {
2062
+ metadata: {
2063
+ stepId,
2064
+ name,
2065
+ type: clientLambda.OperationType.CALLBACK,
2066
+ subType: exports.OperationSubType.CALLBACK,
2067
+ parentId,
2789
2068
  },
2790
2069
  });
2791
- log("✅", "Callback checkpoint completed in phase 1:", { stepId });
2792
- return { wasNewCallback: true };
2793
- })().catch((error) => {
2794
- log("❌", "Callback phase 1 error:", { stepId, error: error.message });
2795
- throw error;
2796
- });
2797
- // Return DurablePromise that executes phase 2 when awaited
2070
+ log("✅", "Callback phase 1 complete:", { stepId });
2071
+ })();
2072
+ phase1Promise.catch(() => { });
2073
+ // Phase 2: Handle results and create callback promise
2798
2074
  return new DurablePromise(async () => {
2799
- // Wait for phase 1 to complete
2800
- const { wasNewCallback } = await setupPromise;
2801
- // Phase 2: Handle results and create callback promise
2802
- log("🔄", "Callback phase 2 executing:", { stepId, name });
2803
- const stepData = context.getStepData(stepId);
2804
- // Handle completed callbacks
2805
- if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
2806
- const callbackData = stepData.CallbackDetails;
2075
+ await phase1Promise;
2076
+ if (isCompleted) {
2077
+ const stepData = context.getStepData(stepId);
2078
+ const callbackData = stepData?.CallbackDetails;
2807
2079
  if (!callbackData?.CallbackId) {
2808
- throw new CallbackError(`No callback ID found for completed callback: ${stepId}`);
2080
+ throw new CallbackError(`No callback ID found for callback: ${stepId}`);
2809
2081
  }
2810
- const deserializedResult = await safeDeserialize(serdes, callbackData.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2811
- const resolvedPromise = new DurablePromise(async () => deserializedResult);
2812
- // Check and update replay mode after callback completion
2813
- checkAndUpdateReplayMode();
2814
- return [resolvedPromise, callbackData.CallbackId];
2815
- }
2816
- // Handle failed callbacks
2817
- if (stepData?.Status === clientLambda.OperationStatus.FAILED ||
2818
- stepData?.Status === clientLambda.OperationStatus.TIMED_OUT) {
2819
- const callbackData = stepData.CallbackDetails;
2820
- if (!callbackData?.CallbackId) {
2821
- throw new CallbackError(`No callback ID found for failed callback: ${stepId}`);
2082
+ if (stepData?.Status === clientLambda.OperationStatus.SUCCEEDED) {
2083
+ const deserializedResult = await safeDeserialize(serdes, callbackData.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2084
+ const resolvedPromise = new DurablePromise(async () => deserializedResult);
2085
+ return [resolvedPromise, callbackData.CallbackId];
2822
2086
  }
2823
- const error = stepData.CallbackDetails?.Error;
2087
+ // Handle failure
2088
+ const error = stepData?.CallbackDetails?.Error;
2824
2089
  const callbackError = error
2825
2090
  ? (() => {
2826
2091
  const cause = new Error(error.ErrorMessage);
@@ -2834,25 +2099,15 @@ const createCallback = (context, checkpoint, createStepId, hasRunningOperations,
2834
2099
  });
2835
2100
  return [rejectedPromise, callbackData.CallbackId];
2836
2101
  }
2837
- // Handle started or new callbacks
2102
+ log("📞", "Callback phase 2:", { stepId });
2103
+ const stepData = context.getStepData(stepId);
2838
2104
  const callbackData = stepData?.CallbackDetails;
2839
2105
  if (!callbackData?.CallbackId) {
2840
- const errorMessage = wasNewCallback
2841
- ? `Callback ID not found in stepData after checkpoint: ${stepId}`
2842
- : `No callback ID found for started callback: ${stepId}`;
2843
- throw new CallbackError(errorMessage);
2106
+ throw new CallbackError(`No callback ID found for started callback: ${stepId}`);
2844
2107
  }
2845
2108
  const callbackId = callbackData.CallbackId;
2846
- // Create callback promise that handles completion
2847
- const terminationMessage = wasNewCallback
2848
- ? `Callback ${name || stepId} created and pending external completion`
2849
- : `Callback ${name || stepId} is pending external completion`;
2850
- const callbackPromise = createCallbackPromise(context, stepId, name, serdes, hasRunningOperations, getOperationsEmitter(), terminationMessage, checkAndUpdateReplayMode);
2851
- log("✅", "Callback created successfully in phase 2:", {
2852
- stepId,
2853
- name,
2854
- callbackId,
2855
- });
2109
+ const callbackPromise = createCallbackPromise(context, checkpoint, stepId, name, serdes, checkAndUpdateReplayMode);
2110
+ log("✅", "Callback created:", { stepId, name, callbackId });
2856
2111
  return [callbackPromise, callbackId];
2857
2112
  });
2858
2113
  };
@@ -3309,6 +2564,42 @@ class ConcurrencyController {
3309
2564
  (childStepData.Status === clientLambda.OperationStatus.SUCCEEDED ||
3310
2565
  childStepData.Status === clientLambda.OperationStatus.FAILED));
3311
2566
  }
2567
+ getCompletionReason(failureCount, successCount, completedCount, items, config) {
2568
+ // Check tolerance first, before checking if all completed
2569
+ const completion = config.completionConfig;
2570
+ // Handle fail-fast behavior (no completion config or empty completion config)
2571
+ if (!completion) {
2572
+ if (failureCount > 0)
2573
+ return "FAILURE_TOLERANCE_EXCEEDED";
2574
+ }
2575
+ else {
2576
+ const hasAnyCompletionCriteria = Object.values(completion).some((value) => value !== undefined);
2577
+ if (!hasAnyCompletionCriteria) {
2578
+ if (failureCount > 0)
2579
+ return "FAILURE_TOLERANCE_EXCEEDED";
2580
+ }
2581
+ else {
2582
+ // Check specific tolerance thresholds
2583
+ if (completion.toleratedFailureCount !== undefined &&
2584
+ failureCount > completion.toleratedFailureCount) {
2585
+ return "FAILURE_TOLERANCE_EXCEEDED";
2586
+ }
2587
+ if (completion.toleratedFailurePercentage !== undefined) {
2588
+ const failurePercentage = (failureCount / items.length) * 100;
2589
+ if (failurePercentage > completion.toleratedFailurePercentage) {
2590
+ return "FAILURE_TOLERANCE_EXCEEDED";
2591
+ }
2592
+ }
2593
+ }
2594
+ }
2595
+ // Check other completion reasons
2596
+ if (completedCount === items.length)
2597
+ return "ALL_COMPLETED";
2598
+ if (config.completionConfig?.minSuccessful !== undefined &&
2599
+ successCount >= config.completionConfig.minSuccessful)
2600
+ return "MIN_SUCCESSFUL_REACHED";
2601
+ return "ALL_COMPLETED";
2602
+ }
3312
2603
  async executeItems(items, executor, parentContext, config, durableExecutionMode = DurableExecutionMode.ExecutionMode, entityId, executionContext) {
3313
2604
  // In replay mode, we're reconstructing the result from child contexts
3314
2605
  if (durableExecutionMode === DurableExecutionMode.ReplaySucceededContext) {
@@ -3422,17 +2713,9 @@ class ConcurrencyController {
3422
2713
  completedCount,
3423
2714
  totalCount: resultItems.length,
3424
2715
  });
3425
- // Reconstruct the completion reason based on replay results
3426
2716
  const successCount = resultItems.filter((item) => item.status === exports.BatchItemStatus.SUCCEEDED).length;
3427
- const getCompletionReason = () => {
3428
- if (completedCount === items.length)
3429
- return "ALL_COMPLETED";
3430
- if (config.completionConfig?.minSuccessful !== undefined &&
3431
- successCount >= config.completionConfig.minSuccessful)
3432
- return "MIN_SUCCESSFUL_REACHED";
3433
- return "FAILURE_TOLERANCE_EXCEEDED";
3434
- };
3435
- return new BatchResultImpl(resultItems, getCompletionReason());
2717
+ const failureCount = completedCount - successCount;
2718
+ return new BatchResultImpl(resultItems, this.getCompletionReason(failureCount, successCount, completedCount, items, config));
3436
2719
  }
3437
2720
  async executeItemsConcurrently(items, executor, parentContext, config) {
3438
2721
  const maxConcurrency = config.maxConcurrency || Infinity;
@@ -3479,13 +2762,8 @@ class ConcurrencyController {
3479
2762
  }
3480
2763
  return false;
3481
2764
  };
3482
- const getCompletionReason = () => {
3483
- if (completedCount === items.length)
3484
- return "ALL_COMPLETED";
3485
- if (config.completionConfig?.minSuccessful !== undefined &&
3486
- successCount >= config.completionConfig.minSuccessful)
3487
- return "MIN_SUCCESSFUL_REACHED";
3488
- return "FAILURE_TOLERANCE_EXCEEDED";
2765
+ const getCompletionReason = (failureCount) => {
2766
+ return this.getCompletionReason(failureCount, successCount, completedCount, items, config);
3489
2767
  };
3490
2768
  const tryStartNext = () => {
3491
2769
  while (activeCount < maxConcurrency &&
@@ -3556,14 +2834,20 @@ class ConcurrencyController {
3556
2834
  startedCount: finalBatchItems.filter((item) => item.status === exports.BatchItemStatus.STARTED).length,
3557
2835
  totalCount: finalBatchItems.length,
3558
2836
  });
3559
- const result = new BatchResultImpl(finalBatchItems, getCompletionReason());
2837
+ const result = new BatchResultImpl(finalBatchItems, getCompletionReason(failureCount));
3560
2838
  resolve(result);
3561
2839
  }
3562
2840
  else {
3563
2841
  tryStartNext();
3564
2842
  }
3565
2843
  };
3566
- tryStartNext();
2844
+ if (items.length === 0) {
2845
+ log("🎉", `${this.operationName} completed with no items`);
2846
+ resolve(new BatchResultImpl([], getCompletionReason(0)));
2847
+ }
2848
+ else {
2849
+ tryStartNext();
2850
+ }
3567
2851
  });
3568
2852
  }
3569
2853
  }
@@ -3637,338 +2921,992 @@ const createConcurrentExecutionHandler = (context, runInChildContext, skipNextOp
3637
2921
  return new DurablePromise(async () => {
3638
2922
  return await phase1Promise;
3639
2923
  });
3640
- };
3641
- };
3642
-
3643
- class ModeManagement {
3644
- captureExecutionState;
3645
- checkAndUpdateReplayMode;
3646
- checkForNonResolvingPromise;
3647
- getDurableExecutionMode;
3648
- setDurableExecutionMode;
3649
- constructor(captureExecutionState, checkAndUpdateReplayMode, checkForNonResolvingPromise, getDurableExecutionMode, setDurableExecutionMode) {
3650
- this.captureExecutionState = captureExecutionState;
3651
- this.checkAndUpdateReplayMode = checkAndUpdateReplayMode;
3652
- this.checkForNonResolvingPromise = checkForNonResolvingPromise;
3653
- this.getDurableExecutionMode = getDurableExecutionMode;
3654
- this.setDurableExecutionMode = setDurableExecutionMode;
2924
+ };
2925
+ };
2926
+
2927
+ class ModeManagement {
2928
+ captureExecutionState;
2929
+ checkAndUpdateReplayMode;
2930
+ checkForNonResolvingPromise;
2931
+ getDurableExecutionMode;
2932
+ setDurableExecutionMode;
2933
+ constructor(captureExecutionState, checkAndUpdateReplayMode, checkForNonResolvingPromise, getDurableExecutionMode, setDurableExecutionMode) {
2934
+ this.captureExecutionState = captureExecutionState;
2935
+ this.checkAndUpdateReplayMode = checkAndUpdateReplayMode;
2936
+ this.checkForNonResolvingPromise = checkForNonResolvingPromise;
2937
+ this.getDurableExecutionMode = getDurableExecutionMode;
2938
+ this.setDurableExecutionMode = setDurableExecutionMode;
2939
+ }
2940
+ withModeManagement(operation) {
2941
+ const shouldSwitchToExecutionMode = this.captureExecutionState();
2942
+ this.checkAndUpdateReplayMode();
2943
+ const nonResolvingPromise = this.checkForNonResolvingPromise();
2944
+ if (nonResolvingPromise)
2945
+ return nonResolvingPromise;
2946
+ try {
2947
+ return operation();
2948
+ }
2949
+ finally {
2950
+ if (shouldSwitchToExecutionMode) {
2951
+ this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
2952
+ }
2953
+ }
2954
+ }
2955
+ withDurableModeManagement(operation) {
2956
+ const shouldSwitchToExecutionMode = this.captureExecutionState();
2957
+ this.checkAndUpdateReplayMode();
2958
+ const nonResolvingPromise = this.checkForNonResolvingPromise();
2959
+ if (nonResolvingPromise) {
2960
+ return new DurablePromise(async () => {
2961
+ await nonResolvingPromise;
2962
+ // This will never be reached
2963
+ throw new Error("Unreachable code");
2964
+ });
2965
+ }
2966
+ try {
2967
+ return operation();
2968
+ }
2969
+ finally {
2970
+ if (shouldSwitchToExecutionMode) {
2971
+ this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
2972
+ }
2973
+ }
2974
+ }
2975
+ }
2976
+
2977
+ const HASH_LENGTH = 16;
2978
+ /**
2979
+ * Creates an MD5 hash of the input string for better performance than SHA-256
2980
+ * @param input - The string to hash
2981
+ * @returns The truncated hexadecimal hash string
2982
+ */
2983
+ const hashId = (input) => {
2984
+ return crypto.createHash("md5")
2985
+ .update(input)
2986
+ .digest("hex")
2987
+ .substring(0, HASH_LENGTH);
2988
+ };
2989
+ /**
2990
+ * Helper function to get step data using the original stepId
2991
+ * This function handles the hashing internally so callers don't need to worry about it
2992
+ * @param stepData - The stepData record from context
2993
+ * @param stepId - The original stepId (will be hashed internally)
2994
+ * @returns The operation data or undefined if not found
2995
+ */
2996
+ const getStepData = (stepData, stepId) => {
2997
+ const hashedId = hashId(stepId);
2998
+ return stepData[hashedId];
2999
+ };
3000
+
3001
+ class DurableContextImpl {
3002
+ executionContext;
3003
+ lambdaContext;
3004
+ _stepPrefix;
3005
+ _stepCounter = 0;
3006
+ durableLogger;
3007
+ modeAwareLoggingEnabled = true;
3008
+ checkpoint;
3009
+ durableExecutionMode;
3010
+ _parentId;
3011
+ modeManagement;
3012
+ durableExecution;
3013
+ logger;
3014
+ constructor(executionContext, lambdaContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) {
3015
+ this.executionContext = executionContext;
3016
+ this.lambdaContext = lambdaContext;
3017
+ this._stepPrefix = stepPrefix;
3018
+ this._parentId = parentId;
3019
+ this.durableExecution = durableExecution;
3020
+ this.durableLogger = inheritedLogger;
3021
+ this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3022
+ this.logger = this.createModeAwareLogger(inheritedLogger);
3023
+ this.durableExecutionMode = durableExecutionMode;
3024
+ this.checkpoint = durableExecution.checkpointManager;
3025
+ this.modeManagement = new ModeManagement(this.captureExecutionState.bind(this), this.checkAndUpdateReplayMode.bind(this), this.checkForNonResolvingPromise.bind(this), () => this.durableExecutionMode, (mode) => {
3026
+ this.durableExecutionMode = mode;
3027
+ });
3028
+ }
3029
+ getDurableLoggingContext() {
3030
+ return {
3031
+ getDurableLogData: () => {
3032
+ const activeContext = getActiveContext();
3033
+ const result = {
3034
+ executionArn: this.executionContext.durableExecutionArn,
3035
+ requestId: this.executionContext.requestId,
3036
+ tenantId: this.executionContext.tenantId,
3037
+ operationId: !activeContext || activeContext?.contextId === "root"
3038
+ ? undefined
3039
+ : hashId(activeContext.contextId),
3040
+ };
3041
+ if (activeContext?.attempt !== undefined) {
3042
+ result.attempt = activeContext.attempt;
3043
+ }
3044
+ return result;
3045
+ },
3046
+ };
3047
+ }
3048
+ shouldLog() {
3049
+ const activeContext = getActiveContext();
3050
+ if (!this.modeAwareLoggingEnabled || !activeContext) {
3051
+ return true;
3052
+ }
3053
+ if (activeContext.contextId === "root") {
3054
+ return this.durableExecutionMode === DurableExecutionMode.ExecutionMode;
3055
+ }
3056
+ return (activeContext.durableExecutionMode === DurableExecutionMode.ExecutionMode);
3057
+ }
3058
+ createModeAwareLogger(logger) {
3059
+ const durableContextLogger = {
3060
+ warn: (...args) => {
3061
+ if (this.shouldLog()) {
3062
+ return logger.warn(...args);
3063
+ }
3064
+ },
3065
+ debug: (...args) => {
3066
+ if (this.shouldLog()) {
3067
+ return logger.debug(...args);
3068
+ }
3069
+ },
3070
+ info: (...args) => {
3071
+ if (this.shouldLog()) {
3072
+ return logger.info(...args);
3073
+ }
3074
+ },
3075
+ error: (...args) => {
3076
+ if (this.shouldLog()) {
3077
+ return logger.error(...args);
3078
+ }
3079
+ },
3080
+ };
3081
+ if ("log" in logger) {
3082
+ durableContextLogger.log = (level, ...args) => {
3083
+ if (this.shouldLog()) {
3084
+ return logger.log?.(level, ...args);
3085
+ }
3086
+ };
3087
+ }
3088
+ return durableContextLogger;
3089
+ }
3090
+ createStepId() {
3091
+ this._stepCounter++;
3092
+ return this._stepPrefix
3093
+ ? `${this._stepPrefix}-${this._stepCounter}`
3094
+ : `${this._stepCounter}`;
3095
+ }
3096
+ getNextStepId() {
3097
+ const nextCounter = this._stepCounter + 1;
3098
+ return this._stepPrefix
3099
+ ? `${this._stepPrefix}-${nextCounter}`
3100
+ : `${nextCounter}`;
3101
+ }
3102
+ /**
3103
+ * Skips the next operation by incrementing the step counter.
3104
+ * Used internally by concurrent execution handler during replay to skip incomplete items.
3105
+ * @internal
3106
+ */
3107
+ skipNextOperation() {
3108
+ this._stepCounter++;
3109
+ }
3110
+ checkAndUpdateReplayMode() {
3111
+ if (this.durableExecutionMode === DurableExecutionMode.ReplayMode) {
3112
+ const nextStepId = this.getNextStepId();
3113
+ const nextStepData = this.executionContext.getStepData(nextStepId);
3114
+ if (!nextStepData) {
3115
+ this.durableExecutionMode = DurableExecutionMode.ExecutionMode;
3116
+ }
3117
+ }
3118
+ }
3119
+ captureExecutionState() {
3120
+ const wasInReplayMode = this.durableExecutionMode === DurableExecutionMode.ReplayMode;
3121
+ const nextStepId = this.getNextStepId();
3122
+ const stepData = this.executionContext.getStepData(nextStepId);
3123
+ const wasNotFinished = !!(stepData &&
3124
+ stepData.Status !== clientLambda.OperationStatus.SUCCEEDED &&
3125
+ stepData.Status !== clientLambda.OperationStatus.FAILED);
3126
+ return wasInReplayMode && wasNotFinished;
3127
+ }
3128
+ checkForNonResolvingPromise() {
3129
+ if (this.durableExecutionMode === DurableExecutionMode.ReplaySucceededContext) {
3130
+ const nextStepId = this.getNextStepId();
3131
+ const nextStepData = this.executionContext.getStepData(nextStepId);
3132
+ if (nextStepData &&
3133
+ nextStepData.Status !== clientLambda.OperationStatus.SUCCEEDED &&
3134
+ nextStepData.Status !== clientLambda.OperationStatus.FAILED) {
3135
+ return new Promise(() => { }); // Non-resolving promise
3136
+ }
3137
+ }
3138
+ return null;
3139
+ }
3140
+ withModeManagement(operation) {
3141
+ return this.modeManagement.withModeManagement(operation);
3142
+ }
3143
+ withDurableModeManagement(operation) {
3144
+ return this.modeManagement.withDurableModeManagement(operation);
3145
+ }
3146
+ step(nameOrFn, fnOrOptions, maybeOptions) {
3147
+ validateContextUsage(this._stepPrefix, "step", this.executionContext.terminationManager);
3148
+ return this.withDurableModeManagement(() => {
3149
+ const stepHandler = createStepHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), this.durableLogger, this._parentId);
3150
+ return stepHandler(nameOrFn, fnOrOptions, maybeOptions);
3151
+ });
3152
+ }
3153
+ invoke(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
3154
+ validateContextUsage(this._stepPrefix, "invoke", this.executionContext.terminationManager);
3155
+ return this.withDurableModeManagement(() => {
3156
+ const invokeHandler = createInvokeHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3157
+ return invokeHandler(...[
3158
+ nameOrFuncId,
3159
+ funcIdOrInput,
3160
+ inputOrConfig,
3161
+ maybeConfig,
3162
+ ]);
3163
+ });
3164
+ }
3165
+ runInChildContext(nameOrFn, fnOrOptions, maybeOptions) {
3166
+ validateContextUsage(this._stepPrefix, "runInChildContext", this.executionContext.terminationManager);
3167
+ return this.withDurableModeManagement(() => {
3168
+ const blockHandler = createRunInChildContextHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), () => this.durableLogger,
3169
+ // Adapter function to maintain compatibility
3170
+ (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, _checkpointToken, parentId) => createDurableContext(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, this.durableExecution, parentId), this._parentId);
3171
+ return blockHandler(nameOrFn, fnOrOptions, maybeOptions);
3172
+ });
3173
+ }
3174
+ wait(nameOrDuration, maybeDuration) {
3175
+ validateContextUsage(this._stepPrefix, "wait", this.executionContext.terminationManager);
3176
+ return this.withDurableModeManagement(() => {
3177
+ const waitHandler = createWaitHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3178
+ return typeof nameOrDuration === "string"
3179
+ ? waitHandler(nameOrDuration, maybeDuration)
3180
+ : waitHandler(nameOrDuration);
3181
+ });
3182
+ }
3183
+ /**
3184
+ * Configure logger behavior for this context
3185
+ *
3186
+ * This method allows partial configuration - only the properties provided will be updated.
3187
+ * For example, calling configureLogger(\{ modeAware: false \}) will only change the modeAware
3188
+ * setting without affecting any previously configured custom logger.
3189
+ *
3190
+ * @param config - Logger configuration options including customLogger and modeAware settings (default: modeAware=true)
3191
+ * @example
3192
+ * // Set custom logger and enable mode-aware logging
3193
+ * context.configureLogger(\{ customLogger: myLogger, modeAware: true \});
3194
+ *
3195
+ * // Later, disable mode-aware logging without changing the custom logger
3196
+ * context.configureLogger(\{ modeAware: false \});
3197
+ */
3198
+ configureLogger(config) {
3199
+ if (config.customLogger !== undefined) {
3200
+ this.durableLogger = config.customLogger;
3201
+ this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3202
+ this.logger = this.createModeAwareLogger(this.durableLogger);
3203
+ }
3204
+ if (config.modeAware !== undefined) {
3205
+ this.modeAwareLoggingEnabled = config.modeAware;
3206
+ }
3207
+ }
3208
+ createCallback(nameOrConfig, maybeConfig) {
3209
+ validateContextUsage(this._stepPrefix, "createCallback", this.executionContext.terminationManager);
3210
+ return this.withDurableModeManagement(() => {
3211
+ const callbackFactory = createCallback(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.checkAndUpdateReplayMode.bind(this), this._parentId);
3212
+ return callbackFactory(nameOrConfig, maybeConfig);
3213
+ });
3214
+ }
3215
+ waitForCallback(nameOrSubmitter, submitterOrConfig, maybeConfig) {
3216
+ validateContextUsage(this._stepPrefix, "waitForCallback", this.executionContext.terminationManager);
3217
+ return this.withDurableModeManagement(() => {
3218
+ const waitForCallbackHandler = createWaitForCallbackHandler(this.executionContext, this.getNextStepId.bind(this), this.runInChildContext.bind(this));
3219
+ return waitForCallbackHandler(nameOrSubmitter, submitterOrConfig, maybeConfig);
3220
+ });
3221
+ }
3222
+ waitForCondition(nameOrCheckFunc, checkFuncOrConfig, maybeConfig) {
3223
+ validateContextUsage(this._stepPrefix, "waitForCondition", this.executionContext.terminationManager);
3224
+ return this.withDurableModeManagement(() => {
3225
+ const waitForConditionHandler = createWaitForConditionHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.durableLogger, this._parentId);
3226
+ return typeof nameOrCheckFunc === "string" ||
3227
+ nameOrCheckFunc === undefined
3228
+ ? waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig, maybeConfig)
3229
+ : waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig);
3230
+ });
3231
+ }
3232
+ map(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig) {
3233
+ validateContextUsage(this._stepPrefix, "map", this.executionContext.terminationManager);
3234
+ return this.withDurableModeManagement(() => {
3235
+ const mapHandler = createMapHandler(this.executionContext, this._executeConcurrently.bind(this));
3236
+ return mapHandler(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig);
3237
+ });
3238
+ }
3239
+ parallel(nameOrBranches, branchesOrConfig, maybeConfig) {
3240
+ validateContextUsage(this._stepPrefix, "parallel", this.executionContext.terminationManager);
3241
+ return this.withDurableModeManagement(() => {
3242
+ const parallelHandler = createParallelHandler(this.executionContext, this._executeConcurrently.bind(this));
3243
+ return parallelHandler(nameOrBranches, branchesOrConfig, maybeConfig);
3244
+ });
3245
+ }
3246
+ _executeConcurrently(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig) {
3247
+ validateContextUsage(this._stepPrefix, "_executeConcurrently", this.executionContext.terminationManager);
3248
+ return this.withDurableModeManagement(() => {
3249
+ const concurrentExecutionHandler = createConcurrentExecutionHandler(this.executionContext, this.runInChildContext.bind(this), this.skipNextOperation.bind(this));
3250
+ const promise = concurrentExecutionHandler(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig);
3251
+ // Prevent unhandled promise rejections
3252
+ promise?.catch(() => { });
3253
+ return promise;
3254
+ });
3255
+ }
3256
+ get promise() {
3257
+ return createPromiseHandler(this.step.bind(this));
3258
+ }
3259
+ }
3260
+ const createDurableContext = (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) => {
3261
+ return new DurableContextImpl(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId);
3262
+ };
3263
+
3264
+ /**
3265
+ * Error thrown when a checkpoint operation fails due to invocation-level issues
3266
+ * (e.g., 5xx errors, invalid checkpoint token)
3267
+ * This will terminate the current Lambda invocation, but the execution can continue with a new invocation
3268
+ */
3269
+ class CheckpointUnrecoverableInvocationError extends UnrecoverableInvocationError {
3270
+ terminationReason = TerminationReason.CHECKPOINT_FAILED;
3271
+ constructor(message, originalError) {
3272
+ super(message || "Checkpoint operation failed", originalError);
3273
+ }
3274
+ }
3275
+ /**
3276
+ * Error thrown when a checkpoint operation fails due to execution-level issues
3277
+ * (e.g., 4xx errors other than invalid checkpoint token)
3278
+ * This will terminate the entire execution and cannot be recovered
3279
+ */
3280
+ class CheckpointUnrecoverableExecutionError extends UnrecoverableExecutionError {
3281
+ terminationReason = TerminationReason.CHECKPOINT_FAILED;
3282
+ constructor(message, originalError) {
3283
+ super(message || "Checkpoint operation failed", originalError);
3284
+ }
3285
+ }
3286
+
3287
+ const STEP_DATA_UPDATED_EVENT = "stepDataUpdated";
3288
+ const TERMINAL_STATUSES = [
3289
+ clientLambda.OperationStatus.SUCCEEDED,
3290
+ clientLambda.OperationStatus.CANCELLED,
3291
+ clientLambda.OperationStatus.FAILED,
3292
+ clientLambda.OperationStatus.STOPPED,
3293
+ clientLambda.OperationStatus.TIMED_OUT,
3294
+ ];
3295
+ class CheckpointManager {
3296
+ durableExecutionArn;
3297
+ stepData;
3298
+ storage;
3299
+ terminationManager;
3300
+ stepDataEmitter;
3301
+ logger;
3302
+ finishedAncestors;
3303
+ queue = [];
3304
+ isProcessing = false;
3305
+ currentTaskToken;
3306
+ forceCheckpointPromises = [];
3307
+ queueCompletionResolver = null;
3308
+ MAX_PAYLOAD_SIZE = 750 * 1024; // 750KB in bytes
3309
+ MAX_ITEMS_IN_BATCH = 250;
3310
+ isTerminating = false;
3311
+ static textEncoder = new TextEncoder();
3312
+ // Operation lifecycle tracking
3313
+ operations = new Map();
3314
+ // Termination cooldown
3315
+ terminationTimer = null;
3316
+ terminationReason = null;
3317
+ TERMINATION_COOLDOWN_MS = 50;
3318
+ constructor(durableExecutionArn, stepData, storage, terminationManager, initialTaskToken, stepDataEmitter, logger, finishedAncestors) {
3319
+ this.durableExecutionArn = durableExecutionArn;
3320
+ this.stepData = stepData;
3321
+ this.storage = storage;
3322
+ this.terminationManager = terminationManager;
3323
+ this.stepDataEmitter = stepDataEmitter;
3324
+ this.logger = logger;
3325
+ this.finishedAncestors = finishedAncestors;
3326
+ this.currentTaskToken = initialTaskToken;
3327
+ }
3328
+ setTerminating() {
3329
+ this.isTerminating = true;
3330
+ log("🛑", "Checkpoint manager marked as terminating");
3331
+ }
3332
+ /**
3333
+ * Mark an ancestor as finished (for run-in-child-context operations)
3334
+ */
3335
+ markAncestorFinished(stepId) {
3336
+ this.finishedAncestors.add(stepId);
3337
+ }
3338
+ /**
3339
+ * Extract parent ID from hierarchical stepId (e.g., "1-2-3" -\> "1-2")
3340
+ */
3341
+ getParentId(stepId) {
3342
+ const lastDashIndex = stepId.lastIndexOf("-");
3343
+ return lastDashIndex > 0 ? stepId.substring(0, lastDashIndex) : undefined;
3344
+ }
3345
+ /**
3346
+ * Checks if any ancestor of the given stepId is finished
3347
+ * Only applies to operations that are descendants of run-in-child-context operations
3348
+ */
3349
+ hasFinishedAncestor(stepId) {
3350
+ // Only use getParentId to avoid mixing hashed and original stepIds
3351
+ let currentParentId = this.getParentId(stepId);
3352
+ while (currentParentId) {
3353
+ // Check if this ancestor is finished
3354
+ if (this.finishedAncestors.has(currentParentId)) {
3355
+ return true;
3356
+ }
3357
+ // Move up to the next ancestor using hierarchical stepId
3358
+ currentParentId = this.getParentId(currentParentId);
3359
+ }
3360
+ return false;
3361
+ }
3362
+ async forceCheckpoint() {
3363
+ if (this.isTerminating) {
3364
+ log("⚠️", "Force checkpoint skipped - termination in progress");
3365
+ return new Promise(() => { }); // Never resolves during termination
3366
+ }
3367
+ return new Promise((resolve, reject) => {
3368
+ this.forceCheckpointPromises.push({ resolve, reject });
3369
+ if (!this.isProcessing) {
3370
+ setImmediate(() => {
3371
+ this.processQueue();
3372
+ });
3373
+ }
3374
+ });
3375
+ }
3376
+ async waitForQueueCompletion() {
3377
+ if (this.queue.length === 0 && !this.isProcessing) {
3378
+ return;
3379
+ }
3380
+ return new Promise((resolve) => {
3381
+ this.queueCompletionResolver = resolve;
3382
+ });
3383
+ }
3384
+ clearQueue() {
3385
+ // Silently clear queue - we're terminating so no need to reject promises
3386
+ this.queue = [];
3387
+ this.forceCheckpointPromises = [];
3388
+ // Resolve any waiting queue completion promises since we're clearing
3389
+ this.notifyQueueCompletion();
3390
+ }
3391
+ // Alias for backward compatibility with Checkpoint interface
3392
+ async force() {
3393
+ return this.forceCheckpoint();
3394
+ }
3395
+ async checkpoint(stepId, data) {
3396
+ if (this.isTerminating) {
3397
+ log("⚠️", "Checkpoint skipped - termination in progress:", { stepId });
3398
+ return new Promise(() => { }); // Never resolves during termination
3399
+ }
3400
+ // Check if any ancestor is finished - if so, don't queue and don't resolve
3401
+ if (this.hasFinishedAncestor(stepId)) {
3402
+ log("⚠️", "Checkpoint skipped - ancestor already finished:", { stepId });
3403
+ return new Promise(() => { }); // Never resolves when ancestor is finished
3404
+ }
3405
+ return new Promise((resolve, reject) => {
3406
+ const queuedItem = {
3407
+ stepId,
3408
+ data,
3409
+ resolve: () => {
3410
+ resolve();
3411
+ },
3412
+ reject: (error) => {
3413
+ reject(error);
3414
+ },
3415
+ };
3416
+ this.queue.push(queuedItem);
3417
+ log("📥", "Checkpoint queued:", {
3418
+ stepId,
3419
+ queueLength: this.queue.length,
3420
+ isProcessing: this.isProcessing,
3421
+ });
3422
+ if (!this.isProcessing) {
3423
+ setImmediate(() => {
3424
+ this.processQueue();
3425
+ });
3426
+ }
3427
+ });
3428
+ }
3429
+ classifyCheckpointError(error) {
3430
+ const originalError = error instanceof Error ? error : new Error(String(error));
3431
+ const awsError = error;
3432
+ const statusCode = awsError.$metadata?.httpStatusCode;
3433
+ const errorName = awsError.name;
3434
+ const errorMessage = awsError.message || originalError.message;
3435
+ log("🔍", "Classifying checkpoint error:", {
3436
+ statusCode,
3437
+ errorName,
3438
+ errorMessage,
3439
+ });
3440
+ if (statusCode &&
3441
+ statusCode >= 400 &&
3442
+ statusCode < 500 &&
3443
+ errorName === "InvalidParameterValueException" &&
3444
+ errorMessage.startsWith("Invalid Checkpoint Token")) {
3445
+ return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
3446
+ }
3447
+ if (statusCode &&
3448
+ statusCode >= 400 &&
3449
+ statusCode < 500 &&
3450
+ statusCode !== 429) {
3451
+ return new CheckpointUnrecoverableExecutionError(`Checkpoint failed: ${errorMessage}`, originalError);
3452
+ }
3453
+ return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
3655
3454
  }
3656
- withModeManagement(operation) {
3657
- const shouldSwitchToExecutionMode = this.captureExecutionState();
3658
- this.checkAndUpdateReplayMode();
3659
- const nonResolvingPromise = this.checkForNonResolvingPromise();
3660
- if (nonResolvingPromise)
3661
- return nonResolvingPromise;
3662
- try {
3663
- return operation();
3455
+ async processQueue() {
3456
+ if (this.isProcessing) {
3457
+ return;
3664
3458
  }
3665
- finally {
3666
- if (shouldSwitchToExecutionMode) {
3667
- this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
3459
+ const hasQueuedItems = this.queue.length > 0;
3460
+ const hasForceRequests = this.forceCheckpointPromises.length > 0;
3461
+ if (!hasQueuedItems && !hasForceRequests) {
3462
+ return;
3463
+ }
3464
+ this.isProcessing = true;
3465
+ const batch = [];
3466
+ const baseSize = this.currentTaskToken.length + 100;
3467
+ let currentSize = baseSize;
3468
+ while (this.queue.length > 0) {
3469
+ const nextItem = this.queue[0];
3470
+ const itemSize = CheckpointManager.textEncoder.encode(JSON.stringify(nextItem)).length;
3471
+ if ((currentSize + itemSize > this.MAX_PAYLOAD_SIZE ||
3472
+ batch.length >= this.MAX_ITEMS_IN_BATCH) &&
3473
+ batch.length > 0) {
3474
+ break;
3668
3475
  }
3476
+ this.queue.shift();
3477
+ batch.push(nextItem);
3478
+ currentSize += itemSize;
3669
3479
  }
3670
- }
3671
- withDurableModeManagement(operation) {
3672
- const shouldSwitchToExecutionMode = this.captureExecutionState();
3673
- this.checkAndUpdateReplayMode();
3674
- const nonResolvingPromise = this.checkForNonResolvingPromise();
3675
- if (nonResolvingPromise) {
3676
- return new DurablePromise(async () => {
3677
- await nonResolvingPromise;
3678
- // This will never be reached
3679
- throw new Error("Unreachable code");
3480
+ log("🔄", "Processing checkpoint batch:", {
3481
+ batchSize: batch.length,
3482
+ remainingInQueue: this.queue.length,
3483
+ estimatedSize: currentSize,
3484
+ maxSize: this.MAX_PAYLOAD_SIZE,
3485
+ });
3486
+ try {
3487
+ if (batch.length > 0 || this.forceCheckpointPromises.length > 0) {
3488
+ await this.processBatch(batch);
3489
+ }
3490
+ batch.forEach((item) => {
3491
+ item.resolve();
3492
+ });
3493
+ const forcePromises = this.forceCheckpointPromises.splice(0);
3494
+ forcePromises.forEach((promise) => {
3495
+ promise.resolve();
3496
+ });
3497
+ log("✅", "Checkpoint batch processed successfully:", {
3498
+ batchSize: batch.length,
3499
+ forceRequests: forcePromises.length,
3500
+ newTaskToken: this.currentTaskToken,
3680
3501
  });
3681
3502
  }
3682
- try {
3683
- return operation();
3503
+ catch (error) {
3504
+ log("❌", "Checkpoint batch failed:", {
3505
+ batchSize: batch.length,
3506
+ error,
3507
+ });
3508
+ const checkpointError = this.classifyCheckpointError(error);
3509
+ // Clear remaining queue silently - we're terminating
3510
+ this.clearQueue();
3511
+ this.terminationManager.terminate({
3512
+ reason: TerminationReason.CHECKPOINT_FAILED,
3513
+ message: checkpointError.message,
3514
+ error: checkpointError,
3515
+ });
3684
3516
  }
3685
3517
  finally {
3686
- if (shouldSwitchToExecutionMode) {
3687
- this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
3518
+ this.isProcessing = false;
3519
+ if (this.queue.length > 0) {
3520
+ setImmediate(() => {
3521
+ this.processQueue();
3522
+ });
3523
+ }
3524
+ else {
3525
+ // Queue is empty and processing is done - notify all waiting promises
3526
+ this.notifyQueueCompletion();
3688
3527
  }
3689
3528
  }
3690
3529
  }
3691
- }
3692
-
3693
- class DurableContextImpl {
3694
- executionContext;
3695
- lambdaContext;
3696
- _stepPrefix;
3697
- _stepCounter = 0;
3698
- durableLogger;
3699
- modeAwareLoggingEnabled = true;
3700
- runningOperations = new Set();
3701
- operationsEmitter = new events.EventEmitter();
3702
- checkpoint;
3703
- durableExecutionMode;
3704
- _parentId;
3705
- modeManagement;
3706
- durableExecution;
3707
- logger;
3708
- constructor(executionContext, lambdaContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) {
3709
- this.executionContext = executionContext;
3710
- this.lambdaContext = lambdaContext;
3711
- this._stepPrefix = stepPrefix;
3712
- this._parentId = parentId;
3713
- this.durableExecution = durableExecution;
3714
- this.durableLogger = inheritedLogger;
3715
- this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3716
- this.logger = this.createModeAwareLogger(inheritedLogger);
3717
- this.durableExecutionMode = durableExecutionMode;
3718
- this.checkpoint = durableExecution.checkpointManager;
3719
- this.modeManagement = new ModeManagement(this.captureExecutionState.bind(this), this.checkAndUpdateReplayMode.bind(this), this.checkForNonResolvingPromise.bind(this), () => this.durableExecutionMode, (mode) => {
3720
- this.durableExecutionMode = mode;
3721
- });
3722
- }
3723
- getDurableLoggingContext() {
3724
- return {
3725
- getDurableLogData: () => {
3726
- const activeContext = getActiveContext();
3727
- const result = {
3728
- executionArn: this.executionContext.durableExecutionArn,
3729
- requestId: this.executionContext.requestId,
3730
- tenantId: this.executionContext.tenantId,
3731
- operationId: !activeContext || activeContext?.contextId === "root"
3732
- ? undefined
3733
- : hashId(activeContext.contextId),
3734
- };
3735
- if (activeContext?.attempt !== undefined) {
3736
- result.attempt = activeContext.attempt;
3737
- }
3738
- return result;
3739
- },
3740
- };
3741
- }
3742
- shouldLog() {
3743
- const activeContext = getActiveContext();
3744
- if (!this.modeAwareLoggingEnabled || !activeContext) {
3745
- return true;
3746
- }
3747
- if (activeContext.contextId === "root") {
3748
- return this.durableExecutionMode === DurableExecutionMode.ExecutionMode;
3530
+ notifyQueueCompletion() {
3531
+ if (this.queueCompletionResolver) {
3532
+ this.queueCompletionResolver();
3533
+ this.queueCompletionResolver = null;
3749
3534
  }
3750
- return (activeContext.durableExecutionMode === DurableExecutionMode.ExecutionMode);
3751
3535
  }
3752
- createModeAwareLogger(logger) {
3753
- const durableContextLogger = {
3754
- warn: (...args) => {
3755
- if (this.shouldLog()) {
3756
- return logger.warn(...args);
3757
- }
3758
- },
3759
- debug: (...args) => {
3760
- if (this.shouldLog()) {
3761
- return logger.debug(...args);
3762
- }
3763
- },
3764
- info: (...args) => {
3765
- if (this.shouldLog()) {
3766
- return logger.info(...args);
3767
- }
3768
- },
3769
- error: (...args) => {
3770
- if (this.shouldLog()) {
3771
- return logger.error(...args);
3772
- }
3773
- },
3774
- };
3775
- if ("log" in logger) {
3776
- durableContextLogger.log = (level, ...args) => {
3777
- if (this.shouldLog()) {
3778
- return logger.log?.(level, ...args);
3779
- }
3536
+ async processBatch(batch) {
3537
+ const updates = batch.map((item) => {
3538
+ const hashedStepId = hashId(item.stepId);
3539
+ const update = {
3540
+ Type: item.data.Type || "STEP",
3541
+ Action: item.data.Action || "START",
3542
+ ...item.data,
3543
+ Id: hashedStepId,
3544
+ ...(item.data.ParentId && { ParentId: hashId(item.data.ParentId) }),
3780
3545
  };
3546
+ return update;
3547
+ });
3548
+ const checkpointData = {
3549
+ DurableExecutionArn: this.durableExecutionArn,
3550
+ CheckpointToken: this.currentTaskToken,
3551
+ Updates: updates,
3552
+ };
3553
+ log("⏺️", "Creating checkpoint batch:", {
3554
+ batchSize: updates.length,
3555
+ checkpointToken: this.currentTaskToken,
3556
+ updates: updates.map((u) => ({
3557
+ Id: u.Id,
3558
+ Action: u.Action,
3559
+ Type: u.Type,
3560
+ })),
3561
+ });
3562
+ const response = await this.storage.checkpoint(checkpointData, this.logger);
3563
+ if (response.CheckpointToken) {
3564
+ this.currentTaskToken = response.CheckpointToken;
3565
+ }
3566
+ if (response.NewExecutionState?.Operations) {
3567
+ this.updateStepDataFromCheckpointResponse(response.NewExecutionState.Operations);
3781
3568
  }
3782
- return durableContextLogger;
3783
- }
3784
- createStepId() {
3785
- this._stepCounter++;
3786
- return this._stepPrefix
3787
- ? `${this._stepPrefix}-${this._stepCounter}`
3788
- : `${this._stepCounter}`;
3789
- }
3790
- getNextStepId() {
3791
- const nextCounter = this._stepCounter + 1;
3792
- return this._stepPrefix
3793
- ? `${this._stepPrefix}-${nextCounter}`
3794
- : `${nextCounter}`;
3795
3569
  }
3796
- /**
3797
- * Skips the next operation by incrementing the step counter.
3798
- * Used internally by concurrent execution handler during replay to skip incomplete items.
3799
- * @internal
3800
- */
3801
- skipNextOperation() {
3802
- this._stepCounter++;
3570
+ updateStepDataFromCheckpointResponse(operations) {
3571
+ log("🔄", "Updating stepData from checkpoint response:", {
3572
+ operationCount: operations.length,
3573
+ operationIds: operations.map((op) => op.Id).filter(Boolean),
3574
+ });
3575
+ operations.forEach((operation) => {
3576
+ if (operation.Id) {
3577
+ // Check if status changed
3578
+ const oldStatus = this.stepData[operation.Id]?.Status;
3579
+ const newStatus = operation.Status;
3580
+ this.stepData[operation.Id] = operation;
3581
+ log("📝", "Updated stepData entry:", operation);
3582
+ this.stepDataEmitter.emit(STEP_DATA_UPDATED_EVENT, operation.Id);
3583
+ // If status changed and we have a waiting promise, resolve it
3584
+ if (oldStatus !== newStatus) {
3585
+ this.resolveWaitingOperation(operation.Id);
3586
+ }
3587
+ }
3588
+ });
3589
+ log("✅", "StepData update completed:", {
3590
+ totalStepDataEntries: Object.keys(this.stepData).length,
3591
+ });
3803
3592
  }
3804
- checkAndUpdateReplayMode() {
3805
- if (this.durableExecutionMode === DurableExecutionMode.ReplayMode) {
3806
- const nextStepId = this.getNextStepId();
3807
- const nextStepData = this.executionContext.getStepData(nextStepId);
3808
- if (!nextStepData) {
3809
- this.durableExecutionMode = DurableExecutionMode.ExecutionMode;
3593
+ resolveWaitingOperation(hashedStepId) {
3594
+ // Find operation by hashed ID in our operations map
3595
+ for (const [stepId, op] of this.operations.entries()) {
3596
+ if (hashId(stepId) === hashedStepId && op.resolver) {
3597
+ log("✅", `Resolving waiting operation ${stepId} due to status change`);
3598
+ op.resolver();
3599
+ op.resolver = undefined;
3600
+ if (op.timer) {
3601
+ clearTimeout(op.timer);
3602
+ op.timer = undefined;
3603
+ }
3604
+ break;
3810
3605
  }
3811
3606
  }
3812
3607
  }
3813
- captureExecutionState() {
3814
- const wasInReplayMode = this.durableExecutionMode === DurableExecutionMode.ReplayMode;
3815
- const nextStepId = this.getNextStepId();
3816
- const stepData = this.executionContext.getStepData(nextStepId);
3817
- const wasNotFinished = !!(stepData &&
3818
- stepData.Status !== clientLambda.OperationStatus.SUCCEEDED &&
3819
- stepData.Status !== clientLambda.OperationStatus.FAILED);
3820
- return wasInReplayMode && wasNotFinished;
3608
+ getQueueStatus() {
3609
+ return {
3610
+ queueLength: this.queue.length,
3611
+ isProcessing: this.isProcessing,
3612
+ };
3821
3613
  }
3822
- checkForNonResolvingPromise() {
3823
- if (this.durableExecutionMode === DurableExecutionMode.ReplaySucceededContext) {
3824
- const nextStepId = this.getNextStepId();
3825
- const nextStepData = this.executionContext.getStepData(nextStepId);
3826
- if (nextStepData &&
3827
- nextStepData.Status !== clientLambda.OperationStatus.SUCCEEDED &&
3828
- nextStepData.Status !== clientLambda.OperationStatus.FAILED) {
3829
- return new Promise(() => { }); // Non-resolving promise
3614
+ // ===== New Lifecycle & Termination Methods =====
3615
+ markOperationState(stepId, state, options) {
3616
+ let op = this.operations.get(stepId);
3617
+ if (!op) {
3618
+ // First call - create operation
3619
+ if (!options?.metadata) {
3620
+ throw new Error(`metadata required on first call for ${stepId}`);
3830
3621
  }
3622
+ op = {
3623
+ stepId,
3624
+ state,
3625
+ metadata: options.metadata,
3626
+ endTimestamp: options.endTimestamp,
3627
+ };
3628
+ this.operations.set(stepId, op);
3831
3629
  }
3832
- return null;
3833
- }
3834
- addRunningOperation(stepId) {
3835
- this.runningOperations.add(stepId);
3836
- }
3837
- removeRunningOperation(stepId) {
3838
- this.runningOperations.delete(stepId);
3839
- if (this.runningOperations.size === 0) {
3840
- this.operationsEmitter.emit(OPERATIONS_COMPLETE_EVENT);
3630
+ else {
3631
+ // Update existing operation
3632
+ op.state = state;
3633
+ if (options?.endTimestamp !== undefined) {
3634
+ op.endTimestamp = options.endTimestamp;
3635
+ }
3636
+ }
3637
+ // Cleanup if transitioning to COMPLETED
3638
+ if (state === OperationLifecycleState.COMPLETED) {
3639
+ this.cleanupOperation(stepId);
3640
+ }
3641
+ // Check if we should terminate
3642
+ // Don't check for IDLE_NOT_AWAITED - operation might be awaited later or intentionally not awaited
3643
+ if (state !== OperationLifecycleState.IDLE_NOT_AWAITED) {
3644
+ this.checkAndTerminate();
3841
3645
  }
3842
3646
  }
3843
- hasRunningOperations() {
3844
- return this.runningOperations.size > 0;
3845
- }
3846
- getOperationsEmitter() {
3847
- return this.operationsEmitter;
3848
- }
3849
- withModeManagement(operation) {
3850
- return this.modeManagement.withModeManagement(operation);
3851
- }
3852
- withDurableModeManagement(operation) {
3853
- return this.modeManagement.withDurableModeManagement(operation);
3854
- }
3855
- step(nameOrFn, fnOrOptions, maybeOptions) {
3856
- validateContextUsage(this._stepPrefix, "step", this.executionContext.terminationManager);
3857
- return this.withDurableModeManagement(() => {
3858
- const stepHandler = createStepHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), this.durableLogger, this.addRunningOperation.bind(this), this.removeRunningOperation.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId);
3859
- return stepHandler(nameOrFn, fnOrOptions, maybeOptions);
3647
+ waitForRetryTimer(stepId) {
3648
+ const op = this.operations.get(stepId);
3649
+ if (!op) {
3650
+ throw new Error(`Operation ${stepId} not found`);
3651
+ }
3652
+ if (op.state !== OperationLifecycleState.RETRY_WAITING) {
3653
+ throw new Error(`Operation ${stepId} must be in RETRY_WAITING state, got ${op.state}`);
3654
+ }
3655
+ // Resolve immediately if the step was completed already
3656
+ const stepData = this.stepData[hashId(stepId)];
3657
+ if (stepData?.Status && TERMINAL_STATUSES.includes(stepData.Status)) {
3658
+ return Promise.resolve();
3659
+ }
3660
+ // Start timer with polling
3661
+ this.startTimerWithPolling(stepId, op.endTimestamp);
3662
+ // Return promise that resolves when status changes
3663
+ return new Promise((resolve) => {
3664
+ op.resolver = resolve;
3860
3665
  });
3861
3666
  }
3862
- invoke(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
3863
- validateContextUsage(this._stepPrefix, "invoke", this.executionContext.terminationManager);
3864
- return this.withDurableModeManagement(() => {
3865
- const invokeHandler = createInvokeHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3866
- return invokeHandler(...[
3867
- nameOrFuncId,
3868
- funcIdOrInput,
3869
- inputOrConfig,
3870
- maybeConfig,
3871
- ]);
3667
+ waitForStatusChange(stepId) {
3668
+ const op = this.operations.get(stepId);
3669
+ if (!op) {
3670
+ throw new Error(`Operation ${stepId} not found`);
3671
+ }
3672
+ if (op.state !== OperationLifecycleState.IDLE_AWAITED) {
3673
+ throw new Error(`Operation ${stepId} must be in IDLE_AWAITED state, got ${op.state}`);
3674
+ }
3675
+ // Resolve immediately if the step was completed already
3676
+ const stepData = this.stepData[hashId(stepId)];
3677
+ if (stepData?.Status && TERMINAL_STATUSES.includes(stepData.Status)) {
3678
+ return Promise.resolve();
3679
+ }
3680
+ // Start timer with polling
3681
+ this.startTimerWithPolling(stepId, op.endTimestamp);
3682
+ // Return promise that resolves when status changes
3683
+ return new Promise((resolve) => {
3684
+ op.resolver = resolve;
3872
3685
  });
3873
3686
  }
3874
- runInChildContext(nameOrFn, fnOrOptions, maybeOptions) {
3875
- validateContextUsage(this._stepPrefix, "runInChildContext", this.executionContext.terminationManager);
3876
- return this.withDurableModeManagement(() => {
3877
- const blockHandler = createRunInChildContextHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), () => this.durableLogger,
3878
- // Adapter function to maintain compatibility
3879
- (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, _checkpointToken, parentId) => createDurableContext(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, this.durableExecution, parentId), this._parentId);
3880
- return blockHandler(nameOrFn, fnOrOptions, maybeOptions);
3881
- });
3687
+ markOperationAwaited(stepId) {
3688
+ const op = this.operations.get(stepId);
3689
+ if (!op) {
3690
+ log("⚠️", `Cannot mark operation as awaited: ${stepId} not found`);
3691
+ return;
3692
+ }
3693
+ // Transition IDLE_NOT_AWAITED → IDLE_AWAITED
3694
+ if (op.state === OperationLifecycleState.IDLE_NOT_AWAITED) {
3695
+ op.state = OperationLifecycleState.IDLE_AWAITED;
3696
+ log("📍", `Operation marked as awaited: ${stepId}`);
3697
+ // Check if we should terminate now that operation is awaited
3698
+ this.checkAndTerminate();
3699
+ }
3882
3700
  }
3883
- wait(nameOrDuration, maybeDuration) {
3884
- validateContextUsage(this._stepPrefix, "wait", this.executionContext.terminationManager);
3885
- return this.withDurableModeManagement(() => {
3886
- const waitHandler = createWaitHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3887
- return typeof nameOrDuration === "string"
3888
- ? waitHandler(nameOrDuration, maybeDuration)
3889
- : waitHandler(nameOrDuration);
3890
- });
3701
+ getOperationState(stepId) {
3702
+ return this.operations.get(stepId)?.state;
3703
+ }
3704
+ getAllOperations() {
3705
+ return new Map(this.operations);
3706
+ }
3707
+ // ===== Private Helper Methods =====
3708
+ cleanupOperation(stepId) {
3709
+ const op = this.operations.get(stepId);
3710
+ if (!op)
3711
+ return;
3712
+ // Clear timer
3713
+ if (op.timer) {
3714
+ clearTimeout(op.timer);
3715
+ op.timer = undefined;
3716
+ }
3717
+ // Clear resolver
3718
+ op.resolver = undefined;
3719
+ }
3720
+ cleanupAllOperations() {
3721
+ for (const op of this.operations.values()) {
3722
+ if (op.timer) {
3723
+ clearTimeout(op.timer);
3724
+ op.timer = undefined;
3725
+ }
3726
+ op.resolver = undefined;
3727
+ }
3891
3728
  }
3892
3729
  /**
3893
- * Configure logger behavior for this context
3894
- *
3895
- * This method allows partial configuration - only the properties provided will be updated.
3896
- * For example, calling configureLogger(\{ modeAware: false \}) will only change the modeAware
3897
- * setting without affecting any previously configured custom logger.
3898
- *
3899
- * @param config - Logger configuration options including customLogger and modeAware settings (default: modeAware=true)
3900
- * @example
3901
- * // Set custom logger and enable mode-aware logging
3902
- * context.configureLogger(\{ customLogger: myLogger, modeAware: true \});
3903
- *
3904
- * // Later, disable mode-aware logging without changing the custom logger
3905
- * context.configureLogger(\{ modeAware: false \});
3730
+ * Determines if the function should terminate.
3731
+ * @returns TerminationReason if the function should terminate, or undefined if the function should not terminate
3906
3732
  */
3907
- configureLogger(config) {
3908
- if (config.customLogger !== undefined) {
3909
- this.durableLogger = config.customLogger;
3910
- this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3911
- this.logger = this.createModeAwareLogger(this.durableLogger);
3733
+ shouldTerminate() {
3734
+ // Rule 1: Can't terminate if checkpoint queue is not empty
3735
+ if (this.queue.length > 0) {
3736
+ return undefined;
3912
3737
  }
3913
- if (config.modeAware !== undefined) {
3914
- this.modeAwareLoggingEnabled = config.modeAware;
3738
+ // Rule 2: Can't terminate if checkpoint is currently processing
3739
+ if (this.isProcessing) {
3740
+ return undefined;
3915
3741
  }
3742
+ // Rule 3: Can't terminate if there are pending force checkpoint promises
3743
+ if (this.forceCheckpointPromises.length > 0) {
3744
+ return undefined;
3745
+ }
3746
+ const allOps = Array.from(this.operations.values());
3747
+ // Rule 4: Can't terminate if any operation is EXECUTING
3748
+ const hasExecuting = allOps.some((op) => op.state === OperationLifecycleState.EXECUTING);
3749
+ if (hasExecuting) {
3750
+ return undefined;
3751
+ }
3752
+ // Rule 5: Clean up operations whose ancestors are complete or pending completion
3753
+ for (const op of allOps) {
3754
+ if (op.state === OperationLifecycleState.RETRY_WAITING ||
3755
+ op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
3756
+ op.state === OperationLifecycleState.IDLE_AWAITED) {
3757
+ // Use the original stepId from metadata, not the potentially hashed op.stepId
3758
+ const originalStepId = op.metadata.stepId;
3759
+ if (this.hasFinishedAncestor(originalStepId)) {
3760
+ log("🧹", `Cleaning up operation with completed ancestor: ${originalStepId}`);
3761
+ this.cleanupOperation(op.stepId);
3762
+ this.operations.delete(op.stepId);
3763
+ }
3764
+ }
3765
+ }
3766
+ // Re-check operations after cleanup
3767
+ const remainingOps = Array.from(this.operations.values());
3768
+ // Determine if we should terminate
3769
+ const hasWaiting = remainingOps.some((op) => op.state === OperationLifecycleState.RETRY_WAITING ||
3770
+ op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
3771
+ op.state === OperationLifecycleState.IDLE_AWAITED);
3772
+ if (hasWaiting) {
3773
+ return this.determineTerminationReason(remainingOps);
3774
+ }
3775
+ return undefined;
3916
3776
  }
3917
- createCallback(nameOrConfig, maybeConfig) {
3918
- validateContextUsage(this._stepPrefix, "createCallback", this.executionContext.terminationManager);
3919
- return this.withDurableModeManagement(() => {
3920
- const callbackFactory = createCallback(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this.checkAndUpdateReplayMode.bind(this), this._parentId);
3921
- return callbackFactory(nameOrConfig, maybeConfig);
3922
- });
3923
- }
3924
- waitForCallback(nameOrSubmitter, submitterOrConfig, maybeConfig) {
3925
- validateContextUsage(this._stepPrefix, "waitForCallback", this.executionContext.terminationManager);
3926
- return this.withDurableModeManagement(() => {
3927
- const waitForCallbackHandler = createWaitForCallbackHandler(this.executionContext, this.getNextStepId.bind(this), this.runInChildContext.bind(this));
3928
- return waitForCallbackHandler(nameOrSubmitter, submitterOrConfig, maybeConfig);
3929
- });
3930
- }
3931
- waitForCondition(nameOrCheckFunc, checkFuncOrConfig, maybeConfig) {
3932
- validateContextUsage(this._stepPrefix, "waitForCondition", this.executionContext.terminationManager);
3933
- return this.withDurableModeManagement(() => {
3934
- const waitForConditionHandler = createWaitForConditionHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.durableLogger, this.addRunningOperation.bind(this), this.removeRunningOperation.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId);
3935
- return typeof nameOrCheckFunc === "string" ||
3936
- nameOrCheckFunc === undefined
3937
- ? waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig, maybeConfig)
3938
- : waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig);
3939
- });
3940
- }
3941
- map(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig) {
3942
- validateContextUsage(this._stepPrefix, "map", this.executionContext.terminationManager);
3943
- return this.withDurableModeManagement(() => {
3944
- const mapHandler = createMapHandler(this.executionContext, this._executeConcurrently.bind(this));
3945
- return mapHandler(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig);
3946
- });
3777
+ checkAndTerminate() {
3778
+ const terminationReason = this.shouldTerminate();
3779
+ if (terminationReason) {
3780
+ this.scheduleTermination(terminationReason);
3781
+ return;
3782
+ }
3783
+ this.abortTermination();
3784
+ }
3785
+ abortTermination() {
3786
+ if (this.terminationTimer) {
3787
+ clearTimeout(this.terminationTimer);
3788
+ this.terminationTimer = null;
3789
+ this.terminationReason = null;
3790
+ log("🔄", "Termination aborted - conditions changed");
3791
+ }
3947
3792
  }
3948
- parallel(nameOrBranches, branchesOrConfig, maybeConfig) {
3949
- validateContextUsage(this._stepPrefix, "parallel", this.executionContext.terminationManager);
3950
- return this.withDurableModeManagement(() => {
3951
- const parallelHandler = createParallelHandler(this.executionContext, this._executeConcurrently.bind(this));
3952
- return parallelHandler(nameOrBranches, branchesOrConfig, maybeConfig);
3793
+ scheduleTermination(reason) {
3794
+ // If already scheduled with same reason, don't reschedule
3795
+ if (this.terminationTimer && this.terminationReason === reason) {
3796
+ return;
3797
+ }
3798
+ // Clear any existing timer
3799
+ this.abortTermination();
3800
+ // Schedule new termination
3801
+ this.terminationReason = reason;
3802
+ log("⏱️", "Scheduling termination", {
3803
+ reason,
3804
+ cooldownMs: this.TERMINATION_COOLDOWN_MS,
3953
3805
  });
3806
+ this.terminationTimer = setTimeout(() => {
3807
+ if (!this.shouldTerminate()) {
3808
+ log("🔄", "Termination conditions no longer valid after cooldown, aborting termination");
3809
+ this.abortTermination();
3810
+ return;
3811
+ }
3812
+ this.executeTermination(reason);
3813
+ }, this.TERMINATION_COOLDOWN_MS);
3814
+ }
3815
+ executeTermination(reason) {
3816
+ log("🛑", "Executing termination after cooldown", { reason });
3817
+ // Clear timer
3818
+ this.terminationTimer = null;
3819
+ this.terminationReason = null;
3820
+ // Cleanup all operations before terminating
3821
+ this.cleanupAllOperations();
3822
+ // Call termination manager directly
3823
+ this.terminationManager.terminate({ reason });
3824
+ }
3825
+ determineTerminationReason(ops) {
3826
+ // Priority: RETRY_SCHEDULED > WAIT_SCHEDULED > CALLBACK_PENDING
3827
+ if (ops.some((op) => op.state === OperationLifecycleState.RETRY_WAITING &&
3828
+ op.metadata.subType === "Step")) {
3829
+ return TerminationReason.RETRY_SCHEDULED;
3830
+ }
3831
+ if (ops.some((op) => (op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
3832
+ op.state === OperationLifecycleState.IDLE_AWAITED) &&
3833
+ op.metadata.subType === "Wait")) {
3834
+ return TerminationReason.WAIT_SCHEDULED;
3835
+ }
3836
+ return TerminationReason.CALLBACK_PENDING;
3954
3837
  }
3955
- _executeConcurrently(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig) {
3956
- validateContextUsage(this._stepPrefix, "_executeConcurrently", this.executionContext.terminationManager);
3957
- return this.withDurableModeManagement(() => {
3958
- const concurrentExecutionHandler = createConcurrentExecutionHandler(this.executionContext, this.runInChildContext.bind(this), this.skipNextOperation.bind(this));
3959
- const promise = concurrentExecutionHandler(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig);
3960
- // Prevent unhandled promise rejections
3961
- promise?.catch(() => { });
3962
- return promise;
3963
- });
3838
+ startTimerWithPolling(stepId, endTimestamp) {
3839
+ const op = this.operations.get(stepId);
3840
+ if (!op)
3841
+ return;
3842
+ let delay;
3843
+ if (endTimestamp) {
3844
+ // Ensure endTimestamp is a Date object
3845
+ const timestamp = endTimestamp instanceof Date ? endTimestamp : new Date(endTimestamp);
3846
+ // Wait until endTimestamp
3847
+ delay = Math.max(0, timestamp.getTime() - Date.now());
3848
+ }
3849
+ else {
3850
+ // No timestamp, start polling immediately (1 second delay)
3851
+ delay = 1000;
3852
+ }
3853
+ // Initialize poll count and start time for this operation
3854
+ if (!op.pollCount) {
3855
+ op.pollCount = 0;
3856
+ op.pollStartTime = Date.now();
3857
+ }
3858
+ op.timer = setTimeout(() => {
3859
+ this.forceRefreshAndCheckStatus(stepId);
3860
+ }, delay);
3964
3861
  }
3965
- get promise() {
3966
- return createPromiseHandler(this.step.bind(this));
3862
+ async forceRefreshAndCheckStatus(stepId) {
3863
+ const op = this.operations.get(stepId);
3864
+ if (!op)
3865
+ return;
3866
+ // Check if we've exceeded max polling duration (15 minutes)
3867
+ const MAX_POLL_DURATION_MS = 15 * 60 * 1000; // 15 minutes
3868
+ if (op.pollStartTime &&
3869
+ Date.now() - op.pollStartTime > MAX_POLL_DURATION_MS) {
3870
+ // Stop polling after 15 minutes to prevent indefinite resource consumption.
3871
+ // We don't resolve or reject the promise because the handler cannot continue
3872
+ // without a status change. The execution will remain suspended until the
3873
+ // operation completes or the Lambda times out.
3874
+ log("⏱️", `Max polling duration (15 min) exceeded for ${stepId}, stopping poll`);
3875
+ if (op.timer) {
3876
+ clearTimeout(op.timer);
3877
+ op.timer = undefined;
3878
+ }
3879
+ return;
3880
+ }
3881
+ // Get old status before refresh
3882
+ const oldStatus = this.stepData[hashId(stepId)]?.Status;
3883
+ // Force checkpoint to refresh state from backend
3884
+ await this.forceCheckpoint();
3885
+ // Get new status after refresh
3886
+ const newStatus = this.stepData[hashId(stepId)]?.Status;
3887
+ // Check if status changed
3888
+ if (newStatus !== oldStatus) {
3889
+ // Status changed, resolve the waiting promise
3890
+ log("✅", `Status changed for ${stepId}: ${oldStatus} → ${newStatus}`);
3891
+ op.resolver?.();
3892
+ op.resolver = undefined;
3893
+ // Clear timer
3894
+ if (op.timer) {
3895
+ clearTimeout(op.timer);
3896
+ op.timer = undefined;
3897
+ }
3898
+ }
3899
+ else {
3900
+ // Status not changed yet, poll again with incremental backoff
3901
+ // Start at 1s, increase by 1s each poll, max 10s
3902
+ op.pollCount = (op.pollCount || 0) + 1;
3903
+ const nextDelay = Math.min(op.pollCount * 1000, 10000);
3904
+ op.timer = setTimeout(() => {
3905
+ this.forceRefreshAndCheckStatus(stepId);
3906
+ }, nextDelay);
3907
+ }
3967
3908
  }
3968
3909
  }
3969
- const createDurableContext = (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) => {
3970
- return new DurableContextImpl(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId);
3971
- };
3972
3910
 
3973
3911
  /*
3974
3912
  Second Approach (Promise-based):
@@ -4262,43 +4200,6 @@ const createDefaultLogger = (executionContext) => {
4262
4200
  return new DefaultLogger(executionContext);
4263
4201
  };
4264
4202
 
4265
- /**
4266
- * Tracks active async operations to prevent premature termination
4267
- */
4268
- class ActiveOperationsTracker {
4269
- activeCount = 0;
4270
- /**
4271
- * Increment the counter when starting an async operation
4272
- */
4273
- increment() {
4274
- this.activeCount++;
4275
- }
4276
- /**
4277
- * Decrement the counter when an async operation completes
4278
- */
4279
- decrement() {
4280
- this.activeCount = Math.max(0, this.activeCount - 1);
4281
- }
4282
- /**
4283
- * Check if there are any active operations
4284
- */
4285
- hasActive() {
4286
- return this.activeCount > 0;
4287
- }
4288
- /**
4289
- * Get the current count of active operations
4290
- */
4291
- getCount() {
4292
- return this.activeCount;
4293
- }
4294
- /**
4295
- * Reset the counter (useful for testing)
4296
- */
4297
- reset() {
4298
- this.activeCount = 0;
4299
- }
4300
- }
4301
-
4302
4203
  let defaultLambdaClient;
4303
4204
  /**
4304
4205
  * Durable execution client which uses an API-based LambdaClient
@@ -4418,6 +4319,20 @@ class DurableExecutionInvocationInputWithClient {
4418
4319
  this.DurableExecutionArn = params.DurableExecutionArn;
4419
4320
  this.CheckpointToken = params.CheckpointToken;
4420
4321
  }
4322
+ static isInstance(event) {
4323
+ if (event instanceof DurableExecutionInvocationInputWithClient) {
4324
+ return true;
4325
+ }
4326
+ return !!(typeof event === "object" &&
4327
+ event &&
4328
+ event.toString() ===
4329
+ "[object DurableExecutionInvocationInputWithClient]" &&
4330
+ "durableExecutionClient" in event &&
4331
+ event.constructor.name === "DurableExecutionInvocationInputWithClient");
4332
+ }
4333
+ get [Symbol.toStringTag]() {
4334
+ return "DurableExecutionInvocationInputWithClient";
4335
+ }
4421
4336
  }
4422
4337
 
4423
4338
  const initializeExecutionContext = async (event, context, lambdaClient) => {
@@ -4427,7 +4342,7 @@ const initializeExecutionContext = async (event, context, lambdaClient) => {
4427
4342
  const durableExecutionArn = event.DurableExecutionArn;
4428
4343
  const durableExecutionClient =
4429
4344
  // Allow passing arbitrary durable clients if the input is a custom class
4430
- event instanceof DurableExecutionInvocationInputWithClient
4345
+ DurableExecutionInvocationInputWithClient.isInstance(event)
4431
4346
  ? event.durableExecutionClient
4432
4347
  : new DurableExecutionApiClient(lambdaClient);
4433
4348
  // Create logger for initialization errors using existing logger factory
@@ -4466,7 +4381,6 @@ const initializeExecutionContext = async (event, context, lambdaClient) => {
4466
4381
  durableExecutionClient,
4467
4382
  _stepData: stepData,
4468
4383
  terminationManager: new TerminationManager(),
4469
- activeOperationsTracker: new ActiveOperationsTracker(),
4470
4384
  durableExecutionArn,
4471
4385
  pendingCompletions: new Set(),
4472
4386
  getStepData(stepId) {
@@ -4485,7 +4399,7 @@ const LAMBDA_RESPONSE_SIZE_LIMIT = 6 * 1024 * 1024 - 50; // 6MB in bytes, minus
4485
4399
  async function runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler) {
4486
4400
  // Create checkpoint manager and step data emitter
4487
4401
  const stepDataEmitter = new events.EventEmitter();
4488
- const checkpointManager = new CheckpointManager(executionContext.durableExecutionArn, executionContext._stepData, executionContext.durableExecutionClient, executionContext.terminationManager, executionContext.activeOperationsTracker, checkpointToken, stepDataEmitter, createDefaultLogger(executionContext), executionContext.pendingCompletions);
4402
+ const checkpointManager = new CheckpointManager(executionContext.durableExecutionArn, executionContext._stepData, executionContext.durableExecutionClient, executionContext.terminationManager, checkpointToken, stepDataEmitter, createDefaultLogger(executionContext), new Set());
4489
4403
  // Set the checkpoint terminating callback on the termination manager
4490
4404
  executionContext.terminationManager.setCheckpointTerminatingCallback(() => {
4491
4405
  checkpointManager.setTerminating();
@@ -4587,6 +4501,14 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4587
4501
  Payload: serializedResult, // Reuse the already serialized result
4588
4502
  });
4589
4503
  log("✅", "Large result successfully checkpointed");
4504
+ // Wait for any pending checkpoints to complete before returning
4505
+ try {
4506
+ await durableExecution.checkpointManager.waitForQueueCompletion();
4507
+ }
4508
+ catch (waitError) {
4509
+ log("⚠️", "Error waiting for checkpoint queue completion:", waitError);
4510
+ // Continue anyway - the checkpoint will be retried on next invocation
4511
+ }
4590
4512
  // Return a response indicating the result was checkpointed
4591
4513
  return {
4592
4514
  Status: exports.InvocationStatus.SUCCEEDED,
@@ -4600,6 +4522,14 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4600
4522
  }
4601
4523
  }
4602
4524
  // If response size is acceptable, return the response
4525
+ // Wait for any pending checkpoints to complete before returning
4526
+ try {
4527
+ await durableExecution.checkpointManager.waitForQueueCompletion();
4528
+ }
4529
+ catch (waitError) {
4530
+ log("⚠️", "Error waiting for checkpoint queue completion:", waitError);
4531
+ // Continue anyway - the checkpoint will be retried on next invocation
4532
+ }
4603
4533
  return {
4604
4534
  Status: exports.InvocationStatus.SUCCEEDED,
4605
4535
  Result: serializedResult,
@@ -4612,6 +4542,14 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4612
4542
  log("🛑", "Unrecoverable invocation error - terminating Lambda execution");
4613
4543
  throw error; // Re-throw the error to terminate Lambda execution
4614
4544
  }
4545
+ // Wait for any pending checkpoints to complete before returning error
4546
+ try {
4547
+ await durableExecution.checkpointManager.waitForQueueCompletion();
4548
+ }
4549
+ catch (waitError) {
4550
+ log("⚠️", "Error waiting for checkpoint queue completion:", waitError);
4551
+ // Continue anyway - the checkpoint will be retried on next invocation
4552
+ }
4615
4553
  return {
4616
4554
  Status: exports.InvocationStatus.FAILED,
4617
4555
  Error: createErrorObjectFromError(error),
@@ -4622,16 +4560,10 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4622
4560
  * Validates that the event is a proper durable execution input
4623
4561
  */
4624
4562
  function validateDurableExecutionEvent(event) {
4625
- try {
4626
- const eventObj = event;
4627
- if (!eventObj?.DurableExecutionArn || !eventObj?.CheckpointToken) {
4628
- throw new Error("Missing required durable execution fields");
4629
- }
4630
- }
4631
- catch {
4632
- const msg = `Unexpected payload provided to start the durable execution.
4633
- Check your resource configurations to confirm the durability is set.`;
4634
- throw new Error(msg);
4563
+ const eventObj = event;
4564
+ if (!eventObj?.DurableExecutionArn || !eventObj?.CheckpointToken) {
4565
+ throw new Error("Unexpected payload provided to start the durable execution.\n" +
4566
+ "Check your resource configurations to confirm the durability is set.");
4635
4567
  }
4636
4568
  }
4637
4569
  /**
@@ -4709,14 +4641,7 @@ const withDurableExecution = (handler, config) => {
4709
4641
  return async (event, context) => {
4710
4642
  validateDurableExecutionEvent(event);
4711
4643
  const { executionContext, durableExecutionMode, checkpointToken } = await initializeExecutionContext(event, context, config?.client);
4712
- let response = null;
4713
- try {
4714
- response = await runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler);
4715
- return response;
4716
- }
4717
- catch (err) {
4718
- throw err;
4719
- }
4644
+ return runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler);
4720
4645
  };
4721
4646
  };
4722
4647