@aws/durable-execution-sdk-js 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +28 -55
  2. package/dist/index.mjs +1945 -2020
  3. package/dist/index.mjs.map +1 -1
  4. package/dist-cjs/index.js +1944 -2019
  5. package/dist-cjs/index.js.map +1 -1
  6. package/dist-types/context/durable-context/durable-context.d.ts +0 -6
  7. package/dist-types/context/durable-context/durable-context.d.ts.map +1 -1
  8. package/dist-types/errors/durable-error/durable-error.d.ts +6 -0
  9. package/dist-types/errors/durable-error/durable-error.d.ts.map +1 -1
  10. package/dist-types/errors/step-errors/step-errors.d.ts +1 -0
  11. package/dist-types/errors/step-errors/step-errors.d.ts.map +1 -1
  12. package/dist-types/handlers/callback-handler/callback-promise.d.ts +2 -2
  13. package/dist-types/handlers/callback-handler/callback-promise.d.ts.map +1 -1
  14. package/dist-types/handlers/callback-handler/callback.d.ts +1 -2
  15. package/dist-types/handlers/callback-handler/callback.d.ts.map +1 -1
  16. package/dist-types/handlers/concurrent-execution-handler/concurrent-execution-handler.d.ts +1 -0
  17. package/dist-types/handlers/concurrent-execution-handler/concurrent-execution-handler.d.ts.map +1 -1
  18. package/dist-types/handlers/invoke-handler/invoke-handler.d.ts +1 -2
  19. package/dist-types/handlers/invoke-handler/invoke-handler.d.ts.map +1 -1
  20. package/dist-types/handlers/run-in-child-context-handler/run-in-child-context-handler.d.ts.map +1 -1
  21. package/dist-types/handlers/step-handler/step-handler.d.ts +1 -9
  22. package/dist-types/handlers/step-handler/step-handler.d.ts.map +1 -1
  23. package/dist-types/handlers/wait-for-condition-handler/wait-for-condition-handler.d.ts +1 -6
  24. package/dist-types/handlers/wait-for-condition-handler/wait-for-condition-handler.d.ts.map +1 -1
  25. package/dist-types/handlers/wait-handler/wait-handler-comparison.test.d.ts +2 -0
  26. package/dist-types/handlers/wait-handler/wait-handler-comparison.test.d.ts.map +1 -0
  27. package/dist-types/handlers/wait-handler/wait-handler.d.ts +1 -2
  28. package/dist-types/handlers/wait-handler/wait-handler.d.ts.map +1 -1
  29. package/dist-types/index.d.ts +1 -1
  30. package/dist-types/index.d.ts.map +1 -1
  31. package/dist-types/testing/create-test-checkpoint-manager.d.ts.map +1 -1
  32. package/dist-types/testing/create-test-durable-context.d.ts.map +1 -1
  33. package/dist-types/testing/mock-checkpoint-manager.d.ts +0 -1
  34. package/dist-types/testing/mock-checkpoint-manager.d.ts.map +1 -1
  35. package/dist-types/testing/mock-checkpoint.d.ts +1 -0
  36. package/dist-types/testing/mock-checkpoint.d.ts.map +1 -1
  37. package/dist-types/types/batch.d.ts +8 -0
  38. package/dist-types/types/batch.d.ts.map +1 -1
  39. package/dist-types/types/core.d.ts +1 -3
  40. package/dist-types/types/core.d.ts.map +1 -1
  41. package/dist-types/types/durable-context.d.ts +8 -6
  42. package/dist-types/types/durable-context.d.ts.map +1 -1
  43. package/dist-types/types/index.d.ts +2 -0
  44. package/dist-types/types/index.d.ts.map +1 -1
  45. package/dist-types/types/operation-lifecycle-state.d.ts +27 -0
  46. package/dist-types/types/operation-lifecycle-state.d.ts.map +1 -0
  47. package/dist-types/types/operation-lifecycle.d.ts +27 -0
  48. package/dist-types/types/operation-lifecycle.d.ts.map +1 -0
  49. package/dist-types/types/step.d.ts +45 -0
  50. package/dist-types/types/step.d.ts.map +1 -1
  51. package/dist-types/utils/checkpoint/checkpoint-ancestor.test.d.ts +2 -0
  52. package/dist-types/utils/checkpoint/checkpoint-ancestor.test.d.ts.map +1 -0
  53. package/dist-types/utils/checkpoint/checkpoint-central-termination.test.d.ts +2 -0
  54. package/dist-types/utils/checkpoint/checkpoint-central-termination.test.d.ts.map +1 -0
  55. package/dist-types/utils/checkpoint/checkpoint-helper.d.ts +37 -0
  56. package/dist-types/utils/checkpoint/checkpoint-helper.d.ts.map +1 -1
  57. package/dist-types/utils/checkpoint/checkpoint-manager.d.ts +43 -12
  58. package/dist-types/utils/checkpoint/checkpoint-manager.d.ts.map +1 -1
  59. package/dist-types/utils/constants/constants.d.ts +0 -1
  60. package/dist-types/utils/constants/constants.d.ts.map +1 -1
  61. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.d.ts +3 -1
  62. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.d.ts.map +1 -1
  63. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.test.d.ts +2 -0
  64. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.test.d.ts.map +1 -0
  65. package/dist-types/utils/termination-helper/termination-helper.d.ts +0 -9
  66. package/dist-types/utils/termination-helper/termination-helper.d.ts.map +1 -1
  67. package/dist-types/with-durable-execution.d.ts.map +1 -1
  68. package/package.json +8 -4
  69. package/dist-types/utils/checkpoint/checkpoint-ancestor-checking.test.d.ts +0 -2
  70. package/dist-types/utils/checkpoint/checkpoint-ancestor-checking.test.d.ts.map +0 -1
  71. package/dist-types/utils/termination-helper/active-operations-tracker.d.ts +0 -31
  72. package/dist-types/utils/termination-helper/active-operations-tracker.d.ts.map +0 -1
  73. package/dist-types/utils/termination-helper/active-operations-tracker.test.d.ts +0 -2
  74. package/dist-types/utils/termination-helper/active-operations-tracker.test.d.ts.map +0 -1
  75. package/dist-types/utils/wait-before-continue/wait-before-continue.d.ts +0 -35
  76. package/dist-types/utils/wait-before-continue/wait-before-continue.d.ts.map +0 -1
  77. package/dist-types/utils/wait-before-continue/wait-before-continue.test.d.ts +0 -2
  78. package/dist-types/utils/wait-before-continue/wait-before-continue.test.d.ts.map +0 -1
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { OperationStatus, OperationAction, OperationType, LambdaClient, GetDurableExecutionStateCommand, CheckpointDurableExecutionCommand } from '@aws-sdk/client-lambda';
1
+ import { OperationType, OperationStatus, OperationAction, LambdaClient, GetDurableExecutionStateCommand, CheckpointDurableExecutionCommand } from '@aws-sdk/client-lambda';
2
2
  import { EventEmitter } from 'events';
3
3
  import { AsyncLocalStorage } from 'async_hooks';
4
4
  import { createHash } from 'crypto';
@@ -165,11 +165,56 @@ var DurableLogLevel;
165
165
  })(DurableLogLevel || (DurableLogLevel = {}));
166
166
 
167
167
  /**
168
+ * Execution semantics for step operations.
169
+ *
170
+ * @remarks
171
+ * These semantics control how step execution is checkpointed and replayed. **Important**: The guarantees apply *per
172
+ * retry attempt*, not per overall workflow execution.
173
+ *
174
+ * With retries enabled (the default), a step could execute multiple times across different retry attempts even when
175
+ * using `AtMostOncePerRetry`. To achieve step-level at-most-once execution, combine `AtMostOncePerRetry` with a retry
176
+ * strategy that disables retries (`shouldRetry: false`).
177
+ *
178
+ * @example
179
+ * ```typescript
180
+ * // At-least-once per retry (default) - safe for idempotent operations
181
+ * await context.step("send-notification", async () => sendEmail(), {
182
+ * semantics: StepSemantics.AtLeastOncePerRetry,
183
+ * });
184
+ *
185
+ * // At-most-once per retry - for non-idempotent operations
186
+ * await context.step("charge-payment", async () => processPayment(), {
187
+ * semantics: StepSemantics.AtMostOncePerRetry,
188
+ * retryStrategy: () => ({ shouldRetry: false }),
189
+ * });
190
+ * ```
191
+ *
168
192
  * @public
169
193
  */
170
194
  var StepSemantics;
171
195
  (function (StepSemantics) {
196
+ /**
197
+ * At-most-once execution per retry attempt.
198
+ *
199
+ * @remarks
200
+ * A checkpoint is created before step execution. If a failure occurs after the checkpoint
201
+ * but before step completion, the previous step retry attempt is skipped on replay.
202
+ *
203
+ * **Note**: This is "at-most-once *per retry*". With multiple retry attempts, the step
204
+ * could still execute multiple times across different retries. To guarantee the step
205
+ * executes at most once, disable retries by returning
206
+ * `{ shouldRetry: false }` from your retry strategy.
207
+ */
172
208
  StepSemantics["AtMostOncePerRetry"] = "AT_MOST_ONCE_PER_RETRY";
209
+ /**
210
+ * At-least-once execution per retry attempt (default).
211
+ *
212
+ * @remarks
213
+ * The step will execute at least once on each retry attempt. If the step succeeds
214
+ * but the checkpoint fails (e.g., due to a sandbox crash), the step will re-execute
215
+ * on replay. This is the safer default for operations that are idempotent or can
216
+ * tolerate duplicate execution.
217
+ */
173
218
  StepSemantics["AtLeastOncePerRetry"] = "AT_LEAST_ONCE_PER_RETRY";
174
219
  })(StepSemantics || (StepSemantics = {}));
175
220
  /**
@@ -298,6 +343,34 @@ class DurablePromise {
298
343
  }
299
344
  }
300
345
 
346
+ /**
347
+ * Represents the lifecycle state of an operation in the durable execution system.
348
+ * This is distinct from AWS SDK's OperationStatus (PENDING, SUCCEEDED, FAILED).
349
+ */
350
+ var OperationLifecycleState;
351
+ (function (OperationLifecycleState) {
352
+ /**
353
+ * Operation is currently executing user code (step function, waitForCondition check)
354
+ */
355
+ OperationLifecycleState["EXECUTING"] = "EXECUTING";
356
+ /**
357
+ * Operation is waiting for retry timer to expire before re-executing user code
358
+ */
359
+ OperationLifecycleState["RETRY_WAITING"] = "RETRY_WAITING";
360
+ /**
361
+ * Operation is waiting for external event (timer, callback, invoke) but not awaited yet (phase 1)
362
+ */
363
+ OperationLifecycleState["IDLE_NOT_AWAITED"] = "IDLE_NOT_AWAITED";
364
+ /**
365
+ * Operation is waiting for external event and has been awaited (phase 2)
366
+ */
367
+ OperationLifecycleState["IDLE_AWAITED"] = "IDLE_AWAITED";
368
+ /**
369
+ * Operation has completed (success or permanent failure)
370
+ */
371
+ OperationLifecycleState["COMPLETED"] = "COMPLETED";
372
+ })(OperationLifecycleState || (OperationLifecycleState = {}));
373
+
301
374
  /**
302
375
  * Converts a Duration object to total seconds
303
376
  * @param duration - Duration object with at least one time unit specified
@@ -311,6 +384,21 @@ function durationToSeconds(duration) {
311
384
  return days * 24 * 60 * 60 + hours * 60 * 60 + minutes * 60 + seconds;
312
385
  }
313
386
 
387
+ /**
388
+ * Terminates execution for unrecoverable errors and returns a never-resolving promise
389
+ * @param context - The execution context containing the termination manager
390
+ * @param error - The unrecoverable error that caused termination
391
+ * @param stepIdentifier - The step name or ID for error messaging
392
+ * @returns A never-resolving promise
393
+ */
394
+ function terminateForUnrecoverableError(context, error, stepIdentifier) {
395
+ context.terminationManager.terminate({
396
+ reason: error.terminationReason,
397
+ message: `Unrecoverable error in step ${stepIdentifier}: ${error.message}`,
398
+ });
399
+ return new Promise(() => { }); // Never-resolving promise
400
+ }
401
+
314
402
  const safeStringify = (data) => {
315
403
  try {
316
404
  const seen = new WeakSet();
@@ -344,238 +432,6 @@ const log = (emoji, message, data) => {
344
432
  }
345
433
  };
346
434
 
347
- var TerminationReason;
348
- (function (TerminationReason) {
349
- // Default termination reason
350
- TerminationReason["OPERATION_TERMINATED"] = "OPERATION_TERMINATED";
351
- // Retry-related reasons
352
- TerminationReason["RETRY_SCHEDULED"] = "RETRY_SCHEDULED";
353
- TerminationReason["RETRY_INTERRUPTED_STEP"] = "RETRY_INTERRUPTED_STEP";
354
- // Wait-related reasons
355
- TerminationReason["WAIT_SCHEDULED"] = "WAIT_SCHEDULED";
356
- // Callback-related reasons
357
- TerminationReason["CALLBACK_PENDING"] = "CALLBACK_PENDING";
358
- // Error-related reasons
359
- TerminationReason["CHECKPOINT_FAILED"] = "CHECKPOINT_FAILED";
360
- TerminationReason["SERDES_FAILED"] = "SERDES_FAILED";
361
- TerminationReason["CONTEXT_VALIDATION_ERROR"] = "CONTEXT_VALIDATION_ERROR";
362
- // Custom reason
363
- TerminationReason["CUSTOM"] = "CUSTOM";
364
- })(TerminationReason || (TerminationReason = {}));
365
-
366
- const asyncLocalStorage = new AsyncLocalStorage();
367
- const getActiveContext = () => {
368
- return asyncLocalStorage.getStore();
369
- };
370
- const runWithContext = (contextId, parentId, fn, attempt, durableExecutionMode) => {
371
- return asyncLocalStorage.run({ contextId, parentId, attempt, durableExecutionMode }, fn);
372
- };
373
- const validateContextUsage = (operationContextId, operationName, terminationManager) => {
374
- const contextId = operationContextId || "root";
375
- const activeContext = getActiveContext();
376
- if (!activeContext) {
377
- return;
378
- }
379
- if (activeContext.contextId !== contextId) {
380
- const errorMessage = `Context usage error in "${operationName}": You are using a parent or sibling context instead of the current child context. Expected context ID: "${activeContext.contextId}", but got: "${operationContextId}". When inside runInChildContext(), you must use the child context parameter, not the parent context.`;
381
- terminationManager.terminate({
382
- reason: TerminationReason.CONTEXT_VALIDATION_ERROR,
383
- message: errorMessage,
384
- error: new Error(errorMessage),
385
- });
386
- // Only call termination manager, don't throw or return promise
387
- }
388
- };
389
-
390
- const HASH_LENGTH = 16;
391
- /**
392
- * Creates an MD5 hash of the input string for better performance than SHA-256
393
- * @param input - The string to hash
394
- * @returns The truncated hexadecimal hash string
395
- */
396
- const hashId = (input) => {
397
- return createHash("md5")
398
- .update(input)
399
- .digest("hex")
400
- .substring(0, HASH_LENGTH);
401
- };
402
- /**
403
- * Helper function to get step data using the original stepId
404
- * This function handles the hashing internally so callers don't need to worry about it
405
- * @param stepData - The stepData record from context
406
- * @param stepId - The original stepId (will be hashed internally)
407
- * @returns The operation data or undefined if not found
408
- */
409
- const getStepData = (stepData, stepId) => {
410
- const hashedId = hashId(stepId);
411
- return stepData[hashedId];
412
- };
413
-
414
- /**
415
- * Checks if any ancestor operation in the parent chain has finished (SUCCEEDED or FAILED)
416
- * or has a pending completion checkpoint
417
- */
418
- function hasFinishedAncestor(context, parentId) {
419
- if (!parentId) {
420
- log("🔍", "hasFinishedAncestor: No parentId provided");
421
- return false;
422
- }
423
- // First check if any ancestor has a pending completion checkpoint
424
- if (hasPendingAncestorCompletion(context, parentId)) {
425
- log("🔍", "hasFinishedAncestor: Found ancestor with pending completion!", {
426
- parentId,
427
- });
428
- return true;
429
- }
430
- let currentHashedId = hashId(parentId);
431
- log("🔍", "hasFinishedAncestor: Starting check", {
432
- parentId,
433
- initialHashedId: currentHashedId,
434
- });
435
- while (currentHashedId) {
436
- const parentOperation = context._stepData[currentHashedId];
437
- log("🔍", "hasFinishedAncestor: Checking operation", {
438
- hashedId: currentHashedId,
439
- hasOperation: !!parentOperation,
440
- status: parentOperation?.Status,
441
- type: parentOperation?.Type,
442
- });
443
- if (parentOperation?.Status === OperationStatus.SUCCEEDED ||
444
- parentOperation?.Status === OperationStatus.FAILED) {
445
- log("🔍", "hasFinishedAncestor: Found finished ancestor!", {
446
- hashedId: currentHashedId,
447
- status: parentOperation.Status,
448
- });
449
- return true;
450
- }
451
- currentHashedId = parentOperation?.ParentId;
452
- }
453
- log("🔍", "hasFinishedAncestor: No finished ancestor found");
454
- return false;
455
- }
456
- /**
457
- * Checks if any ancestor has a pending completion checkpoint
458
- */
459
- function hasPendingAncestorCompletion(context, stepId) {
460
- let currentHashedId = hashId(stepId);
461
- while (currentHashedId) {
462
- if (context.pendingCompletions.has(currentHashedId)) {
463
- return true;
464
- }
465
- const operation = context._stepData[currentHashedId];
466
- currentHashedId = operation?.ParentId;
467
- }
468
- return false;
469
- }
470
- /**
471
- * Terminates execution and returns a never-resolving promise to prevent code progression
472
- * @param context - The execution context containing the termination manager
473
- * @param reason - The termination reason
474
- * @param message - The termination message
475
- * @returns A never-resolving promise
476
- */
477
- function terminate(context, reason, message) {
478
- const activeContext = getActiveContext();
479
- // If we have a parent context, add delay to let checkpoints process
480
- if (activeContext?.parentId) {
481
- return new Promise(async (_resolve, _reject) => {
482
- // Wait a tick to let any pending checkpoints start processing
483
- await new Promise((resolve) => setImmediate(resolve));
484
- log("🔍", "Terminate called - checking context:", {
485
- hasActiveContext: !!activeContext,
486
- contextId: activeContext?.contextId,
487
- parentId: activeContext?.parentId,
488
- reason,
489
- message,
490
- });
491
- const ancestorFinished = hasFinishedAncestor(context, activeContext.parentId);
492
- log("🔍", "Ancestor check result:", {
493
- parentId: activeContext.parentId,
494
- ancestorFinished,
495
- });
496
- if (ancestorFinished) {
497
- log("🛑", "Skipping termination - ancestor already finished:", {
498
- contextId: activeContext.contextId,
499
- parentId: activeContext.parentId,
500
- reason,
501
- message,
502
- });
503
- // Return never-resolving promise without terminating
504
- return;
505
- }
506
- // Check if there are active operations before terminating
507
- const tracker = context.activeOperationsTracker;
508
- if (tracker && tracker.hasActive()) {
509
- log("⏳", "Deferring termination - active operations in progress:", {
510
- activeCount: tracker.getCount(),
511
- reason,
512
- message,
513
- });
514
- // Wait for operations to complete, then terminate
515
- const checkInterval = setInterval(() => {
516
- if (!tracker.hasActive()) {
517
- clearInterval(checkInterval);
518
- log("✅", "Active operations completed, proceeding with termination:", {
519
- reason,
520
- message,
521
- });
522
- context.terminationManager.terminate({
523
- reason,
524
- message,
525
- });
526
- }
527
- }, 10);
528
- return;
529
- }
530
- // No active operations, terminate immediately
531
- context.terminationManager.terminate({
532
- reason,
533
- message,
534
- });
535
- });
536
- }
537
- // No parent context - check active operations and terminate
538
- const tracker = context.activeOperationsTracker;
539
- if (tracker && tracker.hasActive()) {
540
- log("⏳", "Deferring termination - active operations in progress:", {
541
- activeCount: tracker.getCount(),
542
- reason,
543
- message,
544
- });
545
- return new Promise((_resolve, _reject) => {
546
- const checkInterval = setInterval(() => {
547
- if (!tracker.hasActive()) {
548
- clearInterval(checkInterval);
549
- log("✅", "Active operations completed, proceeding with termination:", {
550
- reason,
551
- message,
552
- });
553
- context.terminationManager.terminate({
554
- reason,
555
- message,
556
- });
557
- }
558
- }, 10);
559
- });
560
- }
561
- // No parent, no active operations - terminate immediately
562
- context.terminationManager.terminate({
563
- reason,
564
- message,
565
- });
566
- return new Promise(() => { });
567
- }
568
- /**
569
- * Terminates execution for unrecoverable errors and returns a never-resolving promise
570
- * @param context - The execution context containing the termination manager
571
- * @param error - The unrecoverable error that caused termination
572
- * @param stepIdentifier - The step name or ID for error messaging
573
- * @returns A never-resolving promise
574
- */
575
- function terminateForUnrecoverableError(context, error, stepIdentifier) {
576
- return terminate(context, error.terminationReason, `Unrecoverable error in step ${stepIdentifier}: ${error.message}`);
577
- }
578
-
579
435
  const DEFAULT_CONFIG$1 = {
580
436
  maxAttempts: 3,
581
437
  initialDelay: { seconds: 5 },
@@ -747,6 +603,7 @@ const retryPresets = {
747
603
  /**
748
604
  * Error thrown when a step with AT_MOST_ONCE_PER_RETRY semantics was started but interrupted
749
605
  * before completion.
606
+ * @public
750
607
  */
751
608
  class StepInterruptedError extends Error {
752
609
  constructor(_stepId, _stepName) {
@@ -755,13 +612,9 @@ class StepInterruptedError extends Error {
755
612
  }
756
613
  }
757
614
 
758
- /**
759
- * Shared constants to avoid circular dependencies
760
- */
761
- const OPERATIONS_COMPLETE_EVENT = "allOperationsComplete";
762
-
763
615
  /**
764
616
  * Base class for all durable operation errors
617
+ * @public
765
618
  */
766
619
  class DurableOperationError extends Error {
767
620
  cause;
@@ -810,6 +663,7 @@ class DurableOperationError extends Error {
810
663
  }
811
664
  /**
812
665
  * Error thrown when a step operation fails
666
+ * @public
813
667
  */
814
668
  class StepError extends DurableOperationError {
815
669
  errorType = "StepError";
@@ -819,6 +673,7 @@ class StepError extends DurableOperationError {
819
673
  }
820
674
  /**
821
675
  * Error thrown when a callback operation fails
676
+ * @public
822
677
  */
823
678
  class CallbackError extends DurableOperationError {
824
679
  errorType = "CallbackError";
@@ -828,6 +683,7 @@ class CallbackError extends DurableOperationError {
828
683
  }
829
684
  /**
830
685
  * Error thrown when an invoke operation fails
686
+ * @public
831
687
  */
832
688
  class InvokeError extends DurableOperationError {
833
689
  errorType = "InvokeError";
@@ -837,6 +693,7 @@ class InvokeError extends DurableOperationError {
837
693
  }
838
694
  /**
839
695
  * Error thrown when a child context operation fails
696
+ * @public
840
697
  */
841
698
  class ChildContextError extends DurableOperationError {
842
699
  errorType = "ChildContextError";
@@ -846,6 +703,7 @@ class ChildContextError extends DurableOperationError {
846
703
  }
847
704
  /**
848
705
  * Error thrown when a wait for condition operation fails
706
+ * @public
849
707
  */
850
708
  class WaitForConditionError extends DurableOperationError {
851
709
  errorType = "WaitForConditionError";
@@ -1013,6 +871,25 @@ function createClassSerdesWithDates(cls, dateProps) {
1013
871
  };
1014
872
  }
1015
873
 
874
+ var TerminationReason;
875
+ (function (TerminationReason) {
876
+ // Default termination reason
877
+ TerminationReason["OPERATION_TERMINATED"] = "OPERATION_TERMINATED";
878
+ // Retry-related reasons
879
+ TerminationReason["RETRY_SCHEDULED"] = "RETRY_SCHEDULED";
880
+ TerminationReason["RETRY_INTERRUPTED_STEP"] = "RETRY_INTERRUPTED_STEP";
881
+ // Wait-related reasons
882
+ TerminationReason["WAIT_SCHEDULED"] = "WAIT_SCHEDULED";
883
+ // Callback-related reasons
884
+ TerminationReason["CALLBACK_PENDING"] = "CALLBACK_PENDING";
885
+ // Error-related reasons
886
+ TerminationReason["CHECKPOINT_FAILED"] = "CHECKPOINT_FAILED";
887
+ TerminationReason["SERDES_FAILED"] = "SERDES_FAILED";
888
+ TerminationReason["CONTEXT_VALIDATION_ERROR"] = "CONTEXT_VALIDATION_ERROR";
889
+ // Custom reason
890
+ TerminationReason["CUSTOM"] = "CUSTOM";
891
+ })(TerminationReason || (TerminationReason = {}));
892
+
1016
893
  /**
1017
894
  * Base class for all unrecoverable errors
1018
895
  * Any error that inherits from this class indicates a fatal condition
@@ -1133,6 +1010,30 @@ async function safeDeserialize(serdes, data, stepId, stepName, terminationManage
1133
1010
  }
1134
1011
  }
1135
1012
 
1013
+ const asyncLocalStorage = new AsyncLocalStorage();
1014
+ const getActiveContext = () => {
1015
+ return asyncLocalStorage.getStore();
1016
+ };
1017
+ const runWithContext = (contextId, parentId, fn, attempt, durableExecutionMode) => {
1018
+ return asyncLocalStorage.run({ contextId, parentId, attempt, durableExecutionMode }, fn);
1019
+ };
1020
+ const validateContextUsage = (operationContextId, operationName, terminationManager) => {
1021
+ const contextId = operationContextId || "root";
1022
+ const activeContext = getActiveContext();
1023
+ if (!activeContext) {
1024
+ return;
1025
+ }
1026
+ if (activeContext.contextId !== contextId) {
1027
+ const errorMessage = `Context usage error in "${operationName}": You are using a parent or sibling context instead of the current child context. Expected context ID: "${activeContext.contextId}", but got: "${operationContextId}". When inside runInChildContext(), you must use the child context parameter, not the parent context.`;
1028
+ terminationManager.terminate({
1029
+ reason: TerminationReason.CONTEXT_VALIDATION_ERROR,
1030
+ message: errorMessage,
1031
+ error: new Error(errorMessage),
1032
+ });
1033
+ // Only call termination manager, don't throw or return promise
1034
+ }
1035
+ };
1036
+
1136
1037
  function isErrorLike(obj) {
1137
1038
  return (obj instanceof Error ||
1138
1039
  (obj != null &&
@@ -1161,525 +1062,426 @@ function createErrorObjectFromError(error, data) {
1161
1062
  }
1162
1063
 
1163
1064
  /**
1164
- * Error thrown when a checkpoint operation fails due to invocation-level issues
1165
- * (e.g., 5xx errors, invalid checkpoint token)
1166
- * This will terminate the current Lambda invocation, but the execution can continue with a new invocation
1167
- */
1168
- class CheckpointUnrecoverableInvocationError extends UnrecoverableInvocationError {
1169
- terminationReason = TerminationReason.CHECKPOINT_FAILED;
1170
- constructor(message, originalError) {
1171
- super(message || "Checkpoint operation failed", originalError);
1172
- }
1173
- }
1174
- /**
1175
- * Error thrown when a checkpoint operation fails due to execution-level issues
1176
- * (e.g., 4xx errors other than invalid checkpoint token)
1177
- * This will terminate the entire execution and cannot be recovered
1065
+ * Error thrown when non-deterministic code is detected during replay
1178
1066
  */
1179
- class CheckpointUnrecoverableExecutionError extends UnrecoverableExecutionError {
1180
- terminationReason = TerminationReason.CHECKPOINT_FAILED;
1181
- constructor(message, originalError) {
1182
- super(message || "Checkpoint operation failed", originalError);
1067
+ class NonDeterministicExecutionError extends UnrecoverableExecutionError {
1068
+ terminationReason = TerminationReason.CUSTOM;
1069
+ constructor(message) {
1070
+ super(message);
1071
+ this.name = "NonDeterministicExecutionError";
1183
1072
  }
1184
1073
  }
1185
1074
 
1186
- const STEP_DATA_UPDATED_EVENT = "stepDataUpdated";
1187
- class CheckpointManager {
1188
- durableExecutionArn;
1189
- stepData;
1190
- storage;
1191
- terminationManager;
1192
- activeOperationsTracker;
1193
- stepDataEmitter;
1194
- logger;
1195
- pendingCompletions;
1196
- queue = [];
1197
- isProcessing = false;
1198
- currentTaskToken;
1199
- forceCheckpointPromises = [];
1200
- queueCompletionResolver = null;
1201
- queueCompletionTimeout = null;
1202
- MAX_PAYLOAD_SIZE = 750 * 1024; // 750KB in bytes
1203
- isTerminating = false;
1204
- static textEncoder = new TextEncoder();
1205
- constructor(durableExecutionArn, stepData, storage, terminationManager, activeOperationsTracker, initialTaskToken, stepDataEmitter, logger, pendingCompletions) {
1206
- this.durableExecutionArn = durableExecutionArn;
1207
- this.stepData = stepData;
1208
- this.storage = storage;
1209
- this.terminationManager = terminationManager;
1210
- this.activeOperationsTracker = activeOperationsTracker;
1211
- this.stepDataEmitter = stepDataEmitter;
1212
- this.logger = logger;
1213
- this.pendingCompletions = pendingCompletions;
1214
- this.currentTaskToken = initialTaskToken;
1215
- }
1216
- setTerminating() {
1217
- this.isTerminating = true;
1218
- log("🛑", "Checkpoint manager marked as terminating");
1219
- }
1220
- /**
1221
- * Checks if a step ID or any of its ancestors has a pending completion
1222
- */
1223
- hasPendingAncestorCompletion(stepId) {
1224
- let currentHashedId = hashId(stepId);
1225
- while (currentHashedId) {
1226
- if (this.pendingCompletions.has(currentHashedId)) {
1227
- return true;
1228
- }
1229
- const operation = this.stepData[currentHashedId];
1230
- currentHashedId = operation?.ParentId;
1231
- }
1232
- return false;
1233
- }
1234
- async forceCheckpoint() {
1235
- if (this.isTerminating) {
1236
- log("⚠️", "Force checkpoint skipped - termination in progress");
1237
- return new Promise(() => { }); // Never resolves during termination
1238
- }
1239
- return new Promise((resolve, reject) => {
1240
- this.forceCheckpointPromises.push({ resolve, reject });
1241
- if (!this.isProcessing) {
1242
- setImmediate(() => {
1243
- this.processQueue();
1244
- });
1245
- }
1246
- });
1075
+ const validateReplayConsistency = (stepId, currentOperation, checkpointData, context) => {
1076
+ // Skip validation if no checkpoint data exists or if Type is undefined (first execution)
1077
+ if (!checkpointData || !checkpointData.Type) {
1078
+ return;
1247
1079
  }
1248
- async waitForQueueCompletion() {
1249
- if (this.queue.length === 0 && !this.isProcessing) {
1250
- return;
1251
- }
1252
- return new Promise((resolve, reject) => {
1253
- this.queueCompletionResolver = resolve;
1254
- // Set a timeout to prevent infinite waiting
1255
- this.queueCompletionTimeout = setTimeout(() => {
1256
- this.queueCompletionResolver = null;
1257
- this.queueCompletionTimeout = null;
1258
- // Clear the queue since it's taking too long
1259
- this.clearQueue();
1260
- reject(new Error("Timeout waiting for checkpoint queue completion"));
1261
- }, 3000); // 3 second timeout
1262
- });
1080
+ // Validate operation type
1081
+ if (checkpointData.Type !== currentOperation.type) {
1082
+ const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation type mismatch for step "${stepId}". ` +
1083
+ `Expected type "${checkpointData.Type}", but got "${currentOperation.type}". ` +
1084
+ `This indicates non-deterministic control flow in your workflow code.`);
1085
+ terminateForUnrecoverableError(context, error, stepId);
1263
1086
  }
1264
- clearQueue() {
1265
- // Silently clear queue - we're terminating so no need to reject promises
1266
- this.queue = [];
1267
- this.forceCheckpointPromises = [];
1268
- // Resolve any waiting queue completion promises since we're clearing
1269
- this.notifyQueueCompletion();
1087
+ // Validate operation name (including undefined)
1088
+ if (checkpointData.Name !== currentOperation.name) {
1089
+ const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation name mismatch for step "${stepId}". ` +
1090
+ `Expected name "${checkpointData.Name ?? "undefined"}", but got "${currentOperation.name ?? "undefined"}". ` +
1091
+ `This indicates non-deterministic control flow in your workflow code.`);
1092
+ terminateForUnrecoverableError(context, error, stepId);
1270
1093
  }
1271
- // Alias for backward compatibility with Checkpoint interface
1272
- async force() {
1273
- return this.forceCheckpoint();
1094
+ // Validate operation subtype
1095
+ if (checkpointData.SubType !== currentOperation.subType) {
1096
+ const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation subtype mismatch for step "${stepId}". ` +
1097
+ `Expected subtype "${checkpointData.SubType}", but got "${currentOperation.subType}". ` +
1098
+ `This indicates non-deterministic control flow in your workflow code.`);
1099
+ terminateForUnrecoverableError(context, error, stepId);
1274
1100
  }
1275
- async checkpoint(stepId, data) {
1276
- if (this.isTerminating) {
1277
- log("⚠️", "Checkpoint skipped - termination in progress:", { stepId });
1278
- return new Promise(() => { }); // Never resolves during termination
1101
+ };
1102
+
1103
+ const createStepHandler = (context, checkpoint, parentContext, createStepId, logger, parentId) => {
1104
+ return (nameOrFn, fnOrOptions, maybeOptions) => {
1105
+ let name;
1106
+ let fn;
1107
+ let options;
1108
+ if (typeof nameOrFn === "string" || nameOrFn === undefined) {
1109
+ name = nameOrFn;
1110
+ fn = fnOrOptions;
1111
+ options = maybeOptions;
1279
1112
  }
1280
- if (this.activeOperationsTracker) {
1281
- this.activeOperationsTracker.increment();
1113
+ else {
1114
+ fn = nameOrFn;
1115
+ options = fnOrOptions;
1282
1116
  }
1283
- return new Promise((resolve, reject) => {
1284
- if (data.Action === OperationAction.SUCCEED ||
1285
- data.Action === OperationAction.FAIL) {
1286
- this.pendingCompletions.add(stepId);
1287
- }
1288
- const queuedItem = {
1289
- stepId,
1290
- data,
1291
- resolve: () => {
1292
- if (this.activeOperationsTracker) {
1293
- this.activeOperationsTracker.decrement();
1294
- }
1295
- resolve();
1296
- },
1297
- reject: (error) => {
1298
- if (this.activeOperationsTracker) {
1299
- this.activeOperationsTracker.decrement();
1300
- }
1301
- reject(error);
1302
- },
1303
- };
1304
- this.queue.push(queuedItem);
1305
- log("📥", "Checkpoint queued:", {
1306
- stepId,
1307
- queueLength: this.queue.length,
1308
- isProcessing: this.isProcessing,
1309
- });
1310
- if (!this.isProcessing) {
1311
- setImmediate(() => {
1312
- this.processQueue();
1117
+ const stepId = createStepId();
1118
+ const semantics = options?.semantics || StepSemantics.AtLeastOncePerRetry;
1119
+ const serdes = options?.serdes || defaultSerdes;
1120
+ // Phase 1: Execute step
1121
+ const phase1Promise = (async () => {
1122
+ let stepData = context.getStepData(stepId);
1123
+ validateReplayConsistency(stepId, { type: OperationType.STEP, name, subType: OperationSubType.STEP }, stepData, context);
1124
+ // Check if already completed
1125
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1126
+ log("⏭️", "Step already completed:", { stepId });
1127
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1128
+ metadata: {
1129
+ stepId,
1130
+ name,
1131
+ type: OperationType.STEP,
1132
+ subType: OperationSubType.STEP,
1133
+ parentId,
1134
+ },
1313
1135
  });
1136
+ return await safeDeserialize(serdes, stepData.StepDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
1314
1137
  }
1315
- });
1316
- }
1317
- hasFinishedAncestor(parentId) {
1318
- if (!parentId) {
1319
- return false;
1320
- }
1321
- let currentHashedId = hashId(parentId);
1322
- while (currentHashedId) {
1323
- const parentOperation = this.stepData[currentHashedId];
1324
- if (parentOperation?.Status === OperationStatus.SUCCEEDED ||
1325
- parentOperation?.Status === OperationStatus.FAILED) {
1326
- return true;
1327
- }
1328
- currentHashedId = parentOperation?.ParentId;
1329
- }
1330
- return false;
1331
- }
1332
- classifyCheckpointError(error) {
1333
- const originalError = error instanceof Error ? error : new Error(String(error));
1334
- const awsError = error;
1335
- const statusCode = awsError.$metadata?.httpStatusCode;
1336
- const errorName = awsError.name;
1337
- const errorMessage = awsError.message || originalError.message;
1338
- log("🔍", "Classifying checkpoint error:", {
1339
- statusCode,
1340
- errorName,
1341
- errorMessage,
1342
- });
1343
- if (statusCode &&
1344
- statusCode >= 400 &&
1345
- statusCode < 500 &&
1346
- errorName === "InvalidParameterValueException" &&
1347
- errorMessage.startsWith("Invalid Checkpoint Token")) {
1348
- return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
1349
- }
1350
- if (statusCode &&
1351
- statusCode >= 400 &&
1352
- statusCode < 500 &&
1353
- statusCode !== 429) {
1354
- return new CheckpointUnrecoverableExecutionError(`Checkpoint failed: ${errorMessage}`, originalError);
1355
- }
1356
- return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
1357
- }
1358
- async processQueue() {
1359
- if (this.isProcessing) {
1360
- return;
1361
- }
1362
- const hasQueuedItems = this.queue.length > 0;
1363
- const hasForceRequests = this.forceCheckpointPromises.length > 0;
1364
- if (!hasQueuedItems && !hasForceRequests) {
1365
- return;
1366
- }
1367
- this.isProcessing = true;
1368
- const batch = [];
1369
- let skippedCount = 0;
1370
- const baseSize = this.currentTaskToken.length + 100;
1371
- let currentSize = baseSize;
1372
- while (this.queue.length > 0) {
1373
- const nextItem = this.queue[0];
1374
- const itemSize = CheckpointManager.textEncoder.encode(JSON.stringify(nextItem)).length;
1375
- if (currentSize + itemSize > this.MAX_PAYLOAD_SIZE && batch.length > 0) {
1376
- break;
1377
- }
1378
- this.queue.shift();
1379
- if (this.hasFinishedAncestor(nextItem.data.ParentId)) {
1380
- log("⚠️", "Checkpoint skipped - ancestor finished:", {
1381
- stepId: nextItem.stepId,
1382
- parentId: nextItem.data.ParentId,
1138
+ // Check if already failed
1139
+ if (stepData?.Status === OperationStatus.FAILED) {
1140
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1141
+ metadata: {
1142
+ stepId,
1143
+ name,
1144
+ type: OperationType.STEP,
1145
+ subType: OperationSubType.STEP,
1146
+ parentId,
1147
+ },
1383
1148
  });
1384
- skippedCount++;
1385
- continue;
1149
+ if (stepData.StepDetails?.Error) {
1150
+ throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
1151
+ }
1152
+ throw new StepError("Unknown error");
1386
1153
  }
1387
- batch.push(nextItem);
1388
- currentSize += itemSize;
1389
- }
1390
- log("🔄", "Processing checkpoint batch:", {
1391
- batchSize: batch.length,
1392
- remainingInQueue: this.queue.length,
1393
- estimatedSize: currentSize,
1394
- maxSize: this.MAX_PAYLOAD_SIZE,
1395
- });
1396
- try {
1397
- if (batch.length > 0 || this.forceCheckpointPromises.length > 0) {
1398
- await this.processBatch(batch);
1154
+ // Check if pending retry
1155
+ if (stepData?.Status === OperationStatus.PENDING) {
1156
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1157
+ metadata: {
1158
+ stepId,
1159
+ name,
1160
+ type: OperationType.STEP,
1161
+ subType: OperationSubType.STEP,
1162
+ parentId,
1163
+ },
1164
+ endTimestamp: stepData.StepDetails?.NextAttemptTimestamp,
1165
+ });
1166
+ return (async () => {
1167
+ await checkpoint.waitForRetryTimer(stepId);
1168
+ stepData = context.getStepData(stepId);
1169
+ return await executeStepLogic();
1170
+ })();
1399
1171
  }
1400
- batch.forEach((item) => {
1401
- if (item.data.Action === OperationAction.SUCCEED ||
1402
- item.data.Action === OperationAction.FAIL) {
1403
- this.pendingCompletions.delete(item.stepId);
1172
+ // Check for interrupted step with AT_MOST_ONCE_PER_RETRY
1173
+ if (stepData?.Status === OperationStatus.STARTED &&
1174
+ semantics === StepSemantics.AtMostOncePerRetry) {
1175
+ const error = new StepInterruptedError(stepId, name);
1176
+ const currentAttempt = (stepData.StepDetails?.Attempt || 0) + 1;
1177
+ const retryDecision = options?.retryStrategy?.(error, currentAttempt) ??
1178
+ retryPresets.default(error, currentAttempt);
1179
+ if (!retryDecision.shouldRetry) {
1180
+ await checkpoint.checkpoint(stepId, {
1181
+ Id: stepId,
1182
+ ParentId: parentId,
1183
+ Action: OperationAction.FAIL,
1184
+ SubType: OperationSubType.STEP,
1185
+ Type: OperationType.STEP,
1186
+ Error: createErrorObjectFromError(error),
1187
+ Name: name,
1188
+ });
1189
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1190
+ throw DurableOperationError.fromErrorObject(createErrorObjectFromError(error));
1404
1191
  }
1405
- item.resolve();
1406
- });
1407
- const forcePromises = this.forceCheckpointPromises.splice(0);
1408
- forcePromises.forEach((promise) => {
1409
- promise.resolve();
1410
- });
1411
- log("✅", "Checkpoint batch processed successfully:", {
1412
- batchSize: batch.length,
1413
- skippedCount,
1414
- forceRequests: forcePromises.length,
1415
- newTaskToken: this.currentTaskToken,
1416
- });
1417
- }
1418
- catch (error) {
1419
- log("❌", "Checkpoint batch failed:", {
1420
- batchSize: batch.length,
1421
- error,
1422
- });
1423
- const checkpointError = this.classifyCheckpointError(error);
1424
- // Clear remaining queue silently - we're terminating
1425
- this.clearQueue();
1426
- this.terminationManager.terminate({
1427
- reason: TerminationReason.CHECKPOINT_FAILED,
1428
- message: checkpointError.message,
1429
- error: checkpointError,
1430
- });
1431
- }
1432
- finally {
1433
- this.isProcessing = false;
1434
- if (this.queue.length > 0) {
1435
- setImmediate(() => {
1436
- this.processQueue();
1192
+ await checkpoint.checkpoint(stepId, {
1193
+ Id: stepId,
1194
+ ParentId: parentId,
1195
+ Action: OperationAction.RETRY,
1196
+ SubType: OperationSubType.STEP,
1197
+ Type: OperationType.STEP,
1198
+ Error: createErrorObjectFromError(error),
1199
+ Name: name,
1200
+ StepOptions: {
1201
+ NextAttemptDelaySeconds: retryDecision.delay
1202
+ ? durationToSeconds(retryDecision.delay)
1203
+ : 1,
1204
+ },
1437
1205
  });
1206
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1207
+ metadata: {
1208
+ stepId,
1209
+ name,
1210
+ type: OperationType.STEP,
1211
+ subType: OperationSubType.STEP,
1212
+ parentId,
1213
+ },
1214
+ endTimestamp: context.getStepData(stepId)?.StepDetails?.NextAttemptTimestamp,
1215
+ });
1216
+ return (async () => {
1217
+ await checkpoint.waitForRetryTimer(stepId);
1218
+ stepData = context.getStepData(stepId);
1219
+ return await executeStepLogic();
1220
+ })();
1438
1221
  }
1439
- else {
1440
- // Queue is empty and processing is done - notify all waiting promises
1441
- this.notifyQueueCompletion();
1442
- }
1443
- }
1444
- }
1445
- notifyQueueCompletion() {
1446
- if (this.queueCompletionResolver) {
1447
- if (this.queueCompletionTimeout) {
1448
- clearTimeout(this.queueCompletionTimeout);
1449
- this.queueCompletionTimeout = null;
1450
- }
1451
- this.queueCompletionResolver();
1452
- this.queueCompletionResolver = null;
1453
- }
1454
- }
1455
- async processBatch(batch) {
1456
- const updates = batch.map((item) => {
1457
- const hashedStepId = hashId(item.stepId);
1458
- const update = {
1459
- Type: item.data.Type || "STEP",
1460
- Action: item.data.Action || "START",
1461
- ...item.data,
1462
- Id: hashedStepId,
1463
- ...(item.data.ParentId && { ParentId: hashId(item.data.ParentId) }),
1464
- };
1465
- return update;
1466
- });
1467
- const checkpointData = {
1468
- DurableExecutionArn: this.durableExecutionArn,
1469
- CheckpointToken: this.currentTaskToken,
1470
- Updates: updates,
1471
- };
1472
- log("⏺️", "Creating checkpoint batch:", {
1473
- batchSize: updates.length,
1474
- checkpointToken: this.currentTaskToken,
1475
- updates: updates.map((u) => ({
1476
- Id: u.Id,
1477
- Action: u.Action,
1478
- Type: u.Type,
1479
- })),
1480
- });
1481
- const response = await this.storage.checkpoint(checkpointData, this.logger);
1482
- if (response.CheckpointToken) {
1483
- this.currentTaskToken = response.CheckpointToken;
1484
- }
1485
- if (response.NewExecutionState?.Operations) {
1486
- this.updateStepDataFromCheckpointResponse(response.NewExecutionState.Operations);
1487
- }
1488
- }
1489
- updateStepDataFromCheckpointResponse(operations) {
1490
- log("🔄", "Updating stepData from checkpoint response:", {
1491
- operationCount: operations.length,
1492
- operationIds: operations.map((op) => op.Id).filter(Boolean),
1493
- });
1494
- operations.forEach((operation) => {
1495
- if (operation.Id) {
1496
- this.stepData[operation.Id] = operation;
1497
- log("📝", "Updated stepData entry:", operation);
1498
- this.stepDataEmitter.emit(STEP_DATA_UPDATED_EVENT, operation.Id);
1222
+ return await executeStepLogic();
1223
+ async function executeStepLogic() {
1224
+ stepData = context.getStepData(stepId);
1225
+ if (stepData?.Status !== OperationStatus.STARTED) {
1226
+ if (semantics === StepSemantics.AtMostOncePerRetry) {
1227
+ await checkpoint.checkpoint(stepId, {
1228
+ Id: stepId,
1229
+ ParentId: parentId,
1230
+ Action: OperationAction.START,
1231
+ SubType: OperationSubType.STEP,
1232
+ Type: OperationType.STEP,
1233
+ Name: name,
1234
+ });
1235
+ }
1236
+ else {
1237
+ checkpoint.checkpoint(stepId, {
1238
+ Id: stepId,
1239
+ ParentId: parentId,
1240
+ Action: OperationAction.START,
1241
+ SubType: OperationSubType.STEP,
1242
+ Type: OperationType.STEP,
1243
+ Name: name,
1244
+ });
1245
+ }
1246
+ }
1247
+ try {
1248
+ stepData = context.getStepData(stepId);
1249
+ const currentAttempt = stepData?.StepDetails?.Attempt || 0;
1250
+ const stepContext = { logger };
1251
+ // Mark operation as EXECUTING
1252
+ checkpoint.markOperationState(stepId, OperationLifecycleState.EXECUTING, {
1253
+ metadata: {
1254
+ stepId,
1255
+ name,
1256
+ type: OperationType.STEP,
1257
+ subType: OperationSubType.STEP,
1258
+ parentId,
1259
+ },
1260
+ });
1261
+ let result;
1262
+ result = await runWithContext(stepId, parentId, () => fn(stepContext), currentAttempt + 1, DurableExecutionMode.ExecutionMode);
1263
+ const serializedResult = await safeSerialize(serdes, result, stepId, name, context.terminationManager, context.durableExecutionArn);
1264
+ await checkpoint.checkpoint(stepId, {
1265
+ Id: stepId,
1266
+ ParentId: parentId,
1267
+ Action: OperationAction.SUCCEED,
1268
+ SubType: OperationSubType.STEP,
1269
+ Type: OperationType.STEP,
1270
+ Payload: serializedResult,
1271
+ Name: name,
1272
+ });
1273
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1274
+ return await safeDeserialize(serdes, serializedResult, stepId, name, context.terminationManager, context.durableExecutionArn);
1275
+ }
1276
+ catch (error) {
1277
+ if (isUnrecoverableError(error)) {
1278
+ return terminateForUnrecoverableError(context, error, name || stepId);
1279
+ }
1280
+ stepData = context.getStepData(stepId);
1281
+ const currentAttempt = (stepData?.StepDetails?.Attempt || 0) + 1;
1282
+ const retryDecision = options?.retryStrategy?.(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt) ??
1283
+ retryPresets.default(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt);
1284
+ if (!retryDecision.shouldRetry) {
1285
+ await checkpoint.checkpoint(stepId, {
1286
+ Id: stepId,
1287
+ ParentId: parentId,
1288
+ Action: OperationAction.FAIL,
1289
+ SubType: OperationSubType.STEP,
1290
+ Type: OperationType.STEP,
1291
+ Error: createErrorObjectFromError(error),
1292
+ Name: name,
1293
+ });
1294
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1295
+ throw DurableOperationError.fromErrorObject(createErrorObjectFromError(error));
1296
+ }
1297
+ await checkpoint.checkpoint(stepId, {
1298
+ Id: stepId,
1299
+ ParentId: parentId,
1300
+ Action: OperationAction.RETRY,
1301
+ SubType: OperationSubType.STEP,
1302
+ Type: OperationType.STEP,
1303
+ Error: createErrorObjectFromError(error),
1304
+ Name: name,
1305
+ StepOptions: {
1306
+ NextAttemptDelaySeconds: retryDecision.delay
1307
+ ? durationToSeconds(retryDecision.delay)
1308
+ : 1,
1309
+ },
1310
+ });
1311
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1312
+ metadata: {
1313
+ stepId,
1314
+ name,
1315
+ type: OperationType.STEP,
1316
+ subType: OperationSubType.STEP,
1317
+ parentId,
1318
+ },
1319
+ endTimestamp: context.getStepData(stepId)?.StepDetails?.NextAttemptTimestamp,
1320
+ });
1321
+ await checkpoint.waitForRetryTimer(stepId);
1322
+ return await executeStepLogic();
1323
+ }
1499
1324
  }
1325
+ })();
1326
+ phase1Promise.catch(() => { });
1327
+ return new DurablePromise(async () => {
1328
+ checkpoint.markOperationAwaited(stepId);
1329
+ return await phase1Promise;
1500
1330
  });
1501
- log("✅", "StepData update completed:", {
1502
- totalStepDataEntries: Object.keys(this.stepData).length,
1503
- });
1504
- }
1505
- getQueueStatus() {
1506
- return {
1507
- queueLength: this.queue.length,
1508
- isProcessing: this.isProcessing,
1509
- };
1510
- }
1511
- }
1512
-
1513
- /**
1514
- * High-level helper that waits for conditions before continuing execution.
1515
- * Uses event-driven approach for both operations completion and status changes.
1516
- */
1517
- async function waitBeforeContinue(options) {
1518
- const { checkHasRunningOperations, checkStepStatus, checkTimer, scheduledEndTimestamp, stepId, context, hasRunningOperations, operationsEmitter, checkpoint, onAwaitedChange, } = options;
1519
- const promises = [];
1520
- const timers = [];
1521
- const cleanupFns = [];
1522
- // Cleanup function to clear all timers and listeners
1523
- const cleanup = () => {
1524
- timers.forEach((timer) => clearTimeout(timer));
1525
- cleanupFns.forEach((fn) => fn());
1526
1331
  };
1527
- // Timer promise - resolves when scheduled time is reached
1528
- if (checkTimer && scheduledEndTimestamp) {
1529
- const timerPromise = new Promise((resolve) => {
1530
- const timeLeft = Number(scheduledEndTimestamp) - Date.now();
1531
- if (timeLeft > 0) {
1532
- const timer = setTimeout(() => resolve({ reason: "timer", timerExpired: true }), timeLeft);
1533
- timers.push(timer);
1332
+ };
1333
+
1334
+ const createInvokeHandler = (context, checkpoint, createStepId, parentId, checkAndUpdateReplayMode) => {
1335
+ function invokeHandler(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
1336
+ const isNameFirst = typeof funcIdOrInput === "string";
1337
+ const name = isNameFirst ? nameOrFuncId : undefined;
1338
+ const funcId = isNameFirst ? funcIdOrInput : nameOrFuncId;
1339
+ const input = isNameFirst
1340
+ ? inputOrConfig
1341
+ : funcIdOrInput;
1342
+ const config = isNameFirst
1343
+ ? maybeConfig
1344
+ : inputOrConfig;
1345
+ const stepId = createStepId();
1346
+ // Phase 1: Start invoke operation
1347
+ let isCompleted = false;
1348
+ const phase1Promise = (async () => {
1349
+ log("🔗", "Invoke phase 1:", { stepId, name: name || funcId });
1350
+ let stepData = context.getStepData(stepId);
1351
+ // Validate replay consistency
1352
+ validateReplayConsistency(stepId, {
1353
+ type: OperationType.CHAINED_INVOKE,
1354
+ name,
1355
+ subType: OperationSubType.CHAINED_INVOKE,
1356
+ }, stepData, context);
1357
+ // Check if already completed
1358
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1359
+ log("⏭️", "Invoke already completed:", { stepId });
1360
+ checkAndUpdateReplayMode?.();
1361
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1362
+ metadata: {
1363
+ stepId,
1364
+ name,
1365
+ type: OperationType.CHAINED_INVOKE,
1366
+ subType: OperationSubType.CHAINED_INVOKE,
1367
+ parentId,
1368
+ },
1369
+ });
1370
+ isCompleted = true;
1371
+ return;
1534
1372
  }
1535
- else {
1536
- resolve({ reason: "timer", timerExpired: true });
1373
+ // Check if already failed
1374
+ if (stepData?.Status === OperationStatus.FAILED ||
1375
+ stepData?.Status === OperationStatus.TIMED_OUT ||
1376
+ stepData?.Status === OperationStatus.STOPPED) {
1377
+ log("❌", "Invoke already failed:", { stepId });
1378
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1379
+ metadata: {
1380
+ stepId,
1381
+ name,
1382
+ type: OperationType.CHAINED_INVOKE,
1383
+ subType: OperationSubType.CHAINED_INVOKE,
1384
+ parentId,
1385
+ },
1386
+ });
1387
+ isCompleted = true;
1388
+ return;
1537
1389
  }
1538
- });
1539
- promises.push(timerPromise);
1540
- }
1541
- // Operations promise - event-driven approach
1542
- if (checkHasRunningOperations) {
1543
- const operationsPromise = new Promise((resolve) => {
1544
- if (!hasRunningOperations()) {
1545
- resolve({ reason: "operations" });
1390
+ // Start invoke if not already started
1391
+ if (!stepData) {
1392
+ const serializedPayload = await safeSerialize(config?.payloadSerdes || defaultSerdes, input, stepId, name, context.terminationManager, context.durableExecutionArn);
1393
+ await checkpoint.checkpoint(stepId, {
1394
+ Id: stepId,
1395
+ ParentId: parentId,
1396
+ Action: OperationAction.START,
1397
+ SubType: OperationSubType.CHAINED_INVOKE,
1398
+ Type: OperationType.CHAINED_INVOKE,
1399
+ Name: name,
1400
+ Payload: serializedPayload,
1401
+ ChainedInvokeOptions: {
1402
+ FunctionName: funcId,
1403
+ },
1404
+ });
1546
1405
  }
1547
- else {
1548
- // Event-driven: listen for completion event
1549
- const handler = () => {
1550
- resolve({ reason: "operations" });
1551
- };
1552
- operationsEmitter.once(OPERATIONS_COMPLETE_EVENT, handler);
1553
- cleanupFns.push(() => operationsEmitter.off(OPERATIONS_COMPLETE_EVENT, handler));
1406
+ // Mark as IDLE_NOT_AWAITED
1407
+ checkpoint.markOperationState(stepId, OperationLifecycleState.IDLE_NOT_AWAITED, {
1408
+ metadata: {
1409
+ stepId,
1410
+ name,
1411
+ type: OperationType.CHAINED_INVOKE,
1412
+ subType: OperationSubType.CHAINED_INVOKE,
1413
+ parentId,
1414
+ },
1415
+ });
1416
+ log("✅", "Invoke phase 1 complete:", { stepId });
1417
+ })();
1418
+ phase1Promise.catch(() => { });
1419
+ // Phase 2: Wait for completion
1420
+ return new DurablePromise(async () => {
1421
+ await phase1Promise;
1422
+ if (isCompleted) {
1423
+ const stepData = context.getStepData(stepId);
1424
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1425
+ const invokeDetails = stepData.ChainedInvokeDetails;
1426
+ return await safeDeserialize(config?.resultSerdes || defaultSerdes, invokeDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
1427
+ }
1428
+ // Handle failure
1429
+ const invokeDetails = stepData?.ChainedInvokeDetails;
1430
+ if (invokeDetails?.Error) {
1431
+ throw new InvokeError(invokeDetails.Error.ErrorMessage || "Invoke failed", invokeDetails.Error.ErrorMessage
1432
+ ? new Error(invokeDetails.Error.ErrorMessage)
1433
+ : undefined, invokeDetails.Error.ErrorData);
1434
+ }
1435
+ else {
1436
+ throw new InvokeError("Invoke failed");
1437
+ }
1554
1438
  }
1555
- });
1556
- promises.push(operationsPromise);
1557
- }
1558
- // Step status promise - event-driven approach
1559
- if (checkStepStatus) {
1560
- const originalStatus = context.getStepData(stepId)?.Status;
1561
- const hashedStepId = hashId(stepId);
1562
- const stepStatusPromise = new Promise((resolve) => {
1563
- // Check if status already changed
1564
- const currentStatus = context.getStepData(stepId)?.Status;
1565
- if (originalStatus !== currentStatus) {
1566
- resolve({ reason: "status" });
1439
+ log("🔗", "Invoke phase 2:", { stepId });
1440
+ checkpoint.markOperationAwaited(stepId);
1441
+ await checkpoint.waitForStatusChange(stepId);
1442
+ const stepData = context.getStepData(stepId);
1443
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1444
+ log("✅", "Invoke completed:", { stepId });
1445
+ checkAndUpdateReplayMode?.();
1446
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1447
+ const invokeDetails = stepData.ChainedInvokeDetails;
1448
+ return await safeDeserialize(config?.resultSerdes || defaultSerdes, invokeDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
1449
+ }
1450
+ // Handle failure
1451
+ log("❌", "Invoke failed:", { stepId, status: stepData?.Status });
1452
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1453
+ const invokeDetails = stepData?.ChainedInvokeDetails;
1454
+ if (invokeDetails?.Error) {
1455
+ throw new InvokeError(invokeDetails.Error.ErrorMessage || "Invoke failed", invokeDetails.Error.ErrorMessage
1456
+ ? new Error(invokeDetails.Error.ErrorMessage)
1457
+ : undefined, invokeDetails.Error.ErrorData);
1567
1458
  }
1568
1459
  else {
1569
- // Event-driven: listen for step data updates
1570
- const handler = (updatedStepId) => {
1571
- if (updatedStepId === hashedStepId) {
1572
- const newStatus = context.getStepData(stepId)?.Status;
1573
- if (originalStatus !== newStatus) {
1574
- resolve({ reason: "status" });
1575
- }
1576
- }
1577
- };
1578
- operationsEmitter.on(STEP_DATA_UPDATED_EVENT, handler);
1579
- cleanupFns.push(() => operationsEmitter.off(STEP_DATA_UPDATED_EVENT, handler));
1460
+ throw new InvokeError("Invoke failed");
1580
1461
  }
1581
1462
  });
1582
- promises.push(stepStatusPromise);
1583
- }
1584
- // Awaited change promise - resolves when the callback we set is invoked
1585
- // Note: This is safe from race conditions because waitBeforeContinue is called
1586
- // during Phase 1 execution (inside stepHandler), which happens BEFORE the user
1587
- // can await the DurablePromise. The callback is registered before it can be invoked.
1588
- if (onAwaitedChange) {
1589
- const awaitedChangePromise = new Promise((resolve) => {
1590
- // Register a callback that will be invoked when the promise is awaited
1591
- onAwaitedChange(() => {
1592
- resolve({ reason: "status" });
1593
- });
1594
- });
1595
- promises.push(awaitedChangePromise);
1596
- }
1597
- // If no conditions provided, return immediately
1598
- if (promises.length === 0) {
1599
- return { reason: "timeout" };
1600
- }
1601
- // Wait for any condition to be met, then cleanup timers and listeners
1602
- const result = await Promise.race(promises);
1603
- cleanup();
1604
- // If timer expired, force checkpoint to get fresh data from API
1605
- if (result.reason === "timer" && result.timerExpired && checkpoint) {
1606
- if (checkpoint.force) {
1607
- await checkpoint.force();
1608
- }
1609
- else if (checkpoint.forceCheckpoint) {
1610
- await checkpoint.forceCheckpoint();
1611
- }
1612
- }
1613
- return result;
1614
- }
1615
-
1616
- /**
1617
- * Error thrown when non-deterministic code is detected during replay
1618
- */
1619
- class NonDeterministicExecutionError extends UnrecoverableExecutionError {
1620
- terminationReason = TerminationReason.CUSTOM;
1621
- constructor(message) {
1622
- super(message);
1623
- this.name = "NonDeterministicExecutionError";
1624
1463
  }
1625
- }
1464
+ return invokeHandler;
1465
+ };
1626
1466
 
1627
- const validateReplayConsistency = (stepId, currentOperation, checkpointData, context) => {
1628
- // Skip validation if no checkpoint data exists or if Type is undefined (first execution)
1629
- if (!checkpointData || !checkpointData.Type) {
1630
- return;
1631
- }
1632
- // Validate operation type
1633
- if (checkpointData.Type !== currentOperation.type) {
1634
- const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation type mismatch for step "${stepId}". ` +
1635
- `Expected type "${checkpointData.Type}", but got "${currentOperation.type}". ` +
1636
- `This indicates non-deterministic control flow in your workflow code.`);
1637
- terminateForUnrecoverableError(context, error, stepId);
1467
+ // Checkpoint size limit in bytes (256KB)
1468
+ const CHECKPOINT_SIZE_LIMIT = 256 * 1024;
1469
+ const determineChildReplayMode = (context, stepId) => {
1470
+ const stepData = context.getStepData(stepId);
1471
+ if (!stepData) {
1472
+ return DurableExecutionMode.ExecutionMode;
1638
1473
  }
1639
- // Validate operation name (including undefined)
1640
- if (checkpointData.Name !== currentOperation.name) {
1641
- const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation name mismatch for step "${stepId}". ` +
1642
- `Expected name "${checkpointData.Name ?? "undefined"}", but got "${currentOperation.name ?? "undefined"}". ` +
1643
- `This indicates non-deterministic control flow in your workflow code.`);
1644
- terminateForUnrecoverableError(context, error, stepId);
1474
+ if (stepData.Status === OperationStatus.SUCCEEDED &&
1475
+ stepData.ContextDetails?.ReplayChildren) {
1476
+ return DurableExecutionMode.ReplaySucceededContext;
1645
1477
  }
1646
- // Validate operation subtype
1647
- if (checkpointData.SubType !== currentOperation.subType) {
1648
- const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation subtype mismatch for step "${stepId}". ` +
1649
- `Expected subtype "${checkpointData.SubType}", but got "${currentOperation.subType}". ` +
1650
- `This indicates non-deterministic control flow in your workflow code.`);
1651
- terminateForUnrecoverableError(context, error, stepId);
1478
+ if (stepData.Status === OperationStatus.SUCCEEDED ||
1479
+ stepData.Status === OperationStatus.FAILED) {
1480
+ return DurableExecutionMode.ReplayMode;
1652
1481
  }
1482
+ return DurableExecutionMode.ExecutionMode;
1653
1483
  };
1654
-
1655
- // Special symbol to indicate that the main loop should continue
1656
- const CONTINUE_MAIN_LOOP$1 = Symbol("CONTINUE_MAIN_LOOP");
1657
- const waitForContinuation$1 = async (context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, onAwaitedChange) => {
1658
- const stepData = context.getStepData(stepId);
1659
- // Check if there are any ongoing operations
1660
- if (!hasRunningOperations()) {
1661
- // No ongoing operations - safe to terminate
1662
- return terminate(context, TerminationReason.RETRY_SCHEDULED, `Retry scheduled for ${name || stepId}`);
1663
- }
1664
- // There are ongoing operations - wait before continuing
1665
- await waitBeforeContinue({
1666
- checkHasRunningOperations: true,
1667
- checkStepStatus: true,
1668
- checkTimer: true,
1669
- scheduledEndTimestamp: stepData?.StepDetails?.NextAttemptTimestamp,
1670
- stepId,
1671
- context,
1672
- hasRunningOperations,
1673
- operationsEmitter: getOperationsEmitter(),
1674
- checkpoint,
1675
- onAwaitedChange,
1676
- });
1677
- // Return to let the main loop re-evaluate step status
1678
- };
1679
- /**
1680
- * Creates a step handler for executing durable steps with two-phase execution.
1681
- */
1682
- const createStepHandler = (context, checkpoint, parentContext, createStepId, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId) => {
1484
+ const createRunInChildContextHandler = (context, checkpoint, parentContext, createStepId, getParentLogger, createChildContext, parentId) => {
1683
1485
  return (nameOrFn, fnOrOptions, maybeOptions) => {
1684
1486
  let name;
1685
1487
  let fn;
@@ -1693,479 +1495,30 @@ const createStepHandler = (context, checkpoint, parentContext, createStepId, log
1693
1495
  fn = nameOrFn;
1694
1496
  options = fnOrOptions;
1695
1497
  }
1696
- const stepId = createStepId();
1697
- log("▶️", "Running step:", { stepId, name, options });
1498
+ const entityId = createStepId();
1499
+ log("🔄", "Running child context:", {
1500
+ entityId,
1501
+ name,
1502
+ });
1503
+ const stepData = context.getStepData(entityId);
1504
+ // Validate replay consistency
1505
+ validateReplayConsistency(entityId, {
1506
+ type: OperationType.CONTEXT,
1507
+ name,
1508
+ subType: options?.subType ||
1509
+ OperationSubType.RUN_IN_CHILD_CONTEXT,
1510
+ }, stepData, context);
1698
1511
  // Two-phase execution: Phase 1 starts immediately, Phase 2 returns result when awaited
1699
- let isAwaited = false;
1700
- let waitingCallback;
1701
- const setWaitingCallback = (cb) => {
1702
- waitingCallback = cb;
1703
- };
1704
- // Phase 1: Start execution immediately and capture result/error
1705
- const phase1Promise = (async () => {
1706
- // Main step logic - can be re-executed if step status changes
1707
- while (true) {
1708
- try {
1709
- const stepData = context.getStepData(stepId);
1710
- // Validate replay consistency
1711
- validateReplayConsistency(stepId, {
1712
- type: OperationType.STEP,
1713
- name,
1714
- subType: OperationSubType.STEP,
1715
- }, stepData, context);
1716
- if (stepData?.Status === OperationStatus.SUCCEEDED) {
1717
- return await handleCompletedStep(context, stepId, name, options?.serdes);
1718
- }
1719
- if (stepData?.Status === OperationStatus.FAILED) {
1720
- // Return an async rejected promise to ensure it's handled asynchronously
1721
- return (async () => {
1722
- // Reconstruct the original error from stored ErrorObject
1723
- if (stepData.StepDetails?.Error) {
1724
- throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
1725
- }
1726
- else {
1727
- // Fallback for legacy data without Error field
1728
- const errorMessage = stepData?.StepDetails?.Result;
1729
- throw new StepError(errorMessage || "Unknown error");
1730
- }
1731
- })();
1732
- }
1733
- // If PENDING, wait for timer to complete
1734
- if (stepData?.Status === OperationStatus.PENDING) {
1735
- await waitForContinuation$1(context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, isAwaited ? undefined : setWaitingCallback);
1736
- continue; // Re-evaluate step status after waiting
1737
- }
1738
- // Check for interrupted step with AT_MOST_ONCE_PER_RETRY semantics
1739
- if (stepData?.Status === OperationStatus.STARTED) {
1740
- const semantics = options?.semantics || StepSemantics.AtLeastOncePerRetry;
1741
- if (semantics === StepSemantics.AtMostOncePerRetry) {
1742
- log("⚠️", "Step was interrupted during execution:", {
1743
- stepId,
1744
- name,
1745
- });
1746
- const error = new StepInterruptedError(stepId, name);
1747
- // Handle the interrupted step as a failure
1748
- const currentAttempt = (stepData?.StepDetails?.Attempt || 0) + 1;
1749
- let retryDecision;
1750
- if (options?.retryStrategy !== undefined) {
1751
- retryDecision = options.retryStrategy(error, currentAttempt);
1752
- }
1753
- else {
1754
- retryDecision = retryPresets.default(error, currentAttempt);
1755
- }
1756
- log("⚠️", "Should Retry Interrupted Step:", {
1757
- stepId,
1758
- name,
1759
- currentAttempt,
1760
- shouldRetry: retryDecision.shouldRetry,
1761
- delayInSeconds: retryDecision.shouldRetry
1762
- ? retryDecision.delay
1763
- ? durationToSeconds(retryDecision.delay)
1764
- : undefined
1765
- : undefined,
1766
- });
1767
- if (!retryDecision.shouldRetry) {
1768
- // No retry, mark as failed
1769
- await checkpoint.checkpoint(stepId, {
1770
- Id: stepId,
1771
- ParentId: parentId,
1772
- Action: OperationAction.FAIL,
1773
- SubType: OperationSubType.STEP,
1774
- Type: OperationType.STEP,
1775
- Error: createErrorObjectFromError(error),
1776
- Name: name,
1777
- });
1778
- // Reconstruct error from ErrorObject for deterministic behavior
1779
- const errorObject = createErrorObjectFromError(error);
1780
- throw DurableOperationError.fromErrorObject(errorObject);
1781
- }
1782
- else {
1783
- // Retry
1784
- await checkpoint.checkpoint(stepId, {
1785
- Id: stepId,
1786
- ParentId: parentId,
1787
- Action: OperationAction.RETRY,
1788
- SubType: OperationSubType.STEP,
1789
- Type: OperationType.STEP,
1790
- Error: createErrorObjectFromError(error),
1791
- Name: name,
1792
- StepOptions: {
1793
- NextAttemptDelaySeconds: retryDecision.delay
1794
- ? durationToSeconds(retryDecision.delay)
1795
- : 1,
1796
- },
1797
- });
1798
- await waitForContinuation$1(context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, isAwaited ? undefined : setWaitingCallback);
1799
- continue; // Re-evaluate step status after waiting
1800
- }
1801
- }
1802
- }
1803
- // Execute step function for READY, STARTED (AtLeastOncePerRetry), or first time (undefined)
1804
- const result = await executeStep(context, checkpoint, stepId, name, fn, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, options, isAwaited ? undefined : setWaitingCallback);
1805
- // If executeStep signals to continue the main loop, do so
1806
- if (result === CONTINUE_MAIN_LOOP$1) {
1807
- continue;
1808
- }
1809
- return result;
1810
- }
1811
- catch (error) {
1812
- // Preserve DurableOperationError instances (StepInterruptedError is handled specifically where it's thrown)
1813
- if (error instanceof DurableOperationError) {
1814
- throw error;
1815
- }
1816
- // For any other error from executeStep, wrap it in StepError for consistency
1817
- throw new StepError(error instanceof Error ? error.message : "Step failed", error instanceof Error ? error : undefined);
1818
- }
1819
- }
1820
- })();
1821
- // Attach catch handler to prevent unhandled promise rejections
1822
- // The error will still be thrown when the DurablePromise is awaited
1823
- phase1Promise.catch(() => { });
1824
- // Phase 2: Return DurablePromise that returns Phase 1 result when awaited
1825
- return new DurablePromise(async () => {
1826
- // When promise is awaited, mark as awaited and invoke waiting callback
1827
- isAwaited = true;
1828
- if (waitingCallback) {
1829
- waitingCallback();
1830
- }
1831
- return await phase1Promise;
1832
- });
1833
- };
1834
- };
1835
- const handleCompletedStep = async (context, stepId, stepName, serdes = defaultSerdes) => {
1836
- log("⏭️", "Step already finished, returning cached result:", { stepId });
1837
- const stepData = context.getStepData(stepId);
1838
- const result = stepData?.StepDetails?.Result;
1839
- return await safeDeserialize(serdes, result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
1840
- };
1841
- const executeStep = async (context, checkpoint, stepId, name, fn, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, options, onAwaitedChange) => {
1842
- // Determine step semantics (default to AT_LEAST_ONCE_PER_RETRY if not specified)
1843
- const semantics = options?.semantics || StepSemantics.AtLeastOncePerRetry;
1844
- const serdes = options?.serdes || defaultSerdes;
1845
- // Checkpoint at start for both semantics (only if not already started)
1846
- const stepData = context.getStepData(stepId);
1847
- if (stepData?.Status !== OperationStatus.STARTED) {
1848
- if (semantics === StepSemantics.AtMostOncePerRetry) {
1849
- // Wait for checkpoint to complete
1850
- await checkpoint.checkpoint(stepId, {
1851
- Id: stepId,
1852
- ParentId: parentId,
1853
- Action: OperationAction.START,
1854
- SubType: OperationSubType.STEP,
1855
- Type: OperationType.STEP,
1856
- Name: name,
1857
- });
1858
- }
1859
- else {
1860
- // Fire and forget for AtLeastOncePerRetry
1861
- checkpoint.checkpoint(stepId, {
1862
- Id: stepId,
1863
- ParentId: parentId,
1864
- Action: OperationAction.START,
1865
- SubType: OperationSubType.STEP,
1866
- Type: OperationType.STEP,
1867
- Name: name,
1868
- });
1869
- }
1870
- }
1871
- try {
1872
- // Get current attempt number for logger enrichment
1873
- const stepData = context.getStepData(stepId);
1874
- const currentAttempt = stepData?.StepDetails?.Attempt || 0;
1875
- // Create step context with enriched logger
1876
- const stepContext = {
1877
- logger,
1878
- };
1879
- // Execute the step function with stepContext
1880
- addRunningOperation(stepId);
1881
- let result;
1882
- try {
1883
- result = await runWithContext(stepId, parentId, () => fn(stepContext),
1884
- // The attempt that is running is the attempt from the step data (previous step attempt) + 1
1885
- currentAttempt + 1,
1886
- // Alwasy in execution mode when running step operations
1887
- DurableExecutionMode.ExecutionMode);
1888
- }
1889
- finally {
1890
- removeRunningOperation(stepId);
1891
- }
1892
- // Serialize the result for consistency
1893
- const serializedResult = await safeSerialize(serdes, result, stepId, name, context.terminationManager, context.durableExecutionArn);
1894
- // Always checkpoint on completion
1895
- await checkpoint.checkpoint(stepId, {
1896
- Id: stepId,
1897
- ParentId: parentId,
1898
- Action: OperationAction.SUCCEED,
1899
- SubType: OperationSubType.STEP,
1900
- Type: OperationType.STEP,
1901
- Payload: serializedResult,
1902
- Name: name,
1903
- });
1904
- log("✅", "Step completed successfully:", {
1905
- stepId,
1906
- name,
1907
- result,
1908
- semantics,
1909
- });
1910
- // Deserialize the result for consistency with replay behavior
1911
- return await safeDeserialize(serdes, serializedResult, stepId, name, context.terminationManager, context.durableExecutionArn);
1912
- }
1913
- catch (error) {
1914
- log("❌", "Step failed:", {
1915
- stepId,
1916
- name,
1917
- error,
1918
- semantics,
1919
- });
1920
- // Handle unrecoverable errors - these should not go through retry logic
1921
- if (isUnrecoverableError(error)) {
1922
- log("💥", "Unrecoverable error detected:", {
1923
- stepId,
1924
- name,
1925
- error: error.message,
1926
- });
1927
- return terminateForUnrecoverableError(context, error, name || stepId);
1928
- }
1929
- const stepData = context.getStepData(stepId);
1930
- const currentAttempt = (stepData?.StepDetails?.Attempt || 0) + 1;
1931
- let retryDecision;
1932
- if (options?.retryStrategy !== undefined) {
1933
- // Use provided retry configuration
1934
- retryDecision = options.retryStrategy(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt);
1935
- }
1936
- else {
1937
- // Use default retry preset if no config provided
1938
- retryDecision = retryPresets.default(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt);
1939
- }
1940
- log("⚠️", "Should Retry:", {
1941
- stepId,
1942
- name,
1943
- currentAttempt,
1944
- shouldRetry: retryDecision.shouldRetry,
1945
- delayInSeconds: retryDecision.shouldRetry
1946
- ? retryDecision.delay
1947
- ? durationToSeconds(retryDecision.delay)
1948
- : undefined
1949
- : undefined,
1950
- semantics,
1951
- });
1952
- if (!retryDecision.shouldRetry) {
1953
- // No retry
1954
- await checkpoint.checkpoint(stepId, {
1955
- Id: stepId,
1956
- ParentId: parentId,
1957
- Action: OperationAction.FAIL,
1958
- SubType: OperationSubType.STEP,
1959
- Type: OperationType.STEP,
1960
- Error: createErrorObjectFromError(error),
1961
- Name: name,
1962
- });
1963
- // Reconstruct error from ErrorObject for deterministic behavior
1964
- const errorObject = createErrorObjectFromError(error);
1965
- throw DurableOperationError.fromErrorObject(errorObject);
1966
- }
1967
- else {
1968
- // Retry
1969
- await checkpoint.checkpoint(stepId, {
1970
- Id: stepId,
1971
- ParentId: parentId,
1972
- Action: OperationAction.RETRY,
1973
- SubType: OperationSubType.STEP,
1974
- Type: OperationType.STEP,
1975
- Error: createErrorObjectFromError(error),
1976
- Name: name,
1977
- StepOptions: {
1978
- NextAttemptDelaySeconds: retryDecision.delay
1979
- ? durationToSeconds(retryDecision.delay)
1980
- : 1,
1981
- },
1982
- });
1983
- // Wait for continuation and signal main loop to continue
1984
- await waitForContinuation$1(context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, onAwaitedChange);
1985
- return CONTINUE_MAIN_LOOP$1;
1986
- }
1987
- }
1988
- };
1989
-
1990
- const createInvokeHandler = (context, checkpoint, createStepId, hasRunningOperations, getOperationsEmitter, parentId, checkAndUpdateReplayMode) => {
1991
- function invokeHandler(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
1992
- const isNameFirst = typeof funcIdOrInput === "string";
1993
- const name = isNameFirst ? nameOrFuncId : undefined;
1994
- const funcId = isNameFirst ? funcIdOrInput : nameOrFuncId;
1995
- const input = isNameFirst
1996
- ? inputOrConfig
1997
- : funcIdOrInput;
1998
- const config = isNameFirst
1999
- ? maybeConfig
2000
- : inputOrConfig;
2001
- const stepId = createStepId();
2002
- // Phase 1: Only checkpoint if needed, don't execute full logic
2003
- const startInvokeOperation = async () => {
2004
- log("🔗", `Invoke ${name || funcId} (${stepId}) - phase 1`);
2005
- // Check initial step data for replay consistency validation
2006
- const initialStepData = context.getStepData(stepId);
2007
- // Validate replay consistency once before any execution
2008
- validateReplayConsistency(stepId, {
2009
- type: OperationType.CHAINED_INVOKE,
2010
- name,
2011
- subType: OperationSubType.CHAINED_INVOKE,
2012
- }, initialStepData, context);
2013
- // If stepData already exists, phase 1 has nothing to do
2014
- if (initialStepData) {
2015
- log("⏸️", `Invoke ${name || funcId} already exists (phase 1)`);
2016
- return;
2017
- }
2018
- // No stepData exists - need to start the invoke operation
2019
- // Serialize the input payload
2020
- const serializedPayload = await safeSerialize(config?.payloadSerdes || defaultSerdes, input, stepId, name, context.terminationManager, context.durableExecutionArn);
2021
- // Create checkpoint for the invoke operation
2022
- await checkpoint.checkpoint(stepId, {
2023
- Id: stepId,
2024
- ParentId: parentId,
2025
- Action: OperationAction.START,
2026
- SubType: OperationSubType.CHAINED_INVOKE,
2027
- Type: OperationType.CHAINED_INVOKE,
2028
- Name: name,
2029
- Payload: serializedPayload,
2030
- ChainedInvokeOptions: {
2031
- FunctionName: funcId,
2032
- },
2033
- });
2034
- log("🚀", `Invoke ${name || funcId} started (phase 1)`);
2035
- };
2036
- // Phase 2: Execute full logic including waiting and termination
2037
- const continueInvokeOperation = async () => {
2038
- log("🔗", `Invoke ${name || funcId} (${stepId}) - phase 2`);
2039
- // Main invoke logic - can be re-executed if step status changes
2040
- while (true) {
2041
- // Check if we have existing step data
2042
- const stepData = context.getStepData(stepId);
2043
- if (stepData?.Status === OperationStatus.SUCCEEDED) {
2044
- // Return cached result - no need to check for errors in successful operations
2045
- const invokeDetails = stepData.ChainedInvokeDetails;
2046
- checkAndUpdateReplayMode?.();
2047
- return await safeDeserialize(config?.resultSerdes || defaultSerdes, invokeDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2048
- }
2049
- if (stepData?.Status === OperationStatus.FAILED ||
2050
- stepData?.Status === OperationStatus.TIMED_OUT ||
2051
- stepData?.Status === OperationStatus.STOPPED) {
2052
- // Operation failed, return async rejected promise
2053
- const invokeDetails = stepData.ChainedInvokeDetails;
2054
- return (async () => {
2055
- if (invokeDetails?.Error) {
2056
- throw new InvokeError(invokeDetails.Error.ErrorMessage || "Invoke failed", invokeDetails.Error.ErrorMessage
2057
- ? new Error(invokeDetails.Error.ErrorMessage)
2058
- : undefined, invokeDetails.Error.ErrorData);
2059
- }
2060
- else {
2061
- throw new InvokeError("Invoke failed");
2062
- }
2063
- })();
2064
- }
2065
- if (stepData?.Status === OperationStatus.STARTED) {
2066
- // Operation is still running
2067
- if (hasRunningOperations()) {
2068
- // Phase 2: Wait for other operations
2069
- log("⏳", `Invoke ${name || funcId} still in progress, waiting for other operations`);
2070
- await waitBeforeContinue({
2071
- checkHasRunningOperations: true,
2072
- checkStepStatus: true,
2073
- checkTimer: false,
2074
- stepId,
2075
- context,
2076
- hasRunningOperations,
2077
- operationsEmitter: getOperationsEmitter(),
2078
- });
2079
- continue; // Re-evaluate status after waiting
2080
- }
2081
- // No other operations running - terminate
2082
- log("⏳", `Invoke ${name || funcId} still in progress, terminating`);
2083
- return terminate(context, TerminationReason.OPERATION_TERMINATED, stepId);
2084
- }
2085
- // If stepData exists but has an unexpected status, break to avoid infinite loop
2086
- if (stepData && stepData.Status !== undefined) {
2087
- throw new InvokeError(`Unexpected operation status: ${stepData.Status}`);
2088
- }
2089
- // This should not happen in phase 2 since phase 1 creates stepData
2090
- throw new InvokeError("No step data found in phase 2 - this should not happen");
2091
- }
2092
- };
2093
- // Create a promise that tracks phase 1 completion
2094
- const startInvokePromise = startInvokeOperation()
2095
- .then(() => {
2096
- log("✅", "Invoke phase 1 complete:", { stepId, name: name || funcId });
2097
- })
2098
- .catch((error) => {
2099
- log("❌", "Invoke phase 1 error:", { stepId, error: error.message });
2100
- throw error; // Re-throw to fail phase 1
2101
- });
2102
- // Attach catch handler to prevent unhandled promise rejections
2103
- // The error will still be thrown when the DurablePromise is awaited
2104
- startInvokePromise.catch(() => { });
2105
- // Return DurablePromise that will execute phase 2 when awaited
2106
- return new DurablePromise(async () => {
2107
- // Wait for phase 1 to complete first
2108
- await startInvokePromise;
2109
- // Then execute phase 2
2110
- return await continueInvokeOperation();
2111
- });
2112
- }
2113
- return invokeHandler;
2114
- };
2115
-
2116
- // Checkpoint size limit in bytes (256KB)
2117
- const CHECKPOINT_SIZE_LIMIT = 256 * 1024;
2118
- const determineChildReplayMode = (context, stepId) => {
2119
- const stepData = context.getStepData(stepId);
2120
- if (!stepData) {
2121
- return DurableExecutionMode.ExecutionMode;
2122
- }
2123
- if (stepData.Status === OperationStatus.SUCCEEDED &&
2124
- stepData.ContextDetails?.ReplayChildren) {
2125
- return DurableExecutionMode.ReplaySucceededContext;
2126
- }
2127
- if (stepData.Status === OperationStatus.SUCCEEDED ||
2128
- stepData.Status === OperationStatus.FAILED) {
2129
- return DurableExecutionMode.ReplayMode;
2130
- }
2131
- return DurableExecutionMode.ExecutionMode;
2132
- };
2133
- const createRunInChildContextHandler = (context, checkpoint, parentContext, createStepId, getParentLogger, createChildContext, parentId) => {
2134
- return (nameOrFn, fnOrOptions, maybeOptions) => {
2135
- let name;
2136
- let fn;
2137
- let options;
2138
- if (typeof nameOrFn === "string" || nameOrFn === undefined) {
2139
- name = nameOrFn;
2140
- fn = fnOrOptions;
2141
- options = maybeOptions;
2142
- }
2143
- else {
2144
- fn = nameOrFn;
2145
- options = fnOrOptions;
2146
- }
2147
- const entityId = createStepId();
2148
- log("🔄", "Running child context:", {
2149
- entityId,
2150
- name,
2151
- });
2152
- const stepData = context.getStepData(entityId);
2153
- // Validate replay consistency
2154
- validateReplayConsistency(entityId, {
2155
- type: OperationType.CONTEXT,
2156
- name,
2157
- subType: options?.subType ||
2158
- OperationSubType.RUN_IN_CHILD_CONTEXT,
2159
- }, stepData, context);
2160
- // Two-phase execution: Phase 1 starts immediately, Phase 2 returns result when awaited
2161
- let phase1Result;
2162
- let phase1Error;
1512
+ let phase1Result;
1513
+ let phase1Error;
2163
1514
  // Phase 1: Start execution immediately and capture result/error
2164
1515
  const phase1Promise = (async () => {
2165
1516
  const currentStepData = context.getStepData(entityId);
2166
1517
  // If already completed, return cached result
2167
1518
  if (currentStepData?.Status === OperationStatus.SUCCEEDED ||
2168
1519
  currentStepData?.Status === OperationStatus.FAILED) {
1520
+ // Mark this run-in-child-context as finished to prevent descendant operations
1521
+ checkpoint.markAncestorFinished(entityId);
2169
1522
  return handleCompletedChildContext(context, parentContext, entityId, name, fn, options, getParentLogger, createChildContext);
2170
1523
  }
2171
1524
  // Execute if not completed
@@ -2255,8 +1608,10 @@ const executeChildContext = async (context, checkpoint, parentContext, entityId,
2255
1608
  limit: CHECKPOINT_SIZE_LIMIT,
2256
1609
  });
2257
1610
  }
1611
+ // Mark this run-in-child-context as finished to prevent descendant operations
1612
+ checkpoint.markAncestorFinished(entityId);
2258
1613
  const subType = options?.subType || OperationSubType.RUN_IN_CHILD_CONTEXT;
2259
- await checkpoint.checkpoint(entityId, {
1614
+ checkpoint.checkpoint(entityId, {
2260
1615
  Id: entityId,
2261
1616
  ParentId: parentId,
2262
1617
  Action: OperationAction.SUCCEED,
@@ -2278,9 +1633,11 @@ const executeChildContext = async (context, checkpoint, parentContext, entityId,
2278
1633
  name,
2279
1634
  error,
2280
1635
  });
1636
+ // Mark this run-in-child-context as finished to prevent descendant operations
1637
+ checkpoint.markAncestorFinished(entityId);
2281
1638
  // Always checkpoint failures
2282
1639
  const subType = options?.subType || OperationSubType.RUN_IN_CHILD_CONTEXT;
2283
- await checkpoint.checkpoint(entityId, {
1640
+ checkpoint.checkpoint(entityId, {
2284
1641
  Id: entityId,
2285
1642
  ParentId: parentId,
2286
1643
  Action: OperationAction.FAIL,
@@ -2296,433 +1653,324 @@ const executeChildContext = async (context, checkpoint, parentContext, entityId,
2296
1653
  }
2297
1654
  };
2298
1655
 
2299
- const createWaitHandler = (context, checkpoint, createStepId, hasRunningOperations, getOperationsEmitter, parentId, checkAndUpdateReplayMode) => {
1656
+ const createWaitHandler = (context, checkpoint, createStepId, parentId, checkAndUpdateReplayMode) => {
2300
1657
  function waitHandler(nameOrDuration, duration) {
2301
1658
  const isNameFirst = typeof nameOrDuration === "string";
2302
1659
  const actualName = isNameFirst ? nameOrDuration : undefined;
2303
1660
  const actualDuration = isNameFirst ? duration : nameOrDuration;
2304
1661
  const actualSeconds = durationToSeconds(actualDuration);
2305
1662
  const stepId = createStepId();
2306
- // Shared wait logic for both phases
2307
- const executeWaitLogic = async (canTerminate) => {
2308
- log("⏲️", `Wait executing (${canTerminate ? "phase 2" : "phase 1"}):`, {
1663
+ // Phase 1: Start wait operation
1664
+ let isCompleted = false;
1665
+ const phase1Promise = (async () => {
1666
+ log("⏲️", "Wait phase 1:", {
2309
1667
  stepId,
2310
1668
  name: actualName,
2311
- duration: actualDuration,
2312
1669
  seconds: actualSeconds,
2313
1670
  });
2314
1671
  let stepData = context.getStepData(stepId);
2315
- // Validate replay consistency once before loop
1672
+ // Validate replay consistency
2316
1673
  validateReplayConsistency(stepId, {
2317
1674
  type: OperationType.WAIT,
2318
1675
  name: actualName,
2319
1676
  subType: OperationSubType.WAIT,
2320
1677
  }, stepData, context);
2321
- // Main wait logic - can be re-executed if step data changes
2322
- while (true) {
2323
- stepData = context.getStepData(stepId);
2324
- if (stepData?.Status === OperationStatus.SUCCEEDED) {
2325
- log("⏭️", "Wait already completed:", { stepId });
2326
- checkAndUpdateReplayMode?.();
2327
- return;
2328
- }
2329
- // Only checkpoint START if we haven't started this wait before
2330
- if (!stepData) {
2331
- await checkpoint.checkpoint(stepId, {
2332
- Id: stepId,
2333
- ParentId: parentId,
2334
- Action: OperationAction.START,
2335
- SubType: OperationSubType.WAIT,
2336
- Type: OperationType.WAIT,
2337
- Name: actualName,
2338
- WaitOptions: {
2339
- WaitSeconds: actualSeconds,
2340
- },
2341
- });
2342
- }
2343
- // Always refresh stepData to ensure it's up-to-date before proceeding
2344
- stepData = context.getStepData(stepId);
2345
- // Check if there are any ongoing operations
2346
- if (!hasRunningOperations()) {
2347
- // Phase 1: Just return without terminating
2348
- // Phase 2: Terminate
2349
- if (canTerminate) {
2350
- return terminate(context, TerminationReason.WAIT_SCHEDULED, `Operation ${actualName || stepId} scheduled to wait`);
2351
- }
2352
- else {
2353
- log("⏸️", "Wait ready but not terminating (phase 1):", { stepId });
2354
- return;
2355
- }
2356
- }
2357
- // There are ongoing operations - wait before continuing
2358
- await waitBeforeContinue({
2359
- checkHasRunningOperations: true,
2360
- checkStepStatus: true,
2361
- checkTimer: true,
2362
- scheduledEndTimestamp: stepData?.WaitDetails?.ScheduledEndTimestamp,
2363
- stepId,
2364
- context,
2365
- hasRunningOperations,
2366
- operationsEmitter: getOperationsEmitter(),
2367
- checkpoint,
1678
+ // Check if already completed
1679
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1680
+ log("⏭️", "Wait already completed:", { stepId });
1681
+ checkAndUpdateReplayMode?.();
1682
+ // Mark as completed
1683
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1684
+ metadata: {
1685
+ stepId,
1686
+ name: actualName,
1687
+ type: OperationType.WAIT,
1688
+ subType: OperationSubType.WAIT,
1689
+ parentId,
1690
+ },
2368
1691
  });
2369
- // Continue the loop to re-evaluate all conditions from the beginning
1692
+ isCompleted = true;
1693
+ return;
2370
1694
  }
2371
- };
2372
- // Create a promise that tracks phase 1 completion
2373
- const phase1Promise = executeWaitLogic(false).then(() => {
2374
- log("✅", "Wait phase 1 complete:", { stepId, name: actualName });
2375
- });
2376
- // Attach catch handler to prevent unhandled promise rejections
2377
- // The error will still be thrown when the DurablePromise is awaited
1695
+ // Start wait if not already started
1696
+ if (!stepData) {
1697
+ await checkpoint.checkpoint(stepId, {
1698
+ Id: stepId,
1699
+ ParentId: parentId,
1700
+ Action: OperationAction.START,
1701
+ SubType: OperationSubType.WAIT,
1702
+ Type: OperationType.WAIT,
1703
+ Name: actualName,
1704
+ WaitOptions: {
1705
+ WaitSeconds: actualSeconds,
1706
+ },
1707
+ });
1708
+ }
1709
+ // Refresh stepData after checkpoint
1710
+ stepData = context.getStepData(stepId);
1711
+ // Mark as IDLE_NOT_AWAITED (phase 1 complete, not awaited yet)
1712
+ checkpoint.markOperationState(stepId, OperationLifecycleState.IDLE_NOT_AWAITED, {
1713
+ metadata: {
1714
+ stepId,
1715
+ name: actualName,
1716
+ type: OperationType.WAIT,
1717
+ subType: OperationSubType.WAIT,
1718
+ parentId,
1719
+ },
1720
+ endTimestamp: stepData?.WaitDetails?.ScheduledEndTimestamp,
1721
+ });
1722
+ log("✅", "Wait phase 1 complete:", { stepId });
1723
+ })();
1724
+ // Prevent unhandled rejection
2378
1725
  phase1Promise.catch(() => { });
2379
- // Return DurablePromise that will execute phase 2 when awaited
1726
+ // Phase 2: Wait for completion
2380
1727
  return new DurablePromise(async () => {
2381
- // Wait for phase 1 to complete first
1728
+ // Wait for phase 1
2382
1729
  await phase1Promise;
2383
- // Then execute phase 2
2384
- await executeWaitLogic(true);
1730
+ // If already completed in phase 1, skip phase 2
1731
+ if (isCompleted) {
1732
+ return;
1733
+ }
1734
+ log("⏲️", "Wait phase 2:", { stepId });
1735
+ // Mark as awaited
1736
+ checkpoint.markOperationAwaited(stepId);
1737
+ // Wait for status change
1738
+ await checkpoint.waitForStatusChange(stepId);
1739
+ // Check final status
1740
+ const stepData = context.getStepData(stepId);
1741
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1742
+ log("✅", "Wait completed:", { stepId });
1743
+ checkAndUpdateReplayMode?.();
1744
+ // Mark as completed
1745
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1746
+ return;
1747
+ }
1748
+ // Should not reach here, but handle gracefully
1749
+ log("⚠️", "Wait ended with unexpected status:", {
1750
+ stepId,
1751
+ status: stepData?.Status,
1752
+ });
2385
1753
  });
2386
1754
  }
2387
1755
  return waitHandler;
2388
1756
  };
2389
1757
 
2390
- // Special symbol to indicate that the main loop should continue
2391
- const CONTINUE_MAIN_LOOP = Symbol("CONTINUE_MAIN_LOOP");
2392
- const waitForContinuation = async (context, stepId, name, hasRunningOperations, checkpoint, operationsEmitter, onAwaitedChange) => {
2393
- const stepData = context.getStepData(stepId);
2394
- // Check if there are any ongoing operations
2395
- if (!hasRunningOperations()) {
2396
- // No ongoing operations - safe to terminate
2397
- return terminate(context, TerminationReason.RETRY_SCHEDULED, `Retry scheduled for ${name || stepId}`);
2398
- }
2399
- // There are ongoing operations - wait before continuing
2400
- await waitBeforeContinue({
2401
- checkHasRunningOperations: true,
2402
- checkStepStatus: true,
2403
- checkTimer: true,
2404
- scheduledEndTimestamp: stepData?.StepDetails?.NextAttemptTimestamp,
2405
- stepId,
2406
- context,
2407
- hasRunningOperations,
2408
- operationsEmitter,
2409
- checkpoint,
2410
- onAwaitedChange,
2411
- });
2412
- // Return to let the main loop re-evaluate step status
2413
- };
2414
- const createWaitForConditionHandler = (context, checkpoint, createStepId, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId) => {
1758
+ const createWaitForConditionHandler = (context, checkpoint, createStepId, logger, parentId) => {
2415
1759
  return (nameOrCheck, checkOrConfig, maybeConfig) => {
2416
- // Two-phase execution: Phase 1 starts immediately, Phase 2 returns result when awaited
2417
- let isAwaited = false;
2418
- let waitingCallback;
2419
- const setWaitingCallback = (cb) => {
2420
- waitingCallback = cb;
2421
- };
2422
- // Phase 1: Start execution immediately and capture result/error
1760
+ let name;
1761
+ let check;
1762
+ let config;
1763
+ if (typeof nameOrCheck === "string" || nameOrCheck === undefined) {
1764
+ name = nameOrCheck;
1765
+ check = checkOrConfig;
1766
+ config = maybeConfig;
1767
+ }
1768
+ else {
1769
+ check = nameOrCheck;
1770
+ config = checkOrConfig;
1771
+ }
1772
+ if (!config?.waitStrategy || config.initialState === undefined) {
1773
+ throw new Error("waitForCondition requires config with waitStrategy and initialState");
1774
+ }
1775
+ const stepId = createStepId();
1776
+ const serdes = config.serdes || defaultSerdes;
2423
1777
  const phase1Promise = (async () => {
2424
- let name;
2425
- let check;
2426
- let config;
2427
- // Parse overloaded parameters - validation errors thrown here are async
2428
- if (typeof nameOrCheck === "string" || nameOrCheck === undefined) {
2429
- name = nameOrCheck;
2430
- check = checkOrConfig;
2431
- config = maybeConfig;
1778
+ let stepData = context.getStepData(stepId);
1779
+ // Check if already completed
1780
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1781
+ log("⏭️", "WaitForCondition already completed:", { stepId });
1782
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1783
+ metadata: {
1784
+ stepId,
1785
+ name,
1786
+ type: OperationType.STEP,
1787
+ subType: OperationSubType.WAIT_FOR_CONDITION,
1788
+ parentId,
1789
+ },
1790
+ });
1791
+ return await safeDeserialize(serdes, stepData.StepDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2432
1792
  }
2433
- else {
2434
- check = nameOrCheck;
2435
- config = checkOrConfig;
1793
+ // Check if already failed
1794
+ if (stepData?.Status === OperationStatus.FAILED) {
1795
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1796
+ metadata: {
1797
+ stepId,
1798
+ name,
1799
+ type: OperationType.STEP,
1800
+ subType: OperationSubType.WAIT_FOR_CONDITION,
1801
+ parentId,
1802
+ },
1803
+ });
1804
+ if (stepData.StepDetails?.Error) {
1805
+ throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
1806
+ }
1807
+ throw new WaitForConditionError("waitForCondition failed");
2436
1808
  }
2437
- if (!config ||
2438
- !config.waitStrategy ||
2439
- config.initialState === undefined) {
2440
- throw new Error("waitForCondition requires config with waitStrategy and initialState");
1809
+ // Check if pending retry
1810
+ if (stepData?.Status === OperationStatus.PENDING) {
1811
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1812
+ metadata: {
1813
+ stepId,
1814
+ name,
1815
+ type: OperationType.STEP,
1816
+ subType: OperationSubType.WAIT_FOR_CONDITION,
1817
+ parentId,
1818
+ },
1819
+ endTimestamp: stepData.StepDetails?.NextAttemptTimestamp,
1820
+ });
1821
+ return (async () => {
1822
+ await checkpoint.waitForRetryTimer(stepId);
1823
+ stepData = context.getStepData(stepId);
1824
+ return await executeCheckLogic();
1825
+ })();
2441
1826
  }
2442
- const stepId = createStepId();
2443
- log("🔄", "Running waitForCondition:", {
2444
- stepId,
2445
- name,
2446
- config,
2447
- });
2448
- // Main waitForCondition logic - can be re-executed if step status changes
2449
- while (true) {
2450
- try {
2451
- const stepData = context.getStepData(stepId);
2452
- // Check if already completed
2453
- if (stepData?.Status === OperationStatus.SUCCEEDED) {
2454
- return await handleCompletedWaitForCondition(context, stepId, name, config.serdes);
2455
- }
2456
- if (stepData?.Status === OperationStatus.FAILED) {
2457
- // Return an async rejected promise to ensure it's handled asynchronously
2458
- return (async () => {
2459
- // Reconstruct the original error from stored ErrorObject
2460
- if (stepData.StepDetails?.Error) {
2461
- throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
2462
- }
2463
- else {
2464
- // Fallback for legacy data without Error field
2465
- const errorMessage = stepData?.StepDetails?.Result;
2466
- throw new WaitForConditionError(errorMessage || "waitForCondition failed");
2467
- }
2468
- })();
2469
- }
2470
- // If PENDING, wait for timer to complete
2471
- if (stepData?.Status === OperationStatus.PENDING) {
2472
- await waitForContinuation(context, stepId, name, hasRunningOperations, checkpoint, getOperationsEmitter(), isAwaited ? undefined : setWaitingCallback);
2473
- continue; // Re-evaluate step status after waiting
1827
+ return await executeCheckLogic();
1828
+ async function executeCheckLogic() {
1829
+ stepData = context.getStepData(stepId);
1830
+ // Get current state
1831
+ let currentState;
1832
+ if (stepData?.Status === OperationStatus.STARTED ||
1833
+ stepData?.Status === OperationStatus.READY) {
1834
+ const checkpointData = stepData.StepDetails?.Result;
1835
+ if (checkpointData) {
1836
+ try {
1837
+ const serdesContext = {
1838
+ entityId: stepId,
1839
+ durableExecutionArn: context.durableExecutionArn,
1840
+ };
1841
+ currentState = await serdes.deserialize(checkpointData, serdesContext);
1842
+ }
1843
+ catch {
1844
+ currentState = config.initialState;
1845
+ }
2474
1846
  }
2475
- // Execute check function for READY, STARTED, or first time (undefined)
2476
- const result = await executeWaitForCondition(context, checkpoint, stepId, name, check, config, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, isAwaited ? undefined : setWaitingCallback);
2477
- // If executeWaitForCondition signals to continue the main loop, do so
2478
- if (result === CONTINUE_MAIN_LOOP) {
2479
- continue;
1847
+ else {
1848
+ currentState = config.initialState;
2480
1849
  }
2481
- return result;
2482
1850
  }
2483
- catch (error) {
2484
- // For any error from executeWaitForCondition, re-throw it
2485
- throw error;
1851
+ else {
1852
+ currentState = config.initialState;
1853
+ }
1854
+ const currentAttempt = (stepData?.StepDetails?.Attempt ?? 0) + 1;
1855
+ // Checkpoint START if not already started
1856
+ if (stepData?.Status !== OperationStatus.STARTED) {
1857
+ checkpoint.checkpoint(stepId, {
1858
+ Id: stepId,
1859
+ ParentId: parentId,
1860
+ Action: OperationAction.START,
1861
+ SubType: OperationSubType.WAIT_FOR_CONDITION,
1862
+ Type: OperationType.STEP,
1863
+ Name: name,
1864
+ });
1865
+ }
1866
+ try {
1867
+ const waitForConditionContext = {
1868
+ logger,
1869
+ };
1870
+ // Mark operation as EXECUTING
1871
+ checkpoint.markOperationState(stepId, OperationLifecycleState.EXECUTING, {
1872
+ metadata: {
1873
+ stepId,
1874
+ name,
1875
+ type: OperationType.STEP,
1876
+ subType: OperationSubType.WAIT_FOR_CONDITION,
1877
+ parentId,
1878
+ },
1879
+ });
1880
+ const newState = await runWithContext(stepId, parentId, () => check(currentState, waitForConditionContext), currentAttempt, DurableExecutionMode.ExecutionMode);
1881
+ const serializedState = await safeSerialize(serdes, newState, stepId, name, context.terminationManager, context.durableExecutionArn);
1882
+ const deserializedState = await safeDeserialize(serdes, serializedState, stepId, name, context.terminationManager, context.durableExecutionArn);
1883
+ const decision = config.waitStrategy(deserializedState, currentAttempt);
1884
+ if (!decision.shouldContinue) {
1885
+ await checkpoint.checkpoint(stepId, {
1886
+ Id: stepId,
1887
+ ParentId: parentId,
1888
+ Action: OperationAction.SUCCEED,
1889
+ SubType: OperationSubType.WAIT_FOR_CONDITION,
1890
+ Type: OperationType.STEP,
1891
+ Payload: serializedState,
1892
+ Name: name,
1893
+ });
1894
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1895
+ return deserializedState;
1896
+ }
1897
+ await checkpoint.checkpoint(stepId, {
1898
+ Id: stepId,
1899
+ ParentId: parentId,
1900
+ Action: OperationAction.RETRY,
1901
+ SubType: OperationSubType.WAIT_FOR_CONDITION,
1902
+ Type: OperationType.STEP,
1903
+ Payload: serializedState,
1904
+ Name: name,
1905
+ StepOptions: {
1906
+ NextAttemptDelaySeconds: durationToSeconds(decision.delay),
1907
+ },
1908
+ });
1909
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1910
+ metadata: {
1911
+ stepId,
1912
+ name,
1913
+ type: OperationType.STEP,
1914
+ subType: OperationSubType.WAIT_FOR_CONDITION,
1915
+ parentId,
1916
+ },
1917
+ endTimestamp: context.getStepData(stepId)?.StepDetails?.NextAttemptTimestamp,
1918
+ });
1919
+ await checkpoint.waitForRetryTimer(stepId);
1920
+ return await executeCheckLogic();
1921
+ }
1922
+ catch (error) {
1923
+ await checkpoint.checkpoint(stepId, {
1924
+ Id: stepId,
1925
+ ParentId: parentId,
1926
+ Action: OperationAction.FAIL,
1927
+ SubType: OperationSubType.WAIT_FOR_CONDITION,
1928
+ Type: OperationType.STEP,
1929
+ Error: createErrorObjectFromError(error),
1930
+ Name: name,
1931
+ });
1932
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1933
+ throw DurableOperationError.fromErrorObject(createErrorObjectFromError(error));
2486
1934
  }
2487
1935
  }
2488
- })();
2489
- // Attach catch handler to prevent unhandled promise rejections
2490
- // The error will still be thrown when the DurablePromise is awaited
2491
- phase1Promise.catch(() => { });
2492
- // Phase 2: Return DurablePromise that returns Phase 1 result when awaited
2493
- return new DurablePromise(async () => {
2494
- // When promise is awaited, mark as awaited and invoke waiting callback
2495
- isAwaited = true;
2496
- if (waitingCallback) {
2497
- waitingCallback();
2498
- }
1936
+ })();
1937
+ phase1Promise.catch(() => { });
1938
+ return new DurablePromise(async () => {
1939
+ checkpoint.markOperationAwaited(stepId);
2499
1940
  return await phase1Promise;
2500
1941
  });
2501
1942
  };
2502
1943
  };
2503
- const handleCompletedWaitForCondition = async (context, stepId, stepName, serdes = defaultSerdes) => {
2504
- log("⏭️", "waitForCondition already finished, returning cached result:", {
2505
- stepId,
2506
- });
2507
- const stepData = context.getStepData(stepId);
2508
- const result = stepData?.StepDetails?.Result;
2509
- return await safeDeserialize(serdes, result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
2510
- };
2511
- const executeWaitForCondition = async (context, checkpoint, stepId, name, check, config, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, onAwaitedChange) => {
2512
- const serdes = config.serdes || defaultSerdes;
2513
- // Get current state from previous checkpoint or use initial state
2514
- let currentState;
2515
- const existingOperation = context.getStepData(stepId);
2516
- if (existingOperation?.Status === OperationStatus.STARTED ||
2517
- existingOperation?.Status === OperationStatus.READY) {
2518
- // This is a retry - get state from previous checkpoint
2519
- const checkpointData = existingOperation.StepDetails?.Result;
2520
- if (checkpointData) {
2521
- try {
2522
- // Try to deserialize the checkpoint data directly
2523
- const serdesContext = {
2524
- entityId: stepId,
2525
- durableExecutionArn: context.durableExecutionArn,
2526
- };
2527
- currentState = await serdes.deserialize(checkpointData, serdesContext);
2528
- }
2529
- catch (error) {
2530
- log("⚠️", "Failed to deserialize checkpoint data, using initial state:", {
2531
- stepId,
2532
- name,
2533
- error,
2534
- });
2535
- currentState = config.initialState;
2536
- }
2537
- }
2538
- else {
2539
- currentState = config.initialState;
2540
- }
2541
- }
2542
- else {
2543
- // First execution
2544
- currentState = config.initialState;
2545
- }
2546
- // Get the current attempt number (1-based for wait strategy consistency)
2547
- const currentAttempt = existingOperation?.StepDetails?.Attempt || 1;
2548
- // Checkpoint START for observability (fire and forget) - only if not already started
2549
- const stepData = context.getStepData(stepId);
2550
- if (stepData?.Status !== OperationStatus.STARTED) {
2551
- checkpoint.checkpoint(stepId, {
2552
- Id: stepId,
2553
- ParentId: parentId,
2554
- Action: OperationAction.START,
2555
- SubType: OperationSubType.WAIT_FOR_CONDITION,
2556
- Type: OperationType.STEP,
2557
- Name: name,
2558
- });
2559
- }
2560
- try {
2561
- // Create WaitForConditionContext with enriched logger for the check function
2562
- const waitForConditionContext = {
2563
- logger,
2564
- };
2565
- // Execute the check function
2566
- addRunningOperation(stepId);
2567
- let newState;
2568
- try {
2569
- newState = await runWithContext(stepId, parentId, () => check(currentState, waitForConditionContext), currentAttempt + 1, DurableExecutionMode.ExecutionMode);
2570
- }
2571
- finally {
2572
- removeRunningOperation(stepId);
2573
- }
2574
- // Serialize the new state for consistency
2575
- const serializedState = await safeSerialize(serdes, newState, stepId, name, context.terminationManager, context.durableExecutionArn);
2576
- // Deserialize for consistency with replay behavior
2577
- const deserializedState = await safeDeserialize(serdes, serializedState, stepId, name, context.terminationManager, context.durableExecutionArn);
2578
- // Check if condition is met using the wait strategy
2579
- const decision = config.waitStrategy(deserializedState, currentAttempt);
2580
- log("🔍", "waitForCondition check completed:", {
2581
- stepId,
2582
- name,
2583
- currentAttempt: currentAttempt,
2584
- shouldContinue: decision.shouldContinue,
2585
- delayInSeconds: decision.shouldContinue
2586
- ? durationToSeconds(decision.delay)
2587
- : undefined,
2588
- });
2589
- if (!decision.shouldContinue) {
2590
- // Condition is met - complete successfully
2591
- await checkpoint.checkpoint(stepId, {
2592
- Id: stepId,
2593
- ParentId: parentId,
2594
- Action: OperationAction.SUCCEED,
2595
- SubType: OperationSubType.WAIT_FOR_CONDITION,
2596
- Type: OperationType.STEP,
2597
- Payload: serializedState,
2598
- Name: name,
2599
- });
2600
- log("✅", "waitForCondition completed successfully:", {
2601
- stepId,
2602
- name,
2603
- result: deserializedState,
2604
- totalAttempts: currentAttempt,
2605
- });
2606
- return deserializedState;
2607
- }
2608
- else {
2609
- // Condition not met - schedule retry
2610
- // Only checkpoint the state, not the attempt number (system handles that)
2611
- await checkpoint.checkpoint(stepId, {
2612
- Id: stepId,
2613
- ParentId: parentId,
2614
- Action: OperationAction.RETRY,
2615
- SubType: OperationSubType.WAIT_FOR_CONDITION,
2616
- Type: OperationType.STEP,
2617
- Payload: serializedState, // Just the state, not wrapped in an object
2618
- Name: name,
2619
- StepOptions: {
2620
- NextAttemptDelaySeconds: durationToSeconds(decision.delay),
2621
- },
2622
- });
2623
- // Wait for continuation and signal main loop to continue
2624
- await waitForContinuation(context, stepId, name, hasRunningOperations, checkpoint, getOperationsEmitter(), onAwaitedChange);
2625
- return CONTINUE_MAIN_LOOP;
2626
- }
2627
- }
2628
- catch (error) {
2629
- log("❌", "waitForCondition check function failed:", {
2630
- stepId,
2631
- name,
2632
- error,
2633
- currentAttempt: currentAttempt,
2634
- });
2635
- // Mark as failed - waitForCondition doesn't have its own retry logic for errors
2636
- // If the check function throws, it's considered a failure
2637
- await checkpoint.checkpoint(stepId, {
2638
- Id: stepId,
2639
- ParentId: parentId,
2640
- Action: OperationAction.FAIL,
2641
- SubType: OperationSubType.WAIT_FOR_CONDITION,
2642
- Type: OperationType.STEP,
2643
- Error: createErrorObjectFromError(error),
2644
- Name: name,
2645
- });
2646
- // Reconstruct error from ErrorObject for deterministic behavior
2647
- const errorObject = createErrorObjectFromError(error);
2648
- throw DurableOperationError.fromErrorObject(errorObject);
2649
- }
2650
- };
2651
1944
 
2652
- const createCallbackPromise = (context, stepId, stepName, serdes, hasRunningOperations, operationsEmitter, terminationMessage, checkAndUpdateReplayMode) => {
1945
+ const createCallbackPromise = (context, checkpoint, stepId, stepName, serdes, checkAndUpdateReplayMode) => {
2653
1946
  return new DurablePromise(async () => {
2654
- log("🔄", "Callback promise phase 2 executing:", { stepId, stepName });
2655
- // Main callback logic - can be re-executed if step status changes
2656
- while (true) {
2657
- const stepData = context.getStepData(stepId);
2658
- // Handle case where stepData doesn't exist yet
2659
- // While Phase 1 should create stepData via checkpoint before Phase 2 starts,
2660
- // this can be undefined in test scenarios
2661
- if (!stepData) {
2662
- log("⚠️", "Step data not found, waiting for callback creation:", {
2663
- stepId,
2664
- });
2665
- if (hasRunningOperations()) {
2666
- await waitBeforeContinue({
2667
- checkHasRunningOperations: true,
2668
- checkStepStatus: true,
2669
- checkTimer: false,
2670
- stepId,
2671
- context,
2672
- hasRunningOperations,
2673
- operationsEmitter,
2674
- });
2675
- continue; // Re-evaluate after waiting
2676
- }
2677
- // No other operations and no step data - terminate gracefully
2678
- log("⏳", "No step data found and no running operations, terminating");
2679
- return terminate(context, TerminationReason.CALLBACK_PENDING, terminationMessage);
2680
- }
2681
- if (stepData.Status === OperationStatus.SUCCEEDED) {
2682
- const callbackData = stepData.CallbackDetails;
2683
- if (!callbackData?.CallbackId) {
2684
- throw new CallbackError(`No callback ID found for completed callback: ${stepId}`);
2685
- }
2686
- const result = await safeDeserialize(serdes, callbackData.Result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
2687
- // Check and update replay mode after callback completion
2688
- checkAndUpdateReplayMode();
2689
- return result;
2690
- }
2691
- if (stepData.Status === OperationStatus.FAILED ||
2692
- stepData.Status === OperationStatus.TIMED_OUT) {
2693
- const callbackData = stepData.CallbackDetails;
2694
- const error = callbackData?.Error;
2695
- if (error) {
2696
- const cause = new Error(error.ErrorMessage);
2697
- cause.name = error.ErrorType || "Error";
2698
- cause.stack = error.StackTrace?.join("\n");
2699
- throw new CallbackError(error.ErrorMessage || "Callback failed", cause, error.ErrorData);
2700
- }
2701
- throw new CallbackError("Callback failed");
2702
- }
2703
- if (stepData.Status === OperationStatus.STARTED) {
2704
- // Callback is still pending
2705
- if (hasRunningOperations()) {
2706
- // Wait for other operations or callback completion
2707
- log("⏳", "Callback still pending, waiting for other operations");
2708
- await waitBeforeContinue({
2709
- checkHasRunningOperations: true,
2710
- checkStepStatus: true,
2711
- checkTimer: false,
2712
- stepId,
2713
- context,
2714
- hasRunningOperations,
2715
- operationsEmitter,
2716
- });
2717
- continue; // Re-evaluate status after waiting
2718
- }
2719
- // No other operations running - terminate
2720
- log("⏳", "Callback still pending, terminating");
2721
- return terminate(context, TerminationReason.CALLBACK_PENDING, terminationMessage);
1947
+ log("🔄", "Callback promise phase 2:", { stepId, stepName });
1948
+ checkpoint.markOperationAwaited(stepId);
1949
+ await checkpoint.waitForStatusChange(stepId);
1950
+ const stepData = context.getStepData(stepId);
1951
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1952
+ log("✅", "Callback completed:", { stepId });
1953
+ checkAndUpdateReplayMode();
1954
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1955
+ const callbackData = stepData.CallbackDetails;
1956
+ if (!callbackData) {
1957
+ throw new CallbackError(`No callback data found for completed callback: ${stepId}`);
2722
1958
  }
2723
- // Should not reach here, but handle unexpected status
2724
- throw new CallbackError(`Unexpected callback status: ${stepData.Status}`);
1959
+ const result = await safeDeserialize(serdes, callbackData.Result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
1960
+ return result;
2725
1961
  }
1962
+ // Handle failure
1963
+ log("❌", "Callback failed:", { stepId, status: stepData?.Status });
1964
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1965
+ const callbackData = stepData?.CallbackDetails;
1966
+ const error = callbackData?.Error;
1967
+ if (error) {
1968
+ const cause = new Error(error.ErrorMessage);
1969
+ cause.name = error.ErrorType || "Error";
1970
+ cause.stack = error.StackTrace?.join("\n");
1971
+ throw new CallbackError(error.ErrorMessage || "Callback failed", cause, error.ErrorData);
1972
+ }
1973
+ throw new CallbackError("Callback failed");
2726
1974
  });
2727
1975
  };
2728
1976
 
@@ -2730,7 +1978,7 @@ const createPassThroughSerdes = () => ({
2730
1978
  serialize: async (value) => value,
2731
1979
  deserialize: async (data) => data,
2732
1980
  });
2733
- const createCallback = (context, checkpoint, createStepId, hasRunningOperations, getOperationsEmitter, checkAndUpdateReplayMode, parentId) => {
1981
+ const createCallback = (context, checkpoint, createStepId, checkAndUpdateReplayMode, parentId) => {
2734
1982
  return (nameOrConfig, maybeConfig) => {
2735
1983
  let name;
2736
1984
  let config;
@@ -2743,82 +1991,99 @@ const createCallback = (context, checkpoint, createStepId, hasRunningOperations,
2743
1991
  }
2744
1992
  const stepId = createStepId();
2745
1993
  const serdes = config?.serdes || createPassThroughSerdes();
2746
- // Validate replay consistency first
2747
- const stepData = context.getStepData(stepId);
2748
- validateReplayConsistency(stepId, {
2749
- type: OperationType.CALLBACK,
2750
- name,
2751
- subType: OperationSubType.CALLBACK,
2752
- }, stepData, context);
2753
- // Phase 1: Setup and checkpoint (immediate execution)
2754
- const setupPromise = (async () => {
2755
- log("📞", "Creating callback phase 1:", { stepId, name, config });
2756
- // Handle already completed callbacks
1994
+ // Phase 1: Setup and checkpoint
1995
+ let isCompleted = false;
1996
+ const phase1Promise = (async () => {
1997
+ log("📞", "Callback phase 1:", { stepId, name });
1998
+ let stepData = context.getStepData(stepId);
1999
+ // Validate replay consistency
2000
+ validateReplayConsistency(stepId, {
2001
+ type: OperationType.CALLBACK,
2002
+ name,
2003
+ subType: OperationSubType.CALLBACK,
2004
+ }, stepData, context);
2005
+ // Check if already completed
2757
2006
  if (stepData?.Status === OperationStatus.SUCCEEDED) {
2758
- log("⏭️", "Callback already completed in phase 1:", { stepId });
2759
- return { wasNewCallback: false };
2007
+ log("⏭️", "Callback already completed:", { stepId });
2008
+ checkAndUpdateReplayMode();
2009
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
2010
+ metadata: {
2011
+ stepId,
2012
+ name,
2013
+ type: OperationType.CALLBACK,
2014
+ subType: OperationSubType.CALLBACK,
2015
+ parentId,
2016
+ },
2017
+ });
2018
+ isCompleted = true;
2019
+ return;
2760
2020
  }
2021
+ // Check if already failed
2761
2022
  if (stepData?.Status === OperationStatus.FAILED ||
2762
2023
  stepData?.Status === OperationStatus.TIMED_OUT) {
2763
- log("❌", "Callback already failed in phase 1:", { stepId });
2764
- return { wasNewCallback: false };
2024
+ log("❌", "Callback already failed:", { stepId });
2025
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
2026
+ metadata: {
2027
+ stepId,
2028
+ name,
2029
+ type: OperationType.CALLBACK,
2030
+ subType: OperationSubType.CALLBACK,
2031
+ parentId,
2032
+ },
2033
+ });
2034
+ isCompleted = true;
2035
+ return;
2765
2036
  }
2766
- // Handle already started callbacks
2767
- if (stepData?.Status === OperationStatus.STARTED) {
2768
- log("⏳", "Callback already started in phase 1:", { stepId });
2769
- return { wasNewCallback: false };
2037
+ // Start callback if not already started
2038
+ if (!stepData) {
2039
+ await checkpoint.checkpoint(stepId, {
2040
+ Id: stepId,
2041
+ ParentId: parentId,
2042
+ Action: "START",
2043
+ SubType: OperationSubType.CALLBACK,
2044
+ Type: OperationType.CALLBACK,
2045
+ Name: name,
2046
+ CallbackOptions: {
2047
+ TimeoutSeconds: config?.timeout
2048
+ ? durationToSeconds(config.timeout)
2049
+ : undefined,
2050
+ HeartbeatTimeoutSeconds: config?.heartbeatTimeout
2051
+ ? durationToSeconds(config.heartbeatTimeout)
2052
+ : undefined,
2053
+ },
2054
+ });
2055
+ // Refresh stepData after checkpoint
2056
+ stepData = context.getStepData(stepId);
2770
2057
  }
2771
- // Create new callback - checkpoint START operation
2772
- log("🆕", "Creating new callback in phase 1:", { stepId, name });
2773
- await checkpoint.checkpoint(stepId, {
2774
- Id: stepId,
2775
- ParentId: parentId,
2776
- Action: "START",
2777
- SubType: OperationSubType.CALLBACK,
2778
- Type: OperationType.CALLBACK,
2779
- Name: name,
2780
- CallbackOptions: {
2781
- TimeoutSeconds: config?.timeout
2782
- ? durationToSeconds(config.timeout)
2783
- : undefined,
2784
- HeartbeatTimeoutSeconds: config?.heartbeatTimeout
2785
- ? durationToSeconds(config.heartbeatTimeout)
2786
- : undefined,
2058
+ // Mark as IDLE_NOT_AWAITED
2059
+ checkpoint.markOperationState(stepId, OperationLifecycleState.IDLE_NOT_AWAITED, {
2060
+ metadata: {
2061
+ stepId,
2062
+ name,
2063
+ type: OperationType.CALLBACK,
2064
+ subType: OperationSubType.CALLBACK,
2065
+ parentId,
2787
2066
  },
2788
2067
  });
2789
- log("✅", "Callback checkpoint completed in phase 1:", { stepId });
2790
- return { wasNewCallback: true };
2791
- })().catch((error) => {
2792
- log("❌", "Callback phase 1 error:", { stepId, error: error.message });
2793
- throw error;
2794
- });
2795
- // Return DurablePromise that executes phase 2 when awaited
2068
+ log("✅", "Callback phase 1 complete:", { stepId });
2069
+ })();
2070
+ phase1Promise.catch(() => { });
2071
+ // Phase 2: Handle results and create callback promise
2796
2072
  return new DurablePromise(async () => {
2797
- // Wait for phase 1 to complete
2798
- const { wasNewCallback } = await setupPromise;
2799
- // Phase 2: Handle results and create callback promise
2800
- log("🔄", "Callback phase 2 executing:", { stepId, name });
2801
- const stepData = context.getStepData(stepId);
2802
- // Handle completed callbacks
2803
- if (stepData?.Status === OperationStatus.SUCCEEDED) {
2804
- const callbackData = stepData.CallbackDetails;
2073
+ await phase1Promise;
2074
+ if (isCompleted) {
2075
+ const stepData = context.getStepData(stepId);
2076
+ const callbackData = stepData?.CallbackDetails;
2805
2077
  if (!callbackData?.CallbackId) {
2806
- throw new CallbackError(`No callback ID found for completed callback: ${stepId}`);
2078
+ throw new CallbackError(`No callback ID found for callback: ${stepId}`);
2807
2079
  }
2808
- const deserializedResult = await safeDeserialize(serdes, callbackData.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2809
- const resolvedPromise = new DurablePromise(async () => deserializedResult);
2810
- // Check and update replay mode after callback completion
2811
- checkAndUpdateReplayMode();
2812
- return [resolvedPromise, callbackData.CallbackId];
2813
- }
2814
- // Handle failed callbacks
2815
- if (stepData?.Status === OperationStatus.FAILED ||
2816
- stepData?.Status === OperationStatus.TIMED_OUT) {
2817
- const callbackData = stepData.CallbackDetails;
2818
- if (!callbackData?.CallbackId) {
2819
- throw new CallbackError(`No callback ID found for failed callback: ${stepId}`);
2080
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
2081
+ const deserializedResult = await safeDeserialize(serdes, callbackData.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2082
+ const resolvedPromise = new DurablePromise(async () => deserializedResult);
2083
+ return [resolvedPromise, callbackData.CallbackId];
2820
2084
  }
2821
- const error = stepData.CallbackDetails?.Error;
2085
+ // Handle failure
2086
+ const error = stepData?.CallbackDetails?.Error;
2822
2087
  const callbackError = error
2823
2088
  ? (() => {
2824
2089
  const cause = new Error(error.ErrorMessage);
@@ -2832,25 +2097,15 @@ const createCallback = (context, checkpoint, createStepId, hasRunningOperations,
2832
2097
  });
2833
2098
  return [rejectedPromise, callbackData.CallbackId];
2834
2099
  }
2835
- // Handle started or new callbacks
2100
+ log("📞", "Callback phase 2:", { stepId });
2101
+ const stepData = context.getStepData(stepId);
2836
2102
  const callbackData = stepData?.CallbackDetails;
2837
2103
  if (!callbackData?.CallbackId) {
2838
- const errorMessage = wasNewCallback
2839
- ? `Callback ID not found in stepData after checkpoint: ${stepId}`
2840
- : `No callback ID found for started callback: ${stepId}`;
2841
- throw new CallbackError(errorMessage);
2104
+ throw new CallbackError(`No callback ID found for started callback: ${stepId}`);
2842
2105
  }
2843
2106
  const callbackId = callbackData.CallbackId;
2844
- // Create callback promise that handles completion
2845
- const terminationMessage = wasNewCallback
2846
- ? `Callback ${name || stepId} created and pending external completion`
2847
- : `Callback ${name || stepId} is pending external completion`;
2848
- const callbackPromise = createCallbackPromise(context, stepId, name, serdes, hasRunningOperations, getOperationsEmitter(), terminationMessage, checkAndUpdateReplayMode);
2849
- log("✅", "Callback created successfully in phase 2:", {
2850
- stepId,
2851
- name,
2852
- callbackId,
2853
- });
2107
+ const callbackPromise = createCallbackPromise(context, checkpoint, stepId, name, serdes, checkAndUpdateReplayMode);
2108
+ log("✅", "Callback created:", { stepId, name, callbackId });
2854
2109
  return [callbackPromise, callbackId];
2855
2110
  });
2856
2111
  };
@@ -3307,6 +2562,42 @@ class ConcurrencyController {
3307
2562
  (childStepData.Status === OperationStatus.SUCCEEDED ||
3308
2563
  childStepData.Status === OperationStatus.FAILED));
3309
2564
  }
2565
+ getCompletionReason(failureCount, successCount, completedCount, items, config) {
2566
+ // Check tolerance first, before checking if all completed
2567
+ const completion = config.completionConfig;
2568
+ // Handle fail-fast behavior (no completion config or empty completion config)
2569
+ if (!completion) {
2570
+ if (failureCount > 0)
2571
+ return "FAILURE_TOLERANCE_EXCEEDED";
2572
+ }
2573
+ else {
2574
+ const hasAnyCompletionCriteria = Object.values(completion).some((value) => value !== undefined);
2575
+ if (!hasAnyCompletionCriteria) {
2576
+ if (failureCount > 0)
2577
+ return "FAILURE_TOLERANCE_EXCEEDED";
2578
+ }
2579
+ else {
2580
+ // Check specific tolerance thresholds
2581
+ if (completion.toleratedFailureCount !== undefined &&
2582
+ failureCount > completion.toleratedFailureCount) {
2583
+ return "FAILURE_TOLERANCE_EXCEEDED";
2584
+ }
2585
+ if (completion.toleratedFailurePercentage !== undefined) {
2586
+ const failurePercentage = (failureCount / items.length) * 100;
2587
+ if (failurePercentage > completion.toleratedFailurePercentage) {
2588
+ return "FAILURE_TOLERANCE_EXCEEDED";
2589
+ }
2590
+ }
2591
+ }
2592
+ }
2593
+ // Check other completion reasons
2594
+ if (completedCount === items.length)
2595
+ return "ALL_COMPLETED";
2596
+ if (config.completionConfig?.minSuccessful !== undefined &&
2597
+ successCount >= config.completionConfig.minSuccessful)
2598
+ return "MIN_SUCCESSFUL_REACHED";
2599
+ return "ALL_COMPLETED";
2600
+ }
3310
2601
  async executeItems(items, executor, parentContext, config, durableExecutionMode = DurableExecutionMode.ExecutionMode, entityId, executionContext) {
3311
2602
  // In replay mode, we're reconstructing the result from child contexts
3312
2603
  if (durableExecutionMode === DurableExecutionMode.ReplaySucceededContext) {
@@ -3420,17 +2711,9 @@ class ConcurrencyController {
3420
2711
  completedCount,
3421
2712
  totalCount: resultItems.length,
3422
2713
  });
3423
- // Reconstruct the completion reason based on replay results
3424
2714
  const successCount = resultItems.filter((item) => item.status === BatchItemStatus.SUCCEEDED).length;
3425
- const getCompletionReason = () => {
3426
- if (completedCount === items.length)
3427
- return "ALL_COMPLETED";
3428
- if (config.completionConfig?.minSuccessful !== undefined &&
3429
- successCount >= config.completionConfig.minSuccessful)
3430
- return "MIN_SUCCESSFUL_REACHED";
3431
- return "FAILURE_TOLERANCE_EXCEEDED";
3432
- };
3433
- return new BatchResultImpl(resultItems, getCompletionReason());
2715
+ const failureCount = completedCount - successCount;
2716
+ return new BatchResultImpl(resultItems, this.getCompletionReason(failureCount, successCount, completedCount, items, config));
3434
2717
  }
3435
2718
  async executeItemsConcurrently(items, executor, parentContext, config) {
3436
2719
  const maxConcurrency = config.maxConcurrency || Infinity;
@@ -3477,13 +2760,8 @@ class ConcurrencyController {
3477
2760
  }
3478
2761
  return false;
3479
2762
  };
3480
- const getCompletionReason = () => {
3481
- if (completedCount === items.length)
3482
- return "ALL_COMPLETED";
3483
- if (config.completionConfig?.minSuccessful !== undefined &&
3484
- successCount >= config.completionConfig.minSuccessful)
3485
- return "MIN_SUCCESSFUL_REACHED";
3486
- return "FAILURE_TOLERANCE_EXCEEDED";
2763
+ const getCompletionReason = (failureCount) => {
2764
+ return this.getCompletionReason(failureCount, successCount, completedCount, items, config);
3487
2765
  };
3488
2766
  const tryStartNext = () => {
3489
2767
  while (activeCount < maxConcurrency &&
@@ -3554,14 +2832,20 @@ class ConcurrencyController {
3554
2832
  startedCount: finalBatchItems.filter((item) => item.status === BatchItemStatus.STARTED).length,
3555
2833
  totalCount: finalBatchItems.length,
3556
2834
  });
3557
- const result = new BatchResultImpl(finalBatchItems, getCompletionReason());
2835
+ const result = new BatchResultImpl(finalBatchItems, getCompletionReason(failureCount));
3558
2836
  resolve(result);
3559
2837
  }
3560
2838
  else {
3561
2839
  tryStartNext();
3562
2840
  }
3563
2841
  };
3564
- tryStartNext();
2842
+ if (items.length === 0) {
2843
+ log("🎉", `${this.operationName} completed with no items`);
2844
+ resolve(new BatchResultImpl([], getCompletionReason(0)));
2845
+ }
2846
+ else {
2847
+ tryStartNext();
2848
+ }
3565
2849
  });
3566
2850
  }
3567
2851
  }
@@ -3635,338 +2919,992 @@ const createConcurrentExecutionHandler = (context, runInChildContext, skipNextOp
3635
2919
  return new DurablePromise(async () => {
3636
2920
  return await phase1Promise;
3637
2921
  });
3638
- };
3639
- };
3640
-
3641
- class ModeManagement {
3642
- captureExecutionState;
3643
- checkAndUpdateReplayMode;
3644
- checkForNonResolvingPromise;
3645
- getDurableExecutionMode;
3646
- setDurableExecutionMode;
3647
- constructor(captureExecutionState, checkAndUpdateReplayMode, checkForNonResolvingPromise, getDurableExecutionMode, setDurableExecutionMode) {
3648
- this.captureExecutionState = captureExecutionState;
3649
- this.checkAndUpdateReplayMode = checkAndUpdateReplayMode;
3650
- this.checkForNonResolvingPromise = checkForNonResolvingPromise;
3651
- this.getDurableExecutionMode = getDurableExecutionMode;
3652
- this.setDurableExecutionMode = setDurableExecutionMode;
2922
+ };
2923
+ };
2924
+
2925
+ class ModeManagement {
2926
+ captureExecutionState;
2927
+ checkAndUpdateReplayMode;
2928
+ checkForNonResolvingPromise;
2929
+ getDurableExecutionMode;
2930
+ setDurableExecutionMode;
2931
+ constructor(captureExecutionState, checkAndUpdateReplayMode, checkForNonResolvingPromise, getDurableExecutionMode, setDurableExecutionMode) {
2932
+ this.captureExecutionState = captureExecutionState;
2933
+ this.checkAndUpdateReplayMode = checkAndUpdateReplayMode;
2934
+ this.checkForNonResolvingPromise = checkForNonResolvingPromise;
2935
+ this.getDurableExecutionMode = getDurableExecutionMode;
2936
+ this.setDurableExecutionMode = setDurableExecutionMode;
2937
+ }
2938
+ withModeManagement(operation) {
2939
+ const shouldSwitchToExecutionMode = this.captureExecutionState();
2940
+ this.checkAndUpdateReplayMode();
2941
+ const nonResolvingPromise = this.checkForNonResolvingPromise();
2942
+ if (nonResolvingPromise)
2943
+ return nonResolvingPromise;
2944
+ try {
2945
+ return operation();
2946
+ }
2947
+ finally {
2948
+ if (shouldSwitchToExecutionMode) {
2949
+ this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
2950
+ }
2951
+ }
2952
+ }
2953
+ withDurableModeManagement(operation) {
2954
+ const shouldSwitchToExecutionMode = this.captureExecutionState();
2955
+ this.checkAndUpdateReplayMode();
2956
+ const nonResolvingPromise = this.checkForNonResolvingPromise();
2957
+ if (nonResolvingPromise) {
2958
+ return new DurablePromise(async () => {
2959
+ await nonResolvingPromise;
2960
+ // This will never be reached
2961
+ throw new Error("Unreachable code");
2962
+ });
2963
+ }
2964
+ try {
2965
+ return operation();
2966
+ }
2967
+ finally {
2968
+ if (shouldSwitchToExecutionMode) {
2969
+ this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
2970
+ }
2971
+ }
2972
+ }
2973
+ }
2974
+
2975
+ const HASH_LENGTH = 16;
2976
+ /**
2977
+ * Creates an MD5 hash of the input string for better performance than SHA-256
2978
+ * @param input - The string to hash
2979
+ * @returns The truncated hexadecimal hash string
2980
+ */
2981
+ const hashId = (input) => {
2982
+ return createHash("md5")
2983
+ .update(input)
2984
+ .digest("hex")
2985
+ .substring(0, HASH_LENGTH);
2986
+ };
2987
+ /**
2988
+ * Helper function to get step data using the original stepId
2989
+ * This function handles the hashing internally so callers don't need to worry about it
2990
+ * @param stepData - The stepData record from context
2991
+ * @param stepId - The original stepId (will be hashed internally)
2992
+ * @returns The operation data or undefined if not found
2993
+ */
2994
+ const getStepData = (stepData, stepId) => {
2995
+ const hashedId = hashId(stepId);
2996
+ return stepData[hashedId];
2997
+ };
2998
+
2999
+ class DurableContextImpl {
3000
+ executionContext;
3001
+ lambdaContext;
3002
+ _stepPrefix;
3003
+ _stepCounter = 0;
3004
+ durableLogger;
3005
+ modeAwareLoggingEnabled = true;
3006
+ checkpoint;
3007
+ durableExecutionMode;
3008
+ _parentId;
3009
+ modeManagement;
3010
+ durableExecution;
3011
+ logger;
3012
+ constructor(executionContext, lambdaContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) {
3013
+ this.executionContext = executionContext;
3014
+ this.lambdaContext = lambdaContext;
3015
+ this._stepPrefix = stepPrefix;
3016
+ this._parentId = parentId;
3017
+ this.durableExecution = durableExecution;
3018
+ this.durableLogger = inheritedLogger;
3019
+ this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3020
+ this.logger = this.createModeAwareLogger(inheritedLogger);
3021
+ this.durableExecutionMode = durableExecutionMode;
3022
+ this.checkpoint = durableExecution.checkpointManager;
3023
+ this.modeManagement = new ModeManagement(this.captureExecutionState.bind(this), this.checkAndUpdateReplayMode.bind(this), this.checkForNonResolvingPromise.bind(this), () => this.durableExecutionMode, (mode) => {
3024
+ this.durableExecutionMode = mode;
3025
+ });
3026
+ }
3027
+ getDurableLoggingContext() {
3028
+ return {
3029
+ getDurableLogData: () => {
3030
+ const activeContext = getActiveContext();
3031
+ const result = {
3032
+ executionArn: this.executionContext.durableExecutionArn,
3033
+ requestId: this.executionContext.requestId,
3034
+ tenantId: this.executionContext.tenantId,
3035
+ operationId: !activeContext || activeContext?.contextId === "root"
3036
+ ? undefined
3037
+ : hashId(activeContext.contextId),
3038
+ };
3039
+ if (activeContext?.attempt !== undefined) {
3040
+ result.attempt = activeContext.attempt;
3041
+ }
3042
+ return result;
3043
+ },
3044
+ };
3045
+ }
3046
+ shouldLog() {
3047
+ const activeContext = getActiveContext();
3048
+ if (!this.modeAwareLoggingEnabled || !activeContext) {
3049
+ return true;
3050
+ }
3051
+ if (activeContext.contextId === "root") {
3052
+ return this.durableExecutionMode === DurableExecutionMode.ExecutionMode;
3053
+ }
3054
+ return (activeContext.durableExecutionMode === DurableExecutionMode.ExecutionMode);
3055
+ }
3056
+ createModeAwareLogger(logger) {
3057
+ const durableContextLogger = {
3058
+ warn: (...args) => {
3059
+ if (this.shouldLog()) {
3060
+ return logger.warn(...args);
3061
+ }
3062
+ },
3063
+ debug: (...args) => {
3064
+ if (this.shouldLog()) {
3065
+ return logger.debug(...args);
3066
+ }
3067
+ },
3068
+ info: (...args) => {
3069
+ if (this.shouldLog()) {
3070
+ return logger.info(...args);
3071
+ }
3072
+ },
3073
+ error: (...args) => {
3074
+ if (this.shouldLog()) {
3075
+ return logger.error(...args);
3076
+ }
3077
+ },
3078
+ };
3079
+ if ("log" in logger) {
3080
+ durableContextLogger.log = (level, ...args) => {
3081
+ if (this.shouldLog()) {
3082
+ return logger.log?.(level, ...args);
3083
+ }
3084
+ };
3085
+ }
3086
+ return durableContextLogger;
3087
+ }
3088
+ createStepId() {
3089
+ this._stepCounter++;
3090
+ return this._stepPrefix
3091
+ ? `${this._stepPrefix}-${this._stepCounter}`
3092
+ : `${this._stepCounter}`;
3093
+ }
3094
+ getNextStepId() {
3095
+ const nextCounter = this._stepCounter + 1;
3096
+ return this._stepPrefix
3097
+ ? `${this._stepPrefix}-${nextCounter}`
3098
+ : `${nextCounter}`;
3099
+ }
3100
+ /**
3101
+ * Skips the next operation by incrementing the step counter.
3102
+ * Used internally by concurrent execution handler during replay to skip incomplete items.
3103
+ * @internal
3104
+ */
3105
+ skipNextOperation() {
3106
+ this._stepCounter++;
3107
+ }
3108
+ checkAndUpdateReplayMode() {
3109
+ if (this.durableExecutionMode === DurableExecutionMode.ReplayMode) {
3110
+ const nextStepId = this.getNextStepId();
3111
+ const nextStepData = this.executionContext.getStepData(nextStepId);
3112
+ if (!nextStepData) {
3113
+ this.durableExecutionMode = DurableExecutionMode.ExecutionMode;
3114
+ }
3115
+ }
3116
+ }
3117
+ captureExecutionState() {
3118
+ const wasInReplayMode = this.durableExecutionMode === DurableExecutionMode.ReplayMode;
3119
+ const nextStepId = this.getNextStepId();
3120
+ const stepData = this.executionContext.getStepData(nextStepId);
3121
+ const wasNotFinished = !!(stepData &&
3122
+ stepData.Status !== OperationStatus.SUCCEEDED &&
3123
+ stepData.Status !== OperationStatus.FAILED);
3124
+ return wasInReplayMode && wasNotFinished;
3125
+ }
3126
+ checkForNonResolvingPromise() {
3127
+ if (this.durableExecutionMode === DurableExecutionMode.ReplaySucceededContext) {
3128
+ const nextStepId = this.getNextStepId();
3129
+ const nextStepData = this.executionContext.getStepData(nextStepId);
3130
+ if (nextStepData &&
3131
+ nextStepData.Status !== OperationStatus.SUCCEEDED &&
3132
+ nextStepData.Status !== OperationStatus.FAILED) {
3133
+ return new Promise(() => { }); // Non-resolving promise
3134
+ }
3135
+ }
3136
+ return null;
3137
+ }
3138
+ withModeManagement(operation) {
3139
+ return this.modeManagement.withModeManagement(operation);
3140
+ }
3141
+ withDurableModeManagement(operation) {
3142
+ return this.modeManagement.withDurableModeManagement(operation);
3143
+ }
3144
+ step(nameOrFn, fnOrOptions, maybeOptions) {
3145
+ validateContextUsage(this._stepPrefix, "step", this.executionContext.terminationManager);
3146
+ return this.withDurableModeManagement(() => {
3147
+ const stepHandler = createStepHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), this.durableLogger, this._parentId);
3148
+ return stepHandler(nameOrFn, fnOrOptions, maybeOptions);
3149
+ });
3150
+ }
3151
+ invoke(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
3152
+ validateContextUsage(this._stepPrefix, "invoke", this.executionContext.terminationManager);
3153
+ return this.withDurableModeManagement(() => {
3154
+ const invokeHandler = createInvokeHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3155
+ return invokeHandler(...[
3156
+ nameOrFuncId,
3157
+ funcIdOrInput,
3158
+ inputOrConfig,
3159
+ maybeConfig,
3160
+ ]);
3161
+ });
3162
+ }
3163
+ runInChildContext(nameOrFn, fnOrOptions, maybeOptions) {
3164
+ validateContextUsage(this._stepPrefix, "runInChildContext", this.executionContext.terminationManager);
3165
+ return this.withDurableModeManagement(() => {
3166
+ const blockHandler = createRunInChildContextHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), () => this.durableLogger,
3167
+ // Adapter function to maintain compatibility
3168
+ (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, _checkpointToken, parentId) => createDurableContext(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, this.durableExecution, parentId), this._parentId);
3169
+ return blockHandler(nameOrFn, fnOrOptions, maybeOptions);
3170
+ });
3171
+ }
3172
+ wait(nameOrDuration, maybeDuration) {
3173
+ validateContextUsage(this._stepPrefix, "wait", this.executionContext.terminationManager);
3174
+ return this.withDurableModeManagement(() => {
3175
+ const waitHandler = createWaitHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3176
+ return typeof nameOrDuration === "string"
3177
+ ? waitHandler(nameOrDuration, maybeDuration)
3178
+ : waitHandler(nameOrDuration);
3179
+ });
3180
+ }
3181
+ /**
3182
+ * Configure logger behavior for this context
3183
+ *
3184
+ * This method allows partial configuration - only the properties provided will be updated.
3185
+ * For example, calling configureLogger(\{ modeAware: false \}) will only change the modeAware
3186
+ * setting without affecting any previously configured custom logger.
3187
+ *
3188
+ * @param config - Logger configuration options including customLogger and modeAware settings (default: modeAware=true)
3189
+ * @example
3190
+ * // Set custom logger and enable mode-aware logging
3191
+ * context.configureLogger(\{ customLogger: myLogger, modeAware: true \});
3192
+ *
3193
+ * // Later, disable mode-aware logging without changing the custom logger
3194
+ * context.configureLogger(\{ modeAware: false \});
3195
+ */
3196
+ configureLogger(config) {
3197
+ if (config.customLogger !== undefined) {
3198
+ this.durableLogger = config.customLogger;
3199
+ this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3200
+ this.logger = this.createModeAwareLogger(this.durableLogger);
3201
+ }
3202
+ if (config.modeAware !== undefined) {
3203
+ this.modeAwareLoggingEnabled = config.modeAware;
3204
+ }
3205
+ }
3206
+ createCallback(nameOrConfig, maybeConfig) {
3207
+ validateContextUsage(this._stepPrefix, "createCallback", this.executionContext.terminationManager);
3208
+ return this.withDurableModeManagement(() => {
3209
+ const callbackFactory = createCallback(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.checkAndUpdateReplayMode.bind(this), this._parentId);
3210
+ return callbackFactory(nameOrConfig, maybeConfig);
3211
+ });
3212
+ }
3213
+ waitForCallback(nameOrSubmitter, submitterOrConfig, maybeConfig) {
3214
+ validateContextUsage(this._stepPrefix, "waitForCallback", this.executionContext.terminationManager);
3215
+ return this.withDurableModeManagement(() => {
3216
+ const waitForCallbackHandler = createWaitForCallbackHandler(this.executionContext, this.getNextStepId.bind(this), this.runInChildContext.bind(this));
3217
+ return waitForCallbackHandler(nameOrSubmitter, submitterOrConfig, maybeConfig);
3218
+ });
3219
+ }
3220
+ waitForCondition(nameOrCheckFunc, checkFuncOrConfig, maybeConfig) {
3221
+ validateContextUsage(this._stepPrefix, "waitForCondition", this.executionContext.terminationManager);
3222
+ return this.withDurableModeManagement(() => {
3223
+ const waitForConditionHandler = createWaitForConditionHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.durableLogger, this._parentId);
3224
+ return typeof nameOrCheckFunc === "string" ||
3225
+ nameOrCheckFunc === undefined
3226
+ ? waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig, maybeConfig)
3227
+ : waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig);
3228
+ });
3229
+ }
3230
+ map(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig) {
3231
+ validateContextUsage(this._stepPrefix, "map", this.executionContext.terminationManager);
3232
+ return this.withDurableModeManagement(() => {
3233
+ const mapHandler = createMapHandler(this.executionContext, this._executeConcurrently.bind(this));
3234
+ return mapHandler(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig);
3235
+ });
3236
+ }
3237
+ parallel(nameOrBranches, branchesOrConfig, maybeConfig) {
3238
+ validateContextUsage(this._stepPrefix, "parallel", this.executionContext.terminationManager);
3239
+ return this.withDurableModeManagement(() => {
3240
+ const parallelHandler = createParallelHandler(this.executionContext, this._executeConcurrently.bind(this));
3241
+ return parallelHandler(nameOrBranches, branchesOrConfig, maybeConfig);
3242
+ });
3243
+ }
3244
+ _executeConcurrently(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig) {
3245
+ validateContextUsage(this._stepPrefix, "_executeConcurrently", this.executionContext.terminationManager);
3246
+ return this.withDurableModeManagement(() => {
3247
+ const concurrentExecutionHandler = createConcurrentExecutionHandler(this.executionContext, this.runInChildContext.bind(this), this.skipNextOperation.bind(this));
3248
+ const promise = concurrentExecutionHandler(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig);
3249
+ // Prevent unhandled promise rejections
3250
+ promise?.catch(() => { });
3251
+ return promise;
3252
+ });
3253
+ }
3254
+ get promise() {
3255
+ return createPromiseHandler(this.step.bind(this));
3256
+ }
3257
+ }
3258
+ const createDurableContext = (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) => {
3259
+ return new DurableContextImpl(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId);
3260
+ };
3261
+
3262
+ /**
3263
+ * Error thrown when a checkpoint operation fails due to invocation-level issues
3264
+ * (e.g., 5xx errors, invalid checkpoint token)
3265
+ * This will terminate the current Lambda invocation, but the execution can continue with a new invocation
3266
+ */
3267
+ class CheckpointUnrecoverableInvocationError extends UnrecoverableInvocationError {
3268
+ terminationReason = TerminationReason.CHECKPOINT_FAILED;
3269
+ constructor(message, originalError) {
3270
+ super(message || "Checkpoint operation failed", originalError);
3271
+ }
3272
+ }
3273
+ /**
3274
+ * Error thrown when a checkpoint operation fails due to execution-level issues
3275
+ * (e.g., 4xx errors other than invalid checkpoint token)
3276
+ * This will terminate the entire execution and cannot be recovered
3277
+ */
3278
+ class CheckpointUnrecoverableExecutionError extends UnrecoverableExecutionError {
3279
+ terminationReason = TerminationReason.CHECKPOINT_FAILED;
3280
+ constructor(message, originalError) {
3281
+ super(message || "Checkpoint operation failed", originalError);
3282
+ }
3283
+ }
3284
+
3285
+ const STEP_DATA_UPDATED_EVENT = "stepDataUpdated";
3286
+ const TERMINAL_STATUSES = [
3287
+ OperationStatus.SUCCEEDED,
3288
+ OperationStatus.CANCELLED,
3289
+ OperationStatus.FAILED,
3290
+ OperationStatus.STOPPED,
3291
+ OperationStatus.TIMED_OUT,
3292
+ ];
3293
+ class CheckpointManager {
3294
+ durableExecutionArn;
3295
+ stepData;
3296
+ storage;
3297
+ terminationManager;
3298
+ stepDataEmitter;
3299
+ logger;
3300
+ finishedAncestors;
3301
+ queue = [];
3302
+ isProcessing = false;
3303
+ currentTaskToken;
3304
+ forceCheckpointPromises = [];
3305
+ queueCompletionResolver = null;
3306
+ MAX_PAYLOAD_SIZE = 750 * 1024; // 750KB in bytes
3307
+ MAX_ITEMS_IN_BATCH = 250;
3308
+ isTerminating = false;
3309
+ static textEncoder = new TextEncoder();
3310
+ // Operation lifecycle tracking
3311
+ operations = new Map();
3312
+ // Termination cooldown
3313
+ terminationTimer = null;
3314
+ terminationReason = null;
3315
+ TERMINATION_COOLDOWN_MS = 50;
3316
+ constructor(durableExecutionArn, stepData, storage, terminationManager, initialTaskToken, stepDataEmitter, logger, finishedAncestors) {
3317
+ this.durableExecutionArn = durableExecutionArn;
3318
+ this.stepData = stepData;
3319
+ this.storage = storage;
3320
+ this.terminationManager = terminationManager;
3321
+ this.stepDataEmitter = stepDataEmitter;
3322
+ this.logger = logger;
3323
+ this.finishedAncestors = finishedAncestors;
3324
+ this.currentTaskToken = initialTaskToken;
3325
+ }
3326
+ setTerminating() {
3327
+ this.isTerminating = true;
3328
+ log("🛑", "Checkpoint manager marked as terminating");
3329
+ }
3330
+ /**
3331
+ * Mark an ancestor as finished (for run-in-child-context operations)
3332
+ */
3333
+ markAncestorFinished(stepId) {
3334
+ this.finishedAncestors.add(stepId);
3335
+ }
3336
+ /**
3337
+ * Extract parent ID from hierarchical stepId (e.g., "1-2-3" -\> "1-2")
3338
+ */
3339
+ getParentId(stepId) {
3340
+ const lastDashIndex = stepId.lastIndexOf("-");
3341
+ return lastDashIndex > 0 ? stepId.substring(0, lastDashIndex) : undefined;
3342
+ }
3343
+ /**
3344
+ * Checks if any ancestor of the given stepId is finished
3345
+ * Only applies to operations that are descendants of run-in-child-context operations
3346
+ */
3347
+ hasFinishedAncestor(stepId) {
3348
+ // Only use getParentId to avoid mixing hashed and original stepIds
3349
+ let currentParentId = this.getParentId(stepId);
3350
+ while (currentParentId) {
3351
+ // Check if this ancestor is finished
3352
+ if (this.finishedAncestors.has(currentParentId)) {
3353
+ return true;
3354
+ }
3355
+ // Move up to the next ancestor using hierarchical stepId
3356
+ currentParentId = this.getParentId(currentParentId);
3357
+ }
3358
+ return false;
3359
+ }
3360
+ async forceCheckpoint() {
3361
+ if (this.isTerminating) {
3362
+ log("⚠️", "Force checkpoint skipped - termination in progress");
3363
+ return new Promise(() => { }); // Never resolves during termination
3364
+ }
3365
+ return new Promise((resolve, reject) => {
3366
+ this.forceCheckpointPromises.push({ resolve, reject });
3367
+ if (!this.isProcessing) {
3368
+ setImmediate(() => {
3369
+ this.processQueue();
3370
+ });
3371
+ }
3372
+ });
3373
+ }
3374
+ async waitForQueueCompletion() {
3375
+ if (this.queue.length === 0 && !this.isProcessing) {
3376
+ return;
3377
+ }
3378
+ return new Promise((resolve) => {
3379
+ this.queueCompletionResolver = resolve;
3380
+ });
3381
+ }
3382
+ clearQueue() {
3383
+ // Silently clear queue - we're terminating so no need to reject promises
3384
+ this.queue = [];
3385
+ this.forceCheckpointPromises = [];
3386
+ // Resolve any waiting queue completion promises since we're clearing
3387
+ this.notifyQueueCompletion();
3388
+ }
3389
+ // Alias for backward compatibility with Checkpoint interface
3390
+ async force() {
3391
+ return this.forceCheckpoint();
3392
+ }
3393
+ async checkpoint(stepId, data) {
3394
+ if (this.isTerminating) {
3395
+ log("⚠️", "Checkpoint skipped - termination in progress:", { stepId });
3396
+ return new Promise(() => { }); // Never resolves during termination
3397
+ }
3398
+ // Check if any ancestor is finished - if so, don't queue and don't resolve
3399
+ if (this.hasFinishedAncestor(stepId)) {
3400
+ log("⚠️", "Checkpoint skipped - ancestor already finished:", { stepId });
3401
+ return new Promise(() => { }); // Never resolves when ancestor is finished
3402
+ }
3403
+ return new Promise((resolve, reject) => {
3404
+ const queuedItem = {
3405
+ stepId,
3406
+ data,
3407
+ resolve: () => {
3408
+ resolve();
3409
+ },
3410
+ reject: (error) => {
3411
+ reject(error);
3412
+ },
3413
+ };
3414
+ this.queue.push(queuedItem);
3415
+ log("📥", "Checkpoint queued:", {
3416
+ stepId,
3417
+ queueLength: this.queue.length,
3418
+ isProcessing: this.isProcessing,
3419
+ });
3420
+ if (!this.isProcessing) {
3421
+ setImmediate(() => {
3422
+ this.processQueue();
3423
+ });
3424
+ }
3425
+ });
3426
+ }
3427
+ classifyCheckpointError(error) {
3428
+ const originalError = error instanceof Error ? error : new Error(String(error));
3429
+ const awsError = error;
3430
+ const statusCode = awsError.$metadata?.httpStatusCode;
3431
+ const errorName = awsError.name;
3432
+ const errorMessage = awsError.message || originalError.message;
3433
+ log("🔍", "Classifying checkpoint error:", {
3434
+ statusCode,
3435
+ errorName,
3436
+ errorMessage,
3437
+ });
3438
+ if (statusCode &&
3439
+ statusCode >= 400 &&
3440
+ statusCode < 500 &&
3441
+ errorName === "InvalidParameterValueException" &&
3442
+ errorMessage.startsWith("Invalid Checkpoint Token")) {
3443
+ return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
3444
+ }
3445
+ if (statusCode &&
3446
+ statusCode >= 400 &&
3447
+ statusCode < 500 &&
3448
+ statusCode !== 429) {
3449
+ return new CheckpointUnrecoverableExecutionError(`Checkpoint failed: ${errorMessage}`, originalError);
3450
+ }
3451
+ return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
3653
3452
  }
3654
- withModeManagement(operation) {
3655
- const shouldSwitchToExecutionMode = this.captureExecutionState();
3656
- this.checkAndUpdateReplayMode();
3657
- const nonResolvingPromise = this.checkForNonResolvingPromise();
3658
- if (nonResolvingPromise)
3659
- return nonResolvingPromise;
3660
- try {
3661
- return operation();
3453
+ async processQueue() {
3454
+ if (this.isProcessing) {
3455
+ return;
3662
3456
  }
3663
- finally {
3664
- if (shouldSwitchToExecutionMode) {
3665
- this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
3457
+ const hasQueuedItems = this.queue.length > 0;
3458
+ const hasForceRequests = this.forceCheckpointPromises.length > 0;
3459
+ if (!hasQueuedItems && !hasForceRequests) {
3460
+ return;
3461
+ }
3462
+ this.isProcessing = true;
3463
+ const batch = [];
3464
+ const baseSize = this.currentTaskToken.length + 100;
3465
+ let currentSize = baseSize;
3466
+ while (this.queue.length > 0) {
3467
+ const nextItem = this.queue[0];
3468
+ const itemSize = CheckpointManager.textEncoder.encode(JSON.stringify(nextItem)).length;
3469
+ if ((currentSize + itemSize > this.MAX_PAYLOAD_SIZE ||
3470
+ batch.length >= this.MAX_ITEMS_IN_BATCH) &&
3471
+ batch.length > 0) {
3472
+ break;
3666
3473
  }
3474
+ this.queue.shift();
3475
+ batch.push(nextItem);
3476
+ currentSize += itemSize;
3667
3477
  }
3668
- }
3669
- withDurableModeManagement(operation) {
3670
- const shouldSwitchToExecutionMode = this.captureExecutionState();
3671
- this.checkAndUpdateReplayMode();
3672
- const nonResolvingPromise = this.checkForNonResolvingPromise();
3673
- if (nonResolvingPromise) {
3674
- return new DurablePromise(async () => {
3675
- await nonResolvingPromise;
3676
- // This will never be reached
3677
- throw new Error("Unreachable code");
3478
+ log("🔄", "Processing checkpoint batch:", {
3479
+ batchSize: batch.length,
3480
+ remainingInQueue: this.queue.length,
3481
+ estimatedSize: currentSize,
3482
+ maxSize: this.MAX_PAYLOAD_SIZE,
3483
+ });
3484
+ try {
3485
+ if (batch.length > 0 || this.forceCheckpointPromises.length > 0) {
3486
+ await this.processBatch(batch);
3487
+ }
3488
+ batch.forEach((item) => {
3489
+ item.resolve();
3490
+ });
3491
+ const forcePromises = this.forceCheckpointPromises.splice(0);
3492
+ forcePromises.forEach((promise) => {
3493
+ promise.resolve();
3494
+ });
3495
+ log("✅", "Checkpoint batch processed successfully:", {
3496
+ batchSize: batch.length,
3497
+ forceRequests: forcePromises.length,
3498
+ newTaskToken: this.currentTaskToken,
3678
3499
  });
3679
3500
  }
3680
- try {
3681
- return operation();
3501
+ catch (error) {
3502
+ log("❌", "Checkpoint batch failed:", {
3503
+ batchSize: batch.length,
3504
+ error,
3505
+ });
3506
+ const checkpointError = this.classifyCheckpointError(error);
3507
+ // Clear remaining queue silently - we're terminating
3508
+ this.clearQueue();
3509
+ this.terminationManager.terminate({
3510
+ reason: TerminationReason.CHECKPOINT_FAILED,
3511
+ message: checkpointError.message,
3512
+ error: checkpointError,
3513
+ });
3682
3514
  }
3683
3515
  finally {
3684
- if (shouldSwitchToExecutionMode) {
3685
- this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
3516
+ this.isProcessing = false;
3517
+ if (this.queue.length > 0) {
3518
+ setImmediate(() => {
3519
+ this.processQueue();
3520
+ });
3521
+ }
3522
+ else {
3523
+ // Queue is empty and processing is done - notify all waiting promises
3524
+ this.notifyQueueCompletion();
3686
3525
  }
3687
3526
  }
3688
3527
  }
3689
- }
3690
-
3691
- class DurableContextImpl {
3692
- executionContext;
3693
- lambdaContext;
3694
- _stepPrefix;
3695
- _stepCounter = 0;
3696
- durableLogger;
3697
- modeAwareLoggingEnabled = true;
3698
- runningOperations = new Set();
3699
- operationsEmitter = new EventEmitter();
3700
- checkpoint;
3701
- durableExecutionMode;
3702
- _parentId;
3703
- modeManagement;
3704
- durableExecution;
3705
- logger;
3706
- constructor(executionContext, lambdaContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) {
3707
- this.executionContext = executionContext;
3708
- this.lambdaContext = lambdaContext;
3709
- this._stepPrefix = stepPrefix;
3710
- this._parentId = parentId;
3711
- this.durableExecution = durableExecution;
3712
- this.durableLogger = inheritedLogger;
3713
- this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3714
- this.logger = this.createModeAwareLogger(inheritedLogger);
3715
- this.durableExecutionMode = durableExecutionMode;
3716
- this.checkpoint = durableExecution.checkpointManager;
3717
- this.modeManagement = new ModeManagement(this.captureExecutionState.bind(this), this.checkAndUpdateReplayMode.bind(this), this.checkForNonResolvingPromise.bind(this), () => this.durableExecutionMode, (mode) => {
3718
- this.durableExecutionMode = mode;
3719
- });
3720
- }
3721
- getDurableLoggingContext() {
3722
- return {
3723
- getDurableLogData: () => {
3724
- const activeContext = getActiveContext();
3725
- const result = {
3726
- executionArn: this.executionContext.durableExecutionArn,
3727
- requestId: this.executionContext.requestId,
3728
- tenantId: this.executionContext.tenantId,
3729
- operationId: !activeContext || activeContext?.contextId === "root"
3730
- ? undefined
3731
- : hashId(activeContext.contextId),
3732
- };
3733
- if (activeContext?.attempt !== undefined) {
3734
- result.attempt = activeContext.attempt;
3735
- }
3736
- return result;
3737
- },
3738
- };
3739
- }
3740
- shouldLog() {
3741
- const activeContext = getActiveContext();
3742
- if (!this.modeAwareLoggingEnabled || !activeContext) {
3743
- return true;
3744
- }
3745
- if (activeContext.contextId === "root") {
3746
- return this.durableExecutionMode === DurableExecutionMode.ExecutionMode;
3528
+ notifyQueueCompletion() {
3529
+ if (this.queueCompletionResolver) {
3530
+ this.queueCompletionResolver();
3531
+ this.queueCompletionResolver = null;
3747
3532
  }
3748
- return (activeContext.durableExecutionMode === DurableExecutionMode.ExecutionMode);
3749
3533
  }
3750
- createModeAwareLogger(logger) {
3751
- const durableContextLogger = {
3752
- warn: (...args) => {
3753
- if (this.shouldLog()) {
3754
- return logger.warn(...args);
3755
- }
3756
- },
3757
- debug: (...args) => {
3758
- if (this.shouldLog()) {
3759
- return logger.debug(...args);
3760
- }
3761
- },
3762
- info: (...args) => {
3763
- if (this.shouldLog()) {
3764
- return logger.info(...args);
3765
- }
3766
- },
3767
- error: (...args) => {
3768
- if (this.shouldLog()) {
3769
- return logger.error(...args);
3770
- }
3771
- },
3772
- };
3773
- if ("log" in logger) {
3774
- durableContextLogger.log = (level, ...args) => {
3775
- if (this.shouldLog()) {
3776
- return logger.log?.(level, ...args);
3777
- }
3534
+ async processBatch(batch) {
3535
+ const updates = batch.map((item) => {
3536
+ const hashedStepId = hashId(item.stepId);
3537
+ const update = {
3538
+ Type: item.data.Type || "STEP",
3539
+ Action: item.data.Action || "START",
3540
+ ...item.data,
3541
+ Id: hashedStepId,
3542
+ ...(item.data.ParentId && { ParentId: hashId(item.data.ParentId) }),
3778
3543
  };
3544
+ return update;
3545
+ });
3546
+ const checkpointData = {
3547
+ DurableExecutionArn: this.durableExecutionArn,
3548
+ CheckpointToken: this.currentTaskToken,
3549
+ Updates: updates,
3550
+ };
3551
+ log("⏺️", "Creating checkpoint batch:", {
3552
+ batchSize: updates.length,
3553
+ checkpointToken: this.currentTaskToken,
3554
+ updates: updates.map((u) => ({
3555
+ Id: u.Id,
3556
+ Action: u.Action,
3557
+ Type: u.Type,
3558
+ })),
3559
+ });
3560
+ const response = await this.storage.checkpoint(checkpointData, this.logger);
3561
+ if (response.CheckpointToken) {
3562
+ this.currentTaskToken = response.CheckpointToken;
3563
+ }
3564
+ if (response.NewExecutionState?.Operations) {
3565
+ this.updateStepDataFromCheckpointResponse(response.NewExecutionState.Operations);
3779
3566
  }
3780
- return durableContextLogger;
3781
- }
3782
- createStepId() {
3783
- this._stepCounter++;
3784
- return this._stepPrefix
3785
- ? `${this._stepPrefix}-${this._stepCounter}`
3786
- : `${this._stepCounter}`;
3787
- }
3788
- getNextStepId() {
3789
- const nextCounter = this._stepCounter + 1;
3790
- return this._stepPrefix
3791
- ? `${this._stepPrefix}-${nextCounter}`
3792
- : `${nextCounter}`;
3793
3567
  }
3794
- /**
3795
- * Skips the next operation by incrementing the step counter.
3796
- * Used internally by concurrent execution handler during replay to skip incomplete items.
3797
- * @internal
3798
- */
3799
- skipNextOperation() {
3800
- this._stepCounter++;
3568
+ updateStepDataFromCheckpointResponse(operations) {
3569
+ log("🔄", "Updating stepData from checkpoint response:", {
3570
+ operationCount: operations.length,
3571
+ operationIds: operations.map((op) => op.Id).filter(Boolean),
3572
+ });
3573
+ operations.forEach((operation) => {
3574
+ if (operation.Id) {
3575
+ // Check if status changed
3576
+ const oldStatus = this.stepData[operation.Id]?.Status;
3577
+ const newStatus = operation.Status;
3578
+ this.stepData[operation.Id] = operation;
3579
+ log("📝", "Updated stepData entry:", operation);
3580
+ this.stepDataEmitter.emit(STEP_DATA_UPDATED_EVENT, operation.Id);
3581
+ // If status changed and we have a waiting promise, resolve it
3582
+ if (oldStatus !== newStatus) {
3583
+ this.resolveWaitingOperation(operation.Id);
3584
+ }
3585
+ }
3586
+ });
3587
+ log("✅", "StepData update completed:", {
3588
+ totalStepDataEntries: Object.keys(this.stepData).length,
3589
+ });
3801
3590
  }
3802
- checkAndUpdateReplayMode() {
3803
- if (this.durableExecutionMode === DurableExecutionMode.ReplayMode) {
3804
- const nextStepId = this.getNextStepId();
3805
- const nextStepData = this.executionContext.getStepData(nextStepId);
3806
- if (!nextStepData) {
3807
- this.durableExecutionMode = DurableExecutionMode.ExecutionMode;
3591
+ resolveWaitingOperation(hashedStepId) {
3592
+ // Find operation by hashed ID in our operations map
3593
+ for (const [stepId, op] of this.operations.entries()) {
3594
+ if (hashId(stepId) === hashedStepId && op.resolver) {
3595
+ log("✅", `Resolving waiting operation ${stepId} due to status change`);
3596
+ op.resolver();
3597
+ op.resolver = undefined;
3598
+ if (op.timer) {
3599
+ clearTimeout(op.timer);
3600
+ op.timer = undefined;
3601
+ }
3602
+ break;
3808
3603
  }
3809
3604
  }
3810
3605
  }
3811
- captureExecutionState() {
3812
- const wasInReplayMode = this.durableExecutionMode === DurableExecutionMode.ReplayMode;
3813
- const nextStepId = this.getNextStepId();
3814
- const stepData = this.executionContext.getStepData(nextStepId);
3815
- const wasNotFinished = !!(stepData &&
3816
- stepData.Status !== OperationStatus.SUCCEEDED &&
3817
- stepData.Status !== OperationStatus.FAILED);
3818
- return wasInReplayMode && wasNotFinished;
3606
+ getQueueStatus() {
3607
+ return {
3608
+ queueLength: this.queue.length,
3609
+ isProcessing: this.isProcessing,
3610
+ };
3819
3611
  }
3820
- checkForNonResolvingPromise() {
3821
- if (this.durableExecutionMode === DurableExecutionMode.ReplaySucceededContext) {
3822
- const nextStepId = this.getNextStepId();
3823
- const nextStepData = this.executionContext.getStepData(nextStepId);
3824
- if (nextStepData &&
3825
- nextStepData.Status !== OperationStatus.SUCCEEDED &&
3826
- nextStepData.Status !== OperationStatus.FAILED) {
3827
- return new Promise(() => { }); // Non-resolving promise
3612
+ // ===== New Lifecycle & Termination Methods =====
3613
+ markOperationState(stepId, state, options) {
3614
+ let op = this.operations.get(stepId);
3615
+ if (!op) {
3616
+ // First call - create operation
3617
+ if (!options?.metadata) {
3618
+ throw new Error(`metadata required on first call for ${stepId}`);
3828
3619
  }
3620
+ op = {
3621
+ stepId,
3622
+ state,
3623
+ metadata: options.metadata,
3624
+ endTimestamp: options.endTimestamp,
3625
+ };
3626
+ this.operations.set(stepId, op);
3829
3627
  }
3830
- return null;
3831
- }
3832
- addRunningOperation(stepId) {
3833
- this.runningOperations.add(stepId);
3834
- }
3835
- removeRunningOperation(stepId) {
3836
- this.runningOperations.delete(stepId);
3837
- if (this.runningOperations.size === 0) {
3838
- this.operationsEmitter.emit(OPERATIONS_COMPLETE_EVENT);
3628
+ else {
3629
+ // Update existing operation
3630
+ op.state = state;
3631
+ if (options?.endTimestamp !== undefined) {
3632
+ op.endTimestamp = options.endTimestamp;
3633
+ }
3634
+ }
3635
+ // Cleanup if transitioning to COMPLETED
3636
+ if (state === OperationLifecycleState.COMPLETED) {
3637
+ this.cleanupOperation(stepId);
3638
+ }
3639
+ // Check if we should terminate
3640
+ // Don't check for IDLE_NOT_AWAITED - operation might be awaited later or intentionally not awaited
3641
+ if (state !== OperationLifecycleState.IDLE_NOT_AWAITED) {
3642
+ this.checkAndTerminate();
3839
3643
  }
3840
3644
  }
3841
- hasRunningOperations() {
3842
- return this.runningOperations.size > 0;
3843
- }
3844
- getOperationsEmitter() {
3845
- return this.operationsEmitter;
3846
- }
3847
- withModeManagement(operation) {
3848
- return this.modeManagement.withModeManagement(operation);
3849
- }
3850
- withDurableModeManagement(operation) {
3851
- return this.modeManagement.withDurableModeManagement(operation);
3852
- }
3853
- step(nameOrFn, fnOrOptions, maybeOptions) {
3854
- validateContextUsage(this._stepPrefix, "step", this.executionContext.terminationManager);
3855
- return this.withDurableModeManagement(() => {
3856
- const stepHandler = createStepHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), this.durableLogger, this.addRunningOperation.bind(this), this.removeRunningOperation.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId);
3857
- return stepHandler(nameOrFn, fnOrOptions, maybeOptions);
3645
+ waitForRetryTimer(stepId) {
3646
+ const op = this.operations.get(stepId);
3647
+ if (!op) {
3648
+ throw new Error(`Operation ${stepId} not found`);
3649
+ }
3650
+ if (op.state !== OperationLifecycleState.RETRY_WAITING) {
3651
+ throw new Error(`Operation ${stepId} must be in RETRY_WAITING state, got ${op.state}`);
3652
+ }
3653
+ // Resolve immediately if the step was completed already
3654
+ const stepData = this.stepData[hashId(stepId)];
3655
+ if (stepData?.Status && TERMINAL_STATUSES.includes(stepData.Status)) {
3656
+ return Promise.resolve();
3657
+ }
3658
+ // Start timer with polling
3659
+ this.startTimerWithPolling(stepId, op.endTimestamp);
3660
+ // Return promise that resolves when status changes
3661
+ return new Promise((resolve) => {
3662
+ op.resolver = resolve;
3858
3663
  });
3859
3664
  }
3860
- invoke(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
3861
- validateContextUsage(this._stepPrefix, "invoke", this.executionContext.terminationManager);
3862
- return this.withDurableModeManagement(() => {
3863
- const invokeHandler = createInvokeHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3864
- return invokeHandler(...[
3865
- nameOrFuncId,
3866
- funcIdOrInput,
3867
- inputOrConfig,
3868
- maybeConfig,
3869
- ]);
3665
+ waitForStatusChange(stepId) {
3666
+ const op = this.operations.get(stepId);
3667
+ if (!op) {
3668
+ throw new Error(`Operation ${stepId} not found`);
3669
+ }
3670
+ if (op.state !== OperationLifecycleState.IDLE_AWAITED) {
3671
+ throw new Error(`Operation ${stepId} must be in IDLE_AWAITED state, got ${op.state}`);
3672
+ }
3673
+ // Resolve immediately if the step was completed already
3674
+ const stepData = this.stepData[hashId(stepId)];
3675
+ if (stepData?.Status && TERMINAL_STATUSES.includes(stepData.Status)) {
3676
+ return Promise.resolve();
3677
+ }
3678
+ // Start timer with polling
3679
+ this.startTimerWithPolling(stepId, op.endTimestamp);
3680
+ // Return promise that resolves when status changes
3681
+ return new Promise((resolve) => {
3682
+ op.resolver = resolve;
3870
3683
  });
3871
3684
  }
3872
- runInChildContext(nameOrFn, fnOrOptions, maybeOptions) {
3873
- validateContextUsage(this._stepPrefix, "runInChildContext", this.executionContext.terminationManager);
3874
- return this.withDurableModeManagement(() => {
3875
- const blockHandler = createRunInChildContextHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), () => this.durableLogger,
3876
- // Adapter function to maintain compatibility
3877
- (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, _checkpointToken, parentId) => createDurableContext(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, this.durableExecution, parentId), this._parentId);
3878
- return blockHandler(nameOrFn, fnOrOptions, maybeOptions);
3879
- });
3685
+ markOperationAwaited(stepId) {
3686
+ const op = this.operations.get(stepId);
3687
+ if (!op) {
3688
+ log("⚠️", `Cannot mark operation as awaited: ${stepId} not found`);
3689
+ return;
3690
+ }
3691
+ // Transition IDLE_NOT_AWAITED → IDLE_AWAITED
3692
+ if (op.state === OperationLifecycleState.IDLE_NOT_AWAITED) {
3693
+ op.state = OperationLifecycleState.IDLE_AWAITED;
3694
+ log("📍", `Operation marked as awaited: ${stepId}`);
3695
+ // Check if we should terminate now that operation is awaited
3696
+ this.checkAndTerminate();
3697
+ }
3880
3698
  }
3881
- wait(nameOrDuration, maybeDuration) {
3882
- validateContextUsage(this._stepPrefix, "wait", this.executionContext.terminationManager);
3883
- return this.withDurableModeManagement(() => {
3884
- const waitHandler = createWaitHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3885
- return typeof nameOrDuration === "string"
3886
- ? waitHandler(nameOrDuration, maybeDuration)
3887
- : waitHandler(nameOrDuration);
3888
- });
3699
+ getOperationState(stepId) {
3700
+ return this.operations.get(stepId)?.state;
3701
+ }
3702
+ getAllOperations() {
3703
+ return new Map(this.operations);
3704
+ }
3705
+ // ===== Private Helper Methods =====
3706
+ cleanupOperation(stepId) {
3707
+ const op = this.operations.get(stepId);
3708
+ if (!op)
3709
+ return;
3710
+ // Clear timer
3711
+ if (op.timer) {
3712
+ clearTimeout(op.timer);
3713
+ op.timer = undefined;
3714
+ }
3715
+ // Clear resolver
3716
+ op.resolver = undefined;
3717
+ }
3718
+ cleanupAllOperations() {
3719
+ for (const op of this.operations.values()) {
3720
+ if (op.timer) {
3721
+ clearTimeout(op.timer);
3722
+ op.timer = undefined;
3723
+ }
3724
+ op.resolver = undefined;
3725
+ }
3889
3726
  }
3890
3727
  /**
3891
- * Configure logger behavior for this context
3892
- *
3893
- * This method allows partial configuration - only the properties provided will be updated.
3894
- * For example, calling configureLogger(\{ modeAware: false \}) will only change the modeAware
3895
- * setting without affecting any previously configured custom logger.
3896
- *
3897
- * @param config - Logger configuration options including customLogger and modeAware settings (default: modeAware=true)
3898
- * @example
3899
- * // Set custom logger and enable mode-aware logging
3900
- * context.configureLogger(\{ customLogger: myLogger, modeAware: true \});
3901
- *
3902
- * // Later, disable mode-aware logging without changing the custom logger
3903
- * context.configureLogger(\{ modeAware: false \});
3728
+ * Determines if the function should terminate.
3729
+ * @returns TerminationReason if the function should terminate, or undefined if the function should not terminate
3904
3730
  */
3905
- configureLogger(config) {
3906
- if (config.customLogger !== undefined) {
3907
- this.durableLogger = config.customLogger;
3908
- this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3909
- this.logger = this.createModeAwareLogger(this.durableLogger);
3731
+ shouldTerminate() {
3732
+ // Rule 1: Can't terminate if checkpoint queue is not empty
3733
+ if (this.queue.length > 0) {
3734
+ return undefined;
3910
3735
  }
3911
- if (config.modeAware !== undefined) {
3912
- this.modeAwareLoggingEnabled = config.modeAware;
3736
+ // Rule 2: Can't terminate if checkpoint is currently processing
3737
+ if (this.isProcessing) {
3738
+ return undefined;
3913
3739
  }
3740
+ // Rule 3: Can't terminate if there are pending force checkpoint promises
3741
+ if (this.forceCheckpointPromises.length > 0) {
3742
+ return undefined;
3743
+ }
3744
+ const allOps = Array.from(this.operations.values());
3745
+ // Rule 4: Can't terminate if any operation is EXECUTING
3746
+ const hasExecuting = allOps.some((op) => op.state === OperationLifecycleState.EXECUTING);
3747
+ if (hasExecuting) {
3748
+ return undefined;
3749
+ }
3750
+ // Rule 5: Clean up operations whose ancestors are complete or pending completion
3751
+ for (const op of allOps) {
3752
+ if (op.state === OperationLifecycleState.RETRY_WAITING ||
3753
+ op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
3754
+ op.state === OperationLifecycleState.IDLE_AWAITED) {
3755
+ // Use the original stepId from metadata, not the potentially hashed op.stepId
3756
+ const originalStepId = op.metadata.stepId;
3757
+ if (this.hasFinishedAncestor(originalStepId)) {
3758
+ log("🧹", `Cleaning up operation with completed ancestor: ${originalStepId}`);
3759
+ this.cleanupOperation(op.stepId);
3760
+ this.operations.delete(op.stepId);
3761
+ }
3762
+ }
3763
+ }
3764
+ // Re-check operations after cleanup
3765
+ const remainingOps = Array.from(this.operations.values());
3766
+ // Determine if we should terminate
3767
+ const hasWaiting = remainingOps.some((op) => op.state === OperationLifecycleState.RETRY_WAITING ||
3768
+ op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
3769
+ op.state === OperationLifecycleState.IDLE_AWAITED);
3770
+ if (hasWaiting) {
3771
+ return this.determineTerminationReason(remainingOps);
3772
+ }
3773
+ return undefined;
3914
3774
  }
3915
- createCallback(nameOrConfig, maybeConfig) {
3916
- validateContextUsage(this._stepPrefix, "createCallback", this.executionContext.terminationManager);
3917
- return this.withDurableModeManagement(() => {
3918
- const callbackFactory = createCallback(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this.checkAndUpdateReplayMode.bind(this), this._parentId);
3919
- return callbackFactory(nameOrConfig, maybeConfig);
3920
- });
3921
- }
3922
- waitForCallback(nameOrSubmitter, submitterOrConfig, maybeConfig) {
3923
- validateContextUsage(this._stepPrefix, "waitForCallback", this.executionContext.terminationManager);
3924
- return this.withDurableModeManagement(() => {
3925
- const waitForCallbackHandler = createWaitForCallbackHandler(this.executionContext, this.getNextStepId.bind(this), this.runInChildContext.bind(this));
3926
- return waitForCallbackHandler(nameOrSubmitter, submitterOrConfig, maybeConfig);
3927
- });
3928
- }
3929
- waitForCondition(nameOrCheckFunc, checkFuncOrConfig, maybeConfig) {
3930
- validateContextUsage(this._stepPrefix, "waitForCondition", this.executionContext.terminationManager);
3931
- return this.withDurableModeManagement(() => {
3932
- const waitForConditionHandler = createWaitForConditionHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.durableLogger, this.addRunningOperation.bind(this), this.removeRunningOperation.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId);
3933
- return typeof nameOrCheckFunc === "string" ||
3934
- nameOrCheckFunc === undefined
3935
- ? waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig, maybeConfig)
3936
- : waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig);
3937
- });
3938
- }
3939
- map(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig) {
3940
- validateContextUsage(this._stepPrefix, "map", this.executionContext.terminationManager);
3941
- return this.withDurableModeManagement(() => {
3942
- const mapHandler = createMapHandler(this.executionContext, this._executeConcurrently.bind(this));
3943
- return mapHandler(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig);
3944
- });
3775
+ checkAndTerminate() {
3776
+ const terminationReason = this.shouldTerminate();
3777
+ if (terminationReason) {
3778
+ this.scheduleTermination(terminationReason);
3779
+ return;
3780
+ }
3781
+ this.abortTermination();
3782
+ }
3783
+ abortTermination() {
3784
+ if (this.terminationTimer) {
3785
+ clearTimeout(this.terminationTimer);
3786
+ this.terminationTimer = null;
3787
+ this.terminationReason = null;
3788
+ log("🔄", "Termination aborted - conditions changed");
3789
+ }
3945
3790
  }
3946
- parallel(nameOrBranches, branchesOrConfig, maybeConfig) {
3947
- validateContextUsage(this._stepPrefix, "parallel", this.executionContext.terminationManager);
3948
- return this.withDurableModeManagement(() => {
3949
- const parallelHandler = createParallelHandler(this.executionContext, this._executeConcurrently.bind(this));
3950
- return parallelHandler(nameOrBranches, branchesOrConfig, maybeConfig);
3791
+ scheduleTermination(reason) {
3792
+ // If already scheduled with same reason, don't reschedule
3793
+ if (this.terminationTimer && this.terminationReason === reason) {
3794
+ return;
3795
+ }
3796
+ // Clear any existing timer
3797
+ this.abortTermination();
3798
+ // Schedule new termination
3799
+ this.terminationReason = reason;
3800
+ log("⏱️", "Scheduling termination", {
3801
+ reason,
3802
+ cooldownMs: this.TERMINATION_COOLDOWN_MS,
3951
3803
  });
3804
+ this.terminationTimer = setTimeout(() => {
3805
+ if (!this.shouldTerminate()) {
3806
+ log("🔄", "Termination conditions no longer valid after cooldown, aborting termination");
3807
+ this.abortTermination();
3808
+ return;
3809
+ }
3810
+ this.executeTermination(reason);
3811
+ }, this.TERMINATION_COOLDOWN_MS);
3812
+ }
3813
+ executeTermination(reason) {
3814
+ log("🛑", "Executing termination after cooldown", { reason });
3815
+ // Clear timer
3816
+ this.terminationTimer = null;
3817
+ this.terminationReason = null;
3818
+ // Cleanup all operations before terminating
3819
+ this.cleanupAllOperations();
3820
+ // Call termination manager directly
3821
+ this.terminationManager.terminate({ reason });
3822
+ }
3823
+ determineTerminationReason(ops) {
3824
+ // Priority: RETRY_SCHEDULED > WAIT_SCHEDULED > CALLBACK_PENDING
3825
+ if (ops.some((op) => op.state === OperationLifecycleState.RETRY_WAITING &&
3826
+ op.metadata.subType === "Step")) {
3827
+ return TerminationReason.RETRY_SCHEDULED;
3828
+ }
3829
+ if (ops.some((op) => (op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
3830
+ op.state === OperationLifecycleState.IDLE_AWAITED) &&
3831
+ op.metadata.subType === "Wait")) {
3832
+ return TerminationReason.WAIT_SCHEDULED;
3833
+ }
3834
+ return TerminationReason.CALLBACK_PENDING;
3952
3835
  }
3953
- _executeConcurrently(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig) {
3954
- validateContextUsage(this._stepPrefix, "_executeConcurrently", this.executionContext.terminationManager);
3955
- return this.withDurableModeManagement(() => {
3956
- const concurrentExecutionHandler = createConcurrentExecutionHandler(this.executionContext, this.runInChildContext.bind(this), this.skipNextOperation.bind(this));
3957
- const promise = concurrentExecutionHandler(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig);
3958
- // Prevent unhandled promise rejections
3959
- promise?.catch(() => { });
3960
- return promise;
3961
- });
3836
+ startTimerWithPolling(stepId, endTimestamp) {
3837
+ const op = this.operations.get(stepId);
3838
+ if (!op)
3839
+ return;
3840
+ let delay;
3841
+ if (endTimestamp) {
3842
+ // Ensure endTimestamp is a Date object
3843
+ const timestamp = endTimestamp instanceof Date ? endTimestamp : new Date(endTimestamp);
3844
+ // Wait until endTimestamp
3845
+ delay = Math.max(0, timestamp.getTime() - Date.now());
3846
+ }
3847
+ else {
3848
+ // No timestamp, start polling immediately (1 second delay)
3849
+ delay = 1000;
3850
+ }
3851
+ // Initialize poll count and start time for this operation
3852
+ if (!op.pollCount) {
3853
+ op.pollCount = 0;
3854
+ op.pollStartTime = Date.now();
3855
+ }
3856
+ op.timer = setTimeout(() => {
3857
+ this.forceRefreshAndCheckStatus(stepId);
3858
+ }, delay);
3962
3859
  }
3963
- get promise() {
3964
- return createPromiseHandler(this.step.bind(this));
3860
+ async forceRefreshAndCheckStatus(stepId) {
3861
+ const op = this.operations.get(stepId);
3862
+ if (!op)
3863
+ return;
3864
+ // Check if we've exceeded max polling duration (15 minutes)
3865
+ const MAX_POLL_DURATION_MS = 15 * 60 * 1000; // 15 minutes
3866
+ if (op.pollStartTime &&
3867
+ Date.now() - op.pollStartTime > MAX_POLL_DURATION_MS) {
3868
+ // Stop polling after 15 minutes to prevent indefinite resource consumption.
3869
+ // We don't resolve or reject the promise because the handler cannot continue
3870
+ // without a status change. The execution will remain suspended until the
3871
+ // operation completes or the Lambda times out.
3872
+ log("⏱️", `Max polling duration (15 min) exceeded for ${stepId}, stopping poll`);
3873
+ if (op.timer) {
3874
+ clearTimeout(op.timer);
3875
+ op.timer = undefined;
3876
+ }
3877
+ return;
3878
+ }
3879
+ // Get old status before refresh
3880
+ const oldStatus = this.stepData[hashId(stepId)]?.Status;
3881
+ // Force checkpoint to refresh state from backend
3882
+ await this.forceCheckpoint();
3883
+ // Get new status after refresh
3884
+ const newStatus = this.stepData[hashId(stepId)]?.Status;
3885
+ // Check if status changed
3886
+ if (newStatus !== oldStatus) {
3887
+ // Status changed, resolve the waiting promise
3888
+ log("✅", `Status changed for ${stepId}: ${oldStatus} → ${newStatus}`);
3889
+ op.resolver?.();
3890
+ op.resolver = undefined;
3891
+ // Clear timer
3892
+ if (op.timer) {
3893
+ clearTimeout(op.timer);
3894
+ op.timer = undefined;
3895
+ }
3896
+ }
3897
+ else {
3898
+ // Status not changed yet, poll again with incremental backoff
3899
+ // Start at 1s, increase by 1s each poll, max 10s
3900
+ op.pollCount = (op.pollCount || 0) + 1;
3901
+ const nextDelay = Math.min(op.pollCount * 1000, 10000);
3902
+ op.timer = setTimeout(() => {
3903
+ this.forceRefreshAndCheckStatus(stepId);
3904
+ }, nextDelay);
3905
+ }
3965
3906
  }
3966
3907
  }
3967
- const createDurableContext = (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) => {
3968
- return new DurableContextImpl(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId);
3969
- };
3970
3908
 
3971
3909
  /*
3972
3910
  Second Approach (Promise-based):
@@ -4260,43 +4198,6 @@ const createDefaultLogger = (executionContext) => {
4260
4198
  return new DefaultLogger(executionContext);
4261
4199
  };
4262
4200
 
4263
- /**
4264
- * Tracks active async operations to prevent premature termination
4265
- */
4266
- class ActiveOperationsTracker {
4267
- activeCount = 0;
4268
- /**
4269
- * Increment the counter when starting an async operation
4270
- */
4271
- increment() {
4272
- this.activeCount++;
4273
- }
4274
- /**
4275
- * Decrement the counter when an async operation completes
4276
- */
4277
- decrement() {
4278
- this.activeCount = Math.max(0, this.activeCount - 1);
4279
- }
4280
- /**
4281
- * Check if there are any active operations
4282
- */
4283
- hasActive() {
4284
- return this.activeCount > 0;
4285
- }
4286
- /**
4287
- * Get the current count of active operations
4288
- */
4289
- getCount() {
4290
- return this.activeCount;
4291
- }
4292
- /**
4293
- * Reset the counter (useful for testing)
4294
- */
4295
- reset() {
4296
- this.activeCount = 0;
4297
- }
4298
- }
4299
-
4300
4201
  let defaultLambdaClient;
4301
4202
  /**
4302
4203
  * Durable execution client which uses an API-based LambdaClient
@@ -4416,6 +4317,20 @@ class DurableExecutionInvocationInputWithClient {
4416
4317
  this.DurableExecutionArn = params.DurableExecutionArn;
4417
4318
  this.CheckpointToken = params.CheckpointToken;
4418
4319
  }
4320
+ static isInstance(event) {
4321
+ if (event instanceof DurableExecutionInvocationInputWithClient) {
4322
+ return true;
4323
+ }
4324
+ return !!(typeof event === "object" &&
4325
+ event &&
4326
+ event.toString() ===
4327
+ "[object DurableExecutionInvocationInputWithClient]" &&
4328
+ "durableExecutionClient" in event &&
4329
+ event.constructor.name === "DurableExecutionInvocationInputWithClient");
4330
+ }
4331
+ get [Symbol.toStringTag]() {
4332
+ return "DurableExecutionInvocationInputWithClient";
4333
+ }
4419
4334
  }
4420
4335
 
4421
4336
  const initializeExecutionContext = async (event, context, lambdaClient) => {
@@ -4425,7 +4340,7 @@ const initializeExecutionContext = async (event, context, lambdaClient) => {
4425
4340
  const durableExecutionArn = event.DurableExecutionArn;
4426
4341
  const durableExecutionClient =
4427
4342
  // Allow passing arbitrary durable clients if the input is a custom class
4428
- event instanceof DurableExecutionInvocationInputWithClient
4343
+ DurableExecutionInvocationInputWithClient.isInstance(event)
4429
4344
  ? event.durableExecutionClient
4430
4345
  : new DurableExecutionApiClient(lambdaClient);
4431
4346
  // Create logger for initialization errors using existing logger factory
@@ -4464,7 +4379,6 @@ const initializeExecutionContext = async (event, context, lambdaClient) => {
4464
4379
  durableExecutionClient,
4465
4380
  _stepData: stepData,
4466
4381
  terminationManager: new TerminationManager(),
4467
- activeOperationsTracker: new ActiveOperationsTracker(),
4468
4382
  durableExecutionArn,
4469
4383
  pendingCompletions: new Set(),
4470
4384
  getStepData(stepId) {
@@ -4483,7 +4397,7 @@ const LAMBDA_RESPONSE_SIZE_LIMIT = 6 * 1024 * 1024 - 50; // 6MB in bytes, minus
4483
4397
  async function runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler) {
4484
4398
  // Create checkpoint manager and step data emitter
4485
4399
  const stepDataEmitter = new EventEmitter();
4486
- const checkpointManager = new CheckpointManager(executionContext.durableExecutionArn, executionContext._stepData, executionContext.durableExecutionClient, executionContext.terminationManager, executionContext.activeOperationsTracker, checkpointToken, stepDataEmitter, createDefaultLogger(executionContext), executionContext.pendingCompletions);
4400
+ const checkpointManager = new CheckpointManager(executionContext.durableExecutionArn, executionContext._stepData, executionContext.durableExecutionClient, executionContext.terminationManager, checkpointToken, stepDataEmitter, createDefaultLogger(executionContext), new Set());
4487
4401
  // Set the checkpoint terminating callback on the termination manager
4488
4402
  executionContext.terminationManager.setCheckpointTerminatingCallback(() => {
4489
4403
  checkpointManager.setTerminating();
@@ -4585,6 +4499,14 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4585
4499
  Payload: serializedResult, // Reuse the already serialized result
4586
4500
  });
4587
4501
  log("✅", "Large result successfully checkpointed");
4502
+ // Wait for any pending checkpoints to complete before returning
4503
+ try {
4504
+ await durableExecution.checkpointManager.waitForQueueCompletion();
4505
+ }
4506
+ catch (waitError) {
4507
+ log("⚠️", "Error waiting for checkpoint queue completion:", waitError);
4508
+ // Continue anyway - the checkpoint will be retried on next invocation
4509
+ }
4588
4510
  // Return a response indicating the result was checkpointed
4589
4511
  return {
4590
4512
  Status: InvocationStatus.SUCCEEDED,
@@ -4598,6 +4520,14 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4598
4520
  }
4599
4521
  }
4600
4522
  // If response size is acceptable, return the response
4523
+ // Wait for any pending checkpoints to complete before returning
4524
+ try {
4525
+ await durableExecution.checkpointManager.waitForQueueCompletion();
4526
+ }
4527
+ catch (waitError) {
4528
+ log("⚠️", "Error waiting for checkpoint queue completion:", waitError);
4529
+ // Continue anyway - the checkpoint will be retried on next invocation
4530
+ }
4601
4531
  return {
4602
4532
  Status: InvocationStatus.SUCCEEDED,
4603
4533
  Result: serializedResult,
@@ -4610,6 +4540,14 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4610
4540
  log("🛑", "Unrecoverable invocation error - terminating Lambda execution");
4611
4541
  throw error; // Re-throw the error to terminate Lambda execution
4612
4542
  }
4543
+ // Wait for any pending checkpoints to complete before returning error
4544
+ try {
4545
+ await durableExecution.checkpointManager.waitForQueueCompletion();
4546
+ }
4547
+ catch (waitError) {
4548
+ log("⚠️", "Error waiting for checkpoint queue completion:", waitError);
4549
+ // Continue anyway - the checkpoint will be retried on next invocation
4550
+ }
4613
4551
  return {
4614
4552
  Status: InvocationStatus.FAILED,
4615
4553
  Error: createErrorObjectFromError(error),
@@ -4620,16 +4558,10 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4620
4558
  * Validates that the event is a proper durable execution input
4621
4559
  */
4622
4560
  function validateDurableExecutionEvent(event) {
4623
- try {
4624
- const eventObj = event;
4625
- if (!eventObj?.DurableExecutionArn || !eventObj?.CheckpointToken) {
4626
- throw new Error("Missing required durable execution fields");
4627
- }
4628
- }
4629
- catch {
4630
- const msg = `Unexpected payload provided to start the durable execution.
4631
- Check your resource configurations to confirm the durability is set.`;
4632
- throw new Error(msg);
4561
+ const eventObj = event;
4562
+ if (!eventObj?.DurableExecutionArn || !eventObj?.CheckpointToken) {
4563
+ throw new Error("Unexpected payload provided to start the durable execution.\n" +
4564
+ "Check your resource configurations to confirm the durability is set.");
4633
4565
  }
4634
4566
  }
4635
4567
  /**
@@ -4707,14 +4639,7 @@ const withDurableExecution = (handler, config) => {
4707
4639
  return async (event, context) => {
4708
4640
  validateDurableExecutionEvent(event);
4709
4641
  const { executionContext, durableExecutionMode, checkpointToken } = await initializeExecutionContext(event, context, config?.client);
4710
- let response = null;
4711
- try {
4712
- response = await runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler);
4713
- return response;
4714
- }
4715
- catch (err) {
4716
- throw err;
4717
- }
4642
+ return runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler);
4718
4643
  };
4719
4644
  };
4720
4645