@aws/durable-execution-sdk-js 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +3 -50
  2. package/dist/index.mjs +1874 -2017
  3. package/dist/index.mjs.map +1 -1
  4. package/dist-cjs/index.js +1873 -2016
  5. package/dist-cjs/index.js.map +1 -1
  6. package/dist-types/context/durable-context/durable-context.d.ts +0 -6
  7. package/dist-types/context/durable-context/durable-context.d.ts.map +1 -1
  8. package/dist-types/errors/durable-error/durable-error.d.ts +6 -0
  9. package/dist-types/errors/durable-error/durable-error.d.ts.map +1 -1
  10. package/dist-types/errors/step-errors/step-errors.d.ts +1 -0
  11. package/dist-types/errors/step-errors/step-errors.d.ts.map +1 -1
  12. package/dist-types/handlers/callback-handler/callback-promise.d.ts +2 -2
  13. package/dist-types/handlers/callback-handler/callback-promise.d.ts.map +1 -1
  14. package/dist-types/handlers/callback-handler/callback.d.ts +1 -2
  15. package/dist-types/handlers/callback-handler/callback.d.ts.map +1 -1
  16. package/dist-types/handlers/concurrent-execution-handler/concurrent-execution-handler.d.ts +1 -0
  17. package/dist-types/handlers/concurrent-execution-handler/concurrent-execution-handler.d.ts.map +1 -1
  18. package/dist-types/handlers/invoke-handler/invoke-handler.d.ts +1 -2
  19. package/dist-types/handlers/invoke-handler/invoke-handler.d.ts.map +1 -1
  20. package/dist-types/handlers/run-in-child-context-handler/run-in-child-context-handler.d.ts.map +1 -1
  21. package/dist-types/handlers/step-handler/step-handler.d.ts +1 -9
  22. package/dist-types/handlers/step-handler/step-handler.d.ts.map +1 -1
  23. package/dist-types/handlers/wait-for-condition-handler/wait-for-condition-handler.d.ts +1 -6
  24. package/dist-types/handlers/wait-for-condition-handler/wait-for-condition-handler.d.ts.map +1 -1
  25. package/dist-types/handlers/wait-handler/wait-handler-comparison.test.d.ts +2 -0
  26. package/dist-types/handlers/wait-handler/wait-handler-comparison.test.d.ts.map +1 -0
  27. package/dist-types/handlers/wait-handler/wait-handler.d.ts +1 -2
  28. package/dist-types/handlers/wait-handler/wait-handler.d.ts.map +1 -1
  29. package/dist-types/testing/create-test-checkpoint-manager.d.ts.map +1 -1
  30. package/dist-types/testing/create-test-durable-context.d.ts.map +1 -1
  31. package/dist-types/testing/mock-checkpoint-manager.d.ts +0 -1
  32. package/dist-types/testing/mock-checkpoint-manager.d.ts.map +1 -1
  33. package/dist-types/testing/mock-checkpoint.d.ts +1 -0
  34. package/dist-types/testing/mock-checkpoint.d.ts.map +1 -1
  35. package/dist-types/types/batch.d.ts +8 -0
  36. package/dist-types/types/batch.d.ts.map +1 -1
  37. package/dist-types/types/core.d.ts +1 -3
  38. package/dist-types/types/core.d.ts.map +1 -1
  39. package/dist-types/types/durable-context.d.ts +8 -6
  40. package/dist-types/types/durable-context.d.ts.map +1 -1
  41. package/dist-types/types/index.d.ts +2 -0
  42. package/dist-types/types/index.d.ts.map +1 -1
  43. package/dist-types/types/operation-lifecycle-state.d.ts +27 -0
  44. package/dist-types/types/operation-lifecycle-state.d.ts.map +1 -0
  45. package/dist-types/types/operation-lifecycle.d.ts +27 -0
  46. package/dist-types/types/operation-lifecycle.d.ts.map +1 -0
  47. package/dist-types/utils/checkpoint/checkpoint-ancestor.test.d.ts +2 -0
  48. package/dist-types/utils/checkpoint/checkpoint-ancestor.test.d.ts.map +1 -0
  49. package/dist-types/utils/checkpoint/checkpoint-central-termination.test.d.ts +2 -0
  50. package/dist-types/utils/checkpoint/checkpoint-central-termination.test.d.ts.map +1 -0
  51. package/dist-types/utils/checkpoint/checkpoint-helper.d.ts +37 -0
  52. package/dist-types/utils/checkpoint/checkpoint-helper.d.ts.map +1 -1
  53. package/dist-types/utils/checkpoint/checkpoint-manager.d.ts +37 -12
  54. package/dist-types/utils/checkpoint/checkpoint-manager.d.ts.map +1 -1
  55. package/dist-types/utils/constants/constants.d.ts +0 -1
  56. package/dist-types/utils/constants/constants.d.ts.map +1 -1
  57. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.d.ts +3 -1
  58. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.d.ts.map +1 -1
  59. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.test.d.ts +2 -0
  60. package/dist-types/utils/durable-execution-invocation-input/durable-execution-invocation-input.test.d.ts.map +1 -0
  61. package/dist-types/utils/termination-helper/termination-helper.d.ts +0 -9
  62. package/dist-types/utils/termination-helper/termination-helper.d.ts.map +1 -1
  63. package/dist-types/with-durable-execution.d.ts.map +1 -1
  64. package/package.json +8 -3
  65. package/dist-types/utils/checkpoint/checkpoint-ancestor-checking.test.d.ts +0 -2
  66. package/dist-types/utils/checkpoint/checkpoint-ancestor-checking.test.d.ts.map +0 -1
  67. package/dist-types/utils/termination-helper/active-operations-tracker.d.ts +0 -31
  68. package/dist-types/utils/termination-helper/active-operations-tracker.d.ts.map +0 -1
  69. package/dist-types/utils/termination-helper/active-operations-tracker.test.d.ts +0 -2
  70. package/dist-types/utils/termination-helper/active-operations-tracker.test.d.ts.map +0 -1
  71. package/dist-types/utils/wait-before-continue/wait-before-continue.d.ts +0 -35
  72. package/dist-types/utils/wait-before-continue/wait-before-continue.d.ts.map +0 -1
  73. package/dist-types/utils/wait-before-continue/wait-before-continue.test.d.ts +0 -2
  74. package/dist-types/utils/wait-before-continue/wait-before-continue.test.d.ts.map +0 -1
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { OperationStatus, OperationAction, OperationType, LambdaClient, GetDurableExecutionStateCommand, CheckpointDurableExecutionCommand } from '@aws-sdk/client-lambda';
1
+ import { OperationType, OperationStatus, OperationAction, LambdaClient, GetDurableExecutionStateCommand, CheckpointDurableExecutionCommand } from '@aws-sdk/client-lambda';
2
2
  import { EventEmitter } from 'events';
3
3
  import { AsyncLocalStorage } from 'async_hooks';
4
4
  import { createHash } from 'crypto';
@@ -298,6 +298,34 @@ class DurablePromise {
298
298
  }
299
299
  }
300
300
 
301
+ /**
302
+ * Represents the lifecycle state of an operation in the durable execution system.
303
+ * This is distinct from AWS SDK's OperationStatus (PENDING, SUCCEEDED, FAILED).
304
+ */
305
+ var OperationLifecycleState;
306
+ (function (OperationLifecycleState) {
307
+ /**
308
+ * Operation is currently executing user code (step function, waitForCondition check)
309
+ */
310
+ OperationLifecycleState["EXECUTING"] = "EXECUTING";
311
+ /**
312
+ * Operation is waiting for retry timer to expire before re-executing user code
313
+ */
314
+ OperationLifecycleState["RETRY_WAITING"] = "RETRY_WAITING";
315
+ /**
316
+ * Operation is waiting for external event (timer, callback, invoke) but not awaited yet (phase 1)
317
+ */
318
+ OperationLifecycleState["IDLE_NOT_AWAITED"] = "IDLE_NOT_AWAITED";
319
+ /**
320
+ * Operation is waiting for external event and has been awaited (phase 2)
321
+ */
322
+ OperationLifecycleState["IDLE_AWAITED"] = "IDLE_AWAITED";
323
+ /**
324
+ * Operation has completed (success or permanent failure)
325
+ */
326
+ OperationLifecycleState["COMPLETED"] = "COMPLETED";
327
+ })(OperationLifecycleState || (OperationLifecycleState = {}));
328
+
301
329
  /**
302
330
  * Converts a Duration object to total seconds
303
331
  * @param duration - Duration object with at least one time unit specified
@@ -311,6 +339,21 @@ function durationToSeconds(duration) {
311
339
  return days * 24 * 60 * 60 + hours * 60 * 60 + minutes * 60 + seconds;
312
340
  }
313
341
 
342
+ /**
343
+ * Terminates execution for unrecoverable errors and returns a never-resolving promise
344
+ * @param context - The execution context containing the termination manager
345
+ * @param error - The unrecoverable error that caused termination
346
+ * @param stepIdentifier - The step name or ID for error messaging
347
+ * @returns A never-resolving promise
348
+ */
349
+ function terminateForUnrecoverableError(context, error, stepIdentifier) {
350
+ context.terminationManager.terminate({
351
+ reason: error.terminationReason,
352
+ message: `Unrecoverable error in step ${stepIdentifier}: ${error.message}`,
353
+ });
354
+ return new Promise(() => { }); // Never-resolving promise
355
+ }
356
+
314
357
  const safeStringify = (data) => {
315
358
  try {
316
359
  const seen = new WeakSet();
@@ -344,238 +387,6 @@ const log = (emoji, message, data) => {
344
387
  }
345
388
  };
346
389
 
347
- var TerminationReason;
348
- (function (TerminationReason) {
349
- // Default termination reason
350
- TerminationReason["OPERATION_TERMINATED"] = "OPERATION_TERMINATED";
351
- // Retry-related reasons
352
- TerminationReason["RETRY_SCHEDULED"] = "RETRY_SCHEDULED";
353
- TerminationReason["RETRY_INTERRUPTED_STEP"] = "RETRY_INTERRUPTED_STEP";
354
- // Wait-related reasons
355
- TerminationReason["WAIT_SCHEDULED"] = "WAIT_SCHEDULED";
356
- // Callback-related reasons
357
- TerminationReason["CALLBACK_PENDING"] = "CALLBACK_PENDING";
358
- // Error-related reasons
359
- TerminationReason["CHECKPOINT_FAILED"] = "CHECKPOINT_FAILED";
360
- TerminationReason["SERDES_FAILED"] = "SERDES_FAILED";
361
- TerminationReason["CONTEXT_VALIDATION_ERROR"] = "CONTEXT_VALIDATION_ERROR";
362
- // Custom reason
363
- TerminationReason["CUSTOM"] = "CUSTOM";
364
- })(TerminationReason || (TerminationReason = {}));
365
-
366
- const asyncLocalStorage = new AsyncLocalStorage();
367
- const getActiveContext = () => {
368
- return asyncLocalStorage.getStore();
369
- };
370
- const runWithContext = (contextId, parentId, fn, attempt, durableExecutionMode) => {
371
- return asyncLocalStorage.run({ contextId, parentId, attempt, durableExecutionMode }, fn);
372
- };
373
- const validateContextUsage = (operationContextId, operationName, terminationManager) => {
374
- const contextId = operationContextId || "root";
375
- const activeContext = getActiveContext();
376
- if (!activeContext) {
377
- return;
378
- }
379
- if (activeContext.contextId !== contextId) {
380
- const errorMessage = `Context usage error in "${operationName}": You are using a parent or sibling context instead of the current child context. Expected context ID: "${activeContext.contextId}", but got: "${operationContextId}". When inside runInChildContext(), you must use the child context parameter, not the parent context.`;
381
- terminationManager.terminate({
382
- reason: TerminationReason.CONTEXT_VALIDATION_ERROR,
383
- message: errorMessage,
384
- error: new Error(errorMessage),
385
- });
386
- // Only call termination manager, don't throw or return promise
387
- }
388
- };
389
-
390
- const HASH_LENGTH = 16;
391
- /**
392
- * Creates an MD5 hash of the input string for better performance than SHA-256
393
- * @param input - The string to hash
394
- * @returns The truncated hexadecimal hash string
395
- */
396
- const hashId = (input) => {
397
- return createHash("md5")
398
- .update(input)
399
- .digest("hex")
400
- .substring(0, HASH_LENGTH);
401
- };
402
- /**
403
- * Helper function to get step data using the original stepId
404
- * This function handles the hashing internally so callers don't need to worry about it
405
- * @param stepData - The stepData record from context
406
- * @param stepId - The original stepId (will be hashed internally)
407
- * @returns The operation data or undefined if not found
408
- */
409
- const getStepData = (stepData, stepId) => {
410
- const hashedId = hashId(stepId);
411
- return stepData[hashedId];
412
- };
413
-
414
- /**
415
- * Checks if any ancestor operation in the parent chain has finished (SUCCEEDED or FAILED)
416
- * or has a pending completion checkpoint
417
- */
418
- function hasFinishedAncestor(context, parentId) {
419
- if (!parentId) {
420
- log("🔍", "hasFinishedAncestor: No parentId provided");
421
- return false;
422
- }
423
- // First check if any ancestor has a pending completion checkpoint
424
- if (hasPendingAncestorCompletion(context, parentId)) {
425
- log("🔍", "hasFinishedAncestor: Found ancestor with pending completion!", {
426
- parentId,
427
- });
428
- return true;
429
- }
430
- let currentHashedId = hashId(parentId);
431
- log("🔍", "hasFinishedAncestor: Starting check", {
432
- parentId,
433
- initialHashedId: currentHashedId,
434
- });
435
- while (currentHashedId) {
436
- const parentOperation = context._stepData[currentHashedId];
437
- log("🔍", "hasFinishedAncestor: Checking operation", {
438
- hashedId: currentHashedId,
439
- hasOperation: !!parentOperation,
440
- status: parentOperation?.Status,
441
- type: parentOperation?.Type,
442
- });
443
- if (parentOperation?.Status === OperationStatus.SUCCEEDED ||
444
- parentOperation?.Status === OperationStatus.FAILED) {
445
- log("🔍", "hasFinishedAncestor: Found finished ancestor!", {
446
- hashedId: currentHashedId,
447
- status: parentOperation.Status,
448
- });
449
- return true;
450
- }
451
- currentHashedId = parentOperation?.ParentId;
452
- }
453
- log("🔍", "hasFinishedAncestor: No finished ancestor found");
454
- return false;
455
- }
456
- /**
457
- * Checks if any ancestor has a pending completion checkpoint
458
- */
459
- function hasPendingAncestorCompletion(context, stepId) {
460
- let currentHashedId = hashId(stepId);
461
- while (currentHashedId) {
462
- if (context.pendingCompletions.has(currentHashedId)) {
463
- return true;
464
- }
465
- const operation = context._stepData[currentHashedId];
466
- currentHashedId = operation?.ParentId;
467
- }
468
- return false;
469
- }
470
- /**
471
- * Terminates execution and returns a never-resolving promise to prevent code progression
472
- * @param context - The execution context containing the termination manager
473
- * @param reason - The termination reason
474
- * @param message - The termination message
475
- * @returns A never-resolving promise
476
- */
477
- function terminate(context, reason, message) {
478
- const activeContext = getActiveContext();
479
- // If we have a parent context, add delay to let checkpoints process
480
- if (activeContext?.parentId) {
481
- return new Promise(async (_resolve, _reject) => {
482
- // Wait a tick to let any pending checkpoints start processing
483
- await new Promise((resolve) => setImmediate(resolve));
484
- log("🔍", "Terminate called - checking context:", {
485
- hasActiveContext: !!activeContext,
486
- contextId: activeContext?.contextId,
487
- parentId: activeContext?.parentId,
488
- reason,
489
- message,
490
- });
491
- const ancestorFinished = hasFinishedAncestor(context, activeContext.parentId);
492
- log("🔍", "Ancestor check result:", {
493
- parentId: activeContext.parentId,
494
- ancestorFinished,
495
- });
496
- if (ancestorFinished) {
497
- log("🛑", "Skipping termination - ancestor already finished:", {
498
- contextId: activeContext.contextId,
499
- parentId: activeContext.parentId,
500
- reason,
501
- message,
502
- });
503
- // Return never-resolving promise without terminating
504
- return;
505
- }
506
- // Check if there are active operations before terminating
507
- const tracker = context.activeOperationsTracker;
508
- if (tracker && tracker.hasActive()) {
509
- log("⏳", "Deferring termination - active operations in progress:", {
510
- activeCount: tracker.getCount(),
511
- reason,
512
- message,
513
- });
514
- // Wait for operations to complete, then terminate
515
- const checkInterval = setInterval(() => {
516
- if (!tracker.hasActive()) {
517
- clearInterval(checkInterval);
518
- log("✅", "Active operations completed, proceeding with termination:", {
519
- reason,
520
- message,
521
- });
522
- context.terminationManager.terminate({
523
- reason,
524
- message,
525
- });
526
- }
527
- }, 10);
528
- return;
529
- }
530
- // No active operations, terminate immediately
531
- context.terminationManager.terminate({
532
- reason,
533
- message,
534
- });
535
- });
536
- }
537
- // No parent context - check active operations and terminate
538
- const tracker = context.activeOperationsTracker;
539
- if (tracker && tracker.hasActive()) {
540
- log("⏳", "Deferring termination - active operations in progress:", {
541
- activeCount: tracker.getCount(),
542
- reason,
543
- message,
544
- });
545
- return new Promise((_resolve, _reject) => {
546
- const checkInterval = setInterval(() => {
547
- if (!tracker.hasActive()) {
548
- clearInterval(checkInterval);
549
- log("✅", "Active operations completed, proceeding with termination:", {
550
- reason,
551
- message,
552
- });
553
- context.terminationManager.terminate({
554
- reason,
555
- message,
556
- });
557
- }
558
- }, 10);
559
- });
560
- }
561
- // No parent, no active operations - terminate immediately
562
- context.terminationManager.terminate({
563
- reason,
564
- message,
565
- });
566
- return new Promise(() => { });
567
- }
568
- /**
569
- * Terminates execution for unrecoverable errors and returns a never-resolving promise
570
- * @param context - The execution context containing the termination manager
571
- * @param error - The unrecoverable error that caused termination
572
- * @param stepIdentifier - The step name or ID for error messaging
573
- * @returns A never-resolving promise
574
- */
575
- function terminateForUnrecoverableError(context, error, stepIdentifier) {
576
- return terminate(context, error.terminationReason, `Unrecoverable error in step ${stepIdentifier}: ${error.message}`);
577
- }
578
-
579
390
  const DEFAULT_CONFIG$1 = {
580
391
  maxAttempts: 3,
581
392
  initialDelay: { seconds: 5 },
@@ -747,6 +558,7 @@ const retryPresets = {
747
558
  /**
748
559
  * Error thrown when a step with AT_MOST_ONCE_PER_RETRY semantics was started but interrupted
749
560
  * before completion.
561
+ * @public
750
562
  */
751
563
  class StepInterruptedError extends Error {
752
564
  constructor(_stepId, _stepName) {
@@ -755,13 +567,9 @@ class StepInterruptedError extends Error {
755
567
  }
756
568
  }
757
569
 
758
- /**
759
- * Shared constants to avoid circular dependencies
760
- */
761
- const OPERATIONS_COMPLETE_EVENT = "allOperationsComplete";
762
-
763
570
  /**
764
571
  * Base class for all durable operation errors
572
+ * @public
765
573
  */
766
574
  class DurableOperationError extends Error {
767
575
  cause;
@@ -810,6 +618,7 @@ class DurableOperationError extends Error {
810
618
  }
811
619
  /**
812
620
  * Error thrown when a step operation fails
621
+ * @public
813
622
  */
814
623
  class StepError extends DurableOperationError {
815
624
  errorType = "StepError";
@@ -819,6 +628,7 @@ class StepError extends DurableOperationError {
819
628
  }
820
629
  /**
821
630
  * Error thrown when a callback operation fails
631
+ * @public
822
632
  */
823
633
  class CallbackError extends DurableOperationError {
824
634
  errorType = "CallbackError";
@@ -828,6 +638,7 @@ class CallbackError extends DurableOperationError {
828
638
  }
829
639
  /**
830
640
  * Error thrown when an invoke operation fails
641
+ * @public
831
642
  */
832
643
  class InvokeError extends DurableOperationError {
833
644
  errorType = "InvokeError";
@@ -837,6 +648,7 @@ class InvokeError extends DurableOperationError {
837
648
  }
838
649
  /**
839
650
  * Error thrown when a child context operation fails
651
+ * @public
840
652
  */
841
653
  class ChildContextError extends DurableOperationError {
842
654
  errorType = "ChildContextError";
@@ -846,6 +658,7 @@ class ChildContextError extends DurableOperationError {
846
658
  }
847
659
  /**
848
660
  * Error thrown when a wait for condition operation fails
661
+ * @public
849
662
  */
850
663
  class WaitForConditionError extends DurableOperationError {
851
664
  errorType = "WaitForConditionError";
@@ -1013,6 +826,25 @@ function createClassSerdesWithDates(cls, dateProps) {
1013
826
  };
1014
827
  }
1015
828
 
829
+ var TerminationReason;
830
+ (function (TerminationReason) {
831
+ // Default termination reason
832
+ TerminationReason["OPERATION_TERMINATED"] = "OPERATION_TERMINATED";
833
+ // Retry-related reasons
834
+ TerminationReason["RETRY_SCHEDULED"] = "RETRY_SCHEDULED";
835
+ TerminationReason["RETRY_INTERRUPTED_STEP"] = "RETRY_INTERRUPTED_STEP";
836
+ // Wait-related reasons
837
+ TerminationReason["WAIT_SCHEDULED"] = "WAIT_SCHEDULED";
838
+ // Callback-related reasons
839
+ TerminationReason["CALLBACK_PENDING"] = "CALLBACK_PENDING";
840
+ // Error-related reasons
841
+ TerminationReason["CHECKPOINT_FAILED"] = "CHECKPOINT_FAILED";
842
+ TerminationReason["SERDES_FAILED"] = "SERDES_FAILED";
843
+ TerminationReason["CONTEXT_VALIDATION_ERROR"] = "CONTEXT_VALIDATION_ERROR";
844
+ // Custom reason
845
+ TerminationReason["CUSTOM"] = "CUSTOM";
846
+ })(TerminationReason || (TerminationReason = {}));
847
+
1016
848
  /**
1017
849
  * Base class for all unrecoverable errors
1018
850
  * Any error that inherits from this class indicates a fatal condition
@@ -1133,6 +965,30 @@ async function safeDeserialize(serdes, data, stepId, stepName, terminationManage
1133
965
  }
1134
966
  }
1135
967
 
968
+ const asyncLocalStorage = new AsyncLocalStorage();
969
+ const getActiveContext = () => {
970
+ return asyncLocalStorage.getStore();
971
+ };
972
+ const runWithContext = (contextId, parentId, fn, attempt, durableExecutionMode) => {
973
+ return asyncLocalStorage.run({ contextId, parentId, attempt, durableExecutionMode }, fn);
974
+ };
975
+ const validateContextUsage = (operationContextId, operationName, terminationManager) => {
976
+ const contextId = operationContextId || "root";
977
+ const activeContext = getActiveContext();
978
+ if (!activeContext) {
979
+ return;
980
+ }
981
+ if (activeContext.contextId !== contextId) {
982
+ const errorMessage = `Context usage error in "${operationName}": You are using a parent or sibling context instead of the current child context. Expected context ID: "${activeContext.contextId}", but got: "${operationContextId}". When inside runInChildContext(), you must use the child context parameter, not the parent context.`;
983
+ terminationManager.terminate({
984
+ reason: TerminationReason.CONTEXT_VALIDATION_ERROR,
985
+ message: errorMessage,
986
+ error: new Error(errorMessage),
987
+ });
988
+ // Only call termination manager, don't throw or return promise
989
+ }
990
+ };
991
+
1136
992
  function isErrorLike(obj) {
1137
993
  return (obj instanceof Error ||
1138
994
  (obj != null &&
@@ -1161,525 +1017,426 @@ function createErrorObjectFromError(error, data) {
1161
1017
  }
1162
1018
 
1163
1019
  /**
1164
- * Error thrown when a checkpoint operation fails due to invocation-level issues
1165
- * (e.g., 5xx errors, invalid checkpoint token)
1166
- * This will terminate the current Lambda invocation, but the execution can continue with a new invocation
1167
- */
1168
- class CheckpointUnrecoverableInvocationError extends UnrecoverableInvocationError {
1169
- terminationReason = TerminationReason.CHECKPOINT_FAILED;
1170
- constructor(message, originalError) {
1171
- super(message || "Checkpoint operation failed", originalError);
1172
- }
1173
- }
1174
- /**
1175
- * Error thrown when a checkpoint operation fails due to execution-level issues
1176
- * (e.g., 4xx errors other than invalid checkpoint token)
1177
- * This will terminate the entire execution and cannot be recovered
1020
+ * Error thrown when non-deterministic code is detected during replay
1178
1021
  */
1179
- class CheckpointUnrecoverableExecutionError extends UnrecoverableExecutionError {
1180
- terminationReason = TerminationReason.CHECKPOINT_FAILED;
1181
- constructor(message, originalError) {
1182
- super(message || "Checkpoint operation failed", originalError);
1022
+ class NonDeterministicExecutionError extends UnrecoverableExecutionError {
1023
+ terminationReason = TerminationReason.CUSTOM;
1024
+ constructor(message) {
1025
+ super(message);
1026
+ this.name = "NonDeterministicExecutionError";
1183
1027
  }
1184
1028
  }
1185
1029
 
1186
- const STEP_DATA_UPDATED_EVENT = "stepDataUpdated";
1187
- class CheckpointManager {
1188
- durableExecutionArn;
1189
- stepData;
1190
- storage;
1191
- terminationManager;
1192
- activeOperationsTracker;
1193
- stepDataEmitter;
1194
- logger;
1195
- pendingCompletions;
1196
- queue = [];
1197
- isProcessing = false;
1198
- currentTaskToken;
1199
- forceCheckpointPromises = [];
1200
- queueCompletionResolver = null;
1201
- queueCompletionTimeout = null;
1202
- MAX_PAYLOAD_SIZE = 750 * 1024; // 750KB in bytes
1203
- isTerminating = false;
1204
- static textEncoder = new TextEncoder();
1205
- constructor(durableExecutionArn, stepData, storage, terminationManager, activeOperationsTracker, initialTaskToken, stepDataEmitter, logger, pendingCompletions) {
1206
- this.durableExecutionArn = durableExecutionArn;
1207
- this.stepData = stepData;
1208
- this.storage = storage;
1209
- this.terminationManager = terminationManager;
1210
- this.activeOperationsTracker = activeOperationsTracker;
1211
- this.stepDataEmitter = stepDataEmitter;
1212
- this.logger = logger;
1213
- this.pendingCompletions = pendingCompletions;
1214
- this.currentTaskToken = initialTaskToken;
1030
+ const validateReplayConsistency = (stepId, currentOperation, checkpointData, context) => {
1031
+ // Skip validation if no checkpoint data exists or if Type is undefined (first execution)
1032
+ if (!checkpointData || !checkpointData.Type) {
1033
+ return;
1215
1034
  }
1216
- setTerminating() {
1217
- this.isTerminating = true;
1218
- log("🛑", "Checkpoint manager marked as terminating");
1219
- }
1220
- /**
1221
- * Checks if a step ID or any of its ancestors has a pending completion
1222
- */
1223
- hasPendingAncestorCompletion(stepId) {
1224
- let currentHashedId = hashId(stepId);
1225
- while (currentHashedId) {
1226
- if (this.pendingCompletions.has(currentHashedId)) {
1227
- return true;
1228
- }
1229
- const operation = this.stepData[currentHashedId];
1230
- currentHashedId = operation?.ParentId;
1231
- }
1232
- return false;
1233
- }
1234
- async forceCheckpoint() {
1235
- if (this.isTerminating) {
1236
- log("⚠️", "Force checkpoint skipped - termination in progress");
1237
- return new Promise(() => { }); // Never resolves during termination
1238
- }
1239
- return new Promise((resolve, reject) => {
1240
- this.forceCheckpointPromises.push({ resolve, reject });
1241
- if (!this.isProcessing) {
1242
- setImmediate(() => {
1243
- this.processQueue();
1244
- });
1245
- }
1246
- });
1247
- }
1248
- async waitForQueueCompletion() {
1249
- if (this.queue.length === 0 && !this.isProcessing) {
1250
- return;
1251
- }
1252
- return new Promise((resolve, reject) => {
1253
- this.queueCompletionResolver = resolve;
1254
- // Set a timeout to prevent infinite waiting
1255
- this.queueCompletionTimeout = setTimeout(() => {
1256
- this.queueCompletionResolver = null;
1257
- this.queueCompletionTimeout = null;
1258
- // Clear the queue since it's taking too long
1259
- this.clearQueue();
1260
- reject(new Error("Timeout waiting for checkpoint queue completion"));
1261
- }, 3000); // 3 second timeout
1262
- });
1035
+ // Validate operation type
1036
+ if (checkpointData.Type !== currentOperation.type) {
1037
+ const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation type mismatch for step "${stepId}". ` +
1038
+ `Expected type "${checkpointData.Type}", but got "${currentOperation.type}". ` +
1039
+ `This indicates non-deterministic control flow in your workflow code.`);
1040
+ terminateForUnrecoverableError(context, error, stepId);
1263
1041
  }
1264
- clearQueue() {
1265
- // Silently clear queue - we're terminating so no need to reject promises
1266
- this.queue = [];
1267
- this.forceCheckpointPromises = [];
1268
- // Resolve any waiting queue completion promises since we're clearing
1269
- this.notifyQueueCompletion();
1042
+ // Validate operation name (including undefined)
1043
+ if (checkpointData.Name !== currentOperation.name) {
1044
+ const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation name mismatch for step "${stepId}". ` +
1045
+ `Expected name "${checkpointData.Name ?? "undefined"}", but got "${currentOperation.name ?? "undefined"}". ` +
1046
+ `This indicates non-deterministic control flow in your workflow code.`);
1047
+ terminateForUnrecoverableError(context, error, stepId);
1270
1048
  }
1271
- // Alias for backward compatibility with Checkpoint interface
1272
- async force() {
1273
- return this.forceCheckpoint();
1049
+ // Validate operation subtype
1050
+ if (checkpointData.SubType !== currentOperation.subType) {
1051
+ const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation subtype mismatch for step "${stepId}". ` +
1052
+ `Expected subtype "${checkpointData.SubType}", but got "${currentOperation.subType}". ` +
1053
+ `This indicates non-deterministic control flow in your workflow code.`);
1054
+ terminateForUnrecoverableError(context, error, stepId);
1274
1055
  }
1275
- async checkpoint(stepId, data) {
1276
- if (this.isTerminating) {
1277
- log("⚠️", "Checkpoint skipped - termination in progress:", { stepId });
1278
- return new Promise(() => { }); // Never resolves during termination
1056
+ };
1057
+
1058
+ const createStepHandler = (context, checkpoint, parentContext, createStepId, logger, parentId) => {
1059
+ return (nameOrFn, fnOrOptions, maybeOptions) => {
1060
+ let name;
1061
+ let fn;
1062
+ let options;
1063
+ if (typeof nameOrFn === "string" || nameOrFn === undefined) {
1064
+ name = nameOrFn;
1065
+ fn = fnOrOptions;
1066
+ options = maybeOptions;
1279
1067
  }
1280
- if (this.activeOperationsTracker) {
1281
- this.activeOperationsTracker.increment();
1068
+ else {
1069
+ fn = nameOrFn;
1070
+ options = fnOrOptions;
1282
1071
  }
1283
- return new Promise((resolve, reject) => {
1284
- if (data.Action === OperationAction.SUCCEED ||
1285
- data.Action === OperationAction.FAIL) {
1286
- this.pendingCompletions.add(stepId);
1072
+ const stepId = createStepId();
1073
+ const semantics = options?.semantics || StepSemantics.AtLeastOncePerRetry;
1074
+ const serdes = options?.serdes || defaultSerdes;
1075
+ // Phase 1: Execute step
1076
+ const phase1Promise = (async () => {
1077
+ let stepData = context.getStepData(stepId);
1078
+ validateReplayConsistency(stepId, { type: OperationType.STEP, name, subType: OperationSubType.STEP }, stepData, context);
1079
+ // Check if already completed
1080
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1081
+ log("⏭️", "Step already completed:", { stepId });
1082
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1083
+ metadata: {
1084
+ stepId,
1085
+ name,
1086
+ type: OperationType.STEP,
1087
+ subType: OperationSubType.STEP,
1088
+ parentId,
1089
+ },
1090
+ });
1091
+ return await safeDeserialize(serdes, stepData.StepDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
1287
1092
  }
1288
- const queuedItem = {
1289
- stepId,
1290
- data,
1291
- resolve: () => {
1292
- if (this.activeOperationsTracker) {
1293
- this.activeOperationsTracker.decrement();
1093
+ // Check if already failed
1094
+ if (stepData?.Status === OperationStatus.FAILED) {
1095
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1096
+ metadata: {
1097
+ stepId,
1098
+ name,
1099
+ type: OperationType.STEP,
1100
+ subType: OperationSubType.STEP,
1101
+ parentId,
1102
+ },
1103
+ });
1104
+ if (stepData.StepDetails?.Error) {
1105
+ throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
1106
+ }
1107
+ throw new StepError("Unknown error");
1108
+ }
1109
+ // Check if pending retry
1110
+ if (stepData?.Status === OperationStatus.PENDING) {
1111
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1112
+ metadata: {
1113
+ stepId,
1114
+ name,
1115
+ type: OperationType.STEP,
1116
+ subType: OperationSubType.STEP,
1117
+ parentId,
1118
+ },
1119
+ endTimestamp: stepData.StepDetails?.NextAttemptTimestamp,
1120
+ });
1121
+ return (async () => {
1122
+ await checkpoint.waitForRetryTimer(stepId);
1123
+ stepData = context.getStepData(stepId);
1124
+ return await executeStepLogic();
1125
+ })();
1126
+ }
1127
+ // Check for interrupted step with AT_MOST_ONCE_PER_RETRY
1128
+ if (stepData?.Status === OperationStatus.STARTED &&
1129
+ semantics === StepSemantics.AtMostOncePerRetry) {
1130
+ const error = new StepInterruptedError(stepId, name);
1131
+ const currentAttempt = (stepData.StepDetails?.Attempt || 0) + 1;
1132
+ const retryDecision = options?.retryStrategy?.(error, currentAttempt) ??
1133
+ retryPresets.default(error, currentAttempt);
1134
+ if (!retryDecision.shouldRetry) {
1135
+ await checkpoint.checkpoint(stepId, {
1136
+ Id: stepId,
1137
+ ParentId: parentId,
1138
+ Action: OperationAction.FAIL,
1139
+ SubType: OperationSubType.STEP,
1140
+ Type: OperationType.STEP,
1141
+ Error: createErrorObjectFromError(error),
1142
+ Name: name,
1143
+ });
1144
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1145
+ throw DurableOperationError.fromErrorObject(createErrorObjectFromError(error));
1146
+ }
1147
+ await checkpoint.checkpoint(stepId, {
1148
+ Id: stepId,
1149
+ ParentId: parentId,
1150
+ Action: OperationAction.RETRY,
1151
+ SubType: OperationSubType.STEP,
1152
+ Type: OperationType.STEP,
1153
+ Error: createErrorObjectFromError(error),
1154
+ Name: name,
1155
+ StepOptions: {
1156
+ NextAttemptDelaySeconds: retryDecision.delay
1157
+ ? durationToSeconds(retryDecision.delay)
1158
+ : 1,
1159
+ },
1160
+ });
1161
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1162
+ metadata: {
1163
+ stepId,
1164
+ name,
1165
+ type: OperationType.STEP,
1166
+ subType: OperationSubType.STEP,
1167
+ parentId,
1168
+ },
1169
+ endTimestamp: context.getStepData(stepId)?.StepDetails?.NextAttemptTimestamp,
1170
+ });
1171
+ return (async () => {
1172
+ await checkpoint.waitForRetryTimer(stepId);
1173
+ stepData = context.getStepData(stepId);
1174
+ return await executeStepLogic();
1175
+ })();
1176
+ }
1177
+ return await executeStepLogic();
1178
+ async function executeStepLogic() {
1179
+ stepData = context.getStepData(stepId);
1180
+ if (stepData?.Status !== OperationStatus.STARTED) {
1181
+ if (semantics === StepSemantics.AtMostOncePerRetry) {
1182
+ await checkpoint.checkpoint(stepId, {
1183
+ Id: stepId,
1184
+ ParentId: parentId,
1185
+ Action: OperationAction.START,
1186
+ SubType: OperationSubType.STEP,
1187
+ Type: OperationType.STEP,
1188
+ Name: name,
1189
+ });
1294
1190
  }
1295
- resolve();
1296
- },
1297
- reject: (error) => {
1298
- if (this.activeOperationsTracker) {
1299
- this.activeOperationsTracker.decrement();
1191
+ else {
1192
+ checkpoint.checkpoint(stepId, {
1193
+ Id: stepId,
1194
+ ParentId: parentId,
1195
+ Action: OperationAction.START,
1196
+ SubType: OperationSubType.STEP,
1197
+ Type: OperationType.STEP,
1198
+ Name: name,
1199
+ });
1300
1200
  }
1301
- reject(error);
1302
- },
1303
- };
1304
- this.queue.push(queuedItem);
1305
- log("📥", "Checkpoint queued:", {
1306
- stepId,
1307
- queueLength: this.queue.length,
1308
- isProcessing: this.isProcessing,
1309
- });
1310
- if (!this.isProcessing) {
1311
- setImmediate(() => {
1312
- this.processQueue();
1313
- });
1201
+ }
1202
+ try {
1203
+ stepData = context.getStepData(stepId);
1204
+ const currentAttempt = stepData?.StepDetails?.Attempt || 0;
1205
+ const stepContext = { logger };
1206
+ // Mark operation as EXECUTING
1207
+ checkpoint.markOperationState(stepId, OperationLifecycleState.EXECUTING, {
1208
+ metadata: {
1209
+ stepId,
1210
+ name,
1211
+ type: OperationType.STEP,
1212
+ subType: OperationSubType.STEP,
1213
+ parentId,
1214
+ },
1215
+ });
1216
+ let result;
1217
+ result = await runWithContext(stepId, parentId, () => fn(stepContext), currentAttempt + 1, DurableExecutionMode.ExecutionMode);
1218
+ const serializedResult = await safeSerialize(serdes, result, stepId, name, context.terminationManager, context.durableExecutionArn);
1219
+ await checkpoint.checkpoint(stepId, {
1220
+ Id: stepId,
1221
+ ParentId: parentId,
1222
+ Action: OperationAction.SUCCEED,
1223
+ SubType: OperationSubType.STEP,
1224
+ Type: OperationType.STEP,
1225
+ Payload: serializedResult,
1226
+ Name: name,
1227
+ });
1228
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1229
+ return await safeDeserialize(serdes, serializedResult, stepId, name, context.terminationManager, context.durableExecutionArn);
1230
+ }
1231
+ catch (error) {
1232
+ if (isUnrecoverableError(error)) {
1233
+ return terminateForUnrecoverableError(context, error, name || stepId);
1234
+ }
1235
+ stepData = context.getStepData(stepId);
1236
+ const currentAttempt = (stepData?.StepDetails?.Attempt || 0) + 1;
1237
+ const retryDecision = options?.retryStrategy?.(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt) ??
1238
+ retryPresets.default(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt);
1239
+ if (!retryDecision.shouldRetry) {
1240
+ await checkpoint.checkpoint(stepId, {
1241
+ Id: stepId,
1242
+ ParentId: parentId,
1243
+ Action: OperationAction.FAIL,
1244
+ SubType: OperationSubType.STEP,
1245
+ Type: OperationType.STEP,
1246
+ Error: createErrorObjectFromError(error),
1247
+ Name: name,
1248
+ });
1249
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1250
+ throw DurableOperationError.fromErrorObject(createErrorObjectFromError(error));
1251
+ }
1252
+ await checkpoint.checkpoint(stepId, {
1253
+ Id: stepId,
1254
+ ParentId: parentId,
1255
+ Action: OperationAction.RETRY,
1256
+ SubType: OperationSubType.STEP,
1257
+ Type: OperationType.STEP,
1258
+ Error: createErrorObjectFromError(error),
1259
+ Name: name,
1260
+ StepOptions: {
1261
+ NextAttemptDelaySeconds: retryDecision.delay
1262
+ ? durationToSeconds(retryDecision.delay)
1263
+ : 1,
1264
+ },
1265
+ });
1266
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1267
+ metadata: {
1268
+ stepId,
1269
+ name,
1270
+ type: OperationType.STEP,
1271
+ subType: OperationSubType.STEP,
1272
+ parentId,
1273
+ },
1274
+ endTimestamp: context.getStepData(stepId)?.StepDetails?.NextAttemptTimestamp,
1275
+ });
1276
+ await checkpoint.waitForRetryTimer(stepId);
1277
+ return await executeStepLogic();
1278
+ }
1314
1279
  }
1280
+ })();
1281
+ phase1Promise.catch(() => { });
1282
+ return new DurablePromise(async () => {
1283
+ checkpoint.markOperationAwaited(stepId);
1284
+ return await phase1Promise;
1315
1285
  });
1316
- }
1317
- hasFinishedAncestor(parentId) {
1318
- if (!parentId) {
1319
- return false;
1320
- }
1321
- let currentHashedId = hashId(parentId);
1322
- while (currentHashedId) {
1323
- const parentOperation = this.stepData[currentHashedId];
1324
- if (parentOperation?.Status === OperationStatus.SUCCEEDED ||
1325
- parentOperation?.Status === OperationStatus.FAILED) {
1326
- return true;
1286
+ };
1287
+ };
1288
+
1289
+ const createInvokeHandler = (context, checkpoint, createStepId, parentId, checkAndUpdateReplayMode) => {
1290
+ function invokeHandler(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
1291
+ const isNameFirst = typeof funcIdOrInput === "string";
1292
+ const name = isNameFirst ? nameOrFuncId : undefined;
1293
+ const funcId = isNameFirst ? funcIdOrInput : nameOrFuncId;
1294
+ const input = isNameFirst
1295
+ ? inputOrConfig
1296
+ : funcIdOrInput;
1297
+ const config = isNameFirst
1298
+ ? maybeConfig
1299
+ : inputOrConfig;
1300
+ const stepId = createStepId();
1301
+ // Phase 1: Start invoke operation
1302
+ let isCompleted = false;
1303
+ const phase1Promise = (async () => {
1304
+ log("🔗", "Invoke phase 1:", { stepId, name: name || funcId });
1305
+ let stepData = context.getStepData(stepId);
1306
+ // Validate replay consistency
1307
+ validateReplayConsistency(stepId, {
1308
+ type: OperationType.CHAINED_INVOKE,
1309
+ name,
1310
+ subType: OperationSubType.CHAINED_INVOKE,
1311
+ }, stepData, context);
1312
+ // Check if already completed
1313
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1314
+ log("⏭️", "Invoke already completed:", { stepId });
1315
+ checkAndUpdateReplayMode?.();
1316
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1317
+ metadata: {
1318
+ stepId,
1319
+ name,
1320
+ type: OperationType.CHAINED_INVOKE,
1321
+ subType: OperationSubType.CHAINED_INVOKE,
1322
+ parentId,
1323
+ },
1324
+ });
1325
+ isCompleted = true;
1326
+ return;
1327
1327
  }
1328
- currentHashedId = parentOperation?.ParentId;
1329
- }
1330
- return false;
1331
- }
1332
- classifyCheckpointError(error) {
1333
- const originalError = error instanceof Error ? error : new Error(String(error));
1334
- const awsError = error;
1335
- const statusCode = awsError.$metadata?.httpStatusCode;
1336
- const errorName = awsError.name;
1337
- const errorMessage = awsError.message || originalError.message;
1338
- log("🔍", "Classifying checkpoint error:", {
1339
- statusCode,
1340
- errorName,
1341
- errorMessage,
1342
- });
1343
- if (statusCode &&
1344
- statusCode >= 400 &&
1345
- statusCode < 500 &&
1346
- errorName === "InvalidParameterValueException" &&
1347
- errorMessage.startsWith("Invalid Checkpoint Token")) {
1348
- return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
1349
- }
1350
- if (statusCode &&
1351
- statusCode >= 400 &&
1352
- statusCode < 500 &&
1353
- statusCode !== 429) {
1354
- return new CheckpointUnrecoverableExecutionError(`Checkpoint failed: ${errorMessage}`, originalError);
1355
- }
1356
- return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
1357
- }
1358
- async processQueue() {
1359
- if (this.isProcessing) {
1360
- return;
1361
- }
1362
- const hasQueuedItems = this.queue.length > 0;
1363
- const hasForceRequests = this.forceCheckpointPromises.length > 0;
1364
- if (!hasQueuedItems && !hasForceRequests) {
1365
- return;
1366
- }
1367
- this.isProcessing = true;
1368
- const batch = [];
1369
- let skippedCount = 0;
1370
- const baseSize = this.currentTaskToken.length + 100;
1371
- let currentSize = baseSize;
1372
- while (this.queue.length > 0) {
1373
- const nextItem = this.queue[0];
1374
- const itemSize = CheckpointManager.textEncoder.encode(JSON.stringify(nextItem)).length;
1375
- if (currentSize + itemSize > this.MAX_PAYLOAD_SIZE && batch.length > 0) {
1376
- break;
1328
+ // Check if already failed
1329
+ if (stepData?.Status === OperationStatus.FAILED ||
1330
+ stepData?.Status === OperationStatus.TIMED_OUT ||
1331
+ stepData?.Status === OperationStatus.STOPPED) {
1332
+ log("❌", "Invoke already failed:", { stepId });
1333
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1334
+ metadata: {
1335
+ stepId,
1336
+ name,
1337
+ type: OperationType.CHAINED_INVOKE,
1338
+ subType: OperationSubType.CHAINED_INVOKE,
1339
+ parentId,
1340
+ },
1341
+ });
1342
+ isCompleted = true;
1343
+ return;
1377
1344
  }
1378
- this.queue.shift();
1379
- if (this.hasFinishedAncestor(nextItem.data.ParentId)) {
1380
- log("⚠️", "Checkpoint skipped - ancestor finished:", {
1381
- stepId: nextItem.stepId,
1382
- parentId: nextItem.data.ParentId,
1345
+ // Start invoke if not already started
1346
+ if (!stepData) {
1347
+ const serializedPayload = await safeSerialize(config?.payloadSerdes || defaultSerdes, input, stepId, name, context.terminationManager, context.durableExecutionArn);
1348
+ await checkpoint.checkpoint(stepId, {
1349
+ Id: stepId,
1350
+ ParentId: parentId,
1351
+ Action: OperationAction.START,
1352
+ SubType: OperationSubType.CHAINED_INVOKE,
1353
+ Type: OperationType.CHAINED_INVOKE,
1354
+ Name: name,
1355
+ Payload: serializedPayload,
1356
+ ChainedInvokeOptions: {
1357
+ FunctionName: funcId,
1358
+ },
1383
1359
  });
1384
- skippedCount++;
1385
- continue;
1386
1360
  }
1387
- batch.push(nextItem);
1388
- currentSize += itemSize;
1389
- }
1390
- log("🔄", "Processing checkpoint batch:", {
1391
- batchSize: batch.length,
1392
- remainingInQueue: this.queue.length,
1393
- estimatedSize: currentSize,
1394
- maxSize: this.MAX_PAYLOAD_SIZE,
1395
- });
1396
- try {
1397
- if (batch.length > 0 || this.forceCheckpointPromises.length > 0) {
1398
- await this.processBatch(batch);
1399
- }
1400
- batch.forEach((item) => {
1401
- if (item.data.Action === OperationAction.SUCCEED ||
1402
- item.data.Action === OperationAction.FAIL) {
1403
- this.pendingCompletions.delete(item.stepId);
1404
- }
1405
- item.resolve();
1406
- });
1407
- const forcePromises = this.forceCheckpointPromises.splice(0);
1408
- forcePromises.forEach((promise) => {
1409
- promise.resolve();
1410
- });
1411
- log("✅", "Checkpoint batch processed successfully:", {
1412
- batchSize: batch.length,
1413
- skippedCount,
1414
- forceRequests: forcePromises.length,
1415
- newTaskToken: this.currentTaskToken,
1416
- });
1417
- }
1418
- catch (error) {
1419
- log("❌", "Checkpoint batch failed:", {
1420
- batchSize: batch.length,
1421
- error,
1422
- });
1423
- const checkpointError = this.classifyCheckpointError(error);
1424
- // Clear remaining queue silently - we're terminating
1425
- this.clearQueue();
1426
- this.terminationManager.terminate({
1427
- reason: TerminationReason.CHECKPOINT_FAILED,
1428
- message: checkpointError.message,
1429
- error: checkpointError,
1361
+ // Mark as IDLE_NOT_AWAITED
1362
+ checkpoint.markOperationState(stepId, OperationLifecycleState.IDLE_NOT_AWAITED, {
1363
+ metadata: {
1364
+ stepId,
1365
+ name,
1366
+ type: OperationType.CHAINED_INVOKE,
1367
+ subType: OperationSubType.CHAINED_INVOKE,
1368
+ parentId,
1369
+ },
1430
1370
  });
1431
- }
1432
- finally {
1433
- this.isProcessing = false;
1434
- if (this.queue.length > 0) {
1435
- setImmediate(() => {
1436
- this.processQueue();
1437
- });
1438
- }
1439
- else {
1440
- // Queue is empty and processing is done - notify all waiting promises
1441
- this.notifyQueueCompletion();
1442
- }
1443
- }
1444
- }
1445
- notifyQueueCompletion() {
1446
- if (this.queueCompletionResolver) {
1447
- if (this.queueCompletionTimeout) {
1448
- clearTimeout(this.queueCompletionTimeout);
1449
- this.queueCompletionTimeout = null;
1450
- }
1451
- this.queueCompletionResolver();
1452
- this.queueCompletionResolver = null;
1453
- }
1454
- }
1455
- async processBatch(batch) {
1456
- const updates = batch.map((item) => {
1457
- const hashedStepId = hashId(item.stepId);
1458
- const update = {
1459
- Type: item.data.Type || "STEP",
1460
- Action: item.data.Action || "START",
1461
- ...item.data,
1462
- Id: hashedStepId,
1463
- ...(item.data.ParentId && { ParentId: hashId(item.data.ParentId) }),
1464
- };
1465
- return update;
1466
- });
1467
- const checkpointData = {
1468
- DurableExecutionArn: this.durableExecutionArn,
1469
- CheckpointToken: this.currentTaskToken,
1470
- Updates: updates,
1471
- };
1472
- log("⏺️", "Creating checkpoint batch:", {
1473
- batchSize: updates.length,
1474
- checkpointToken: this.currentTaskToken,
1475
- updates: updates.map((u) => ({
1476
- Id: u.Id,
1477
- Action: u.Action,
1478
- Type: u.Type,
1479
- })),
1480
- });
1481
- const response = await this.storage.checkpoint(checkpointData, this.logger);
1482
- if (response.CheckpointToken) {
1483
- this.currentTaskToken = response.CheckpointToken;
1484
- }
1485
- if (response.NewExecutionState?.Operations) {
1486
- this.updateStepDataFromCheckpointResponse(response.NewExecutionState.Operations);
1487
- }
1488
- }
1489
- updateStepDataFromCheckpointResponse(operations) {
1490
- log("🔄", "Updating stepData from checkpoint response:", {
1491
- operationCount: operations.length,
1492
- operationIds: operations.map((op) => op.Id).filter(Boolean),
1493
- });
1494
- operations.forEach((operation) => {
1495
- if (operation.Id) {
1496
- this.stepData[operation.Id] = operation;
1497
- log("📝", "Updated stepData entry:", operation);
1498
- this.stepDataEmitter.emit(STEP_DATA_UPDATED_EVENT, operation.Id);
1499
- }
1500
- });
1501
- log("✅", "StepData update completed:", {
1502
- totalStepDataEntries: Object.keys(this.stepData).length,
1503
- });
1504
- }
1505
- getQueueStatus() {
1506
- return {
1507
- queueLength: this.queue.length,
1508
- isProcessing: this.isProcessing,
1509
- };
1510
- }
1511
- }
1512
-
1513
- /**
1514
- * High-level helper that waits for conditions before continuing execution.
1515
- * Uses event-driven approach for both operations completion and status changes.
1516
- */
1517
- async function waitBeforeContinue(options) {
1518
- const { checkHasRunningOperations, checkStepStatus, checkTimer, scheduledEndTimestamp, stepId, context, hasRunningOperations, operationsEmitter, checkpoint, onAwaitedChange, } = options;
1519
- const promises = [];
1520
- const timers = [];
1521
- const cleanupFns = [];
1522
- // Cleanup function to clear all timers and listeners
1523
- const cleanup = () => {
1524
- timers.forEach((timer) => clearTimeout(timer));
1525
- cleanupFns.forEach((fn) => fn());
1526
- };
1527
- // Timer promise - resolves when scheduled time is reached
1528
- if (checkTimer && scheduledEndTimestamp) {
1529
- const timerPromise = new Promise((resolve) => {
1530
- const timeLeft = Number(scheduledEndTimestamp) - Date.now();
1531
- if (timeLeft > 0) {
1532
- const timer = setTimeout(() => resolve({ reason: "timer", timerExpired: true }), timeLeft);
1533
- timers.push(timer);
1534
- }
1535
- else {
1536
- resolve({ reason: "timer", timerExpired: true });
1537
- }
1538
- });
1539
- promises.push(timerPromise);
1540
- }
1541
- // Operations promise - event-driven approach
1542
- if (checkHasRunningOperations) {
1543
- const operationsPromise = new Promise((resolve) => {
1544
- if (!hasRunningOperations()) {
1545
- resolve({ reason: "operations" });
1371
+ log("✅", "Invoke phase 1 complete:", { stepId });
1372
+ })();
1373
+ phase1Promise.catch(() => { });
1374
+ // Phase 2: Wait for completion
1375
+ return new DurablePromise(async () => {
1376
+ await phase1Promise;
1377
+ if (isCompleted) {
1378
+ const stepData = context.getStepData(stepId);
1379
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1380
+ const invokeDetails = stepData.ChainedInvokeDetails;
1381
+ return await safeDeserialize(config?.resultSerdes || defaultSerdes, invokeDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
1382
+ }
1383
+ // Handle failure
1384
+ const invokeDetails = stepData?.ChainedInvokeDetails;
1385
+ if (invokeDetails?.Error) {
1386
+ throw new InvokeError(invokeDetails.Error.ErrorMessage || "Invoke failed", invokeDetails.Error.ErrorMessage
1387
+ ? new Error(invokeDetails.Error.ErrorMessage)
1388
+ : undefined, invokeDetails.Error.ErrorData);
1389
+ }
1390
+ else {
1391
+ throw new InvokeError("Invoke failed");
1392
+ }
1546
1393
  }
1547
- else {
1548
- // Event-driven: listen for completion event
1549
- const handler = () => {
1550
- resolve({ reason: "operations" });
1551
- };
1552
- operationsEmitter.once(OPERATIONS_COMPLETE_EVENT, handler);
1553
- cleanupFns.push(() => operationsEmitter.off(OPERATIONS_COMPLETE_EVENT, handler));
1394
+ log("🔗", "Invoke phase 2:", { stepId });
1395
+ checkpoint.markOperationAwaited(stepId);
1396
+ await checkpoint.waitForStatusChange(stepId);
1397
+ const stepData = context.getStepData(stepId);
1398
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1399
+ log("✅", "Invoke completed:", { stepId });
1400
+ checkAndUpdateReplayMode?.();
1401
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1402
+ const invokeDetails = stepData.ChainedInvokeDetails;
1403
+ return await safeDeserialize(config?.resultSerdes || defaultSerdes, invokeDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
1554
1404
  }
1555
- });
1556
- promises.push(operationsPromise);
1557
- }
1558
- // Step status promise - event-driven approach
1559
- if (checkStepStatus) {
1560
- const originalStatus = context.getStepData(stepId)?.Status;
1561
- const hashedStepId = hashId(stepId);
1562
- const stepStatusPromise = new Promise((resolve) => {
1563
- // Check if status already changed
1564
- const currentStatus = context.getStepData(stepId)?.Status;
1565
- if (originalStatus !== currentStatus) {
1566
- resolve({ reason: "status" });
1405
+ // Handle failure
1406
+ log("❌", "Invoke failed:", { stepId, status: stepData?.Status });
1407
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1408
+ const invokeDetails = stepData?.ChainedInvokeDetails;
1409
+ if (invokeDetails?.Error) {
1410
+ throw new InvokeError(invokeDetails.Error.ErrorMessage || "Invoke failed", invokeDetails.Error.ErrorMessage
1411
+ ? new Error(invokeDetails.Error.ErrorMessage)
1412
+ : undefined, invokeDetails.Error.ErrorData);
1567
1413
  }
1568
1414
  else {
1569
- // Event-driven: listen for step data updates
1570
- const handler = (updatedStepId) => {
1571
- if (updatedStepId === hashedStepId) {
1572
- const newStatus = context.getStepData(stepId)?.Status;
1573
- if (originalStatus !== newStatus) {
1574
- resolve({ reason: "status" });
1575
- }
1576
- }
1577
- };
1578
- operationsEmitter.on(STEP_DATA_UPDATED_EVENT, handler);
1579
- cleanupFns.push(() => operationsEmitter.off(STEP_DATA_UPDATED_EVENT, handler));
1415
+ throw new InvokeError("Invoke failed");
1580
1416
  }
1581
1417
  });
1582
- promises.push(stepStatusPromise);
1583
- }
1584
- // Awaited change promise - resolves when the callback we set is invoked
1585
- // Note: This is safe from race conditions because waitBeforeContinue is called
1586
- // during Phase 1 execution (inside stepHandler), which happens BEFORE the user
1587
- // can await the DurablePromise. The callback is registered before it can be invoked.
1588
- if (onAwaitedChange) {
1589
- const awaitedChangePromise = new Promise((resolve) => {
1590
- // Register a callback that will be invoked when the promise is awaited
1591
- onAwaitedChange(() => {
1592
- resolve({ reason: "status" });
1593
- });
1594
- });
1595
- promises.push(awaitedChangePromise);
1596
- }
1597
- // If no conditions provided, return immediately
1598
- if (promises.length === 0) {
1599
- return { reason: "timeout" };
1600
- }
1601
- // Wait for any condition to be met, then cleanup timers and listeners
1602
- const result = await Promise.race(promises);
1603
- cleanup();
1604
- // If timer expired, force checkpoint to get fresh data from API
1605
- if (result.reason === "timer" && result.timerExpired && checkpoint) {
1606
- if (checkpoint.force) {
1607
- await checkpoint.force();
1608
- }
1609
- else if (checkpoint.forceCheckpoint) {
1610
- await checkpoint.forceCheckpoint();
1611
- }
1612
- }
1613
- return result;
1614
- }
1615
-
1616
- /**
1617
- * Error thrown when non-deterministic code is detected during replay
1618
- */
1619
- class NonDeterministicExecutionError extends UnrecoverableExecutionError {
1620
- terminationReason = TerminationReason.CUSTOM;
1621
- constructor(message) {
1622
- super(message);
1623
- this.name = "NonDeterministicExecutionError";
1624
1418
  }
1625
- }
1419
+ return invokeHandler;
1420
+ };
1626
1421
 
1627
- const validateReplayConsistency = (stepId, currentOperation, checkpointData, context) => {
1628
- // Skip validation if no checkpoint data exists or if Type is undefined (first execution)
1629
- if (!checkpointData || !checkpointData.Type) {
1630
- return;
1631
- }
1632
- // Validate operation type
1633
- if (checkpointData.Type !== currentOperation.type) {
1634
- const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation type mismatch for step "${stepId}". ` +
1635
- `Expected type "${checkpointData.Type}", but got "${currentOperation.type}". ` +
1636
- `This indicates non-deterministic control flow in your workflow code.`);
1637
- terminateForUnrecoverableError(context, error, stepId);
1422
+ // Checkpoint size limit in bytes (256KB)
1423
+ const CHECKPOINT_SIZE_LIMIT = 256 * 1024;
1424
+ const determineChildReplayMode = (context, stepId) => {
1425
+ const stepData = context.getStepData(stepId);
1426
+ if (!stepData) {
1427
+ return DurableExecutionMode.ExecutionMode;
1638
1428
  }
1639
- // Validate operation name (including undefined)
1640
- if (checkpointData.Name !== currentOperation.name) {
1641
- const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation name mismatch for step "${stepId}". ` +
1642
- `Expected name "${checkpointData.Name ?? "undefined"}", but got "${currentOperation.name ?? "undefined"}". ` +
1643
- `This indicates non-deterministic control flow in your workflow code.`);
1644
- terminateForUnrecoverableError(context, error, stepId);
1429
+ if (stepData.Status === OperationStatus.SUCCEEDED &&
1430
+ stepData.ContextDetails?.ReplayChildren) {
1431
+ return DurableExecutionMode.ReplaySucceededContext;
1645
1432
  }
1646
- // Validate operation subtype
1647
- if (checkpointData.SubType !== currentOperation.subType) {
1648
- const error = new NonDeterministicExecutionError(`Non-deterministic execution detected: Operation subtype mismatch for step "${stepId}". ` +
1649
- `Expected subtype "${checkpointData.SubType}", but got "${currentOperation.subType}". ` +
1650
- `This indicates non-deterministic control flow in your workflow code.`);
1651
- terminateForUnrecoverableError(context, error, stepId);
1433
+ if (stepData.Status === OperationStatus.SUCCEEDED ||
1434
+ stepData.Status === OperationStatus.FAILED) {
1435
+ return DurableExecutionMode.ReplayMode;
1652
1436
  }
1437
+ return DurableExecutionMode.ExecutionMode;
1653
1438
  };
1654
-
1655
- // Special symbol to indicate that the main loop should continue
1656
- const CONTINUE_MAIN_LOOP$1 = Symbol("CONTINUE_MAIN_LOOP");
1657
- const waitForContinuation$1 = async (context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, onAwaitedChange) => {
1658
- const stepData = context.getStepData(stepId);
1659
- // Check if there are any ongoing operations
1660
- if (!hasRunningOperations()) {
1661
- // No ongoing operations - safe to terminate
1662
- return terminate(context, TerminationReason.RETRY_SCHEDULED, `Retry scheduled for ${name || stepId}`);
1663
- }
1664
- // There are ongoing operations - wait before continuing
1665
- await waitBeforeContinue({
1666
- checkHasRunningOperations: true,
1667
- checkStepStatus: true,
1668
- checkTimer: true,
1669
- scheduledEndTimestamp: stepData?.StepDetails?.NextAttemptTimestamp,
1670
- stepId,
1671
- context,
1672
- hasRunningOperations,
1673
- operationsEmitter: getOperationsEmitter(),
1674
- checkpoint,
1675
- onAwaitedChange,
1676
- });
1677
- // Return to let the main loop re-evaluate step status
1678
- };
1679
- /**
1680
- * Creates a step handler for executing durable steps with two-phase execution.
1681
- */
1682
- const createStepHandler = (context, checkpoint, parentContext, createStepId, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId) => {
1439
+ const createRunInChildContextHandler = (context, checkpoint, parentContext, createStepId, getParentLogger, createChildContext, parentId) => {
1683
1440
  return (nameOrFn, fnOrOptions, maybeOptions) => {
1684
1441
  let name;
1685
1442
  let fn;
@@ -1693,480 +1450,31 @@ const createStepHandler = (context, checkpoint, parentContext, createStepId, log
1693
1450
  fn = nameOrFn;
1694
1451
  options = fnOrOptions;
1695
1452
  }
1696
- const stepId = createStepId();
1697
- log("▶️", "Running step:", { stepId, name, options });
1453
+ const entityId = createStepId();
1454
+ log("🔄", "Running child context:", {
1455
+ entityId,
1456
+ name,
1457
+ });
1458
+ const stepData = context.getStepData(entityId);
1459
+ // Validate replay consistency
1460
+ validateReplayConsistency(entityId, {
1461
+ type: OperationType.CONTEXT,
1462
+ name,
1463
+ subType: options?.subType ||
1464
+ OperationSubType.RUN_IN_CHILD_CONTEXT,
1465
+ }, stepData, context);
1698
1466
  // Two-phase execution: Phase 1 starts immediately, Phase 2 returns result when awaited
1699
- let isAwaited = false;
1700
- let waitingCallback;
1701
- const setWaitingCallback = (cb) => {
1702
- waitingCallback = cb;
1703
- };
1467
+ let phase1Result;
1468
+ let phase1Error;
1704
1469
  // Phase 1: Start execution immediately and capture result/error
1705
1470
  const phase1Promise = (async () => {
1706
- // Main step logic - can be re-executed if step status changes
1707
- while (true) {
1708
- try {
1709
- const stepData = context.getStepData(stepId);
1710
- // Validate replay consistency
1711
- validateReplayConsistency(stepId, {
1712
- type: OperationType.STEP,
1713
- name,
1714
- subType: OperationSubType.STEP,
1715
- }, stepData, context);
1716
- if (stepData?.Status === OperationStatus.SUCCEEDED) {
1717
- return await handleCompletedStep(context, stepId, name, options?.serdes);
1718
- }
1719
- if (stepData?.Status === OperationStatus.FAILED) {
1720
- // Return an async rejected promise to ensure it's handled asynchronously
1721
- return (async () => {
1722
- // Reconstruct the original error from stored ErrorObject
1723
- if (stepData.StepDetails?.Error) {
1724
- throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
1725
- }
1726
- else {
1727
- // Fallback for legacy data without Error field
1728
- const errorMessage = stepData?.StepDetails?.Result;
1729
- throw new StepError(errorMessage || "Unknown error");
1730
- }
1731
- })();
1732
- }
1733
- // If PENDING, wait for timer to complete
1734
- if (stepData?.Status === OperationStatus.PENDING) {
1735
- await waitForContinuation$1(context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, isAwaited ? undefined : setWaitingCallback);
1736
- continue; // Re-evaluate step status after waiting
1737
- }
1738
- // Check for interrupted step with AT_MOST_ONCE_PER_RETRY semantics
1739
- if (stepData?.Status === OperationStatus.STARTED) {
1740
- const semantics = options?.semantics || StepSemantics.AtLeastOncePerRetry;
1741
- if (semantics === StepSemantics.AtMostOncePerRetry) {
1742
- log("⚠️", "Step was interrupted during execution:", {
1743
- stepId,
1744
- name,
1745
- });
1746
- const error = new StepInterruptedError(stepId, name);
1747
- // Handle the interrupted step as a failure
1748
- const currentAttempt = (stepData?.StepDetails?.Attempt || 0) + 1;
1749
- let retryDecision;
1750
- if (options?.retryStrategy !== undefined) {
1751
- retryDecision = options.retryStrategy(error, currentAttempt);
1752
- }
1753
- else {
1754
- retryDecision = retryPresets.default(error, currentAttempt);
1755
- }
1756
- log("⚠️", "Should Retry Interrupted Step:", {
1757
- stepId,
1758
- name,
1759
- currentAttempt,
1760
- shouldRetry: retryDecision.shouldRetry,
1761
- delayInSeconds: retryDecision.shouldRetry
1762
- ? retryDecision.delay
1763
- ? durationToSeconds(retryDecision.delay)
1764
- : undefined
1765
- : undefined,
1766
- });
1767
- if (!retryDecision.shouldRetry) {
1768
- // No retry, mark as failed
1769
- await checkpoint.checkpoint(stepId, {
1770
- Id: stepId,
1771
- ParentId: parentId,
1772
- Action: OperationAction.FAIL,
1773
- SubType: OperationSubType.STEP,
1774
- Type: OperationType.STEP,
1775
- Error: createErrorObjectFromError(error),
1776
- Name: name,
1777
- });
1778
- // Reconstruct error from ErrorObject for deterministic behavior
1779
- const errorObject = createErrorObjectFromError(error);
1780
- throw DurableOperationError.fromErrorObject(errorObject);
1781
- }
1782
- else {
1783
- // Retry
1784
- await checkpoint.checkpoint(stepId, {
1785
- Id: stepId,
1786
- ParentId: parentId,
1787
- Action: OperationAction.RETRY,
1788
- SubType: OperationSubType.STEP,
1789
- Type: OperationType.STEP,
1790
- Error: createErrorObjectFromError(error),
1791
- Name: name,
1792
- StepOptions: {
1793
- NextAttemptDelaySeconds: retryDecision.delay
1794
- ? durationToSeconds(retryDecision.delay)
1795
- : 1,
1796
- },
1797
- });
1798
- await waitForContinuation$1(context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, isAwaited ? undefined : setWaitingCallback);
1799
- continue; // Re-evaluate step status after waiting
1800
- }
1801
- }
1802
- }
1803
- // Execute step function for READY, STARTED (AtLeastOncePerRetry), or first time (undefined)
1804
- const result = await executeStep(context, checkpoint, stepId, name, fn, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, options, isAwaited ? undefined : setWaitingCallback);
1805
- // If executeStep signals to continue the main loop, do so
1806
- if (result === CONTINUE_MAIN_LOOP$1) {
1807
- continue;
1808
- }
1809
- return result;
1810
- }
1811
- catch (error) {
1812
- // Preserve DurableOperationError instances (StepInterruptedError is handled specifically where it's thrown)
1813
- if (error instanceof DurableOperationError) {
1814
- throw error;
1815
- }
1816
- // For any other error from executeStep, wrap it in StepError for consistency
1817
- throw new StepError(error instanceof Error ? error.message : "Step failed", error instanceof Error ? error : undefined);
1818
- }
1819
- }
1820
- })();
1821
- // Attach catch handler to prevent unhandled promise rejections
1822
- // The error will still be thrown when the DurablePromise is awaited
1823
- phase1Promise.catch(() => { });
1824
- // Phase 2: Return DurablePromise that returns Phase 1 result when awaited
1825
- return new DurablePromise(async () => {
1826
- // When promise is awaited, mark as awaited and invoke waiting callback
1827
- isAwaited = true;
1828
- if (waitingCallback) {
1829
- waitingCallback();
1830
- }
1831
- return await phase1Promise;
1832
- });
1833
- };
1834
- };
1835
- const handleCompletedStep = async (context, stepId, stepName, serdes = defaultSerdes) => {
1836
- log("⏭️", "Step already finished, returning cached result:", { stepId });
1837
- const stepData = context.getStepData(stepId);
1838
- const result = stepData?.StepDetails?.Result;
1839
- return await safeDeserialize(serdes, result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
1840
- };
1841
- const executeStep = async (context, checkpoint, stepId, name, fn, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, options, onAwaitedChange) => {
1842
- // Determine step semantics (default to AT_LEAST_ONCE_PER_RETRY if not specified)
1843
- const semantics = options?.semantics || StepSemantics.AtLeastOncePerRetry;
1844
- const serdes = options?.serdes || defaultSerdes;
1845
- // Checkpoint at start for both semantics (only if not already started)
1846
- const stepData = context.getStepData(stepId);
1847
- if (stepData?.Status !== OperationStatus.STARTED) {
1848
- if (semantics === StepSemantics.AtMostOncePerRetry) {
1849
- // Wait for checkpoint to complete
1850
- await checkpoint.checkpoint(stepId, {
1851
- Id: stepId,
1852
- ParentId: parentId,
1853
- Action: OperationAction.START,
1854
- SubType: OperationSubType.STEP,
1855
- Type: OperationType.STEP,
1856
- Name: name,
1857
- });
1858
- }
1859
- else {
1860
- // Fire and forget for AtLeastOncePerRetry
1861
- checkpoint.checkpoint(stepId, {
1862
- Id: stepId,
1863
- ParentId: parentId,
1864
- Action: OperationAction.START,
1865
- SubType: OperationSubType.STEP,
1866
- Type: OperationType.STEP,
1867
- Name: name,
1868
- });
1869
- }
1870
- }
1871
- try {
1872
- // Get current attempt number for logger enrichment
1873
- const stepData = context.getStepData(stepId);
1874
- const currentAttempt = stepData?.StepDetails?.Attempt || 0;
1875
- // Create step context with enriched logger
1876
- const stepContext = {
1877
- logger,
1878
- };
1879
- // Execute the step function with stepContext
1880
- addRunningOperation(stepId);
1881
- let result;
1882
- try {
1883
- result = await runWithContext(stepId, parentId, () => fn(stepContext),
1884
- // The attempt that is running is the attempt from the step data (previous step attempt) + 1
1885
- currentAttempt + 1,
1886
- // Alwasy in execution mode when running step operations
1887
- DurableExecutionMode.ExecutionMode);
1888
- }
1889
- finally {
1890
- removeRunningOperation(stepId);
1891
- }
1892
- // Serialize the result for consistency
1893
- const serializedResult = await safeSerialize(serdes, result, stepId, name, context.terminationManager, context.durableExecutionArn);
1894
- // Always checkpoint on completion
1895
- await checkpoint.checkpoint(stepId, {
1896
- Id: stepId,
1897
- ParentId: parentId,
1898
- Action: OperationAction.SUCCEED,
1899
- SubType: OperationSubType.STEP,
1900
- Type: OperationType.STEP,
1901
- Payload: serializedResult,
1902
- Name: name,
1903
- });
1904
- log("✅", "Step completed successfully:", {
1905
- stepId,
1906
- name,
1907
- result,
1908
- semantics,
1909
- });
1910
- // Deserialize the result for consistency with replay behavior
1911
- return await safeDeserialize(serdes, serializedResult, stepId, name, context.terminationManager, context.durableExecutionArn);
1912
- }
1913
- catch (error) {
1914
- log("❌", "Step failed:", {
1915
- stepId,
1916
- name,
1917
- error,
1918
- semantics,
1919
- });
1920
- // Handle unrecoverable errors - these should not go through retry logic
1921
- if (isUnrecoverableError(error)) {
1922
- log("💥", "Unrecoverable error detected:", {
1923
- stepId,
1924
- name,
1925
- error: error.message,
1926
- });
1927
- return terminateForUnrecoverableError(context, error, name || stepId);
1928
- }
1929
- const stepData = context.getStepData(stepId);
1930
- const currentAttempt = (stepData?.StepDetails?.Attempt || 0) + 1;
1931
- let retryDecision;
1932
- if (options?.retryStrategy !== undefined) {
1933
- // Use provided retry configuration
1934
- retryDecision = options.retryStrategy(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt);
1935
- }
1936
- else {
1937
- // Use default retry preset if no config provided
1938
- retryDecision = retryPresets.default(error instanceof Error ? error : new Error("Unknown Error"), currentAttempt);
1939
- }
1940
- log("⚠️", "Should Retry:", {
1941
- stepId,
1942
- name,
1943
- currentAttempt,
1944
- shouldRetry: retryDecision.shouldRetry,
1945
- delayInSeconds: retryDecision.shouldRetry
1946
- ? retryDecision.delay
1947
- ? durationToSeconds(retryDecision.delay)
1948
- : undefined
1949
- : undefined,
1950
- semantics,
1951
- });
1952
- if (!retryDecision.shouldRetry) {
1953
- // No retry
1954
- await checkpoint.checkpoint(stepId, {
1955
- Id: stepId,
1956
- ParentId: parentId,
1957
- Action: OperationAction.FAIL,
1958
- SubType: OperationSubType.STEP,
1959
- Type: OperationType.STEP,
1960
- Error: createErrorObjectFromError(error),
1961
- Name: name,
1962
- });
1963
- // Reconstruct error from ErrorObject for deterministic behavior
1964
- const errorObject = createErrorObjectFromError(error);
1965
- throw DurableOperationError.fromErrorObject(errorObject);
1966
- }
1967
- else {
1968
- // Retry
1969
- await checkpoint.checkpoint(stepId, {
1970
- Id: stepId,
1971
- ParentId: parentId,
1972
- Action: OperationAction.RETRY,
1973
- SubType: OperationSubType.STEP,
1974
- Type: OperationType.STEP,
1975
- Error: createErrorObjectFromError(error),
1976
- Name: name,
1977
- StepOptions: {
1978
- NextAttemptDelaySeconds: retryDecision.delay
1979
- ? durationToSeconds(retryDecision.delay)
1980
- : 1,
1981
- },
1982
- });
1983
- // Wait for continuation and signal main loop to continue
1984
- await waitForContinuation$1(context, stepId, name, hasRunningOperations, getOperationsEmitter, checkpoint, onAwaitedChange);
1985
- return CONTINUE_MAIN_LOOP$1;
1986
- }
1987
- }
1988
- };
1989
-
1990
- const createInvokeHandler = (context, checkpoint, createStepId, hasRunningOperations, getOperationsEmitter, parentId, checkAndUpdateReplayMode) => {
1991
- function invokeHandler(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
1992
- const isNameFirst = typeof funcIdOrInput === "string";
1993
- const name = isNameFirst ? nameOrFuncId : undefined;
1994
- const funcId = isNameFirst ? funcIdOrInput : nameOrFuncId;
1995
- const input = isNameFirst
1996
- ? inputOrConfig
1997
- : funcIdOrInput;
1998
- const config = isNameFirst
1999
- ? maybeConfig
2000
- : inputOrConfig;
2001
- const stepId = createStepId();
2002
- // Phase 1: Only checkpoint if needed, don't execute full logic
2003
- const startInvokeOperation = async () => {
2004
- log("🔗", `Invoke ${name || funcId} (${stepId}) - phase 1`);
2005
- // Check initial step data for replay consistency validation
2006
- const initialStepData = context.getStepData(stepId);
2007
- // Validate replay consistency once before any execution
2008
- validateReplayConsistency(stepId, {
2009
- type: OperationType.CHAINED_INVOKE,
2010
- name,
2011
- subType: OperationSubType.CHAINED_INVOKE,
2012
- }, initialStepData, context);
2013
- // If stepData already exists, phase 1 has nothing to do
2014
- if (initialStepData) {
2015
- log("⏸️", `Invoke ${name || funcId} already exists (phase 1)`);
2016
- return;
2017
- }
2018
- // No stepData exists - need to start the invoke operation
2019
- // Serialize the input payload
2020
- const serializedPayload = await safeSerialize(config?.payloadSerdes || defaultSerdes, input, stepId, name, context.terminationManager, context.durableExecutionArn);
2021
- // Create checkpoint for the invoke operation
2022
- await checkpoint.checkpoint(stepId, {
2023
- Id: stepId,
2024
- ParentId: parentId,
2025
- Action: OperationAction.START,
2026
- SubType: OperationSubType.CHAINED_INVOKE,
2027
- Type: OperationType.CHAINED_INVOKE,
2028
- Name: name,
2029
- Payload: serializedPayload,
2030
- ChainedInvokeOptions: {
2031
- FunctionName: funcId,
2032
- },
2033
- });
2034
- log("🚀", `Invoke ${name || funcId} started (phase 1)`);
2035
- };
2036
- // Phase 2: Execute full logic including waiting and termination
2037
- const continueInvokeOperation = async () => {
2038
- log("🔗", `Invoke ${name || funcId} (${stepId}) - phase 2`);
2039
- // Main invoke logic - can be re-executed if step status changes
2040
- while (true) {
2041
- // Check if we have existing step data
2042
- const stepData = context.getStepData(stepId);
2043
- if (stepData?.Status === OperationStatus.SUCCEEDED) {
2044
- // Return cached result - no need to check for errors in successful operations
2045
- const invokeDetails = stepData.ChainedInvokeDetails;
2046
- checkAndUpdateReplayMode?.();
2047
- return await safeDeserialize(config?.resultSerdes || defaultSerdes, invokeDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2048
- }
2049
- if (stepData?.Status === OperationStatus.FAILED ||
2050
- stepData?.Status === OperationStatus.TIMED_OUT ||
2051
- stepData?.Status === OperationStatus.STOPPED) {
2052
- // Operation failed, return async rejected promise
2053
- const invokeDetails = stepData.ChainedInvokeDetails;
2054
- return (async () => {
2055
- if (invokeDetails?.Error) {
2056
- throw new InvokeError(invokeDetails.Error.ErrorMessage || "Invoke failed", invokeDetails.Error.ErrorMessage
2057
- ? new Error(invokeDetails.Error.ErrorMessage)
2058
- : undefined, invokeDetails.Error.ErrorData);
2059
- }
2060
- else {
2061
- throw new InvokeError("Invoke failed");
2062
- }
2063
- })();
2064
- }
2065
- if (stepData?.Status === OperationStatus.STARTED) {
2066
- // Operation is still running
2067
- if (hasRunningOperations()) {
2068
- // Phase 2: Wait for other operations
2069
- log("⏳", `Invoke ${name || funcId} still in progress, waiting for other operations`);
2070
- await waitBeforeContinue({
2071
- checkHasRunningOperations: true,
2072
- checkStepStatus: true,
2073
- checkTimer: false,
2074
- stepId,
2075
- context,
2076
- hasRunningOperations,
2077
- operationsEmitter: getOperationsEmitter(),
2078
- });
2079
- continue; // Re-evaluate status after waiting
2080
- }
2081
- // No other operations running - terminate
2082
- log("⏳", `Invoke ${name || funcId} still in progress, terminating`);
2083
- return terminate(context, TerminationReason.OPERATION_TERMINATED, stepId);
2084
- }
2085
- // If stepData exists but has an unexpected status, break to avoid infinite loop
2086
- if (stepData && stepData.Status !== undefined) {
2087
- throw new InvokeError(`Unexpected operation status: ${stepData.Status}`);
2088
- }
2089
- // This should not happen in phase 2 since phase 1 creates stepData
2090
- throw new InvokeError("No step data found in phase 2 - this should not happen");
2091
- }
2092
- };
2093
- // Create a promise that tracks phase 1 completion
2094
- const startInvokePromise = startInvokeOperation()
2095
- .then(() => {
2096
- log("✅", "Invoke phase 1 complete:", { stepId, name: name || funcId });
2097
- })
2098
- .catch((error) => {
2099
- log("❌", "Invoke phase 1 error:", { stepId, error: error.message });
2100
- throw error; // Re-throw to fail phase 1
2101
- });
2102
- // Attach catch handler to prevent unhandled promise rejections
2103
- // The error will still be thrown when the DurablePromise is awaited
2104
- startInvokePromise.catch(() => { });
2105
- // Return DurablePromise that will execute phase 2 when awaited
2106
- return new DurablePromise(async () => {
2107
- // Wait for phase 1 to complete first
2108
- await startInvokePromise;
2109
- // Then execute phase 2
2110
- return await continueInvokeOperation();
2111
- });
2112
- }
2113
- return invokeHandler;
2114
- };
2115
-
2116
- // Checkpoint size limit in bytes (256KB)
2117
- const CHECKPOINT_SIZE_LIMIT = 256 * 1024;
2118
- const determineChildReplayMode = (context, stepId) => {
2119
- const stepData = context.getStepData(stepId);
2120
- if (!stepData) {
2121
- return DurableExecutionMode.ExecutionMode;
2122
- }
2123
- if (stepData.Status === OperationStatus.SUCCEEDED &&
2124
- stepData.ContextDetails?.ReplayChildren) {
2125
- return DurableExecutionMode.ReplaySucceededContext;
2126
- }
2127
- if (stepData.Status === OperationStatus.SUCCEEDED ||
2128
- stepData.Status === OperationStatus.FAILED) {
2129
- return DurableExecutionMode.ReplayMode;
2130
- }
2131
- return DurableExecutionMode.ExecutionMode;
2132
- };
2133
- const createRunInChildContextHandler = (context, checkpoint, parentContext, createStepId, getParentLogger, createChildContext, parentId) => {
2134
- return (nameOrFn, fnOrOptions, maybeOptions) => {
2135
- let name;
2136
- let fn;
2137
- let options;
2138
- if (typeof nameOrFn === "string" || nameOrFn === undefined) {
2139
- name = nameOrFn;
2140
- fn = fnOrOptions;
2141
- options = maybeOptions;
2142
- }
2143
- else {
2144
- fn = nameOrFn;
2145
- options = fnOrOptions;
2146
- }
2147
- const entityId = createStepId();
2148
- log("🔄", "Running child context:", {
2149
- entityId,
2150
- name,
2151
- });
2152
- const stepData = context.getStepData(entityId);
2153
- // Validate replay consistency
2154
- validateReplayConsistency(entityId, {
2155
- type: OperationType.CONTEXT,
2156
- name,
2157
- subType: options?.subType ||
2158
- OperationSubType.RUN_IN_CHILD_CONTEXT,
2159
- }, stepData, context);
2160
- // Two-phase execution: Phase 1 starts immediately, Phase 2 returns result when awaited
2161
- let phase1Result;
2162
- let phase1Error;
2163
- // Phase 1: Start execution immediately and capture result/error
2164
- const phase1Promise = (async () => {
2165
- const currentStepData = context.getStepData(entityId);
2166
- // If already completed, return cached result
2167
- if (currentStepData?.Status === OperationStatus.SUCCEEDED ||
2168
- currentStepData?.Status === OperationStatus.FAILED) {
2169
- return handleCompletedChildContext(context, parentContext, entityId, name, fn, options, getParentLogger, createChildContext);
1471
+ const currentStepData = context.getStepData(entityId);
1472
+ // If already completed, return cached result
1473
+ if (currentStepData?.Status === OperationStatus.SUCCEEDED ||
1474
+ currentStepData?.Status === OperationStatus.FAILED) {
1475
+ // Mark this run-in-child-context as finished to prevent descendant operations
1476
+ checkpoint.markAncestorFinished(entityId);
1477
+ return handleCompletedChildContext(context, parentContext, entityId, name, fn, options, getParentLogger, createChildContext);
2170
1478
  }
2171
1479
  // Execute if not completed
2172
1480
  return executeChildContext(context, checkpoint, parentContext, entityId, name, fn, options, getParentLogger, createChildContext, parentId);
@@ -2255,8 +1563,10 @@ const executeChildContext = async (context, checkpoint, parentContext, entityId,
2255
1563
  limit: CHECKPOINT_SIZE_LIMIT,
2256
1564
  });
2257
1565
  }
1566
+ // Mark this run-in-child-context as finished to prevent descendant operations
1567
+ checkpoint.markAncestorFinished(entityId);
2258
1568
  const subType = options?.subType || OperationSubType.RUN_IN_CHILD_CONTEXT;
2259
- await checkpoint.checkpoint(entityId, {
1569
+ checkpoint.checkpoint(entityId, {
2260
1570
  Id: entityId,
2261
1571
  ParentId: parentId,
2262
1572
  Action: OperationAction.SUCCEED,
@@ -2278,9 +1588,11 @@ const executeChildContext = async (context, checkpoint, parentContext, entityId,
2278
1588
  name,
2279
1589
  error,
2280
1590
  });
1591
+ // Mark this run-in-child-context as finished to prevent descendant operations
1592
+ checkpoint.markAncestorFinished(entityId);
2281
1593
  // Always checkpoint failures
2282
1594
  const subType = options?.subType || OperationSubType.RUN_IN_CHILD_CONTEXT;
2283
- await checkpoint.checkpoint(entityId, {
1595
+ checkpoint.checkpoint(entityId, {
2284
1596
  Id: entityId,
2285
1597
  ParentId: parentId,
2286
1598
  Action: OperationAction.FAIL,
@@ -2296,433 +1608,324 @@ const executeChildContext = async (context, checkpoint, parentContext, entityId,
2296
1608
  }
2297
1609
  };
2298
1610
 
2299
- const createWaitHandler = (context, checkpoint, createStepId, hasRunningOperations, getOperationsEmitter, parentId, checkAndUpdateReplayMode) => {
1611
+ const createWaitHandler = (context, checkpoint, createStepId, parentId, checkAndUpdateReplayMode) => {
2300
1612
  function waitHandler(nameOrDuration, duration) {
2301
1613
  const isNameFirst = typeof nameOrDuration === "string";
2302
1614
  const actualName = isNameFirst ? nameOrDuration : undefined;
2303
1615
  const actualDuration = isNameFirst ? duration : nameOrDuration;
2304
1616
  const actualSeconds = durationToSeconds(actualDuration);
2305
1617
  const stepId = createStepId();
2306
- // Shared wait logic for both phases
2307
- const executeWaitLogic = async (canTerminate) => {
2308
- log("⏲️", `Wait executing (${canTerminate ? "phase 2" : "phase 1"}):`, {
1618
+ // Phase 1: Start wait operation
1619
+ let isCompleted = false;
1620
+ const phase1Promise = (async () => {
1621
+ log("⏲️", "Wait phase 1:", {
2309
1622
  stepId,
2310
1623
  name: actualName,
2311
- duration: actualDuration,
2312
1624
  seconds: actualSeconds,
2313
1625
  });
2314
1626
  let stepData = context.getStepData(stepId);
2315
- // Validate replay consistency once before loop
1627
+ // Validate replay consistency
2316
1628
  validateReplayConsistency(stepId, {
2317
1629
  type: OperationType.WAIT,
2318
1630
  name: actualName,
2319
1631
  subType: OperationSubType.WAIT,
2320
1632
  }, stepData, context);
2321
- // Main wait logic - can be re-executed if step data changes
2322
- while (true) {
2323
- stepData = context.getStepData(stepId);
2324
- if (stepData?.Status === OperationStatus.SUCCEEDED) {
2325
- log("⏭️", "Wait already completed:", { stepId });
2326
- checkAndUpdateReplayMode?.();
2327
- return;
2328
- }
2329
- // Only checkpoint START if we haven't started this wait before
2330
- if (!stepData) {
2331
- await checkpoint.checkpoint(stepId, {
2332
- Id: stepId,
2333
- ParentId: parentId,
2334
- Action: OperationAction.START,
2335
- SubType: OperationSubType.WAIT,
2336
- Type: OperationType.WAIT,
2337
- Name: actualName,
2338
- WaitOptions: {
2339
- WaitSeconds: actualSeconds,
2340
- },
2341
- });
2342
- }
2343
- // Always refresh stepData to ensure it's up-to-date before proceeding
2344
- stepData = context.getStepData(stepId);
2345
- // Check if there are any ongoing operations
2346
- if (!hasRunningOperations()) {
2347
- // Phase 1: Just return without terminating
2348
- // Phase 2: Terminate
2349
- if (canTerminate) {
2350
- return terminate(context, TerminationReason.WAIT_SCHEDULED, `Operation ${actualName || stepId} scheduled to wait`);
2351
- }
2352
- else {
2353
- log("⏸️", "Wait ready but not terminating (phase 1):", { stepId });
2354
- return;
2355
- }
2356
- }
2357
- // There are ongoing operations - wait before continuing
2358
- await waitBeforeContinue({
2359
- checkHasRunningOperations: true,
2360
- checkStepStatus: true,
2361
- checkTimer: true,
2362
- scheduledEndTimestamp: stepData?.WaitDetails?.ScheduledEndTimestamp,
2363
- stepId,
2364
- context,
2365
- hasRunningOperations,
2366
- operationsEmitter: getOperationsEmitter(),
2367
- checkpoint,
1633
+ // Check if already completed
1634
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1635
+ log("⏭️", "Wait already completed:", { stepId });
1636
+ checkAndUpdateReplayMode?.();
1637
+ // Mark as completed
1638
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1639
+ metadata: {
1640
+ stepId,
1641
+ name: actualName,
1642
+ type: OperationType.WAIT,
1643
+ subType: OperationSubType.WAIT,
1644
+ parentId,
1645
+ },
2368
1646
  });
2369
- // Continue the loop to re-evaluate all conditions from the beginning
1647
+ isCompleted = true;
1648
+ return;
2370
1649
  }
2371
- };
2372
- // Create a promise that tracks phase 1 completion
2373
- const phase1Promise = executeWaitLogic(false).then(() => {
2374
- log("✅", "Wait phase 1 complete:", { stepId, name: actualName });
2375
- });
2376
- // Attach catch handler to prevent unhandled promise rejections
2377
- // The error will still be thrown when the DurablePromise is awaited
1650
+ // Start wait if not already started
1651
+ if (!stepData) {
1652
+ await checkpoint.checkpoint(stepId, {
1653
+ Id: stepId,
1654
+ ParentId: parentId,
1655
+ Action: OperationAction.START,
1656
+ SubType: OperationSubType.WAIT,
1657
+ Type: OperationType.WAIT,
1658
+ Name: actualName,
1659
+ WaitOptions: {
1660
+ WaitSeconds: actualSeconds,
1661
+ },
1662
+ });
1663
+ }
1664
+ // Refresh stepData after checkpoint
1665
+ stepData = context.getStepData(stepId);
1666
+ // Mark as IDLE_NOT_AWAITED (phase 1 complete, not awaited yet)
1667
+ checkpoint.markOperationState(stepId, OperationLifecycleState.IDLE_NOT_AWAITED, {
1668
+ metadata: {
1669
+ stepId,
1670
+ name: actualName,
1671
+ type: OperationType.WAIT,
1672
+ subType: OperationSubType.WAIT,
1673
+ parentId,
1674
+ },
1675
+ endTimestamp: stepData?.WaitDetails?.ScheduledEndTimestamp,
1676
+ });
1677
+ log("✅", "Wait phase 1 complete:", { stepId });
1678
+ })();
1679
+ // Prevent unhandled rejection
2378
1680
  phase1Promise.catch(() => { });
2379
- // Return DurablePromise that will execute phase 2 when awaited
1681
+ // Phase 2: Wait for completion
2380
1682
  return new DurablePromise(async () => {
2381
- // Wait for phase 1 to complete first
1683
+ // Wait for phase 1
2382
1684
  await phase1Promise;
2383
- // Then execute phase 2
2384
- await executeWaitLogic(true);
1685
+ // If already completed in phase 1, skip phase 2
1686
+ if (isCompleted) {
1687
+ return;
1688
+ }
1689
+ log("⏲️", "Wait phase 2:", { stepId });
1690
+ // Mark as awaited
1691
+ checkpoint.markOperationAwaited(stepId);
1692
+ // Wait for status change
1693
+ await checkpoint.waitForStatusChange(stepId);
1694
+ // Check final status
1695
+ const stepData = context.getStepData(stepId);
1696
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1697
+ log("✅", "Wait completed:", { stepId });
1698
+ checkAndUpdateReplayMode?.();
1699
+ // Mark as completed
1700
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1701
+ return;
1702
+ }
1703
+ // Should not reach here, but handle gracefully
1704
+ log("⚠️", "Wait ended with unexpected status:", {
1705
+ stepId,
1706
+ status: stepData?.Status,
1707
+ });
2385
1708
  });
2386
1709
  }
2387
1710
  return waitHandler;
2388
1711
  };
2389
1712
 
2390
- // Special symbol to indicate that the main loop should continue
2391
- const CONTINUE_MAIN_LOOP = Symbol("CONTINUE_MAIN_LOOP");
2392
- const waitForContinuation = async (context, stepId, name, hasRunningOperations, checkpoint, operationsEmitter, onAwaitedChange) => {
2393
- const stepData = context.getStepData(stepId);
2394
- // Check if there are any ongoing operations
2395
- if (!hasRunningOperations()) {
2396
- // No ongoing operations - safe to terminate
2397
- return terminate(context, TerminationReason.RETRY_SCHEDULED, `Retry scheduled for ${name || stepId}`);
2398
- }
2399
- // There are ongoing operations - wait before continuing
2400
- await waitBeforeContinue({
2401
- checkHasRunningOperations: true,
2402
- checkStepStatus: true,
2403
- checkTimer: true,
2404
- scheduledEndTimestamp: stepData?.StepDetails?.NextAttemptTimestamp,
2405
- stepId,
2406
- context,
2407
- hasRunningOperations,
2408
- operationsEmitter,
2409
- checkpoint,
2410
- onAwaitedChange,
2411
- });
2412
- // Return to let the main loop re-evaluate step status
2413
- };
2414
- const createWaitForConditionHandler = (context, checkpoint, createStepId, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId) => {
1713
+ const createWaitForConditionHandler = (context, checkpoint, createStepId, logger, parentId) => {
2415
1714
  return (nameOrCheck, checkOrConfig, maybeConfig) => {
2416
- // Two-phase execution: Phase 1 starts immediately, Phase 2 returns result when awaited
2417
- let isAwaited = false;
2418
- let waitingCallback;
2419
- const setWaitingCallback = (cb) => {
2420
- waitingCallback = cb;
2421
- };
2422
- // Phase 1: Start execution immediately and capture result/error
1715
+ let name;
1716
+ let check;
1717
+ let config;
1718
+ if (typeof nameOrCheck === "string" || nameOrCheck === undefined) {
1719
+ name = nameOrCheck;
1720
+ check = checkOrConfig;
1721
+ config = maybeConfig;
1722
+ }
1723
+ else {
1724
+ check = nameOrCheck;
1725
+ config = checkOrConfig;
1726
+ }
1727
+ if (!config?.waitStrategy || config.initialState === undefined) {
1728
+ throw new Error("waitForCondition requires config with waitStrategy and initialState");
1729
+ }
1730
+ const stepId = createStepId();
1731
+ const serdes = config.serdes || defaultSerdes;
2423
1732
  const phase1Promise = (async () => {
2424
- let name;
2425
- let check;
2426
- let config;
2427
- // Parse overloaded parameters - validation errors thrown here are async
2428
- if (typeof nameOrCheck === "string" || nameOrCheck === undefined) {
2429
- name = nameOrCheck;
2430
- check = checkOrConfig;
2431
- config = maybeConfig;
1733
+ let stepData = context.getStepData(stepId);
1734
+ // Check if already completed
1735
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1736
+ log("⏭️", "WaitForCondition already completed:", { stepId });
1737
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1738
+ metadata: {
1739
+ stepId,
1740
+ name,
1741
+ type: OperationType.STEP,
1742
+ subType: OperationSubType.WAIT_FOR_CONDITION,
1743
+ parentId,
1744
+ },
1745
+ });
1746
+ return await safeDeserialize(serdes, stepData.StepDetails?.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2432
1747
  }
2433
- else {
2434
- check = nameOrCheck;
2435
- config = checkOrConfig;
1748
+ // Check if already failed
1749
+ if (stepData?.Status === OperationStatus.FAILED) {
1750
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1751
+ metadata: {
1752
+ stepId,
1753
+ name,
1754
+ type: OperationType.STEP,
1755
+ subType: OperationSubType.WAIT_FOR_CONDITION,
1756
+ parentId,
1757
+ },
1758
+ });
1759
+ if (stepData.StepDetails?.Error) {
1760
+ throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
1761
+ }
1762
+ throw new WaitForConditionError("waitForCondition failed");
2436
1763
  }
2437
- if (!config ||
2438
- !config.waitStrategy ||
2439
- config.initialState === undefined) {
2440
- throw new Error("waitForCondition requires config with waitStrategy and initialState");
1764
+ // Check if pending retry
1765
+ if (stepData?.Status === OperationStatus.PENDING) {
1766
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1767
+ metadata: {
1768
+ stepId,
1769
+ name,
1770
+ type: OperationType.STEP,
1771
+ subType: OperationSubType.WAIT_FOR_CONDITION,
1772
+ parentId,
1773
+ },
1774
+ endTimestamp: stepData.StepDetails?.NextAttemptTimestamp,
1775
+ });
1776
+ return (async () => {
1777
+ await checkpoint.waitForRetryTimer(stepId);
1778
+ stepData = context.getStepData(stepId);
1779
+ return await executeCheckLogic();
1780
+ })();
2441
1781
  }
2442
- const stepId = createStepId();
2443
- log("🔄", "Running waitForCondition:", {
2444
- stepId,
2445
- name,
2446
- config,
2447
- });
2448
- // Main waitForCondition logic - can be re-executed if step status changes
2449
- while (true) {
2450
- try {
2451
- const stepData = context.getStepData(stepId);
2452
- // Check if already completed
2453
- if (stepData?.Status === OperationStatus.SUCCEEDED) {
2454
- return await handleCompletedWaitForCondition(context, stepId, name, config.serdes);
2455
- }
2456
- if (stepData?.Status === OperationStatus.FAILED) {
2457
- // Return an async rejected promise to ensure it's handled asynchronously
2458
- return (async () => {
2459
- // Reconstruct the original error from stored ErrorObject
2460
- if (stepData.StepDetails?.Error) {
2461
- throw DurableOperationError.fromErrorObject(stepData.StepDetails.Error);
2462
- }
2463
- else {
2464
- // Fallback for legacy data without Error field
2465
- const errorMessage = stepData?.StepDetails?.Result;
2466
- throw new WaitForConditionError(errorMessage || "waitForCondition failed");
2467
- }
2468
- })();
2469
- }
2470
- // If PENDING, wait for timer to complete
2471
- if (stepData?.Status === OperationStatus.PENDING) {
2472
- await waitForContinuation(context, stepId, name, hasRunningOperations, checkpoint, getOperationsEmitter(), isAwaited ? undefined : setWaitingCallback);
2473
- continue; // Re-evaluate step status after waiting
1782
+ return await executeCheckLogic();
1783
+ async function executeCheckLogic() {
1784
+ stepData = context.getStepData(stepId);
1785
+ // Get current state
1786
+ let currentState;
1787
+ if (stepData?.Status === OperationStatus.STARTED ||
1788
+ stepData?.Status === OperationStatus.READY) {
1789
+ const checkpointData = stepData.StepDetails?.Result;
1790
+ if (checkpointData) {
1791
+ try {
1792
+ const serdesContext = {
1793
+ entityId: stepId,
1794
+ durableExecutionArn: context.durableExecutionArn,
1795
+ };
1796
+ currentState = await serdes.deserialize(checkpointData, serdesContext);
1797
+ }
1798
+ catch {
1799
+ currentState = config.initialState;
1800
+ }
2474
1801
  }
2475
- // Execute check function for READY, STARTED, or first time (undefined)
2476
- const result = await executeWaitForCondition(context, checkpoint, stepId, name, check, config, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, isAwaited ? undefined : setWaitingCallback);
2477
- // If executeWaitForCondition signals to continue the main loop, do so
2478
- if (result === CONTINUE_MAIN_LOOP) {
2479
- continue;
1802
+ else {
1803
+ currentState = config.initialState;
2480
1804
  }
2481
- return result;
2482
1805
  }
2483
- catch (error) {
2484
- // For any error from executeWaitForCondition, re-throw it
2485
- throw error;
1806
+ else {
1807
+ currentState = config.initialState;
2486
1808
  }
2487
- }
2488
- })();
2489
- // Attach catch handler to prevent unhandled promise rejections
2490
- // The error will still be thrown when the DurablePromise is awaited
2491
- phase1Promise.catch(() => { });
2492
- // Phase 2: Return DurablePromise that returns Phase 1 result when awaited
2493
- return new DurablePromise(async () => {
2494
- // When promise is awaited, mark as awaited and invoke waiting callback
2495
- isAwaited = true;
2496
- if (waitingCallback) {
2497
- waitingCallback();
2498
- }
1809
+ const currentAttempt = (stepData?.StepDetails?.Attempt ?? 0) + 1;
1810
+ // Checkpoint START if not already started
1811
+ if (stepData?.Status !== OperationStatus.STARTED) {
1812
+ checkpoint.checkpoint(stepId, {
1813
+ Id: stepId,
1814
+ ParentId: parentId,
1815
+ Action: OperationAction.START,
1816
+ SubType: OperationSubType.WAIT_FOR_CONDITION,
1817
+ Type: OperationType.STEP,
1818
+ Name: name,
1819
+ });
1820
+ }
1821
+ try {
1822
+ const waitForConditionContext = {
1823
+ logger,
1824
+ };
1825
+ // Mark operation as EXECUTING
1826
+ checkpoint.markOperationState(stepId, OperationLifecycleState.EXECUTING, {
1827
+ metadata: {
1828
+ stepId,
1829
+ name,
1830
+ type: OperationType.STEP,
1831
+ subType: OperationSubType.WAIT_FOR_CONDITION,
1832
+ parentId,
1833
+ },
1834
+ });
1835
+ const newState = await runWithContext(stepId, parentId, () => check(currentState, waitForConditionContext), currentAttempt, DurableExecutionMode.ExecutionMode);
1836
+ const serializedState = await safeSerialize(serdes, newState, stepId, name, context.terminationManager, context.durableExecutionArn);
1837
+ const deserializedState = await safeDeserialize(serdes, serializedState, stepId, name, context.terminationManager, context.durableExecutionArn);
1838
+ const decision = config.waitStrategy(deserializedState, currentAttempt);
1839
+ if (!decision.shouldContinue) {
1840
+ await checkpoint.checkpoint(stepId, {
1841
+ Id: stepId,
1842
+ ParentId: parentId,
1843
+ Action: OperationAction.SUCCEED,
1844
+ SubType: OperationSubType.WAIT_FOR_CONDITION,
1845
+ Type: OperationType.STEP,
1846
+ Payload: serializedState,
1847
+ Name: name,
1848
+ });
1849
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1850
+ return deserializedState;
1851
+ }
1852
+ await checkpoint.checkpoint(stepId, {
1853
+ Id: stepId,
1854
+ ParentId: parentId,
1855
+ Action: OperationAction.RETRY,
1856
+ SubType: OperationSubType.WAIT_FOR_CONDITION,
1857
+ Type: OperationType.STEP,
1858
+ Payload: serializedState,
1859
+ Name: name,
1860
+ StepOptions: {
1861
+ NextAttemptDelaySeconds: durationToSeconds(decision.delay),
1862
+ },
1863
+ });
1864
+ checkpoint.markOperationState(stepId, OperationLifecycleState.RETRY_WAITING, {
1865
+ metadata: {
1866
+ stepId,
1867
+ name,
1868
+ type: OperationType.STEP,
1869
+ subType: OperationSubType.WAIT_FOR_CONDITION,
1870
+ parentId,
1871
+ },
1872
+ endTimestamp: context.getStepData(stepId)?.StepDetails?.NextAttemptTimestamp,
1873
+ });
1874
+ await checkpoint.waitForRetryTimer(stepId);
1875
+ return await executeCheckLogic();
1876
+ }
1877
+ catch (error) {
1878
+ await checkpoint.checkpoint(stepId, {
1879
+ Id: stepId,
1880
+ ParentId: parentId,
1881
+ Action: OperationAction.FAIL,
1882
+ SubType: OperationSubType.WAIT_FOR_CONDITION,
1883
+ Type: OperationType.STEP,
1884
+ Error: createErrorObjectFromError(error),
1885
+ Name: name,
1886
+ });
1887
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1888
+ throw DurableOperationError.fromErrorObject(createErrorObjectFromError(error));
1889
+ }
1890
+ }
1891
+ })();
1892
+ phase1Promise.catch(() => { });
1893
+ return new DurablePromise(async () => {
1894
+ checkpoint.markOperationAwaited(stepId);
2499
1895
  return await phase1Promise;
2500
1896
  });
2501
1897
  };
2502
1898
  };
2503
- const handleCompletedWaitForCondition = async (context, stepId, stepName, serdes = defaultSerdes) => {
2504
- log("⏭️", "waitForCondition already finished, returning cached result:", {
2505
- stepId,
2506
- });
2507
- const stepData = context.getStepData(stepId);
2508
- const result = stepData?.StepDetails?.Result;
2509
- return await safeDeserialize(serdes, result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
2510
- };
2511
- const executeWaitForCondition = async (context, checkpoint, stepId, name, check, config, logger, addRunningOperation, removeRunningOperation, hasRunningOperations, getOperationsEmitter, parentId, onAwaitedChange) => {
2512
- const serdes = config.serdes || defaultSerdes;
2513
- // Get current state from previous checkpoint or use initial state
2514
- let currentState;
2515
- const existingOperation = context.getStepData(stepId);
2516
- if (existingOperation?.Status === OperationStatus.STARTED ||
2517
- existingOperation?.Status === OperationStatus.READY) {
2518
- // This is a retry - get state from previous checkpoint
2519
- const checkpointData = existingOperation.StepDetails?.Result;
2520
- if (checkpointData) {
2521
- try {
2522
- // Try to deserialize the checkpoint data directly
2523
- const serdesContext = {
2524
- entityId: stepId,
2525
- durableExecutionArn: context.durableExecutionArn,
2526
- };
2527
- currentState = await serdes.deserialize(checkpointData, serdesContext);
2528
- }
2529
- catch (error) {
2530
- log("⚠️", "Failed to deserialize checkpoint data, using initial state:", {
2531
- stepId,
2532
- name,
2533
- error,
2534
- });
2535
- currentState = config.initialState;
2536
- }
2537
- }
2538
- else {
2539
- currentState = config.initialState;
2540
- }
2541
- }
2542
- else {
2543
- // First execution
2544
- currentState = config.initialState;
2545
- }
2546
- // Get the current attempt number (1-based for wait strategy consistency)
2547
- const currentAttempt = existingOperation?.StepDetails?.Attempt || 1;
2548
- // Checkpoint START for observability (fire and forget) - only if not already started
2549
- const stepData = context.getStepData(stepId);
2550
- if (stepData?.Status !== OperationStatus.STARTED) {
2551
- checkpoint.checkpoint(stepId, {
2552
- Id: stepId,
2553
- ParentId: parentId,
2554
- Action: OperationAction.START,
2555
- SubType: OperationSubType.WAIT_FOR_CONDITION,
2556
- Type: OperationType.STEP,
2557
- Name: name,
2558
- });
2559
- }
2560
- try {
2561
- // Create WaitForConditionContext with enriched logger for the check function
2562
- const waitForConditionContext = {
2563
- logger,
2564
- };
2565
- // Execute the check function
2566
- addRunningOperation(stepId);
2567
- let newState;
2568
- try {
2569
- newState = await runWithContext(stepId, parentId, () => check(currentState, waitForConditionContext), currentAttempt + 1, DurableExecutionMode.ExecutionMode);
2570
- }
2571
- finally {
2572
- removeRunningOperation(stepId);
2573
- }
2574
- // Serialize the new state for consistency
2575
- const serializedState = await safeSerialize(serdes, newState, stepId, name, context.terminationManager, context.durableExecutionArn);
2576
- // Deserialize for consistency with replay behavior
2577
- const deserializedState = await safeDeserialize(serdes, serializedState, stepId, name, context.terminationManager, context.durableExecutionArn);
2578
- // Check if condition is met using the wait strategy
2579
- const decision = config.waitStrategy(deserializedState, currentAttempt);
2580
- log("🔍", "waitForCondition check completed:", {
2581
- stepId,
2582
- name,
2583
- currentAttempt: currentAttempt,
2584
- shouldContinue: decision.shouldContinue,
2585
- delayInSeconds: decision.shouldContinue
2586
- ? durationToSeconds(decision.delay)
2587
- : undefined,
2588
- });
2589
- if (!decision.shouldContinue) {
2590
- // Condition is met - complete successfully
2591
- await checkpoint.checkpoint(stepId, {
2592
- Id: stepId,
2593
- ParentId: parentId,
2594
- Action: OperationAction.SUCCEED,
2595
- SubType: OperationSubType.WAIT_FOR_CONDITION,
2596
- Type: OperationType.STEP,
2597
- Payload: serializedState,
2598
- Name: name,
2599
- });
2600
- log("✅", "waitForCondition completed successfully:", {
2601
- stepId,
2602
- name,
2603
- result: deserializedState,
2604
- totalAttempts: currentAttempt,
2605
- });
2606
- return deserializedState;
2607
- }
2608
- else {
2609
- // Condition not met - schedule retry
2610
- // Only checkpoint the state, not the attempt number (system handles that)
2611
- await checkpoint.checkpoint(stepId, {
2612
- Id: stepId,
2613
- ParentId: parentId,
2614
- Action: OperationAction.RETRY,
2615
- SubType: OperationSubType.WAIT_FOR_CONDITION,
2616
- Type: OperationType.STEP,
2617
- Payload: serializedState, // Just the state, not wrapped in an object
2618
- Name: name,
2619
- StepOptions: {
2620
- NextAttemptDelaySeconds: durationToSeconds(decision.delay),
2621
- },
2622
- });
2623
- // Wait for continuation and signal main loop to continue
2624
- await waitForContinuation(context, stepId, name, hasRunningOperations, checkpoint, getOperationsEmitter(), onAwaitedChange);
2625
- return CONTINUE_MAIN_LOOP;
2626
- }
2627
- }
2628
- catch (error) {
2629
- log("❌", "waitForCondition check function failed:", {
2630
- stepId,
2631
- name,
2632
- error,
2633
- currentAttempt: currentAttempt,
2634
- });
2635
- // Mark as failed - waitForCondition doesn't have its own retry logic for errors
2636
- // If the check function throws, it's considered a failure
2637
- await checkpoint.checkpoint(stepId, {
2638
- Id: stepId,
2639
- ParentId: parentId,
2640
- Action: OperationAction.FAIL,
2641
- SubType: OperationSubType.WAIT_FOR_CONDITION,
2642
- Type: OperationType.STEP,
2643
- Error: createErrorObjectFromError(error),
2644
- Name: name,
2645
- });
2646
- // Reconstruct error from ErrorObject for deterministic behavior
2647
- const errorObject = createErrorObjectFromError(error);
2648
- throw DurableOperationError.fromErrorObject(errorObject);
2649
- }
2650
- };
2651
1899
 
2652
- const createCallbackPromise = (context, stepId, stepName, serdes, hasRunningOperations, operationsEmitter, terminationMessage, checkAndUpdateReplayMode) => {
1900
+ const createCallbackPromise = (context, checkpoint, stepId, stepName, serdes, checkAndUpdateReplayMode) => {
2653
1901
  return new DurablePromise(async () => {
2654
- log("🔄", "Callback promise phase 2 executing:", { stepId, stepName });
2655
- // Main callback logic - can be re-executed if step status changes
2656
- while (true) {
2657
- const stepData = context.getStepData(stepId);
2658
- // Handle case where stepData doesn't exist yet
2659
- // While Phase 1 should create stepData via checkpoint before Phase 2 starts,
2660
- // this can be undefined in test scenarios
2661
- if (!stepData) {
2662
- log("⚠️", "Step data not found, waiting for callback creation:", {
2663
- stepId,
2664
- });
2665
- if (hasRunningOperations()) {
2666
- await waitBeforeContinue({
2667
- checkHasRunningOperations: true,
2668
- checkStepStatus: true,
2669
- checkTimer: false,
2670
- stepId,
2671
- context,
2672
- hasRunningOperations,
2673
- operationsEmitter,
2674
- });
2675
- continue; // Re-evaluate after waiting
2676
- }
2677
- // No other operations and no step data - terminate gracefully
2678
- log("⏳", "No step data found and no running operations, terminating");
2679
- return terminate(context, TerminationReason.CALLBACK_PENDING, terminationMessage);
2680
- }
2681
- if (stepData.Status === OperationStatus.SUCCEEDED) {
2682
- const callbackData = stepData.CallbackDetails;
2683
- if (!callbackData?.CallbackId) {
2684
- throw new CallbackError(`No callback ID found for completed callback: ${stepId}`);
2685
- }
2686
- const result = await safeDeserialize(serdes, callbackData.Result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
2687
- // Check and update replay mode after callback completion
2688
- checkAndUpdateReplayMode();
2689
- return result;
2690
- }
2691
- if (stepData.Status === OperationStatus.FAILED ||
2692
- stepData.Status === OperationStatus.TIMED_OUT) {
2693
- const callbackData = stepData.CallbackDetails;
2694
- const error = callbackData?.Error;
2695
- if (error) {
2696
- const cause = new Error(error.ErrorMessage);
2697
- cause.name = error.ErrorType || "Error";
2698
- cause.stack = error.StackTrace?.join("\n");
2699
- throw new CallbackError(error.ErrorMessage || "Callback failed", cause, error.ErrorData);
2700
- }
2701
- throw new CallbackError("Callback failed");
2702
- }
2703
- if (stepData.Status === OperationStatus.STARTED) {
2704
- // Callback is still pending
2705
- if (hasRunningOperations()) {
2706
- // Wait for other operations or callback completion
2707
- log("⏳", "Callback still pending, waiting for other operations");
2708
- await waitBeforeContinue({
2709
- checkHasRunningOperations: true,
2710
- checkStepStatus: true,
2711
- checkTimer: false,
2712
- stepId,
2713
- context,
2714
- hasRunningOperations,
2715
- operationsEmitter,
2716
- });
2717
- continue; // Re-evaluate status after waiting
2718
- }
2719
- // No other operations running - terminate
2720
- log("⏳", "Callback still pending, terminating");
2721
- return terminate(context, TerminationReason.CALLBACK_PENDING, terminationMessage);
1902
+ log("🔄", "Callback promise phase 2:", { stepId, stepName });
1903
+ checkpoint.markOperationAwaited(stepId);
1904
+ await checkpoint.waitForStatusChange(stepId);
1905
+ const stepData = context.getStepData(stepId);
1906
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
1907
+ log("✅", "Callback completed:", { stepId });
1908
+ checkAndUpdateReplayMode();
1909
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1910
+ const callbackData = stepData.CallbackDetails;
1911
+ if (!callbackData) {
1912
+ throw new CallbackError(`No callback data found for completed callback: ${stepId}`);
2722
1913
  }
2723
- // Should not reach here, but handle unexpected status
2724
- throw new CallbackError(`Unexpected callback status: ${stepData.Status}`);
1914
+ const result = await safeDeserialize(serdes, callbackData.Result, stepId, stepName, context.terminationManager, context.durableExecutionArn);
1915
+ return result;
1916
+ }
1917
+ // Handle failure
1918
+ log("❌", "Callback failed:", { stepId, status: stepData?.Status });
1919
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED);
1920
+ const callbackData = stepData?.CallbackDetails;
1921
+ const error = callbackData?.Error;
1922
+ if (error) {
1923
+ const cause = new Error(error.ErrorMessage);
1924
+ cause.name = error.ErrorType || "Error";
1925
+ cause.stack = error.StackTrace?.join("\n");
1926
+ throw new CallbackError(error.ErrorMessage || "Callback failed", cause, error.ErrorData);
2725
1927
  }
1928
+ throw new CallbackError("Callback failed");
2726
1929
  });
2727
1930
  };
2728
1931
 
@@ -2730,7 +1933,7 @@ const createPassThroughSerdes = () => ({
2730
1933
  serialize: async (value) => value,
2731
1934
  deserialize: async (data) => data,
2732
1935
  });
2733
- const createCallback = (context, checkpoint, createStepId, hasRunningOperations, getOperationsEmitter, checkAndUpdateReplayMode, parentId) => {
1936
+ const createCallback = (context, checkpoint, createStepId, checkAndUpdateReplayMode, parentId) => {
2734
1937
  return (nameOrConfig, maybeConfig) => {
2735
1938
  let name;
2736
1939
  let config;
@@ -2743,82 +1946,99 @@ const createCallback = (context, checkpoint, createStepId, hasRunningOperations,
2743
1946
  }
2744
1947
  const stepId = createStepId();
2745
1948
  const serdes = config?.serdes || createPassThroughSerdes();
2746
- // Validate replay consistency first
2747
- const stepData = context.getStepData(stepId);
2748
- validateReplayConsistency(stepId, {
2749
- type: OperationType.CALLBACK,
2750
- name,
2751
- subType: OperationSubType.CALLBACK,
2752
- }, stepData, context);
2753
- // Phase 1: Setup and checkpoint (immediate execution)
2754
- const setupPromise = (async () => {
2755
- log("📞", "Creating callback phase 1:", { stepId, name, config });
2756
- // Handle already completed callbacks
1949
+ // Phase 1: Setup and checkpoint
1950
+ let isCompleted = false;
1951
+ const phase1Promise = (async () => {
1952
+ log("📞", "Callback phase 1:", { stepId, name });
1953
+ let stepData = context.getStepData(stepId);
1954
+ // Validate replay consistency
1955
+ validateReplayConsistency(stepId, {
1956
+ type: OperationType.CALLBACK,
1957
+ name,
1958
+ subType: OperationSubType.CALLBACK,
1959
+ }, stepData, context);
1960
+ // Check if already completed
2757
1961
  if (stepData?.Status === OperationStatus.SUCCEEDED) {
2758
- log("⏭️", "Callback already completed in phase 1:", { stepId });
2759
- return { wasNewCallback: false };
1962
+ log("⏭️", "Callback already completed:", { stepId });
1963
+ checkAndUpdateReplayMode();
1964
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1965
+ metadata: {
1966
+ stepId,
1967
+ name,
1968
+ type: OperationType.CALLBACK,
1969
+ subType: OperationSubType.CALLBACK,
1970
+ parentId,
1971
+ },
1972
+ });
1973
+ isCompleted = true;
1974
+ return;
2760
1975
  }
1976
+ // Check if already failed
2761
1977
  if (stepData?.Status === OperationStatus.FAILED ||
2762
1978
  stepData?.Status === OperationStatus.TIMED_OUT) {
2763
- log("❌", "Callback already failed in phase 1:", { stepId });
2764
- return { wasNewCallback: false };
1979
+ log("❌", "Callback already failed:", { stepId });
1980
+ checkpoint.markOperationState(stepId, OperationLifecycleState.COMPLETED, {
1981
+ metadata: {
1982
+ stepId,
1983
+ name,
1984
+ type: OperationType.CALLBACK,
1985
+ subType: OperationSubType.CALLBACK,
1986
+ parentId,
1987
+ },
1988
+ });
1989
+ isCompleted = true;
1990
+ return;
2765
1991
  }
2766
- // Handle already started callbacks
2767
- if (stepData?.Status === OperationStatus.STARTED) {
2768
- log("⏳", "Callback already started in phase 1:", { stepId });
2769
- return { wasNewCallback: false };
1992
+ // Start callback if not already started
1993
+ if (!stepData) {
1994
+ await checkpoint.checkpoint(stepId, {
1995
+ Id: stepId,
1996
+ ParentId: parentId,
1997
+ Action: "START",
1998
+ SubType: OperationSubType.CALLBACK,
1999
+ Type: OperationType.CALLBACK,
2000
+ Name: name,
2001
+ CallbackOptions: {
2002
+ TimeoutSeconds: config?.timeout
2003
+ ? durationToSeconds(config.timeout)
2004
+ : undefined,
2005
+ HeartbeatTimeoutSeconds: config?.heartbeatTimeout
2006
+ ? durationToSeconds(config.heartbeatTimeout)
2007
+ : undefined,
2008
+ },
2009
+ });
2010
+ // Refresh stepData after checkpoint
2011
+ stepData = context.getStepData(stepId);
2770
2012
  }
2771
- // Create new callback - checkpoint START operation
2772
- log("🆕", "Creating new callback in phase 1:", { stepId, name });
2773
- await checkpoint.checkpoint(stepId, {
2774
- Id: stepId,
2775
- ParentId: parentId,
2776
- Action: "START",
2777
- SubType: OperationSubType.CALLBACK,
2778
- Type: OperationType.CALLBACK,
2779
- Name: name,
2780
- CallbackOptions: {
2781
- TimeoutSeconds: config?.timeout
2782
- ? durationToSeconds(config.timeout)
2783
- : undefined,
2784
- HeartbeatTimeoutSeconds: config?.heartbeatTimeout
2785
- ? durationToSeconds(config.heartbeatTimeout)
2786
- : undefined,
2013
+ // Mark as IDLE_NOT_AWAITED
2014
+ checkpoint.markOperationState(stepId, OperationLifecycleState.IDLE_NOT_AWAITED, {
2015
+ metadata: {
2016
+ stepId,
2017
+ name,
2018
+ type: OperationType.CALLBACK,
2019
+ subType: OperationSubType.CALLBACK,
2020
+ parentId,
2787
2021
  },
2788
2022
  });
2789
- log("✅", "Callback checkpoint completed in phase 1:", { stepId });
2790
- return { wasNewCallback: true };
2791
- })().catch((error) => {
2792
- log("❌", "Callback phase 1 error:", { stepId, error: error.message });
2793
- throw error;
2794
- });
2795
- // Return DurablePromise that executes phase 2 when awaited
2023
+ log("✅", "Callback phase 1 complete:", { stepId });
2024
+ })();
2025
+ phase1Promise.catch(() => { });
2026
+ // Phase 2: Handle results and create callback promise
2796
2027
  return new DurablePromise(async () => {
2797
- // Wait for phase 1 to complete
2798
- const { wasNewCallback } = await setupPromise;
2799
- // Phase 2: Handle results and create callback promise
2800
- log("🔄", "Callback phase 2 executing:", { stepId, name });
2801
- const stepData = context.getStepData(stepId);
2802
- // Handle completed callbacks
2803
- if (stepData?.Status === OperationStatus.SUCCEEDED) {
2804
- const callbackData = stepData.CallbackDetails;
2028
+ await phase1Promise;
2029
+ if (isCompleted) {
2030
+ const stepData = context.getStepData(stepId);
2031
+ const callbackData = stepData?.CallbackDetails;
2805
2032
  if (!callbackData?.CallbackId) {
2806
- throw new CallbackError(`No callback ID found for completed callback: ${stepId}`);
2033
+ throw new CallbackError(`No callback ID found for callback: ${stepId}`);
2807
2034
  }
2808
- const deserializedResult = await safeDeserialize(serdes, callbackData.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2809
- const resolvedPromise = new DurablePromise(async () => deserializedResult);
2810
- // Check and update replay mode after callback completion
2811
- checkAndUpdateReplayMode();
2812
- return [resolvedPromise, callbackData.CallbackId];
2813
- }
2814
- // Handle failed callbacks
2815
- if (stepData?.Status === OperationStatus.FAILED ||
2816
- stepData?.Status === OperationStatus.TIMED_OUT) {
2817
- const callbackData = stepData.CallbackDetails;
2818
- if (!callbackData?.CallbackId) {
2819
- throw new CallbackError(`No callback ID found for failed callback: ${stepId}`);
2035
+ if (stepData?.Status === OperationStatus.SUCCEEDED) {
2036
+ const deserializedResult = await safeDeserialize(serdes, callbackData.Result, stepId, name, context.terminationManager, context.durableExecutionArn);
2037
+ const resolvedPromise = new DurablePromise(async () => deserializedResult);
2038
+ return [resolvedPromise, callbackData.CallbackId];
2820
2039
  }
2821
- const error = stepData.CallbackDetails?.Error;
2040
+ // Handle failure
2041
+ const error = stepData?.CallbackDetails?.Error;
2822
2042
  const callbackError = error
2823
2043
  ? (() => {
2824
2044
  const cause = new Error(error.ErrorMessage);
@@ -2832,25 +2052,15 @@ const createCallback = (context, checkpoint, createStepId, hasRunningOperations,
2832
2052
  });
2833
2053
  return [rejectedPromise, callbackData.CallbackId];
2834
2054
  }
2835
- // Handle started or new callbacks
2055
+ log("📞", "Callback phase 2:", { stepId });
2056
+ const stepData = context.getStepData(stepId);
2836
2057
  const callbackData = stepData?.CallbackDetails;
2837
2058
  if (!callbackData?.CallbackId) {
2838
- const errorMessage = wasNewCallback
2839
- ? `Callback ID not found in stepData after checkpoint: ${stepId}`
2840
- : `No callback ID found for started callback: ${stepId}`;
2841
- throw new CallbackError(errorMessage);
2059
+ throw new CallbackError(`No callback ID found for started callback: ${stepId}`);
2842
2060
  }
2843
2061
  const callbackId = callbackData.CallbackId;
2844
- // Create callback promise that handles completion
2845
- const terminationMessage = wasNewCallback
2846
- ? `Callback ${name || stepId} created and pending external completion`
2847
- : `Callback ${name || stepId} is pending external completion`;
2848
- const callbackPromise = createCallbackPromise(context, stepId, name, serdes, hasRunningOperations, getOperationsEmitter(), terminationMessage, checkAndUpdateReplayMode);
2849
- log("✅", "Callback created successfully in phase 2:", {
2850
- stepId,
2851
- name,
2852
- callbackId,
2853
- });
2062
+ const callbackPromise = createCallbackPromise(context, checkpoint, stepId, name, serdes, checkAndUpdateReplayMode);
2063
+ log("✅", "Callback created:", { stepId, name, callbackId });
2854
2064
  return [callbackPromise, callbackId];
2855
2065
  });
2856
2066
  };
@@ -3307,6 +2517,42 @@ class ConcurrencyController {
3307
2517
  (childStepData.Status === OperationStatus.SUCCEEDED ||
3308
2518
  childStepData.Status === OperationStatus.FAILED));
3309
2519
  }
2520
+ getCompletionReason(failureCount, successCount, completedCount, items, config) {
2521
+ // Check tolerance first, before checking if all completed
2522
+ const completion = config.completionConfig;
2523
+ // Handle fail-fast behavior (no completion config or empty completion config)
2524
+ if (!completion) {
2525
+ if (failureCount > 0)
2526
+ return "FAILURE_TOLERANCE_EXCEEDED";
2527
+ }
2528
+ else {
2529
+ const hasAnyCompletionCriteria = Object.values(completion).some((value) => value !== undefined);
2530
+ if (!hasAnyCompletionCriteria) {
2531
+ if (failureCount > 0)
2532
+ return "FAILURE_TOLERANCE_EXCEEDED";
2533
+ }
2534
+ else {
2535
+ // Check specific tolerance thresholds
2536
+ if (completion.toleratedFailureCount !== undefined &&
2537
+ failureCount > completion.toleratedFailureCount) {
2538
+ return "FAILURE_TOLERANCE_EXCEEDED";
2539
+ }
2540
+ if (completion.toleratedFailurePercentage !== undefined) {
2541
+ const failurePercentage = (failureCount / items.length) * 100;
2542
+ if (failurePercentage > completion.toleratedFailurePercentage) {
2543
+ return "FAILURE_TOLERANCE_EXCEEDED";
2544
+ }
2545
+ }
2546
+ }
2547
+ }
2548
+ // Check other completion reasons
2549
+ if (completedCount === items.length)
2550
+ return "ALL_COMPLETED";
2551
+ if (config.completionConfig?.minSuccessful !== undefined &&
2552
+ successCount >= config.completionConfig.minSuccessful)
2553
+ return "MIN_SUCCESSFUL_REACHED";
2554
+ return "ALL_COMPLETED";
2555
+ }
3310
2556
  async executeItems(items, executor, parentContext, config, durableExecutionMode = DurableExecutionMode.ExecutionMode, entityId, executionContext) {
3311
2557
  // In replay mode, we're reconstructing the result from child contexts
3312
2558
  if (durableExecutionMode === DurableExecutionMode.ReplaySucceededContext) {
@@ -3420,17 +2666,9 @@ class ConcurrencyController {
3420
2666
  completedCount,
3421
2667
  totalCount: resultItems.length,
3422
2668
  });
3423
- // Reconstruct the completion reason based on replay results
3424
2669
  const successCount = resultItems.filter((item) => item.status === BatchItemStatus.SUCCEEDED).length;
3425
- const getCompletionReason = () => {
3426
- if (completedCount === items.length)
3427
- return "ALL_COMPLETED";
3428
- if (config.completionConfig?.minSuccessful !== undefined &&
3429
- successCount >= config.completionConfig.minSuccessful)
3430
- return "MIN_SUCCESSFUL_REACHED";
3431
- return "FAILURE_TOLERANCE_EXCEEDED";
3432
- };
3433
- return new BatchResultImpl(resultItems, getCompletionReason());
2670
+ const failureCount = completedCount - successCount;
2671
+ return new BatchResultImpl(resultItems, this.getCompletionReason(failureCount, successCount, completedCount, items, config));
3434
2672
  }
3435
2673
  async executeItemsConcurrently(items, executor, parentContext, config) {
3436
2674
  const maxConcurrency = config.maxConcurrency || Infinity;
@@ -3477,13 +2715,8 @@ class ConcurrencyController {
3477
2715
  }
3478
2716
  return false;
3479
2717
  };
3480
- const getCompletionReason = () => {
3481
- if (completedCount === items.length)
3482
- return "ALL_COMPLETED";
3483
- if (config.completionConfig?.minSuccessful !== undefined &&
3484
- successCount >= config.completionConfig.minSuccessful)
3485
- return "MIN_SUCCESSFUL_REACHED";
3486
- return "FAILURE_TOLERANCE_EXCEEDED";
2718
+ const getCompletionReason = (failureCount) => {
2719
+ return this.getCompletionReason(failureCount, successCount, completedCount, items, config);
3487
2720
  };
3488
2721
  const tryStartNext = () => {
3489
2722
  while (activeCount < maxConcurrency &&
@@ -3554,7 +2787,7 @@ class ConcurrencyController {
3554
2787
  startedCount: finalBatchItems.filter((item) => item.status === BatchItemStatus.STARTED).length,
3555
2788
  totalCount: finalBatchItems.length,
3556
2789
  });
3557
- const result = new BatchResultImpl(finalBatchItems, getCompletionReason());
2790
+ const result = new BatchResultImpl(finalBatchItems, getCompletionReason(failureCount));
3558
2791
  resolve(result);
3559
2792
  }
3560
2793
  else {
@@ -3626,347 +2859,971 @@ const createConcurrentExecutionHandler = (context, runInChildContext, skipNextOp
3626
2859
  Array.isArray(result.all)) {
3627
2860
  return restoreBatchResult(result);
3628
2861
  }
3629
- return result;
3630
- })();
3631
- // Attach catch handler to prevent unhandled promise rejections
3632
- // The error will still be thrown when the DurablePromise is awaited
3633
- phase1Promise.catch(() => { });
3634
- // Phase 2: Return DurablePromise that returns Phase 1 result when awaited
3635
- return new DurablePromise(async () => {
3636
- return await phase1Promise;
2862
+ return result;
2863
+ })();
2864
+ // Attach catch handler to prevent unhandled promise rejections
2865
+ // The error will still be thrown when the DurablePromise is awaited
2866
+ phase1Promise.catch(() => { });
2867
+ // Phase 2: Return DurablePromise that returns Phase 1 result when awaited
2868
+ return new DurablePromise(async () => {
2869
+ return await phase1Promise;
2870
+ });
2871
+ };
2872
+ };
2873
+
2874
+ class ModeManagement {
2875
+ captureExecutionState;
2876
+ checkAndUpdateReplayMode;
2877
+ checkForNonResolvingPromise;
2878
+ getDurableExecutionMode;
2879
+ setDurableExecutionMode;
2880
+ constructor(captureExecutionState, checkAndUpdateReplayMode, checkForNonResolvingPromise, getDurableExecutionMode, setDurableExecutionMode) {
2881
+ this.captureExecutionState = captureExecutionState;
2882
+ this.checkAndUpdateReplayMode = checkAndUpdateReplayMode;
2883
+ this.checkForNonResolvingPromise = checkForNonResolvingPromise;
2884
+ this.getDurableExecutionMode = getDurableExecutionMode;
2885
+ this.setDurableExecutionMode = setDurableExecutionMode;
2886
+ }
2887
+ withModeManagement(operation) {
2888
+ const shouldSwitchToExecutionMode = this.captureExecutionState();
2889
+ this.checkAndUpdateReplayMode();
2890
+ const nonResolvingPromise = this.checkForNonResolvingPromise();
2891
+ if (nonResolvingPromise)
2892
+ return nonResolvingPromise;
2893
+ try {
2894
+ return operation();
2895
+ }
2896
+ finally {
2897
+ if (shouldSwitchToExecutionMode) {
2898
+ this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
2899
+ }
2900
+ }
2901
+ }
2902
+ withDurableModeManagement(operation) {
2903
+ const shouldSwitchToExecutionMode = this.captureExecutionState();
2904
+ this.checkAndUpdateReplayMode();
2905
+ const nonResolvingPromise = this.checkForNonResolvingPromise();
2906
+ if (nonResolvingPromise) {
2907
+ return new DurablePromise(async () => {
2908
+ await nonResolvingPromise;
2909
+ // This will never be reached
2910
+ throw new Error("Unreachable code");
2911
+ });
2912
+ }
2913
+ try {
2914
+ return operation();
2915
+ }
2916
+ finally {
2917
+ if (shouldSwitchToExecutionMode) {
2918
+ this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
2919
+ }
2920
+ }
2921
+ }
2922
+ }
2923
+
2924
+ const HASH_LENGTH = 16;
2925
+ /**
2926
+ * Creates an MD5 hash of the input string for better performance than SHA-256
2927
+ * @param input - The string to hash
2928
+ * @returns The truncated hexadecimal hash string
2929
+ */
2930
+ const hashId = (input) => {
2931
+ return createHash("md5")
2932
+ .update(input)
2933
+ .digest("hex")
2934
+ .substring(0, HASH_LENGTH);
2935
+ };
2936
+ /**
2937
+ * Helper function to get step data using the original stepId
2938
+ * This function handles the hashing internally so callers don't need to worry about it
2939
+ * @param stepData - The stepData record from context
2940
+ * @param stepId - The original stepId (will be hashed internally)
2941
+ * @returns The operation data or undefined if not found
2942
+ */
2943
+ const getStepData = (stepData, stepId) => {
2944
+ const hashedId = hashId(stepId);
2945
+ return stepData[hashedId];
2946
+ };
2947
+
2948
+ class DurableContextImpl {
2949
+ executionContext;
2950
+ lambdaContext;
2951
+ _stepPrefix;
2952
+ _stepCounter = 0;
2953
+ durableLogger;
2954
+ modeAwareLoggingEnabled = true;
2955
+ checkpoint;
2956
+ durableExecutionMode;
2957
+ _parentId;
2958
+ modeManagement;
2959
+ durableExecution;
2960
+ logger;
2961
+ constructor(executionContext, lambdaContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) {
2962
+ this.executionContext = executionContext;
2963
+ this.lambdaContext = lambdaContext;
2964
+ this._stepPrefix = stepPrefix;
2965
+ this._parentId = parentId;
2966
+ this.durableExecution = durableExecution;
2967
+ this.durableLogger = inheritedLogger;
2968
+ this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
2969
+ this.logger = this.createModeAwareLogger(inheritedLogger);
2970
+ this.durableExecutionMode = durableExecutionMode;
2971
+ this.checkpoint = durableExecution.checkpointManager;
2972
+ this.modeManagement = new ModeManagement(this.captureExecutionState.bind(this), this.checkAndUpdateReplayMode.bind(this), this.checkForNonResolvingPromise.bind(this), () => this.durableExecutionMode, (mode) => {
2973
+ this.durableExecutionMode = mode;
2974
+ });
2975
+ }
2976
+ getDurableLoggingContext() {
2977
+ return {
2978
+ getDurableLogData: () => {
2979
+ const activeContext = getActiveContext();
2980
+ const result = {
2981
+ executionArn: this.executionContext.durableExecutionArn,
2982
+ requestId: this.executionContext.requestId,
2983
+ tenantId: this.executionContext.tenantId,
2984
+ operationId: !activeContext || activeContext?.contextId === "root"
2985
+ ? undefined
2986
+ : hashId(activeContext.contextId),
2987
+ };
2988
+ if (activeContext?.attempt !== undefined) {
2989
+ result.attempt = activeContext.attempt;
2990
+ }
2991
+ return result;
2992
+ },
2993
+ };
2994
+ }
2995
+ shouldLog() {
2996
+ const activeContext = getActiveContext();
2997
+ if (!this.modeAwareLoggingEnabled || !activeContext) {
2998
+ return true;
2999
+ }
3000
+ if (activeContext.contextId === "root") {
3001
+ return this.durableExecutionMode === DurableExecutionMode.ExecutionMode;
3002
+ }
3003
+ return (activeContext.durableExecutionMode === DurableExecutionMode.ExecutionMode);
3004
+ }
3005
+ createModeAwareLogger(logger) {
3006
+ const durableContextLogger = {
3007
+ warn: (...args) => {
3008
+ if (this.shouldLog()) {
3009
+ return logger.warn(...args);
3010
+ }
3011
+ },
3012
+ debug: (...args) => {
3013
+ if (this.shouldLog()) {
3014
+ return logger.debug(...args);
3015
+ }
3016
+ },
3017
+ info: (...args) => {
3018
+ if (this.shouldLog()) {
3019
+ return logger.info(...args);
3020
+ }
3021
+ },
3022
+ error: (...args) => {
3023
+ if (this.shouldLog()) {
3024
+ return logger.error(...args);
3025
+ }
3026
+ },
3027
+ };
3028
+ if ("log" in logger) {
3029
+ durableContextLogger.log = (level, ...args) => {
3030
+ if (this.shouldLog()) {
3031
+ return logger.log?.(level, ...args);
3032
+ }
3033
+ };
3034
+ }
3035
+ return durableContextLogger;
3036
+ }
3037
+ createStepId() {
3038
+ this._stepCounter++;
3039
+ return this._stepPrefix
3040
+ ? `${this._stepPrefix}-${this._stepCounter}`
3041
+ : `${this._stepCounter}`;
3042
+ }
3043
+ getNextStepId() {
3044
+ const nextCounter = this._stepCounter + 1;
3045
+ return this._stepPrefix
3046
+ ? `${this._stepPrefix}-${nextCounter}`
3047
+ : `${nextCounter}`;
3048
+ }
3049
+ /**
3050
+ * Skips the next operation by incrementing the step counter.
3051
+ * Used internally by concurrent execution handler during replay to skip incomplete items.
3052
+ * @internal
3053
+ */
3054
+ skipNextOperation() {
3055
+ this._stepCounter++;
3056
+ }
3057
+ checkAndUpdateReplayMode() {
3058
+ if (this.durableExecutionMode === DurableExecutionMode.ReplayMode) {
3059
+ const nextStepId = this.getNextStepId();
3060
+ const nextStepData = this.executionContext.getStepData(nextStepId);
3061
+ if (!nextStepData) {
3062
+ this.durableExecutionMode = DurableExecutionMode.ExecutionMode;
3063
+ }
3064
+ }
3065
+ }
3066
+ captureExecutionState() {
3067
+ const wasInReplayMode = this.durableExecutionMode === DurableExecutionMode.ReplayMode;
3068
+ const nextStepId = this.getNextStepId();
3069
+ const stepData = this.executionContext.getStepData(nextStepId);
3070
+ const wasNotFinished = !!(stepData &&
3071
+ stepData.Status !== OperationStatus.SUCCEEDED &&
3072
+ stepData.Status !== OperationStatus.FAILED);
3073
+ return wasInReplayMode && wasNotFinished;
3074
+ }
3075
+ checkForNonResolvingPromise() {
3076
+ if (this.durableExecutionMode === DurableExecutionMode.ReplaySucceededContext) {
3077
+ const nextStepId = this.getNextStepId();
3078
+ const nextStepData = this.executionContext.getStepData(nextStepId);
3079
+ if (nextStepData &&
3080
+ nextStepData.Status !== OperationStatus.SUCCEEDED &&
3081
+ nextStepData.Status !== OperationStatus.FAILED) {
3082
+ return new Promise(() => { }); // Non-resolving promise
3083
+ }
3084
+ }
3085
+ return null;
3086
+ }
3087
+ withModeManagement(operation) {
3088
+ return this.modeManagement.withModeManagement(operation);
3089
+ }
3090
+ withDurableModeManagement(operation) {
3091
+ return this.modeManagement.withDurableModeManagement(operation);
3092
+ }
3093
+ step(nameOrFn, fnOrOptions, maybeOptions) {
3094
+ validateContextUsage(this._stepPrefix, "step", this.executionContext.terminationManager);
3095
+ return this.withDurableModeManagement(() => {
3096
+ const stepHandler = createStepHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), this.durableLogger, this._parentId);
3097
+ return stepHandler(nameOrFn, fnOrOptions, maybeOptions);
3098
+ });
3099
+ }
3100
+ invoke(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
3101
+ validateContextUsage(this._stepPrefix, "invoke", this.executionContext.terminationManager);
3102
+ return this.withDurableModeManagement(() => {
3103
+ const invokeHandler = createInvokeHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3104
+ return invokeHandler(...[
3105
+ nameOrFuncId,
3106
+ funcIdOrInput,
3107
+ inputOrConfig,
3108
+ maybeConfig,
3109
+ ]);
3110
+ });
3111
+ }
3112
+ runInChildContext(nameOrFn, fnOrOptions, maybeOptions) {
3113
+ validateContextUsage(this._stepPrefix, "runInChildContext", this.executionContext.terminationManager);
3114
+ return this.withDurableModeManagement(() => {
3115
+ const blockHandler = createRunInChildContextHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), () => this.durableLogger,
3116
+ // Adapter function to maintain compatibility
3117
+ (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, _checkpointToken, parentId) => createDurableContext(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, this.durableExecution, parentId), this._parentId);
3118
+ return blockHandler(nameOrFn, fnOrOptions, maybeOptions);
3119
+ });
3120
+ }
3121
+ wait(nameOrDuration, maybeDuration) {
3122
+ validateContextUsage(this._stepPrefix, "wait", this.executionContext.terminationManager);
3123
+ return this.withDurableModeManagement(() => {
3124
+ const waitHandler = createWaitHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3125
+ return typeof nameOrDuration === "string"
3126
+ ? waitHandler(nameOrDuration, maybeDuration)
3127
+ : waitHandler(nameOrDuration);
3128
+ });
3129
+ }
3130
+ /**
3131
+ * Configure logger behavior for this context
3132
+ *
3133
+ * This method allows partial configuration - only the properties provided will be updated.
3134
+ * For example, calling configureLogger(\{ modeAware: false \}) will only change the modeAware
3135
+ * setting without affecting any previously configured custom logger.
3136
+ *
3137
+ * @param config - Logger configuration options including customLogger and modeAware settings (default: modeAware=true)
3138
+ * @example
3139
+ * // Set custom logger and enable mode-aware logging
3140
+ * context.configureLogger(\{ customLogger: myLogger, modeAware: true \});
3141
+ *
3142
+ * // Later, disable mode-aware logging without changing the custom logger
3143
+ * context.configureLogger(\{ modeAware: false \});
3144
+ */
3145
+ configureLogger(config) {
3146
+ if (config.customLogger !== undefined) {
3147
+ this.durableLogger = config.customLogger;
3148
+ this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3149
+ this.logger = this.createModeAwareLogger(this.durableLogger);
3150
+ }
3151
+ if (config.modeAware !== undefined) {
3152
+ this.modeAwareLoggingEnabled = config.modeAware;
3153
+ }
3154
+ }
3155
+ createCallback(nameOrConfig, maybeConfig) {
3156
+ validateContextUsage(this._stepPrefix, "createCallback", this.executionContext.terminationManager);
3157
+ return this.withDurableModeManagement(() => {
3158
+ const callbackFactory = createCallback(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.checkAndUpdateReplayMode.bind(this), this._parentId);
3159
+ return callbackFactory(nameOrConfig, maybeConfig);
3160
+ });
3161
+ }
3162
+ waitForCallback(nameOrSubmitter, submitterOrConfig, maybeConfig) {
3163
+ validateContextUsage(this._stepPrefix, "waitForCallback", this.executionContext.terminationManager);
3164
+ return this.withDurableModeManagement(() => {
3165
+ const waitForCallbackHandler = createWaitForCallbackHandler(this.executionContext, this.getNextStepId.bind(this), this.runInChildContext.bind(this));
3166
+ return waitForCallbackHandler(nameOrSubmitter, submitterOrConfig, maybeConfig);
3167
+ });
3168
+ }
3169
+ waitForCondition(nameOrCheckFunc, checkFuncOrConfig, maybeConfig) {
3170
+ validateContextUsage(this._stepPrefix, "waitForCondition", this.executionContext.terminationManager);
3171
+ return this.withDurableModeManagement(() => {
3172
+ const waitForConditionHandler = createWaitForConditionHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.durableLogger, this._parentId);
3173
+ return typeof nameOrCheckFunc === "string" ||
3174
+ nameOrCheckFunc === undefined
3175
+ ? waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig, maybeConfig)
3176
+ : waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig);
3177
+ });
3178
+ }
3179
+ map(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig) {
3180
+ validateContextUsage(this._stepPrefix, "map", this.executionContext.terminationManager);
3181
+ return this.withDurableModeManagement(() => {
3182
+ const mapHandler = createMapHandler(this.executionContext, this._executeConcurrently.bind(this));
3183
+ return mapHandler(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig);
3184
+ });
3185
+ }
3186
+ parallel(nameOrBranches, branchesOrConfig, maybeConfig) {
3187
+ validateContextUsage(this._stepPrefix, "parallel", this.executionContext.terminationManager);
3188
+ return this.withDurableModeManagement(() => {
3189
+ const parallelHandler = createParallelHandler(this.executionContext, this._executeConcurrently.bind(this));
3190
+ return parallelHandler(nameOrBranches, branchesOrConfig, maybeConfig);
3191
+ });
3192
+ }
3193
+ _executeConcurrently(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig) {
3194
+ validateContextUsage(this._stepPrefix, "_executeConcurrently", this.executionContext.terminationManager);
3195
+ return this.withDurableModeManagement(() => {
3196
+ const concurrentExecutionHandler = createConcurrentExecutionHandler(this.executionContext, this.runInChildContext.bind(this), this.skipNextOperation.bind(this));
3197
+ const promise = concurrentExecutionHandler(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig);
3198
+ // Prevent unhandled promise rejections
3199
+ promise?.catch(() => { });
3200
+ return promise;
3201
+ });
3202
+ }
3203
+ get promise() {
3204
+ return createPromiseHandler(this.step.bind(this));
3205
+ }
3206
+ }
3207
+ const createDurableContext = (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) => {
3208
+ return new DurableContextImpl(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId);
3209
+ };
3210
+
3211
+ /**
3212
+ * Error thrown when a checkpoint operation fails due to invocation-level issues
3213
+ * (e.g., 5xx errors, invalid checkpoint token)
3214
+ * This will terminate the current Lambda invocation, but the execution can continue with a new invocation
3215
+ */
3216
+ class CheckpointUnrecoverableInvocationError extends UnrecoverableInvocationError {
3217
+ terminationReason = TerminationReason.CHECKPOINT_FAILED;
3218
+ constructor(message, originalError) {
3219
+ super(message || "Checkpoint operation failed", originalError);
3220
+ }
3221
+ }
3222
+ /**
3223
+ * Error thrown when a checkpoint operation fails due to execution-level issues
3224
+ * (e.g., 4xx errors other than invalid checkpoint token)
3225
+ * This will terminate the entire execution and cannot be recovered
3226
+ */
3227
+ class CheckpointUnrecoverableExecutionError extends UnrecoverableExecutionError {
3228
+ terminationReason = TerminationReason.CHECKPOINT_FAILED;
3229
+ constructor(message, originalError) {
3230
+ super(message || "Checkpoint operation failed", originalError);
3231
+ }
3232
+ }
3233
+
3234
+ const STEP_DATA_UPDATED_EVENT = "stepDataUpdated";
3235
+ class CheckpointManager {
3236
+ durableExecutionArn;
3237
+ stepData;
3238
+ storage;
3239
+ terminationManager;
3240
+ stepDataEmitter;
3241
+ logger;
3242
+ finishedAncestors;
3243
+ queue = [];
3244
+ isProcessing = false;
3245
+ currentTaskToken;
3246
+ forceCheckpointPromises = [];
3247
+ queueCompletionResolver = null;
3248
+ MAX_PAYLOAD_SIZE = 750 * 1024; // 750KB in bytes
3249
+ isTerminating = false;
3250
+ static textEncoder = new TextEncoder();
3251
+ // Operation lifecycle tracking
3252
+ operations = new Map();
3253
+ // Termination cooldown
3254
+ terminationTimer = null;
3255
+ terminationReason = null;
3256
+ TERMINATION_COOLDOWN_MS = 50;
3257
+ constructor(durableExecutionArn, stepData, storage, terminationManager, initialTaskToken, stepDataEmitter, logger, finishedAncestors) {
3258
+ this.durableExecutionArn = durableExecutionArn;
3259
+ this.stepData = stepData;
3260
+ this.storage = storage;
3261
+ this.terminationManager = terminationManager;
3262
+ this.stepDataEmitter = stepDataEmitter;
3263
+ this.logger = logger;
3264
+ this.finishedAncestors = finishedAncestors;
3265
+ this.currentTaskToken = initialTaskToken;
3266
+ }
3267
+ setTerminating() {
3268
+ this.isTerminating = true;
3269
+ log("🛑", "Checkpoint manager marked as terminating");
3270
+ }
3271
+ /**
3272
+ * Mark an ancestor as finished (for run-in-child-context operations)
3273
+ */
3274
+ markAncestorFinished(stepId) {
3275
+ this.finishedAncestors.add(stepId);
3276
+ }
3277
+ /**
3278
+ * Extract parent ID from hierarchical stepId (e.g., "1-2-3" -\> "1-2")
3279
+ */
3280
+ getParentId(stepId) {
3281
+ const lastDashIndex = stepId.lastIndexOf("-");
3282
+ return lastDashIndex > 0 ? stepId.substring(0, lastDashIndex) : undefined;
3283
+ }
3284
+ /**
3285
+ * Checks if any ancestor of the given stepId is finished
3286
+ * Only applies to operations that are descendants of run-in-child-context operations
3287
+ */
3288
+ hasFinishedAncestor(stepId) {
3289
+ // Only use getParentId to avoid mixing hashed and original stepIds
3290
+ let currentParentId = this.getParentId(stepId);
3291
+ while (currentParentId) {
3292
+ // Check if this ancestor is finished
3293
+ if (this.finishedAncestors.has(currentParentId)) {
3294
+ return true;
3295
+ }
3296
+ // Move up to the next ancestor using hierarchical stepId
3297
+ currentParentId = this.getParentId(currentParentId);
3298
+ }
3299
+ return false;
3300
+ }
3301
+ async forceCheckpoint() {
3302
+ if (this.isTerminating) {
3303
+ log("⚠️", "Force checkpoint skipped - termination in progress");
3304
+ return new Promise(() => { }); // Never resolves during termination
3305
+ }
3306
+ return new Promise((resolve, reject) => {
3307
+ this.forceCheckpointPromises.push({ resolve, reject });
3308
+ if (!this.isProcessing) {
3309
+ setImmediate(() => {
3310
+ this.processQueue();
3311
+ });
3312
+ }
3313
+ });
3314
+ }
3315
+ async waitForQueueCompletion() {
3316
+ if (this.queue.length === 0 && !this.isProcessing) {
3317
+ return;
3318
+ }
3319
+ return new Promise((resolve) => {
3320
+ this.queueCompletionResolver = resolve;
3321
+ });
3322
+ }
3323
+ clearQueue() {
3324
+ // Silently clear queue - we're terminating so no need to reject promises
3325
+ this.queue = [];
3326
+ this.forceCheckpointPromises = [];
3327
+ // Resolve any waiting queue completion promises since we're clearing
3328
+ this.notifyQueueCompletion();
3329
+ }
3330
+ // Alias for backward compatibility with Checkpoint interface
3331
+ async force() {
3332
+ return this.forceCheckpoint();
3333
+ }
3334
+ async checkpoint(stepId, data) {
3335
+ if (this.isTerminating) {
3336
+ log("⚠️", "Checkpoint skipped - termination in progress:", { stepId });
3337
+ return new Promise(() => { }); // Never resolves during termination
3338
+ }
3339
+ // Check if any ancestor is finished - if so, don't queue and don't resolve
3340
+ if (this.hasFinishedAncestor(stepId)) {
3341
+ log("⚠️", "Checkpoint skipped - ancestor already finished:", { stepId });
3342
+ return new Promise(() => { }); // Never resolves when ancestor is finished
3343
+ }
3344
+ return new Promise((resolve, reject) => {
3345
+ const queuedItem = {
3346
+ stepId,
3347
+ data,
3348
+ resolve: () => {
3349
+ resolve();
3350
+ },
3351
+ reject: (error) => {
3352
+ reject(error);
3353
+ },
3354
+ };
3355
+ this.queue.push(queuedItem);
3356
+ log("📥", "Checkpoint queued:", {
3357
+ stepId,
3358
+ queueLength: this.queue.length,
3359
+ isProcessing: this.isProcessing,
3360
+ });
3361
+ if (!this.isProcessing) {
3362
+ setImmediate(() => {
3363
+ this.processQueue();
3364
+ });
3365
+ }
3637
3366
  });
3638
- };
3639
- };
3640
-
3641
- class ModeManagement {
3642
- captureExecutionState;
3643
- checkAndUpdateReplayMode;
3644
- checkForNonResolvingPromise;
3645
- getDurableExecutionMode;
3646
- setDurableExecutionMode;
3647
- constructor(captureExecutionState, checkAndUpdateReplayMode, checkForNonResolvingPromise, getDurableExecutionMode, setDurableExecutionMode) {
3648
- this.captureExecutionState = captureExecutionState;
3649
- this.checkAndUpdateReplayMode = checkAndUpdateReplayMode;
3650
- this.checkForNonResolvingPromise = checkForNonResolvingPromise;
3651
- this.getDurableExecutionMode = getDurableExecutionMode;
3652
- this.setDurableExecutionMode = setDurableExecutionMode;
3653
3367
  }
3654
- withModeManagement(operation) {
3655
- const shouldSwitchToExecutionMode = this.captureExecutionState();
3656
- this.checkAndUpdateReplayMode();
3657
- const nonResolvingPromise = this.checkForNonResolvingPromise();
3658
- if (nonResolvingPromise)
3659
- return nonResolvingPromise;
3660
- try {
3661
- return operation();
3368
+ classifyCheckpointError(error) {
3369
+ const originalError = error instanceof Error ? error : new Error(String(error));
3370
+ const awsError = error;
3371
+ const statusCode = awsError.$metadata?.httpStatusCode;
3372
+ const errorName = awsError.name;
3373
+ const errorMessage = awsError.message || originalError.message;
3374
+ log("🔍", "Classifying checkpoint error:", {
3375
+ statusCode,
3376
+ errorName,
3377
+ errorMessage,
3378
+ });
3379
+ if (statusCode &&
3380
+ statusCode >= 400 &&
3381
+ statusCode < 500 &&
3382
+ errorName === "InvalidParameterValueException" &&
3383
+ errorMessage.startsWith("Invalid Checkpoint Token")) {
3384
+ return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
3662
3385
  }
3663
- finally {
3664
- if (shouldSwitchToExecutionMode) {
3665
- this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
3666
- }
3386
+ if (statusCode &&
3387
+ statusCode >= 400 &&
3388
+ statusCode < 500 &&
3389
+ statusCode !== 429) {
3390
+ return new CheckpointUnrecoverableExecutionError(`Checkpoint failed: ${errorMessage}`, originalError);
3667
3391
  }
3392
+ return new CheckpointUnrecoverableInvocationError(`Checkpoint failed: ${errorMessage}`, originalError);
3668
3393
  }
3669
- withDurableModeManagement(operation) {
3670
- const shouldSwitchToExecutionMode = this.captureExecutionState();
3671
- this.checkAndUpdateReplayMode();
3672
- const nonResolvingPromise = this.checkForNonResolvingPromise();
3673
- if (nonResolvingPromise) {
3674
- return new DurablePromise(async () => {
3675
- await nonResolvingPromise;
3676
- // This will never be reached
3677
- throw new Error("Unreachable code");
3678
- });
3394
+ async processQueue() {
3395
+ if (this.isProcessing) {
3396
+ return;
3397
+ }
3398
+ const hasQueuedItems = this.queue.length > 0;
3399
+ const hasForceRequests = this.forceCheckpointPromises.length > 0;
3400
+ if (!hasQueuedItems && !hasForceRequests) {
3401
+ return;
3402
+ }
3403
+ this.isProcessing = true;
3404
+ const batch = [];
3405
+ const baseSize = this.currentTaskToken.length + 100;
3406
+ let currentSize = baseSize;
3407
+ while (this.queue.length > 0) {
3408
+ const nextItem = this.queue[0];
3409
+ const itemSize = CheckpointManager.textEncoder.encode(JSON.stringify(nextItem)).length;
3410
+ if (currentSize + itemSize > this.MAX_PAYLOAD_SIZE && batch.length > 0) {
3411
+ break;
3412
+ }
3413
+ this.queue.shift();
3414
+ batch.push(nextItem);
3415
+ currentSize += itemSize;
3679
3416
  }
3417
+ log("🔄", "Processing checkpoint batch:", {
3418
+ batchSize: batch.length,
3419
+ remainingInQueue: this.queue.length,
3420
+ estimatedSize: currentSize,
3421
+ maxSize: this.MAX_PAYLOAD_SIZE,
3422
+ });
3680
3423
  try {
3681
- return operation();
3424
+ if (batch.length > 0 || this.forceCheckpointPromises.length > 0) {
3425
+ await this.processBatch(batch);
3426
+ }
3427
+ batch.forEach((item) => {
3428
+ item.resolve();
3429
+ });
3430
+ const forcePromises = this.forceCheckpointPromises.splice(0);
3431
+ forcePromises.forEach((promise) => {
3432
+ promise.resolve();
3433
+ });
3434
+ log("✅", "Checkpoint batch processed successfully:", {
3435
+ batchSize: batch.length,
3436
+ forceRequests: forcePromises.length,
3437
+ newTaskToken: this.currentTaskToken,
3438
+ });
3439
+ }
3440
+ catch (error) {
3441
+ log("❌", "Checkpoint batch failed:", {
3442
+ batchSize: batch.length,
3443
+ error,
3444
+ });
3445
+ const checkpointError = this.classifyCheckpointError(error);
3446
+ // Clear remaining queue silently - we're terminating
3447
+ this.clearQueue();
3448
+ this.terminationManager.terminate({
3449
+ reason: TerminationReason.CHECKPOINT_FAILED,
3450
+ message: checkpointError.message,
3451
+ error: checkpointError,
3452
+ });
3682
3453
  }
3683
3454
  finally {
3684
- if (shouldSwitchToExecutionMode) {
3685
- this.setDurableExecutionMode(DurableExecutionMode.ExecutionMode);
3455
+ this.isProcessing = false;
3456
+ if (this.queue.length > 0) {
3457
+ setImmediate(() => {
3458
+ this.processQueue();
3459
+ });
3460
+ }
3461
+ else {
3462
+ // Queue is empty and processing is done - notify all waiting promises
3463
+ this.notifyQueueCompletion();
3686
3464
  }
3687
3465
  }
3688
3466
  }
3689
- }
3690
-
3691
- class DurableContextImpl {
3692
- executionContext;
3693
- lambdaContext;
3694
- _stepPrefix;
3695
- _stepCounter = 0;
3696
- durableLogger;
3697
- modeAwareLoggingEnabled = true;
3698
- runningOperations = new Set();
3699
- operationsEmitter = new EventEmitter();
3700
- checkpoint;
3701
- durableExecutionMode;
3702
- _parentId;
3703
- modeManagement;
3704
- durableExecution;
3705
- logger;
3706
- constructor(executionContext, lambdaContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) {
3707
- this.executionContext = executionContext;
3708
- this.lambdaContext = lambdaContext;
3709
- this._stepPrefix = stepPrefix;
3710
- this._parentId = parentId;
3711
- this.durableExecution = durableExecution;
3712
- this.durableLogger = inheritedLogger;
3713
- this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3714
- this.logger = this.createModeAwareLogger(inheritedLogger);
3715
- this.durableExecutionMode = durableExecutionMode;
3716
- this.checkpoint = durableExecution.checkpointManager;
3717
- this.modeManagement = new ModeManagement(this.captureExecutionState.bind(this), this.checkAndUpdateReplayMode.bind(this), this.checkForNonResolvingPromise.bind(this), () => this.durableExecutionMode, (mode) => {
3718
- this.durableExecutionMode = mode;
3719
- });
3467
+ notifyQueueCompletion() {
3468
+ if (this.queueCompletionResolver) {
3469
+ this.queueCompletionResolver();
3470
+ this.queueCompletionResolver = null;
3471
+ }
3720
3472
  }
3721
- getDurableLoggingContext() {
3722
- return {
3723
- getDurableLogData: () => {
3724
- const activeContext = getActiveContext();
3725
- const result = {
3726
- executionArn: this.executionContext.durableExecutionArn,
3727
- requestId: this.executionContext.requestId,
3728
- tenantId: this.executionContext.tenantId,
3729
- operationId: !activeContext || activeContext?.contextId === "root"
3730
- ? undefined
3731
- : hashId(activeContext.contextId),
3732
- };
3733
- if (activeContext?.attempt !== undefined) {
3734
- result.attempt = activeContext.attempt;
3735
- }
3736
- return result;
3737
- },
3473
+ async processBatch(batch) {
3474
+ const updates = batch.map((item) => {
3475
+ const hashedStepId = hashId(item.stepId);
3476
+ const update = {
3477
+ Type: item.data.Type || "STEP",
3478
+ Action: item.data.Action || "START",
3479
+ ...item.data,
3480
+ Id: hashedStepId,
3481
+ ...(item.data.ParentId && { ParentId: hashId(item.data.ParentId) }),
3482
+ };
3483
+ return update;
3484
+ });
3485
+ const checkpointData = {
3486
+ DurableExecutionArn: this.durableExecutionArn,
3487
+ CheckpointToken: this.currentTaskToken,
3488
+ Updates: updates,
3738
3489
  };
3739
- }
3740
- shouldLog() {
3741
- const activeContext = getActiveContext();
3742
- if (!this.modeAwareLoggingEnabled || !activeContext) {
3743
- return true;
3490
+ log("⏺️", "Creating checkpoint batch:", {
3491
+ batchSize: updates.length,
3492
+ checkpointToken: this.currentTaskToken,
3493
+ updates: updates.map((u) => ({
3494
+ Id: u.Id,
3495
+ Action: u.Action,
3496
+ Type: u.Type,
3497
+ })),
3498
+ });
3499
+ const response = await this.storage.checkpoint(checkpointData, this.logger);
3500
+ if (response.CheckpointToken) {
3501
+ this.currentTaskToken = response.CheckpointToken;
3744
3502
  }
3745
- if (activeContext.contextId === "root") {
3746
- return this.durableExecutionMode === DurableExecutionMode.ExecutionMode;
3503
+ if (response.NewExecutionState?.Operations) {
3504
+ this.updateStepDataFromCheckpointResponse(response.NewExecutionState.Operations);
3747
3505
  }
3748
- return (activeContext.durableExecutionMode === DurableExecutionMode.ExecutionMode);
3749
3506
  }
3750
- createModeAwareLogger(logger) {
3751
- const durableContextLogger = {
3752
- warn: (...args) => {
3753
- if (this.shouldLog()) {
3754
- return logger.warn(...args);
3755
- }
3756
- },
3757
- debug: (...args) => {
3758
- if (this.shouldLog()) {
3759
- return logger.debug(...args);
3760
- }
3761
- },
3762
- info: (...args) => {
3763
- if (this.shouldLog()) {
3764
- return logger.info(...args);
3765
- }
3766
- },
3767
- error: (...args) => {
3768
- if (this.shouldLog()) {
3769
- return logger.error(...args);
3507
+ updateStepDataFromCheckpointResponse(operations) {
3508
+ log("🔄", "Updating stepData from checkpoint response:", {
3509
+ operationCount: operations.length,
3510
+ operationIds: operations.map((op) => op.Id).filter(Boolean),
3511
+ });
3512
+ operations.forEach((operation) => {
3513
+ if (operation.Id) {
3514
+ // Check if status changed
3515
+ const oldStatus = this.stepData[operation.Id]?.Status;
3516
+ const newStatus = operation.Status;
3517
+ this.stepData[operation.Id] = operation;
3518
+ log("📝", "Updated stepData entry:", operation);
3519
+ this.stepDataEmitter.emit(STEP_DATA_UPDATED_EVENT, operation.Id);
3520
+ // If status changed and we have a waiting promise, resolve it
3521
+ if (oldStatus !== newStatus) {
3522
+ this.resolveWaitingOperation(operation.Id);
3770
3523
  }
3771
- },
3772
- };
3773
- if ("log" in logger) {
3774
- durableContextLogger.log = (level, ...args) => {
3775
- if (this.shouldLog()) {
3776
- return logger.log?.(level, ...args);
3524
+ }
3525
+ });
3526
+ log("", "StepData update completed:", {
3527
+ totalStepDataEntries: Object.keys(this.stepData).length,
3528
+ });
3529
+ }
3530
+ resolveWaitingOperation(hashedStepId) {
3531
+ // Find operation by hashed ID in our operations map
3532
+ for (const [stepId, op] of this.operations.entries()) {
3533
+ if (hashId(stepId) === hashedStepId && op.resolver) {
3534
+ log("✅", `Resolving waiting operation ${stepId} due to status change`);
3535
+ op.resolver();
3536
+ op.resolver = undefined;
3537
+ if (op.timer) {
3538
+ clearTimeout(op.timer);
3539
+ op.timer = undefined;
3777
3540
  }
3778
- };
3541
+ break;
3542
+ }
3779
3543
  }
3780
- return durableContextLogger;
3781
3544
  }
3782
- createStepId() {
3783
- this._stepCounter++;
3784
- return this._stepPrefix
3785
- ? `${this._stepPrefix}-${this._stepCounter}`
3786
- : `${this._stepCounter}`;
3787
- }
3788
- getNextStepId() {
3789
- const nextCounter = this._stepCounter + 1;
3790
- return this._stepPrefix
3791
- ? `${this._stepPrefix}-${nextCounter}`
3792
- : `${nextCounter}`;
3793
- }
3794
- /**
3795
- * Skips the next operation by incrementing the step counter.
3796
- * Used internally by concurrent execution handler during replay to skip incomplete items.
3797
- * @internal
3798
- */
3799
- skipNextOperation() {
3800
- this._stepCounter++;
3545
+ getQueueStatus() {
3546
+ return {
3547
+ queueLength: this.queue.length,
3548
+ isProcessing: this.isProcessing,
3549
+ };
3801
3550
  }
3802
- checkAndUpdateReplayMode() {
3803
- if (this.durableExecutionMode === DurableExecutionMode.ReplayMode) {
3804
- const nextStepId = this.getNextStepId();
3805
- const nextStepData = this.executionContext.getStepData(nextStepId);
3806
- if (!nextStepData) {
3807
- this.durableExecutionMode = DurableExecutionMode.ExecutionMode;
3551
+ // ===== New Lifecycle & Termination Methods =====
3552
+ markOperationState(stepId, state, options) {
3553
+ let op = this.operations.get(stepId);
3554
+ if (!op) {
3555
+ // First call - create operation
3556
+ if (!options?.metadata) {
3557
+ throw new Error(`metadata required on first call for ${stepId}`);
3808
3558
  }
3559
+ op = {
3560
+ stepId,
3561
+ state,
3562
+ metadata: options.metadata,
3563
+ endTimestamp: options.endTimestamp,
3564
+ };
3565
+ this.operations.set(stepId, op);
3809
3566
  }
3810
- }
3811
- captureExecutionState() {
3812
- const wasInReplayMode = this.durableExecutionMode === DurableExecutionMode.ReplayMode;
3813
- const nextStepId = this.getNextStepId();
3814
- const stepData = this.executionContext.getStepData(nextStepId);
3815
- const wasNotFinished = !!(stepData &&
3816
- stepData.Status !== OperationStatus.SUCCEEDED &&
3817
- stepData.Status !== OperationStatus.FAILED);
3818
- return wasInReplayMode && wasNotFinished;
3819
- }
3820
- checkForNonResolvingPromise() {
3821
- if (this.durableExecutionMode === DurableExecutionMode.ReplaySucceededContext) {
3822
- const nextStepId = this.getNextStepId();
3823
- const nextStepData = this.executionContext.getStepData(nextStepId);
3824
- if (nextStepData &&
3825
- nextStepData.Status !== OperationStatus.SUCCEEDED &&
3826
- nextStepData.Status !== OperationStatus.FAILED) {
3827
- return new Promise(() => { }); // Non-resolving promise
3567
+ else {
3568
+ // Update existing operation
3569
+ op.state = state;
3570
+ if (options?.endTimestamp !== undefined) {
3571
+ op.endTimestamp = options.endTimestamp;
3828
3572
  }
3829
3573
  }
3830
- return null;
3831
- }
3832
- addRunningOperation(stepId) {
3833
- this.runningOperations.add(stepId);
3834
- }
3835
- removeRunningOperation(stepId) {
3836
- this.runningOperations.delete(stepId);
3837
- if (this.runningOperations.size === 0) {
3838
- this.operationsEmitter.emit(OPERATIONS_COMPLETE_EVENT);
3574
+ // Cleanup if transitioning to COMPLETED
3575
+ if (state === OperationLifecycleState.COMPLETED) {
3576
+ this.cleanupOperation(stepId);
3577
+ }
3578
+ // Check if we should terminate
3579
+ // Don't check for IDLE_NOT_AWAITED - operation might be awaited later or intentionally not awaited
3580
+ if (state !== OperationLifecycleState.IDLE_NOT_AWAITED) {
3581
+ this.checkAndTerminate();
3839
3582
  }
3840
3583
  }
3841
- hasRunningOperations() {
3842
- return this.runningOperations.size > 0;
3843
- }
3844
- getOperationsEmitter() {
3845
- return this.operationsEmitter;
3846
- }
3847
- withModeManagement(operation) {
3848
- return this.modeManagement.withModeManagement(operation);
3849
- }
3850
- withDurableModeManagement(operation) {
3851
- return this.modeManagement.withDurableModeManagement(operation);
3852
- }
3853
- step(nameOrFn, fnOrOptions, maybeOptions) {
3854
- validateContextUsage(this._stepPrefix, "step", this.executionContext.terminationManager);
3855
- return this.withDurableModeManagement(() => {
3856
- const stepHandler = createStepHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), this.durableLogger, this.addRunningOperation.bind(this), this.removeRunningOperation.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId);
3857
- return stepHandler(nameOrFn, fnOrOptions, maybeOptions);
3858
- });
3859
- }
3860
- invoke(nameOrFuncId, funcIdOrInput, inputOrConfig, maybeConfig) {
3861
- validateContextUsage(this._stepPrefix, "invoke", this.executionContext.terminationManager);
3862
- return this.withDurableModeManagement(() => {
3863
- const invokeHandler = createInvokeHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3864
- return invokeHandler(...[
3865
- nameOrFuncId,
3866
- funcIdOrInput,
3867
- inputOrConfig,
3868
- maybeConfig,
3869
- ]);
3870
- });
3871
- }
3872
- runInChildContext(nameOrFn, fnOrOptions, maybeOptions) {
3873
- validateContextUsage(this._stepPrefix, "runInChildContext", this.executionContext.terminationManager);
3874
- return this.withDurableModeManagement(() => {
3875
- const blockHandler = createRunInChildContextHandler(this.executionContext, this.checkpoint, this.lambdaContext, this.createStepId.bind(this), () => this.durableLogger,
3876
- // Adapter function to maintain compatibility
3877
- (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, _checkpointToken, parentId) => createDurableContext(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, this.durableExecution, parentId), this._parentId);
3878
- return blockHandler(nameOrFn, fnOrOptions, maybeOptions);
3584
+ waitForRetryTimer(stepId) {
3585
+ const op = this.operations.get(stepId);
3586
+ if (!op) {
3587
+ throw new Error(`Operation ${stepId} not found`);
3588
+ }
3589
+ if (op.state !== OperationLifecycleState.RETRY_WAITING) {
3590
+ throw new Error(`Operation ${stepId} must be in RETRY_WAITING state, got ${op.state}`);
3591
+ }
3592
+ // Start timer with polling
3593
+ this.startTimerWithPolling(stepId, op.endTimestamp);
3594
+ // Return promise that resolves when status changes
3595
+ return new Promise((resolve) => {
3596
+ op.resolver = resolve;
3879
3597
  });
3880
3598
  }
3881
- wait(nameOrDuration, maybeDuration) {
3882
- validateContextUsage(this._stepPrefix, "wait", this.executionContext.terminationManager);
3883
- return this.withDurableModeManagement(() => {
3884
- const waitHandler = createWaitHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId, this.checkAndUpdateReplayMode.bind(this));
3885
- return typeof nameOrDuration === "string"
3886
- ? waitHandler(nameOrDuration, maybeDuration)
3887
- : waitHandler(nameOrDuration);
3599
+ waitForStatusChange(stepId) {
3600
+ const op = this.operations.get(stepId);
3601
+ if (!op) {
3602
+ throw new Error(`Operation ${stepId} not found`);
3603
+ }
3604
+ if (op.state !== OperationLifecycleState.IDLE_AWAITED) {
3605
+ throw new Error(`Operation ${stepId} must be in IDLE_AWAITED state, got ${op.state}`);
3606
+ }
3607
+ // Start timer with polling
3608
+ this.startTimerWithPolling(stepId, op.endTimestamp);
3609
+ // Return promise that resolves when status changes
3610
+ return new Promise((resolve) => {
3611
+ op.resolver = resolve;
3888
3612
  });
3889
3613
  }
3890
- /**
3891
- * Configure logger behavior for this context
3892
- *
3893
- * This method allows partial configuration - only the properties provided will be updated.
3894
- * For example, calling configureLogger(\{ modeAware: false \}) will only change the modeAware
3895
- * setting without affecting any previously configured custom logger.
3896
- *
3897
- * @param config - Logger configuration options including customLogger and modeAware settings (default: modeAware=true)
3898
- * @example
3899
- * // Set custom logger and enable mode-aware logging
3900
- * context.configureLogger(\{ customLogger: myLogger, modeAware: true \});
3901
- *
3902
- * // Later, disable mode-aware logging without changing the custom logger
3903
- * context.configureLogger(\{ modeAware: false \});
3904
- */
3905
- configureLogger(config) {
3906
- if (config.customLogger !== undefined) {
3907
- this.durableLogger = config.customLogger;
3908
- this.durableLogger.configureDurableLoggingContext?.(this.getDurableLoggingContext());
3909
- this.logger = this.createModeAwareLogger(this.durableLogger);
3614
+ markOperationAwaited(stepId) {
3615
+ const op = this.operations.get(stepId);
3616
+ if (!op) {
3617
+ log("⚠️", `Cannot mark operation as awaited: ${stepId} not found`);
3618
+ return;
3910
3619
  }
3911
- if (config.modeAware !== undefined) {
3912
- this.modeAwareLoggingEnabled = config.modeAware;
3620
+ // Transition IDLE_NOT_AWAITED IDLE_AWAITED
3621
+ if (op.state === OperationLifecycleState.IDLE_NOT_AWAITED) {
3622
+ op.state = OperationLifecycleState.IDLE_AWAITED;
3623
+ log("📍", `Operation marked as awaited: ${stepId}`);
3624
+ // Check if we should terminate now that operation is awaited
3625
+ this.checkAndTerminate();
3913
3626
  }
3914
3627
  }
3915
- createCallback(nameOrConfig, maybeConfig) {
3916
- validateContextUsage(this._stepPrefix, "createCallback", this.executionContext.terminationManager);
3917
- return this.withDurableModeManagement(() => {
3918
- const callbackFactory = createCallback(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this.checkAndUpdateReplayMode.bind(this), this._parentId);
3919
- return callbackFactory(nameOrConfig, maybeConfig);
3920
- });
3628
+ getOperationState(stepId) {
3629
+ return this.operations.get(stepId)?.state;
3921
3630
  }
3922
- waitForCallback(nameOrSubmitter, submitterOrConfig, maybeConfig) {
3923
- validateContextUsage(this._stepPrefix, "waitForCallback", this.executionContext.terminationManager);
3924
- return this.withDurableModeManagement(() => {
3925
- const waitForCallbackHandler = createWaitForCallbackHandler(this.executionContext, this.getNextStepId.bind(this), this.runInChildContext.bind(this));
3926
- return waitForCallbackHandler(nameOrSubmitter, submitterOrConfig, maybeConfig);
3927
- });
3631
+ getAllOperations() {
3632
+ return new Map(this.operations);
3928
3633
  }
3929
- waitForCondition(nameOrCheckFunc, checkFuncOrConfig, maybeConfig) {
3930
- validateContextUsage(this._stepPrefix, "waitForCondition", this.executionContext.terminationManager);
3931
- return this.withDurableModeManagement(() => {
3932
- const waitForConditionHandler = createWaitForConditionHandler(this.executionContext, this.checkpoint, this.createStepId.bind(this), this.durableLogger, this.addRunningOperation.bind(this), this.removeRunningOperation.bind(this), this.hasRunningOperations.bind(this), this.getOperationsEmitter.bind(this), this._parentId);
3933
- return typeof nameOrCheckFunc === "string" ||
3934
- nameOrCheckFunc === undefined
3935
- ? waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig, maybeConfig)
3936
- : waitForConditionHandler(nameOrCheckFunc, checkFuncOrConfig);
3937
- });
3634
+ // ===== Private Helper Methods =====
3635
+ cleanupOperation(stepId) {
3636
+ const op = this.operations.get(stepId);
3637
+ if (!op)
3638
+ return;
3639
+ // Clear timer
3640
+ if (op.timer) {
3641
+ clearTimeout(op.timer);
3642
+ op.timer = undefined;
3643
+ }
3644
+ // Clear resolver
3645
+ op.resolver = undefined;
3646
+ }
3647
+ cleanupAllOperations() {
3648
+ for (const op of this.operations.values()) {
3649
+ if (op.timer) {
3650
+ clearTimeout(op.timer);
3651
+ op.timer = undefined;
3652
+ }
3653
+ op.resolver = undefined;
3654
+ }
3938
3655
  }
3939
- map(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig) {
3940
- validateContextUsage(this._stepPrefix, "map", this.executionContext.terminationManager);
3941
- return this.withDurableModeManagement(() => {
3942
- const mapHandler = createMapHandler(this.executionContext, this._executeConcurrently.bind(this));
3943
- return mapHandler(nameOrItems, itemsOrMapFunc, mapFuncOrConfig, maybeConfig);
3944
- });
3656
+ checkAndTerminate() {
3657
+ // Rule 1: Can't terminate if checkpoint queue is not empty
3658
+ if (this.queue.length > 0) {
3659
+ this.abortTermination();
3660
+ return;
3661
+ }
3662
+ // Rule 2: Can't terminate if checkpoint is currently processing
3663
+ if (this.isProcessing) {
3664
+ this.abortTermination();
3665
+ return;
3666
+ }
3667
+ // Rule 3: Can't terminate if there are pending force checkpoint promises
3668
+ if (this.forceCheckpointPromises.length > 0) {
3669
+ this.abortTermination();
3670
+ return;
3671
+ }
3672
+ const allOps = Array.from(this.operations.values());
3673
+ // Rule 4: Can't terminate if any operation is EXECUTING
3674
+ const hasExecuting = allOps.some((op) => op.state === OperationLifecycleState.EXECUTING);
3675
+ if (hasExecuting) {
3676
+ this.abortTermination();
3677
+ return;
3678
+ }
3679
+ // Rule 5: Clean up operations whose ancestors are complete or pending completion
3680
+ for (const op of allOps) {
3681
+ if (op.state === OperationLifecycleState.RETRY_WAITING ||
3682
+ op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
3683
+ op.state === OperationLifecycleState.IDLE_AWAITED) {
3684
+ // Use the original stepId from metadata, not the potentially hashed op.stepId
3685
+ const originalStepId = op.metadata.stepId;
3686
+ if (this.hasFinishedAncestor(originalStepId)) {
3687
+ log("🧹", `Cleaning up operation with completed ancestor: ${originalStepId}`);
3688
+ this.cleanupOperation(op.stepId);
3689
+ this.operations.delete(op.stepId);
3690
+ }
3691
+ }
3692
+ }
3693
+ // Re-check operations after cleanup
3694
+ const remainingOps = Array.from(this.operations.values());
3695
+ // Determine if we should terminate
3696
+ const hasWaiting = remainingOps.some((op) => op.state === OperationLifecycleState.RETRY_WAITING ||
3697
+ op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
3698
+ op.state === OperationLifecycleState.IDLE_AWAITED);
3699
+ if (hasWaiting) {
3700
+ const reason = this.determineTerminationReason(remainingOps);
3701
+ this.scheduleTermination(reason);
3702
+ }
3703
+ else {
3704
+ this.abortTermination();
3705
+ }
3945
3706
  }
3946
- parallel(nameOrBranches, branchesOrConfig, maybeConfig) {
3947
- validateContextUsage(this._stepPrefix, "parallel", this.executionContext.terminationManager);
3948
- return this.withDurableModeManagement(() => {
3949
- const parallelHandler = createParallelHandler(this.executionContext, this._executeConcurrently.bind(this));
3950
- return parallelHandler(nameOrBranches, branchesOrConfig, maybeConfig);
3951
- });
3707
+ abortTermination() {
3708
+ if (this.terminationTimer) {
3709
+ clearTimeout(this.terminationTimer);
3710
+ this.terminationTimer = null;
3711
+ this.terminationReason = null;
3712
+ log("🔄", "Termination aborted - conditions changed");
3713
+ }
3952
3714
  }
3953
- _executeConcurrently(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig) {
3954
- validateContextUsage(this._stepPrefix, "_executeConcurrently", this.executionContext.terminationManager);
3955
- return this.withDurableModeManagement(() => {
3956
- const concurrentExecutionHandler = createConcurrentExecutionHandler(this.executionContext, this.runInChildContext.bind(this), this.skipNextOperation.bind(this));
3957
- const promise = concurrentExecutionHandler(nameOrItems, itemsOrExecutor, executorOrConfig, maybeConfig);
3958
- // Prevent unhandled promise rejections
3959
- promise?.catch(() => { });
3960
- return promise;
3715
+ scheduleTermination(reason) {
3716
+ // If already scheduled with same reason, don't reschedule
3717
+ if (this.terminationTimer && this.terminationReason === reason) {
3718
+ return;
3719
+ }
3720
+ // Clear any existing timer
3721
+ this.abortTermination();
3722
+ // Schedule new termination
3723
+ this.terminationReason = reason;
3724
+ log("⏱️", "Scheduling termination", {
3725
+ reason,
3726
+ cooldownMs: this.TERMINATION_COOLDOWN_MS,
3961
3727
  });
3728
+ this.terminationTimer = setTimeout(() => {
3729
+ this.executeTermination(reason);
3730
+ }, this.TERMINATION_COOLDOWN_MS);
3731
+ }
3732
+ executeTermination(reason) {
3733
+ log("🛑", "Executing termination after cooldown", { reason });
3734
+ // Clear timer
3735
+ this.terminationTimer = null;
3736
+ this.terminationReason = null;
3737
+ // Cleanup all operations before terminating
3738
+ this.cleanupAllOperations();
3739
+ // Call termination manager directly
3740
+ this.terminationManager.terminate({ reason });
3741
+ }
3742
+ determineTerminationReason(ops) {
3743
+ // Priority: RETRY_SCHEDULED > WAIT_SCHEDULED > CALLBACK_PENDING
3744
+ if (ops.some((op) => op.state === OperationLifecycleState.RETRY_WAITING &&
3745
+ op.metadata.subType === "Step")) {
3746
+ return TerminationReason.RETRY_SCHEDULED;
3747
+ }
3748
+ if (ops.some((op) => (op.state === OperationLifecycleState.IDLE_NOT_AWAITED ||
3749
+ op.state === OperationLifecycleState.IDLE_AWAITED) &&
3750
+ op.metadata.subType === "Wait")) {
3751
+ return TerminationReason.WAIT_SCHEDULED;
3752
+ }
3753
+ return TerminationReason.CALLBACK_PENDING;
3962
3754
  }
3963
- get promise() {
3964
- return createPromiseHandler(this.step.bind(this));
3755
+ startTimerWithPolling(stepId, endTimestamp) {
3756
+ const op = this.operations.get(stepId);
3757
+ if (!op)
3758
+ return;
3759
+ let delay;
3760
+ if (endTimestamp) {
3761
+ // Ensure endTimestamp is a Date object
3762
+ const timestamp = endTimestamp instanceof Date ? endTimestamp : new Date(endTimestamp);
3763
+ // Wait until endTimestamp
3764
+ delay = Math.max(0, timestamp.getTime() - Date.now());
3765
+ }
3766
+ else {
3767
+ // No timestamp, start polling immediately (1 second delay)
3768
+ delay = 1000;
3769
+ }
3770
+ // Initialize poll count and start time for this operation
3771
+ if (!op.pollCount) {
3772
+ op.pollCount = 0;
3773
+ op.pollStartTime = Date.now();
3774
+ }
3775
+ op.timer = setTimeout(() => {
3776
+ this.forceRefreshAndCheckStatus(stepId);
3777
+ }, delay);
3778
+ }
3779
+ async forceRefreshAndCheckStatus(stepId) {
3780
+ const op = this.operations.get(stepId);
3781
+ if (!op)
3782
+ return;
3783
+ // Check if we've exceeded max polling duration (15 minutes)
3784
+ const MAX_POLL_DURATION_MS = 15 * 60 * 1000; // 15 minutes
3785
+ if (op.pollStartTime &&
3786
+ Date.now() - op.pollStartTime > MAX_POLL_DURATION_MS) {
3787
+ // Stop polling after 15 minutes to prevent indefinite resource consumption.
3788
+ // We don't resolve or reject the promise because the handler cannot continue
3789
+ // without a status change. The execution will remain suspended until the
3790
+ // operation completes or the Lambda times out.
3791
+ log("⏱️", `Max polling duration (15 min) exceeded for ${stepId}, stopping poll`);
3792
+ if (op.timer) {
3793
+ clearTimeout(op.timer);
3794
+ op.timer = undefined;
3795
+ }
3796
+ return;
3797
+ }
3798
+ // Get old status before refresh
3799
+ const oldStatus = this.stepData[hashId(stepId)]?.Status;
3800
+ // Force checkpoint to refresh state from backend
3801
+ await this.forceCheckpoint();
3802
+ // Get new status after refresh
3803
+ const newStatus = this.stepData[hashId(stepId)]?.Status;
3804
+ // Check if status changed
3805
+ if (newStatus !== oldStatus) {
3806
+ // Status changed, resolve the waiting promise
3807
+ log("✅", `Status changed for ${stepId}: ${oldStatus} → ${newStatus}`);
3808
+ op.resolver?.();
3809
+ op.resolver = undefined;
3810
+ // Clear timer
3811
+ if (op.timer) {
3812
+ clearTimeout(op.timer);
3813
+ op.timer = undefined;
3814
+ }
3815
+ }
3816
+ else {
3817
+ // Status not changed yet, poll again with incremental backoff
3818
+ // Start at 1s, increase by 1s each poll, max 10s
3819
+ op.pollCount = (op.pollCount || 0) + 1;
3820
+ const nextDelay = Math.min(op.pollCount * 1000, 10000);
3821
+ op.timer = setTimeout(() => {
3822
+ this.forceRefreshAndCheckStatus(stepId);
3823
+ }, nextDelay);
3824
+ }
3965
3825
  }
3966
3826
  }
3967
- const createDurableContext = (executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId) => {
3968
- return new DurableContextImpl(executionContext, parentContext, durableExecutionMode, inheritedLogger, stepPrefix, durableExecution, parentId);
3969
- };
3970
3827
 
3971
3828
  /*
3972
3829
  Second Approach (Promise-based):
@@ -4260,43 +4117,6 @@ const createDefaultLogger = (executionContext) => {
4260
4117
  return new DefaultLogger(executionContext);
4261
4118
  };
4262
4119
 
4263
- /**
4264
- * Tracks active async operations to prevent premature termination
4265
- */
4266
- class ActiveOperationsTracker {
4267
- activeCount = 0;
4268
- /**
4269
- * Increment the counter when starting an async operation
4270
- */
4271
- increment() {
4272
- this.activeCount++;
4273
- }
4274
- /**
4275
- * Decrement the counter when an async operation completes
4276
- */
4277
- decrement() {
4278
- this.activeCount = Math.max(0, this.activeCount - 1);
4279
- }
4280
- /**
4281
- * Check if there are any active operations
4282
- */
4283
- hasActive() {
4284
- return this.activeCount > 0;
4285
- }
4286
- /**
4287
- * Get the current count of active operations
4288
- */
4289
- getCount() {
4290
- return this.activeCount;
4291
- }
4292
- /**
4293
- * Reset the counter (useful for testing)
4294
- */
4295
- reset() {
4296
- this.activeCount = 0;
4297
- }
4298
- }
4299
-
4300
4120
  let defaultLambdaClient;
4301
4121
  /**
4302
4122
  * Durable execution client which uses an API-based LambdaClient
@@ -4416,6 +4236,20 @@ class DurableExecutionInvocationInputWithClient {
4416
4236
  this.DurableExecutionArn = params.DurableExecutionArn;
4417
4237
  this.CheckpointToken = params.CheckpointToken;
4418
4238
  }
4239
+ static isInstance(event) {
4240
+ if (event instanceof DurableExecutionInvocationInputWithClient) {
4241
+ return true;
4242
+ }
4243
+ return !!(typeof event === "object" &&
4244
+ event &&
4245
+ event.toString() ===
4246
+ "[object DurableExecutionInvocationInputWithClient]" &&
4247
+ "durableExecutionClient" in event &&
4248
+ event.constructor.name === "DurableExecutionInvocationInputWithClient");
4249
+ }
4250
+ get [Symbol.toStringTag]() {
4251
+ return "DurableExecutionInvocationInputWithClient";
4252
+ }
4419
4253
  }
4420
4254
 
4421
4255
  const initializeExecutionContext = async (event, context, lambdaClient) => {
@@ -4425,7 +4259,7 @@ const initializeExecutionContext = async (event, context, lambdaClient) => {
4425
4259
  const durableExecutionArn = event.DurableExecutionArn;
4426
4260
  const durableExecutionClient =
4427
4261
  // Allow passing arbitrary durable clients if the input is a custom class
4428
- event instanceof DurableExecutionInvocationInputWithClient
4262
+ DurableExecutionInvocationInputWithClient.isInstance(event)
4429
4263
  ? event.durableExecutionClient
4430
4264
  : new DurableExecutionApiClient(lambdaClient);
4431
4265
  // Create logger for initialization errors using existing logger factory
@@ -4464,7 +4298,6 @@ const initializeExecutionContext = async (event, context, lambdaClient) => {
4464
4298
  durableExecutionClient,
4465
4299
  _stepData: stepData,
4466
4300
  terminationManager: new TerminationManager(),
4467
- activeOperationsTracker: new ActiveOperationsTracker(),
4468
4301
  durableExecutionArn,
4469
4302
  pendingCompletions: new Set(),
4470
4303
  getStepData(stepId) {
@@ -4483,7 +4316,7 @@ const LAMBDA_RESPONSE_SIZE_LIMIT = 6 * 1024 * 1024 - 50; // 6MB in bytes, minus
4483
4316
  async function runHandler(event, context, executionContext, durableExecutionMode, checkpointToken, handler) {
4484
4317
  // Create checkpoint manager and step data emitter
4485
4318
  const stepDataEmitter = new EventEmitter();
4486
- const checkpointManager = new CheckpointManager(executionContext.durableExecutionArn, executionContext._stepData, executionContext.durableExecutionClient, executionContext.terminationManager, executionContext.activeOperationsTracker, checkpointToken, stepDataEmitter, createDefaultLogger(executionContext), executionContext.pendingCompletions);
4319
+ const checkpointManager = new CheckpointManager(executionContext.durableExecutionArn, executionContext._stepData, executionContext.durableExecutionClient, executionContext.terminationManager, checkpointToken, stepDataEmitter, createDefaultLogger(executionContext), new Set());
4487
4320
  // Set the checkpoint terminating callback on the termination manager
4488
4321
  executionContext.terminationManager.setCheckpointTerminatingCallback(() => {
4489
4322
  checkpointManager.setTerminating();
@@ -4585,6 +4418,14 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4585
4418
  Payload: serializedResult, // Reuse the already serialized result
4586
4419
  });
4587
4420
  log("✅", "Large result successfully checkpointed");
4421
+ // Wait for any pending checkpoints to complete before returning
4422
+ try {
4423
+ await durableExecution.checkpointManager.waitForQueueCompletion();
4424
+ }
4425
+ catch (waitError) {
4426
+ log("⚠️", "Error waiting for checkpoint queue completion:", waitError);
4427
+ // Continue anyway - the checkpoint will be retried on next invocation
4428
+ }
4588
4429
  // Return a response indicating the result was checkpointed
4589
4430
  return {
4590
4431
  Status: InvocationStatus.SUCCEEDED,
@@ -4598,6 +4439,14 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4598
4439
  }
4599
4440
  }
4600
4441
  // If response size is acceptable, return the response
4442
+ // Wait for any pending checkpoints to complete before returning
4443
+ try {
4444
+ await durableExecution.checkpointManager.waitForQueueCompletion();
4445
+ }
4446
+ catch (waitError) {
4447
+ log("⚠️", "Error waiting for checkpoint queue completion:", waitError);
4448
+ // Continue anyway - the checkpoint will be retried on next invocation
4449
+ }
4601
4450
  return {
4602
4451
  Status: InvocationStatus.SUCCEEDED,
4603
4452
  Result: serializedResult,
@@ -4610,6 +4459,14 @@ async function runHandler(event, context, executionContext, durableExecutionMode
4610
4459
  log("🛑", "Unrecoverable invocation error - terminating Lambda execution");
4611
4460
  throw error; // Re-throw the error to terminate Lambda execution
4612
4461
  }
4462
+ // Wait for any pending checkpoints to complete before returning error
4463
+ try {
4464
+ await durableExecution.checkpointManager.waitForQueueCompletion();
4465
+ }
4466
+ catch (waitError) {
4467
+ log("⚠️", "Error waiting for checkpoint queue completion:", waitError);
4468
+ // Continue anyway - the checkpoint will be retried on next invocation
4469
+ }
4613
4470
  return {
4614
4471
  Status: InvocationStatus.FAILED,
4615
4472
  Error: createErrorObjectFromError(error),