@temporalio/core-bridge 0.23.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/Cargo.lock +118 -15
  2. package/Cargo.toml +2 -1
  3. package/LICENSE.md +1 -1
  4. package/README.md +1 -1
  5. package/index.d.ts +47 -18
  6. package/package.json +7 -7
  7. package/releases/aarch64-apple-darwin/index.node +0 -0
  8. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  9. package/releases/x86_64-apple-darwin/index.node +0 -0
  10. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  11. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  12. package/sdk-core/.buildkite/docker/docker-compose.yaml +4 -2
  13. package/sdk-core/ARCHITECTURE.md +9 -7
  14. package/sdk-core/README.md +5 -1
  15. package/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
  16. package/sdk-core/bridge-ffi/src/wrappers.rs +0 -3
  17. package/sdk-core/client/src/lib.rs +26 -8
  18. package/sdk-core/client/src/raw.rs +166 -54
  19. package/sdk-core/client/src/retry.rs +9 -4
  20. package/sdk-core/client/src/workflow_handle/mod.rs +4 -2
  21. package/sdk-core/core/Cargo.toml +2 -0
  22. package/sdk-core/core/src/abstractions.rs +137 -16
  23. package/sdk-core/core/src/core_tests/activity_tasks.rs +258 -63
  24. package/sdk-core/core/src/core_tests/child_workflows.rs +1 -2
  25. package/sdk-core/core/src/core_tests/determinism.rs +2 -2
  26. package/sdk-core/core/src/core_tests/local_activities.rs +8 -7
  27. package/sdk-core/core/src/core_tests/queries.rs +146 -60
  28. package/sdk-core/core/src/core_tests/replay_flag.rs +1 -1
  29. package/sdk-core/core/src/core_tests/workers.rs +39 -23
  30. package/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
  31. package/sdk-core/core/src/core_tests/workflow_tasks.rs +387 -280
  32. package/sdk-core/core/src/lib.rs +6 -4
  33. package/sdk-core/core/src/pollers/poll_buffer.rs +16 -10
  34. package/sdk-core/core/src/protosext/mod.rs +6 -6
  35. package/sdk-core/core/src/retry_logic.rs +1 -1
  36. package/sdk-core/core/src/telemetry/metrics.rs +21 -7
  37. package/sdk-core/core/src/telemetry/mod.rs +18 -4
  38. package/sdk-core/core/src/test_help/mod.rs +341 -109
  39. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +18 -9
  40. package/sdk-core/core/src/worker/activities/local_activities.rs +19 -16
  41. package/sdk-core/core/src/worker/activities.rs +156 -29
  42. package/sdk-core/core/src/worker/client.rs +1 -0
  43. package/sdk-core/core/src/worker/mod.rs +132 -659
  44. package/sdk-core/core/src/{workflow → worker/workflow}/bridge.rs +1 -1
  45. package/sdk-core/core/src/{workflow → worker/workflow}/driven_workflow.rs +1 -1
  46. package/sdk-core/core/src/{workflow → worker/workflow}/history_update.rs +16 -2
  47. package/sdk-core/core/src/{workflow → worker/workflow}/machines/activity_state_machine.rs +39 -4
  48. package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_external_state_machine.rs +5 -2
  49. package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_workflow_state_machine.rs +1 -1
  50. package/sdk-core/core/src/{workflow → worker/workflow}/machines/child_workflow_state_machine.rs +2 -4
  51. package/sdk-core/core/src/{workflow → worker/workflow}/machines/complete_workflow_state_machine.rs +0 -0
  52. package/sdk-core/core/src/{workflow → worker/workflow}/machines/continue_as_new_workflow_state_machine.rs +1 -1
  53. package/sdk-core/core/src/{workflow → worker/workflow}/machines/fail_workflow_state_machine.rs +0 -0
  54. package/sdk-core/core/src/{workflow → worker/workflow}/machines/local_activity_state_machine.rs +2 -5
  55. package/sdk-core/core/src/{workflow → worker/workflow}/machines/mod.rs +1 -1
  56. package/sdk-core/core/src/{workflow → worker/workflow}/machines/mutable_side_effect_state_machine.rs +0 -0
  57. package/sdk-core/core/src/{workflow → worker/workflow}/machines/patch_state_machine.rs +1 -1
  58. package/sdk-core/core/src/{workflow → worker/workflow}/machines/side_effect_state_machine.rs +0 -0
  59. package/sdk-core/core/src/{workflow → worker/workflow}/machines/signal_external_state_machine.rs +4 -2
  60. package/sdk-core/core/src/{workflow → worker/workflow}/machines/timer_state_machine.rs +1 -2
  61. package/sdk-core/core/src/{workflow → worker/workflow}/machines/transition_coverage.rs +1 -1
  62. package/sdk-core/core/src/{workflow → worker/workflow}/machines/upsert_search_attributes_state_machine.rs +5 -7
  63. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines/local_acts.rs +2 -2
  64. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines.rs +40 -16
  65. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_task_state_machine.rs +0 -0
  66. package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
  67. package/sdk-core/core/src/worker/workflow/managed_run.rs +627 -0
  68. package/sdk-core/core/src/worker/workflow/mod.rs +1115 -0
  69. package/sdk-core/core/src/worker/workflow/run_cache.rs +143 -0
  70. package/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
  71. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +936 -0
  72. package/sdk-core/core-api/src/errors.rs +3 -10
  73. package/sdk-core/core-api/src/lib.rs +2 -1
  74. package/sdk-core/core-api/src/worker.rs +26 -2
  75. package/sdk-core/etc/dynamic-config.yaml +2 -0
  76. package/sdk-core/integ-with-otel.sh +1 -1
  77. package/sdk-core/protos/api_upstream/Makefile +4 -4
  78. package/sdk-core/protos/api_upstream/api-linter.yaml +2 -0
  79. package/sdk-core/protos/api_upstream/buf.yaml +8 -9
  80. package/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +83 -0
  81. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -1
  82. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +40 -0
  83. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +3 -0
  84. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +3 -1
  85. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
  86. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +3 -0
  87. package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +32 -4
  88. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +69 -19
  89. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +13 -0
  90. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +163 -0
  91. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +97 -0
  92. package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +300 -0
  93. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +25 -0
  94. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +180 -3
  95. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +53 -3
  96. package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +2 -2
  97. package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +6 -5
  98. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -1
  99. package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +2 -1
  100. package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +0 -64
  101. package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -1
  102. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +11 -8
  103. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +30 -25
  104. package/sdk-core/sdk/src/activity_context.rs +12 -5
  105. package/sdk-core/sdk/src/app_data.rs +37 -0
  106. package/sdk-core/sdk/src/lib.rs +76 -43
  107. package/sdk-core/sdk/src/workflow_context/options.rs +8 -6
  108. package/sdk-core/sdk/src/workflow_context.rs +14 -19
  109. package/sdk-core/sdk/src/workflow_future.rs +11 -6
  110. package/sdk-core/sdk-core-protos/src/history_builder.rs +19 -5
  111. package/sdk-core/sdk-core-protos/src/history_info.rs +11 -6
  112. package/sdk-core/sdk-core-protos/src/lib.rs +74 -176
  113. package/sdk-core/test-utils/src/lib.rs +85 -72
  114. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +11 -9
  115. package/sdk-core/tests/integ_tests/polling_tests.rs +12 -0
  116. package/sdk-core/tests/integ_tests/queries_tests.rs +39 -22
  117. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +49 -4
  118. package/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
  119. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -1
  120. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +74 -13
  121. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +19 -0
  122. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -1
  123. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -3
  124. package/sdk-core/tests/integ_tests/workflow_tests.rs +10 -23
  125. package/sdk-core/tests/load_tests.rs +8 -3
  126. package/sdk-core/tests/main.rs +2 -1
  127. package/src/conversions.rs +47 -39
  128. package/src/errors.rs +10 -21
  129. package/src/lib.rs +342 -325
  130. package/sdk-core/core/src/pending_activations.rs +0 -173
  131. package/sdk-core/core/src/worker/wft_delivery.rs +0 -81
  132. package/sdk-core/core/src/workflow/mod.rs +0 -478
  133. package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +0 -194
  134. package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +0 -418
  135. package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +0 -989
@@ -1,24 +1,29 @@
1
1
  use crate::{
2
- errors::PollWfError,
3
- job_assert,
2
+ advance_fut, job_assert,
4
3
  replay::TestHistoryBuilder,
5
4
  test_help::{
6
5
  build_fake_worker, build_mock_pollers, build_multihist_mock_sg, canned_histories,
7
- gen_assert_and_fail, gen_assert_and_reply, hist_to_poll_resp, mock_worker, poll_and_reply,
8
- poll_and_reply_clears_outstanding_evicts, single_hist_mock_sg, FakeWfResponses,
9
- MockPollCfg, MocksHolder, ResponseType, NO_MORE_WORK_ERROR_MSG, TEST_Q,
6
+ gen_assert_and_fail, gen_assert_and_reply, hist_to_poll_resp, mock_sdk, mock_sdk_cfg,
7
+ mock_worker, poll_and_reply, poll_and_reply_clears_outstanding_evicts, single_hist_mock_sg,
8
+ test_worker_cfg, FakeWfResponses, MockPollCfg, MocksHolder, ResponseType,
9
+ WorkflowCachingPolicy::{self, AfterEveryReply, NonSticky},
10
+ TEST_Q,
10
11
  },
11
- worker::client::mocks::mock_workflow_client,
12
- workflow::WorkflowCachingPolicy::{self, AfterEveryReply, NonSticky},
12
+ worker::client::mocks::{mock_manual_workflow_client, mock_workflow_client},
13
13
  Worker,
14
14
  };
15
+ use futures::{stream, FutureExt};
15
16
  use rstest::{fixture, rstest};
16
17
  use std::{
17
18
  collections::VecDeque,
18
- sync::atomic::{AtomicU64, AtomicUsize, Ordering},
19
+ sync::{
20
+ atomic::{AtomicU64, Ordering},
21
+ Arc,
22
+ },
19
23
  time::Duration,
20
24
  };
21
- use temporal_sdk_core_api::Worker as WorkerTrait;
25
+ use temporal_sdk::{ActivityOptions, CancellableFuture, WfContext};
26
+ use temporal_sdk_core_api::{errors::PollWfError, Worker as WorkerTrait};
22
27
  use temporal_sdk_core_protos::{
23
28
  coresdk::{
24
29
  activity_result::{self as ar, activity_resolution, ActivityResolution},
@@ -35,13 +40,18 @@ use temporal_sdk_core_protos::{
35
40
  temporal::api::{
36
41
  enums::v1::{EventType, WorkflowTaskFailedCause},
37
42
  failure::v1::Failure,
38
- history::v1::{history_event, History, TimerFiredEventAttributes},
43
+ history::v1::{history_event, TimerFiredEventAttributes},
39
44
  workflowservice::v1::{
40
45
  GetWorkflowExecutionHistoryResponse, RespondWorkflowTaskCompletedResponse,
41
46
  },
42
47
  },
48
+ DEFAULT_WORKFLOW_TYPE,
43
49
  };
44
50
  use temporal_sdk_core_test_utils::{fanout_tasks, start_timer_cmd};
51
+ use tokio::{
52
+ join,
53
+ sync::{Barrier, Semaphore},
54
+ };
45
55
 
46
56
  #[fixture(hist_batches = &[])]
47
57
  fn single_timer_setup(hist_batches: &'static [usize]) -> Worker {
@@ -433,6 +443,54 @@ async fn started_activity_cancellation_abandon(hist_batches: &'static [usize]) {
433
443
  verify_activity_cancellation(&core, activity_id, ActivityCancellationType::Abandon).await;
434
444
  }
435
445
 
446
+ #[rstest(hist_batches, case::incremental(&[1, 2, 3, 4]), case::replay(&[4]))]
447
+ #[tokio::test]
448
+ async fn abandoned_activities_ignore_start_and_complete(hist_batches: &'static [usize]) {
449
+ let wfid = "fake_wf_id";
450
+ let wf_type = DEFAULT_WORKFLOW_TYPE;
451
+ let activity_id = "1";
452
+
453
+ let mut t = TestHistoryBuilder::default();
454
+ t.add_by_type(EventType::WorkflowExecutionStarted);
455
+ t.add_full_wf_task();
456
+ let act_scheduled_event_id = t.add_activity_task_scheduled(activity_id);
457
+ let timer_started_event_id = t.add_get_event_id(EventType::TimerStarted, None);
458
+ t.add_timer_fired(timer_started_event_id, "1".to_string());
459
+ t.add_full_wf_task();
460
+ let timer_started_event_id = t.add_get_event_id(EventType::TimerStarted, None);
461
+ let act_started_event_id = t.add_activity_task_started(act_scheduled_event_id);
462
+ t.add_activity_task_completed(
463
+ act_scheduled_event_id,
464
+ act_started_event_id,
465
+ Default::default(),
466
+ );
467
+ t.add_full_wf_task();
468
+ t.add_timer_fired(timer_started_event_id, "2".to_string());
469
+ t.add_full_wf_task();
470
+ t.add_workflow_execution_completed();
471
+ let mock = mock_workflow_client();
472
+ let mut worker = mock_sdk(MockPollCfg::from_resp_batches(wfid, t, hist_batches, mock));
473
+
474
+ worker.register_wf(wf_type.to_owned(), |ctx: WfContext| async move {
475
+ let act_fut = ctx.activity(ActivityOptions {
476
+ activity_type: "echo_activity".to_string(),
477
+ start_to_close_timeout: Some(Duration::from_secs(5)),
478
+ cancellation_type: ActivityCancellationType::Abandon,
479
+ ..Default::default()
480
+ });
481
+ ctx.timer(Duration::from_secs(1)).await;
482
+ act_fut.cancel(&ctx);
483
+ ctx.timer(Duration::from_secs(3)).await;
484
+ act_fut.await;
485
+ Ok(().into())
486
+ });
487
+ worker
488
+ .submit_wf(wfid, wf_type, vec![], Default::default())
489
+ .await
490
+ .unwrap();
491
+ worker.run_until_done().await.unwrap();
492
+ }
493
+
436
494
  #[rstest(hist_batches, case::incremental(&[1, 3]), case::replay(&[3]))]
437
495
  #[tokio::test]
438
496
  async fn scheduled_activity_cancellation_try_cancel_task_canceled(hist_batches: &'static [usize]) {
@@ -684,7 +742,7 @@ async fn complete_activation_with_failure(
684
742
  response_batches: batches.iter().map(Into::into).collect(),
685
743
  }],
686
744
  true,
687
- Some(1),
745
+ 1,
688
746
  );
689
747
  let core = mock_worker(mock_sg);
690
748
 
@@ -792,7 +850,7 @@ async fn workflow_failures_only_reported_once() {
792
850
  }],
793
851
  true,
794
852
  // We should only call the server to say we failed twice (once after each success)
795
- Some(2),
853
+ 2,
796
854
  );
797
855
  let omap = mocks.outstanding_task_map.clone();
798
856
  let core = mock_worker(mocks);
@@ -827,93 +885,43 @@ async fn workflow_failures_only_reported_once() {
827
885
  }
828
886
 
829
887
  #[tokio::test]
830
- async fn max_concurrent_wft_respected() {
831
- // Create long histories for three workflows
832
- let t1 = canned_histories::long_sequential_timers(20);
833
- let t2 = canned_histories::long_sequential_timers(20);
834
- let mh = MockPollCfg::new(
835
- vec![
836
- FakeWfResponses {
837
- wf_id: "wf1".to_string(),
838
- hist: t1,
839
- response_batches: vec![ResponseType::AllHistory],
840
- },
841
- FakeWfResponses {
842
- wf_id: "wf2".to_string(),
843
- hist: t2,
844
- response_batches: vec![ResponseType::AllHistory],
845
- },
846
- ],
847
- true,
848
- None,
849
- );
850
- let mut mock = build_mock_pollers(mh);
851
- // Limit the core to two outstanding workflow tasks, hence we should only see polling
852
- // happen twice, since we will not actually finish the two workflows
853
- mock.worker_cfg(|cfg| {
854
- cfg.max_cached_workflows = 2;
855
- cfg.max_outstanding_workflow_tasks = 2;
856
- });
857
- let core = mock_worker(mock);
858
-
859
- // Poll twice in a row before completing -- we should be at limit
860
- let r1 = core.poll_workflow_activation().await.unwrap();
861
- let r1_run_id = r1.run_id.clone();
862
- let r2 = core.poll_workflow_activation().await.unwrap();
863
- // Now we immediately poll for new work, and complete the r1 activation. The poll must not
864
- // unblock until the completion goes through.
865
- let last_finisher = AtomicUsize::new(0);
866
- let (_, mut r1) = tokio::join! {
867
- async {
868
- core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
869
- r1.run_id,
870
- start_timer_cmd(1, Duration::from_secs(1)))
871
- ).await.unwrap();
872
- last_finisher.store(1, Ordering::SeqCst);
873
- },
874
- async {
875
- let r = core.poll_workflow_activation().await.unwrap();
876
- last_finisher.store(2, Ordering::SeqCst);
877
- r
888
+ async fn max_wft_respected() {
889
+ let total_wfs = 100;
890
+ let wf_ids: Vec<_> = (0..total_wfs)
891
+ .into_iter()
892
+ .map(|i| format!("fake-wf-{}", i))
893
+ .collect();
894
+ let hists = wf_ids.iter().map(|wf_id| {
895
+ let hist = canned_histories::single_timer("1");
896
+ FakeWfResponses {
897
+ wf_id: wf_id.to_string(),
898
+ hist,
899
+ response_batches: vec![1.into(), 2.into()],
878
900
  }
879
- };
880
- // So that we know we blocked
881
- assert_eq!(last_finisher.load(Ordering::Acquire), 2);
901
+ });
902
+ let mh = MockPollCfg::new(hists.into_iter().collect(), true, 0);
903
+ let mut worker = mock_sdk_cfg(mh, |cfg| {
904
+ cfg.max_cached_workflows = total_wfs as usize;
905
+ cfg.max_outstanding_workflow_tasks = 1;
906
+ });
907
+ let active_count: &'static _ = Box::leak(Box::new(Semaphore::new(1)));
908
+ worker.register_wf(DEFAULT_WORKFLOW_TYPE, move |ctx: WfContext| async move {
909
+ drop(
910
+ active_count
911
+ .try_acquire()
912
+ .expect("No multiple concurrent workflow tasks!"),
913
+ );
914
+ ctx.timer(Duration::from_secs(1)).await;
915
+ Ok(().into())
916
+ });
882
917
 
883
- // Since we never did anything with r2, all subsequent activations should be for wf1
884
- for i in 2..=20 {
885
- core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
886
- r1.run_id,
887
- start_timer_cmd(i, Duration::from_secs(1)),
888
- ))
889
- .await
890
- .unwrap();
891
- r1 = core.poll_workflow_activation().await.unwrap();
892
- assert_eq!(r1.run_id, r1_run_id);
918
+ for wf_id in wf_ids {
919
+ worker
920
+ .submit_wf(wf_id, DEFAULT_WORKFLOW_TYPE, vec![], Default::default())
921
+ .await
922
+ .unwrap();
893
923
  }
894
- // Finish the tasks so we can shut down
895
- core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
896
- r1.run_id,
897
- CompleteWorkflowExecution { result: None }.into(),
898
- ))
899
- .await
900
- .unwrap();
901
- // Evict r2
902
- core.request_workflow_eviction(&r2.run_id);
903
- // We have to properly complete the outstanding task (or the mock will be confused why a task
904
- // failure was reported)
905
- let _ = core
906
- .complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
907
- r2.run_id,
908
- start_timer_cmd(1, Duration::from_secs(1)),
909
- ))
910
- .await;
911
- // Get and complete eviction
912
- let r2 = core.poll_workflow_activation().await.unwrap();
913
- let _ = core
914
- .complete_workflow_activation(WorkflowActivationCompletion::empty(r2.run_id))
915
- .await;
916
- core.shutdown().await;
924
+ worker.run_until_done().await.unwrap();
917
925
  }
918
926
 
919
927
  #[rstest(hist_batches, case::incremental(&[1, 2]), case::replay(&[3]))]
@@ -1009,7 +1017,8 @@ async fn activity_not_canceled_when_also_completed_repro(hist_batches: &'static
1009
1017
 
1010
1018
  #[tokio::test]
1011
1019
  async fn lots_of_workflows() {
1012
- let hists = (0..500).into_iter().map(|i| {
1020
+ let total_wfs = 500;
1021
+ let hists = (0..total_wfs).into_iter().map(|i| {
1013
1022
  let wf_id = format!("fake-wf-{}", i);
1014
1023
  let hist = canned_histories::single_timer("1");
1015
1024
  FakeWfResponses {
@@ -1018,37 +1027,48 @@ async fn lots_of_workflows() {
1018
1027
  response_batches: vec![1.into(), 2.into()],
1019
1028
  }
1020
1029
  });
1021
- let mock = build_multihist_mock_sg(hists, false, None);
1030
+ let mut mock = build_multihist_mock_sg(hists, false, 0);
1031
+ mock.make_wft_stream_interminable();
1022
1032
  let worker = &mock_worker(mock);
1023
-
1024
- fanout_tasks(5, |_| async move {
1025
- while let Ok(wft) = worker.poll_workflow_activation().await {
1026
- let job = &wft.jobs[0];
1027
- let reply = match job.variant {
1028
- Some(workflow_activation_job::Variant::StartWorkflow(_)) => {
1029
- start_timer_cmd(1, Duration::from_secs(1))
1030
- }
1031
- Some(workflow_activation_job::Variant::RemoveFromCache(_)) => {
1032
- worker
1033
- .complete_workflow_activation(WorkflowActivationCompletion::empty(
1034
- wft.run_id,
1035
- ))
1036
- .await
1037
- .unwrap();
1038
- continue;
1039
- }
1040
- _ => CompleteWorkflowExecution { result: None }.into(),
1041
- };
1042
- worker
1043
- .complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1044
- wft.run_id, reply,
1045
- ))
1046
- .await
1047
- .unwrap();
1033
+ let completed_count = Arc::new(Semaphore::new(0));
1034
+ let killer = async {
1035
+ let _ = completed_count.acquire_many(total_wfs).await.unwrap();
1036
+ dbg!("Shutdown initted");
1037
+ worker.initiate_shutdown();
1038
+ };
1039
+ let poller = fanout_tasks(5, |_| {
1040
+ let completed_count = completed_count.clone();
1041
+ async move {
1042
+ while let Ok(wft) = worker.poll_workflow_activation().await {
1043
+ let job = &wft.jobs[0];
1044
+ let reply = match job.variant {
1045
+ Some(workflow_activation_job::Variant::StartWorkflow(_)) => {
1046
+ start_timer_cmd(1, Duration::from_secs(1))
1047
+ }
1048
+ Some(workflow_activation_job::Variant::RemoveFromCache(_)) => {
1049
+ worker
1050
+ .complete_workflow_activation(WorkflowActivationCompletion::empty(
1051
+ wft.run_id,
1052
+ ))
1053
+ .await
1054
+ .unwrap();
1055
+ continue;
1056
+ }
1057
+ _ => {
1058
+ completed_count.add_permits(1);
1059
+ CompleteWorkflowExecution { result: None }.into()
1060
+ }
1061
+ };
1062
+ worker
1063
+ .complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1064
+ wft.run_id, reply,
1065
+ ))
1066
+ .await
1067
+ .unwrap();
1068
+ }
1048
1069
  }
1049
- })
1050
- .await;
1051
- assert_eq!(worker.outstanding_workflow_tasks(), 0);
1070
+ });
1071
+ join!(killer, poller);
1052
1072
  worker.shutdown().await;
1053
1073
  }
1054
1074
 
@@ -1104,34 +1124,27 @@ async fn complete_after_eviction() {
1104
1124
  let activation = core.poll_workflow_activation().await.unwrap();
1105
1125
  // We just got start workflow, immediately evict
1106
1126
  core.request_workflow_eviction(&activation.run_id);
1107
- // Original task must be completed before we get the eviction
1127
+ // Since we got whole history, we must finish replay before eviction will appear
1108
1128
  core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1109
1129
  activation.run_id,
1110
1130
  start_timer_cmd(1, Duration::from_secs(1)),
1111
1131
  ))
1112
1132
  .await
1113
1133
  .unwrap();
1114
- let eviction_activation = core.poll_workflow_activation().await.unwrap();
1134
+ let next_activation = core.poll_workflow_activation().await.unwrap();
1115
1135
  assert_matches!(
1116
- eviction_activation.jobs.as_slice(),
1136
+ next_activation.jobs.as_slice(),
1117
1137
  [WorkflowActivationJob {
1118
1138
  variant: Some(workflow_activation_job::Variant::FireTimer(_)),
1119
1139
  },]
1120
1140
  );
1121
- // Complete the activation containing the eviction, the way we normally would have
1122
1141
  core.complete_workflow_activation(WorkflowActivationCompletion::from_cmds(
1123
- eviction_activation.run_id,
1142
+ next_activation.run_id,
1124
1143
  vec![CompleteWorkflowExecution { result: None }.into()],
1125
1144
  ))
1126
1145
  .await
1127
1146
  .unwrap();
1128
- let eviction = core.poll_workflow_activation().await.unwrap();
1129
- assert_matches!(
1130
- eviction.jobs.as_slice(),
1131
- [WorkflowActivationJob {
1132
- variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1133
- }]
1134
- );
1147
+
1135
1148
  core.shutdown().await;
1136
1149
  }
1137
1150
 
@@ -1166,6 +1179,7 @@ async fn new_server_work_while_eviction_outstanding_doesnt_overwrite_activation(
1166
1179
  let wfid = "fake_wf_id";
1167
1180
  let t = canned_histories::single_timer("1");
1168
1181
  let mock = single_hist_mock_sg(wfid, t, &[1, 2], mock_workflow_client(), false);
1182
+ let taskmap = mock.outstanding_task_map.clone().unwrap();
1169
1183
  let core = mock_worker(mock);
1170
1184
 
1171
1185
  // Poll for and complete first workflow task
@@ -1176,19 +1190,25 @@ async fn new_server_work_while_eviction_outstanding_doesnt_overwrite_activation(
1176
1190
  ))
1177
1191
  .await
1178
1192
  .unwrap();
1179
- let eviction_activation = core.poll_workflow_activation().await.unwrap();
1193
+ let evict_act = core.poll_workflow_activation().await.unwrap();
1180
1194
  assert_matches!(
1181
- eviction_activation.jobs.as_slice(),
1195
+ evict_act.jobs.as_slice(),
1182
1196
  [WorkflowActivationJob {
1183
1197
  variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1184
1198
  }]
1185
1199
  );
1186
- // Poll again. We should not overwrite the eviction with the new work from the server to fire
1187
- // the timer, so polling will try again, and run into the mock being out of responses.
1188
- let act = core.poll_workflow_activation().await;
1189
- assert_matches!(act, Err(PollWfError::TonicError(err))
1190
- if err.message() == NO_MORE_WORK_ERROR_MSG);
1191
- core.shutdown().await;
1200
+ // Ensure mock has delivered both tasks
1201
+ assert!(taskmap.all_work_delivered());
1202
+ // Now we can complete the evict
1203
+ core.complete_workflow_activation(WorkflowActivationCompletion::empty(evict_act.run_id))
1204
+ .await
1205
+ .unwrap();
1206
+ // The task buffered during eviction is applied and we start over
1207
+ let start_again = core.poll_workflow_activation().await.unwrap();
1208
+ assert_matches!(
1209
+ start_again.jobs[0].variant,
1210
+ Some(workflow_activation_job::Variant::StartWorkflow(_))
1211
+ );
1192
1212
  }
1193
1213
 
1194
1214
  #[tokio::test]
@@ -1200,7 +1220,7 @@ async fn buffered_work_drained_on_shutdown() {
1200
1220
  t.add_workflow_task_scheduled_and_started();
1201
1221
  // Need to build the first response before adding the timeout events b/c otherwise the history
1202
1222
  // builder will include them in the first task
1203
- let resp_1 = hist_to_poll_resp(&t, wfid.to_owned(), 1.into(), TEST_Q.to_string());
1223
+ let resp_1 = hist_to_poll_resp(&t, wfid.to_owned(), 1.into(), TEST_Q.to_string()).resp;
1204
1224
  t.add_workflow_task_timed_out();
1205
1225
  t.add_full_wf_task();
1206
1226
  let timer_started_event_id = t.add_get_event_id(EventType::TimerStarted, None);
@@ -1218,18 +1238,15 @@ async fn buffered_work_drained_on_shutdown() {
1218
1238
  // Extend the task list with the now timeout-included version of the task. We add a bunch of
1219
1239
  // them because the poll loop will spin while new tasks are available and it is buffering them
1220
1240
  tasks.extend(
1221
- std::iter::repeat(hist_to_poll_resp(
1222
- &t,
1223
- wfid.to_owned(),
1224
- 2.into(),
1225
- TEST_Q.to_string(),
1226
- ))
1241
+ std::iter::repeat_with(|| {
1242
+ hist_to_poll_resp(&t, wfid.to_owned(), 2.into(), TEST_Q.to_string()).resp
1243
+ })
1227
1244
  .take(50),
1228
1245
  );
1229
1246
  let mut mock = mock_workflow_client();
1230
1247
  mock.expect_complete_workflow_task()
1231
1248
  .returning(|_| Ok(RespondWorkflowTaskCompletedResponse::default()));
1232
- let mut mock = MocksHolder::from_client_with_responses(mock, tasks, []);
1249
+ let mut mock = MocksHolder::from_wft_stream(mock, stream::iter(tasks));
1233
1250
  // Cache on to avoid being super repetitive
1234
1251
  mock.worker_cfg(|wc| wc.max_cached_workflows = 10);
1235
1252
  let core = &mock_worker(mock);
@@ -1255,7 +1272,7 @@ async fn buffered_work_drained_on_shutdown() {
1255
1272
  .await
1256
1273
  .unwrap();
1257
1274
  };
1258
- tokio::join!(poll_fut, complete_first, async {
1275
+ join!(poll_fut, complete_first, async {
1259
1276
  // If the shutdown is sent too too fast, we might not have got a chance to even buffer work
1260
1277
  tokio::time::sleep(Duration::from_millis(5)).await;
1261
1278
  core.shutdown().await;
@@ -1272,7 +1289,7 @@ async fn fail_wft_then_recover() {
1272
1289
  [ResponseType::AllHistory, ResponseType::AllHistory],
1273
1290
  mock_workflow_client(),
1274
1291
  );
1275
- mh.num_expected_fails = Some(1);
1292
+ mh.num_expected_fails = 1;
1276
1293
  mh.expect_fail_wft_matcher =
1277
1294
  Box::new(|_, cause, _| matches!(cause, WorkflowTaskFailedCause::NonDeterministicError));
1278
1295
  let mut mock = build_mock_pollers(mh);
@@ -1339,21 +1356,22 @@ async fn poll_response_triggers_wf_error() {
1339
1356
  t.add_full_wf_task();
1340
1357
  t.add_workflow_execution_completed();
1341
1358
 
1342
- let mut mh = MockPollCfg::from_resp_batches(
1359
+ let mh = MockPollCfg::from_resp_batches(
1343
1360
  "fake_wf_id",
1344
1361
  t,
1345
1362
  [ResponseType::AllHistory],
1346
1363
  mock_workflow_client(),
1347
1364
  );
1348
- // Since applying the poll response immediately generates an error core will start polling again
1349
- // Rather than panic on bad expectation we want to return the magic "no more work" error
1350
- mh.enforce_correct_number_of_polls = false;
1351
1365
  let mock = build_mock_pollers(mh);
1352
1366
  let core = mock_worker(mock);
1353
1367
  // Poll for first WFT, which is immediately an eviction
1354
- let act = core.poll_workflow_activation().await;
1355
- assert_matches!(act, Err(PollWfError::TonicError(err))
1356
- if err.message() == NO_MORE_WORK_ERROR_MSG);
1368
+ let act = core.poll_workflow_activation().await.unwrap();
1369
+ assert_matches!(
1370
+ act.jobs.as_slice(),
1371
+ [WorkflowActivationJob {
1372
+ variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1373
+ }]
1374
+ );
1357
1375
  }
1358
1376
 
1359
1377
  // Verifies we can handle multiple wft timeouts in a row if lang is being very slow in responding
@@ -1367,11 +1385,6 @@ async fn lang_slower_than_wft_timeouts() {
1367
1385
  t.add_full_wf_task();
1368
1386
  t.add_workflow_execution_completed();
1369
1387
 
1370
- let tasks = [
1371
- hist_to_poll_resp(&t, wfid.to_owned(), 1.into(), TEST_Q.to_string()),
1372
- hist_to_poll_resp(&t, wfid.to_owned(), 1.into(), TEST_Q.to_string()),
1373
- hist_to_poll_resp(&t, wfid.to_owned(), 1.into(), TEST_Q.to_string()),
1374
- ];
1375
1388
  let mut mock = mock_workflow_client();
1376
1389
  mock.expect_complete_workflow_task()
1377
1390
  .times(1)
@@ -1379,22 +1392,19 @@ async fn lang_slower_than_wft_timeouts() {
1379
1392
  mock.expect_complete_workflow_task()
1380
1393
  .times(1)
1381
1394
  .returning(|_| Ok(Default::default()));
1382
- let mut mock = MocksHolder::from_client_with_responses(mock, tasks, []);
1395
+ let mut mock = single_hist_mock_sg(wfid, t, [1, 1], mock, true);
1396
+ let tasksmap = mock.outstanding_task_map.clone().unwrap();
1383
1397
  mock.worker_cfg(|wc| {
1384
1398
  wc.max_cached_workflows = 2;
1385
1399
  });
1386
1400
  let core = mock_worker(mock);
1387
1401
 
1402
+ // This completion runs into the workflow task not found error
1388
1403
  let wf_task = core.poll_workflow_activation().await.unwrap();
1389
- let poll_until_no_work = core.poll_workflow_activation().await;
1390
- assert_matches!(poll_until_no_work, Err(PollWfError::TonicError(err))
1391
- if err.message() == NO_MORE_WORK_ERROR_MSG);
1392
- // This completion runs into a workflow task not found error, since it's completing a stale
1393
- // task.
1394
1404
  core.complete_workflow_activation(WorkflowActivationCompletion::empty(wf_task.run_id))
1395
1405
  .await
1396
1406
  .unwrap();
1397
- // Now we should get an eviction
1407
+ // It will get an eviction
1398
1408
  let wf_task = core.poll_workflow_activation().await.unwrap();
1399
1409
  assert_matches!(
1400
1410
  wf_task.jobs.as_slice(),
@@ -1402,10 +1412,12 @@ async fn lang_slower_than_wft_timeouts() {
1402
1412
  variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1403
1413
  }]
1404
1414
  );
1415
+ // Before we complete, unlock the next task from the mock so that we'll see it get buffered.
1416
+ tasksmap.release_run(&wf_task.run_id);
1405
1417
  core.complete_workflow_activation(WorkflowActivationCompletion::empty(wf_task.run_id))
1406
1418
  .await
1407
1419
  .unwrap();
1408
- // The last WFT buffered should be applied now
1420
+ // The buffered WFT should be applied now
1409
1421
  let start_again = core.poll_workflow_activation().await.unwrap();
1410
1422
  assert_matches!(
1411
1423
  start_again.jobs[0].variant,
@@ -1480,7 +1492,9 @@ async fn failing_wft_doesnt_eat_permit_forever() {
1480
1492
  t.add_workflow_task_scheduled_and_started();
1481
1493
 
1482
1494
  let mock = mock_workflow_client();
1483
- let mut mock = single_hist_mock_sg("fake_wf_id", t, [1, 1, 1], mock, true);
1495
+ let mut mock = MockPollCfg::from_resp_batches("fake_wf_id", t, [1, 1, 1], mock);
1496
+ mock.num_expected_fails = 1;
1497
+ let mut mock = build_mock_pollers(mock);
1484
1498
  mock.worker_cfg(|cfg| {
1485
1499
  cfg.max_cached_workflows = 2;
1486
1500
  cfg.max_outstanding_workflow_tasks = 2;
@@ -1489,8 +1503,8 @@ async fn failing_wft_doesnt_eat_permit_forever() {
1489
1503
  let worker = mock_worker(mock);
1490
1504
 
1491
1505
  let mut run_id = "".to_string();
1492
- // Fail twice, verifying a permit is eaten. We cannot fail the same run more than twice in a row
1493
- // because we purposefully time out rather than spamming.
1506
+ // Fail twice, verifying a permit is not eaten. We cannot fail the same run more than twice in a
1507
+ // row because we purposefully time out rather than spamming.
1494
1508
  for _ in 1..=2 {
1495
1509
  let activation = worker.poll_workflow_activation().await.unwrap();
1496
1510
  // Issue a nonsense completion that will trigger a WFT failure
@@ -1513,19 +1527,19 @@ async fn failing_wft_doesnt_eat_permit_forever() {
1513
1527
  .complete_workflow_activation(WorkflowActivationCompletion::empty(activation.run_id))
1514
1528
  .await
1515
1529
  .unwrap();
1516
- assert_eq!(worker.outstanding_workflow_tasks(), 0);
1517
- assert_eq!(worker.available_wft_permits(), 2);
1518
1530
  }
1531
+ assert_eq!(worker.outstanding_workflow_tasks().await, 0);
1532
+ // 1 permit is in use because the next task is buffered and has re-used the permit
1533
+ assert_eq!(worker.available_wft_permits().await, 1);
1519
1534
  // We should be "out of work" because the mock service thinks we didn't complete the last task,
1520
1535
  // which we didn't, because we don't spam failures. The real server would eventually time out
1521
1536
  // the task. Mock doesn't understand that, so the WFT permit is released because eventually a
1522
1537
  // new one will be generated. We manually clear the mock's outstanding task list so the next
1523
1538
  // poll will work.
1524
- outstanding_mock_tasks
1525
- .unwrap()
1526
- .write()
1527
- .remove_by_left(&run_id);
1539
+ outstanding_mock_tasks.unwrap().release_run(&run_id);
1528
1540
  let activation = worker.poll_workflow_activation().await.unwrap();
1541
+ // There should be no change in permits, since this just unbuffered the buffered task
1542
+ assert_eq!(worker.available_wft_permits().await, 1);
1529
1543
  worker
1530
1544
  .complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1531
1545
  activation.run_id,
@@ -1533,6 +1547,7 @@ async fn failing_wft_doesnt_eat_permit_forever() {
1533
1547
  ))
1534
1548
  .await
1535
1549
  .unwrap();
1550
+ assert_eq!(worker.available_wft_permits().await, 2);
1536
1551
 
1537
1552
  worker.shutdown().await;
1538
1553
  }
@@ -1553,7 +1568,6 @@ async fn cache_miss_will_fetch_history() {
1553
1568
  [ResponseType::ToTaskNum(1), ResponseType::OneTask(2)],
1554
1569
  mock_workflow_client(),
1555
1570
  );
1556
- mh.num_expected_fails = Some(0);
1557
1571
  mh.mock_client
1558
1572
  .expect_get_workflow_execution_history()
1559
1573
  .times(1)
@@ -1565,31 +1579,50 @@ async fn cache_miss_will_fetch_history() {
1565
1579
  let worker = mock_worker(mock);
1566
1580
 
1567
1581
  let activation = worker.poll_workflow_activation().await.unwrap();
1582
+ assert_eq!(activation.history_length, 3);
1568
1583
  assert_matches!(
1569
1584
  activation.jobs.as_slice(),
1570
1585
  [WorkflowActivationJob {
1571
1586
  variant: Some(workflow_activation_job::Variant::StartWorkflow(_)),
1572
1587
  }]
1573
1588
  );
1574
- worker
1575
- .complete_workflow_activation(WorkflowActivationCompletion::empty(&activation.run_id))
1576
- .await
1577
- .unwrap();
1578
- // Force an eviction
1589
+ // Force an eviction (before complete matters, so that we will be sure the eviction is queued
1590
+ // up before the next fake WFT is unlocked)
1579
1591
  worker.request_wf_eviction(
1580
1592
  &activation.run_id,
1581
1593
  "whatever",
1582
1594
  EvictionReason::LangRequested,
1583
1595
  );
1596
+ worker
1597
+ .complete_workflow_activation(WorkflowActivationCompletion::empty(&activation.run_id))
1598
+ .await
1599
+ .unwrap();
1584
1600
  // Handle the eviction, and the restart
1585
- for _ in 1..=2 {
1601
+ for i in 1..=2 {
1586
1602
  let activation = worker.poll_workflow_activation().await.unwrap();
1603
+ assert_eq!(activation.history_length, 3);
1604
+ if i == 1 {
1605
+ assert_matches!(
1606
+ activation.jobs.as_slice(),
1607
+ [WorkflowActivationJob {
1608
+ variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1609
+ }]
1610
+ );
1611
+ } else {
1612
+ assert_matches!(
1613
+ activation.jobs.as_slice(),
1614
+ [WorkflowActivationJob {
1615
+ variant: Some(workflow_activation_job::Variant::StartWorkflow(_)),
1616
+ }]
1617
+ );
1618
+ }
1587
1619
  worker
1588
1620
  .complete_workflow_activation(WorkflowActivationCompletion::empty(activation.run_id))
1589
1621
  .await
1590
1622
  .unwrap();
1591
1623
  }
1592
1624
  let activation = worker.poll_workflow_activation().await.unwrap();
1625
+ assert_eq!(activation.history_length, 7);
1593
1626
  assert_matches!(
1594
1627
  activation.jobs.as_slice(),
1595
1628
  [WorkflowActivationJob {
@@ -1603,7 +1636,7 @@ async fn cache_miss_will_fetch_history() {
1603
1636
  ))
1604
1637
  .await
1605
1638
  .unwrap();
1606
- assert_eq!(worker.outstanding_workflow_tasks(), 0);
1639
+ assert_eq!(worker.outstanding_workflow_tasks().await, 0);
1607
1640
  worker.shutdown().await;
1608
1641
  }
1609
1642
 
@@ -1619,29 +1652,20 @@ async fn tasks_from_completion_are_delivered() {
1619
1652
  t.add_full_wf_task();
1620
1653
  t.add_workflow_execution_completed();
1621
1654
 
1622
- let tasks = [hist_to_poll_resp(
1623
- &t,
1624
- wfid.to_owned(),
1625
- 1.into(),
1626
- TEST_Q.to_string(),
1627
- )];
1628
1655
  let mut mock = mock_workflow_client();
1656
+ let complete_resp = RespondWorkflowTaskCompletedResponse {
1657
+ workflow_task: Some(
1658
+ hist_to_poll_resp(&t, wfid.to_owned(), 2.into(), TEST_Q.to_string()).resp,
1659
+ ),
1660
+ activity_tasks: vec![],
1661
+ };
1629
1662
  mock.expect_complete_workflow_task()
1630
1663
  .times(1)
1631
- .returning(move |_| {
1632
- Ok(RespondWorkflowTaskCompletedResponse {
1633
- workflow_task: Some(hist_to_poll_resp(
1634
- &t,
1635
- wfid.to_owned(),
1636
- 2.into(),
1637
- TEST_Q.to_string(),
1638
- )),
1639
- })
1640
- });
1664
+ .returning(move |_| Ok(complete_resp.clone()));
1641
1665
  mock.expect_complete_workflow_task()
1642
1666
  .times(1)
1643
1667
  .returning(|_| Ok(Default::default()));
1644
- let mut mock = MocksHolder::from_client_with_responses(mock, tasks, []);
1668
+ let mut mock = single_hist_mock_sg(wfid, t, [1], mock, true);
1645
1669
  mock.worker_cfg(|wc| wc.max_cached_workflows = 2);
1646
1670
  let core = mock_worker(mock);
1647
1671
 
@@ -1665,68 +1689,24 @@ async fn tasks_from_completion_are_delivered() {
1665
1689
  core.shutdown().await;
1666
1690
  }
1667
1691
 
1668
- #[tokio::test]
1669
- async fn evict_missing_wf_during_poll_doesnt_eat_permit() {
1670
- let wfid = "fake_wf_id";
1671
- let mut t = TestHistoryBuilder::default();
1672
- t.add_by_type(EventType::WorkflowExecutionStarted);
1673
- t.add_full_wf_task();
1674
- t.add_we_signaled("sig", vec![]);
1675
- t.add_full_wf_task();
1676
- t.add_workflow_execution_completed();
1677
-
1678
- let tasks = [hist_to_poll_resp(
1679
- &t,
1680
- wfid.to_owned(),
1681
- // Use a partial task so that we'll fetch history
1682
- ResponseType::OneTask(2),
1683
- TEST_Q.to_string(),
1684
- )];
1685
- let mut mock = mock_workflow_client();
1686
- mock.expect_get_workflow_execution_history()
1687
- .times(1)
1688
- .returning(move |_, _, _| {
1689
- Ok(GetWorkflowExecutionHistoryResponse {
1690
- // Empty history so we error applying it (no jobs)
1691
- history: Some(History { events: vec![] }),
1692
- raw_history: vec![],
1693
- next_page_token: vec![],
1694
- archived: false,
1695
- })
1696
- });
1697
- let mut mock = MocksHolder::from_client_with_responses(mock, tasks, []);
1698
- mock.worker_cfg(|wc| {
1699
- wc.max_cached_workflows = 1;
1700
- wc.max_outstanding_workflow_tasks = 1;
1701
- });
1702
- let core = mock_worker(mock);
1703
-
1704
- // Should error because mock is out of work
1705
- assert_matches!(core.poll_workflow_activation().await, Err(_));
1706
- assert_eq!(core.available_wft_permits(), 1);
1707
-
1708
- core.shutdown().await;
1709
- }
1710
-
1711
1692
  #[tokio::test]
1712
1693
  async fn poll_faster_than_complete_wont_overflow_cache() {
1713
1694
  // Make workflow tasks for 5 different runs
1714
1695
  let tasks: Vec<_> = (1..=5)
1715
- .map(|i| {
1716
- hist_to_poll_resp(
1717
- // New hist each time for new run ids
1718
- &canned_histories::single_timer("1"),
1719
- format!("wf-{}", i),
1720
- ResponseType::ToTaskNum(1),
1721
- TEST_Q.to_string(),
1722
- )
1696
+ .map(|i| FakeWfResponses {
1697
+ wf_id: format!("wf-{}", i),
1698
+ hist: canned_histories::single_timer("1"),
1699
+ response_batches: vec![ResponseType::ToTaskNum(1)],
1723
1700
  })
1724
1701
  .collect();
1725
- let mut mock = mock_workflow_client();
1726
- mock.expect_complete_workflow_task()
1702
+ let mut mock_client = mock_workflow_client();
1703
+ mock_client
1704
+ .expect_complete_workflow_task()
1727
1705
  .times(3)
1728
1706
  .returning(|_| Ok(Default::default()));
1729
- let mut mock = MocksHolder::from_client_with_responses(mock, tasks, []);
1707
+ let mut mock_cfg = MockPollCfg::new(tasks, true, 0);
1708
+ mock_cfg.mock_client = mock_client;
1709
+ let mut mock = build_mock_pollers(mock_cfg);
1730
1710
  mock.worker_cfg(|wc| {
1731
1711
  wc.max_cached_workflows = 3;
1732
1712
  wc.max_outstanding_workflow_tasks = 3;
@@ -1754,8 +1734,12 @@ async fn poll_faster_than_complete_wont_overflow_cache() {
1754
1734
  // an eviction, and buffer the new run task. However, the run we're trying to evict has pending
1755
1735
  // activations! Thus, we must complete them first before this poll will unblock, and then it
1756
1736
  // will unblock with the eviciton.
1737
+ let p4 = core.poll_workflow_activation();
1738
+ // Make sure the task gets buffered before we start the complete, so the LRU list is in the
1739
+ // expected order and what we expect to evict will be evicted.
1740
+ advance_fut!(p4);
1757
1741
  let p4 = async {
1758
- let p4 = core.poll_workflow_activation().await.unwrap();
1742
+ let p4 = p4.await.unwrap();
1759
1743
  assert_matches!(
1760
1744
  &p4.jobs.as_slice(),
1761
1745
  [WorkflowActivationJob {
@@ -1765,9 +1749,6 @@ async fn poll_faster_than_complete_wont_overflow_cache() {
1765
1749
  p4
1766
1750
  };
1767
1751
  let p2_pending_completer = async {
1768
- // Sleep needed because otherwise the complete unblocks waiting for the cache to free a slot
1769
- // before we have a chance to actually... wait for it.
1770
- tokio::time::sleep(Duration::from_millis(100)).await;
1771
1752
  core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1772
1753
  p2.run_id,
1773
1754
  start_timer_cmd(1, Duration::from_secs(1)),
@@ -1775,8 +1756,8 @@ async fn poll_faster_than_complete_wont_overflow_cache() {
1775
1756
  .await
1776
1757
  .unwrap();
1777
1758
  };
1778
- let (p4, _) = tokio::join!(p4, p2_pending_completer);
1779
- assert_eq!(core.cached_workflows(), 3);
1759
+ let (p4, _) = join!(p4, p2_pending_completer);
1760
+ assert_eq!(core.cached_workflows().await, 3);
1780
1761
 
1781
1762
  // This poll should also block until the eviction is actually completed
1782
1763
  let blocking_poll = async {
@@ -1794,8 +1775,8 @@ async fn poll_faster_than_complete_wont_overflow_cache() {
1794
1775
  .unwrap();
1795
1776
  };
1796
1777
 
1797
- let (_p5, _) = tokio::join!(blocking_poll, complete_evict);
1798
- assert_eq!(core.cached_workflows(), 3);
1778
+ let (_p5, _) = join!(blocking_poll, complete_evict);
1779
+ assert_eq!(core.cached_workflows().await, 3);
1799
1780
  // The next poll will get an buffer a task for a new run, and generate an eviction for p3 but
1800
1781
  // that eviction cannot be obtained until we complete the existing outstanding task.
1801
1782
  let p6 = async {
@@ -1816,7 +1797,7 @@ async fn poll_faster_than_complete_wont_overflow_cache() {
1816
1797
  .await
1817
1798
  .unwrap();
1818
1799
  };
1819
- let (p6, _) = tokio::join!(p6, completer);
1800
+ let (p6, _) = join!(p6, completer);
1820
1801
  let complete_evict = async {
1821
1802
  core.complete_workflow_activation(WorkflowActivationCompletion::empty(p6.run_id))
1822
1803
  .await
@@ -1833,10 +1814,10 @@ async fn poll_faster_than_complete_wont_overflow_cache() {
1833
1814
  );
1834
1815
  };
1835
1816
 
1836
- tokio::join!(blocking_poll, complete_evict);
1817
+ join!(blocking_poll, complete_evict);
1837
1818
  // p5 outstanding and final poll outstanding -- hence one permit available
1838
- assert_eq!(core.available_wft_permits(), 1);
1839
- assert_eq!(core.cached_workflows(), 3);
1819
+ assert_eq!(core.available_wft_permits().await, 1);
1820
+ assert_eq!(core.cached_workflows().await, 3);
1840
1821
  }
1841
1822
 
1842
1823
  #[tokio::test]
@@ -1848,6 +1829,7 @@ async fn eviction_waits_until_replay_finished() {
1848
1829
  let core = mock_worker(mock);
1849
1830
 
1850
1831
  let activation = core.poll_workflow_activation().await.unwrap();
1832
+ assert_eq!(activation.history_length, 3);
1851
1833
  // Immediately request eviction after getting start workflow
1852
1834
  core.request_workflow_eviction(&activation.run_id);
1853
1835
  core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
@@ -1857,6 +1839,7 @@ async fn eviction_waits_until_replay_finished() {
1857
1839
  .await
1858
1840
  .unwrap();
1859
1841
  let t1_fired = core.poll_workflow_activation().await.unwrap();
1842
+ assert_eq!(t1_fired.history_length, 8);
1860
1843
  assert_matches!(
1861
1844
  t1_fired.jobs.as_slice(),
1862
1845
  [WorkflowActivationJob {
@@ -1870,6 +1853,7 @@ async fn eviction_waits_until_replay_finished() {
1870
1853
  .await
1871
1854
  .unwrap();
1872
1855
  let t2_fired = core.poll_workflow_activation().await.unwrap();
1856
+ assert_eq!(t2_fired.history_length, 13);
1873
1857
  assert_matches!(
1874
1858
  t2_fired.jobs.as_slice(),
1875
1859
  [WorkflowActivationJob {
@@ -1882,14 +1866,137 @@ async fn eviction_waits_until_replay_finished() {
1882
1866
  ))
1883
1867
  .await
1884
1868
  .unwrap();
1885
- // The first two WFTs were replay, and now that we've caught up, the eviction will be sent
1886
- let eviction = core.poll_workflow_activation().await.unwrap();
1869
+
1870
+ core.shutdown().await;
1871
+ }
1872
+
1873
+ #[tokio::test]
1874
+ async fn autocompletes_wft_no_work() {
1875
+ let wfid = "fake_wf_id";
1876
+ let activity_id = "1";
1877
+
1878
+ let mut t = TestHistoryBuilder::default();
1879
+ t.add_by_type(EventType::WorkflowExecutionStarted);
1880
+ t.add_full_wf_task();
1881
+ let scheduled_event_id = t.add_activity_task_scheduled(activity_id);
1882
+ t.add_full_wf_task();
1883
+ t.add_we_signaled("sig1", vec![]);
1884
+ t.add_full_wf_task();
1885
+ let started_event_id = t.add_activity_task_started(scheduled_event_id);
1886
+ t.add_activity_task_completed(scheduled_event_id, started_event_id, Default::default());
1887
+ t.add_full_wf_task();
1888
+ let mock = mock_workflow_client();
1889
+ let mut mock = single_hist_mock_sg(wfid, t, &[1, 2, 3, 4], mock, true);
1890
+ mock.worker_cfg(|w| w.max_cached_workflows = 1);
1891
+ let core = mock_worker(mock);
1892
+
1893
+ let act = core.poll_workflow_activation().await.unwrap();
1887
1894
  assert_matches!(
1888
- eviction.jobs.as_slice(),
1895
+ act.jobs.as_slice(),
1889
1896
  [WorkflowActivationJob {
1890
- variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1897
+ variant: Some(workflow_activation_job::Variant::StartWorkflow(_)),
1898
+ }]
1899
+ );
1900
+ core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1901
+ act.run_id,
1902
+ ScheduleActivity {
1903
+ seq: 1,
1904
+ activity_id: activity_id.to_string(),
1905
+ cancellation_type: ActivityCancellationType::Abandon as i32,
1906
+ ..Default::default()
1907
+ }
1908
+ .into(),
1909
+ ))
1910
+ .await
1911
+ .unwrap();
1912
+ let act = core.poll_workflow_activation().await.unwrap();
1913
+ assert_matches!(
1914
+ act.jobs.as_slice(),
1915
+ [WorkflowActivationJob {
1916
+ variant: Some(workflow_activation_job::Variant::SignalWorkflow(_)),
1891
1917
  }]
1892
1918
  );
1919
+ core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1920
+ act.run_id,
1921
+ RequestCancelActivity { seq: 1 }.into(),
1922
+ ))
1923
+ .await
1924
+ .unwrap();
1925
+ let act = core.poll_workflow_activation().await.unwrap();
1926
+ core.complete_workflow_activation(WorkflowActivationCompletion::empty(act.run_id))
1927
+ .await
1928
+ .unwrap();
1929
+ // The last task will autocomplete, and thus this will return shutdown since there is no more
1930
+ // work
1931
+ assert_matches!(
1932
+ core.poll_workflow_activation().await.unwrap_err(),
1933
+ PollWfError::ShutDown
1934
+ );
1893
1935
 
1894
1936
  core.shutdown().await;
1895
1937
  }
1938
+
1939
+ #[tokio::test]
1940
+ async fn no_race_acquiring_permits() {
1941
+ let wfid = "fake_wf_id";
1942
+ let mut mock_client = mock_manual_workflow_client();
1943
+ // We need to allow two polls to happen by triggering two processing events in the workflow
1944
+ // stream, but then delivering the actual tasks after that
1945
+ let task_barr: &'static Barrier = Box::leak(Box::new(Barrier::new(2)));
1946
+ mock_client
1947
+ .expect_poll_workflow_task()
1948
+ .returning(move |_, _| {
1949
+ let t = canned_histories::single_timer("1");
1950
+ let poll_resp =
1951
+ hist_to_poll_resp(&t, wfid.to_owned(), 2.into(), TEST_Q.to_string()).resp;
1952
+ async move {
1953
+ task_barr.wait().await;
1954
+ Ok(poll_resp.clone())
1955
+ }
1956
+ .boxed()
1957
+ });
1958
+ mock_client
1959
+ .expect_complete_workflow_task()
1960
+ .returning(|_| async move { Ok(Default::default()) }.boxed());
1961
+
1962
+ let worker = Worker::new_test(
1963
+ test_worker_cfg()
1964
+ .max_outstanding_workflow_tasks(1_usize)
1965
+ .max_cached_workflows(10_usize)
1966
+ .build()
1967
+ .unwrap(),
1968
+ mock_client,
1969
+ );
1970
+
1971
+ // Two polls in a row, both of which will get stuck on the barrier and are only allowed to
1972
+ // proceed after a call which will cause the workflow stream to process an event. Without the
1973
+ // fix, this would've meant the stream though it was OK to poll twice, but once the tasks
1974
+ // are received, it would find there was only one permit.
1975
+ let poll_1_f = async {
1976
+ let r = worker.poll_workflow_activation().await.unwrap();
1977
+ worker
1978
+ .complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1979
+ r.run_id,
1980
+ start_timer_cmd(1, Duration::from_secs(1)),
1981
+ ))
1982
+ .await
1983
+ .unwrap();
1984
+ };
1985
+ let poll_2_f = async {
1986
+ let r = worker.poll_workflow_activation().await.unwrap();
1987
+ worker
1988
+ .complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1989
+ r.run_id,
1990
+ start_timer_cmd(1, Duration::from_secs(1)),
1991
+ ))
1992
+ .await
1993
+ .unwrap();
1994
+ };
1995
+ let other_f = async {
1996
+ worker.cached_workflows().await;
1997
+ task_barr.wait().await;
1998
+ worker.cached_workflows().await;
1999
+ task_barr.wait().await;
2000
+ };
2001
+ join!(poll_1_f, poll_2_f, other_f);
2002
+ }