@temporalio/core-bridge 0.20.2 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/Cargo.lock +137 -127
  2. package/index.d.ts +7 -2
  3. package/package.json +3 -3
  4. package/releases/aarch64-apple-darwin/index.node +0 -0
  5. package/releases/x86_64-apple-darwin/index.node +0 -0
  6. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  7. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  8. package/sdk-core/.buildkite/docker/docker-compose.yaml +5 -4
  9. package/sdk-core/client/Cargo.toml +1 -0
  10. package/sdk-core/client/src/lib.rs +52 -9
  11. package/sdk-core/client/src/raw.rs +9 -1
  12. package/sdk-core/client/src/retry.rs +12 -1
  13. package/sdk-core/client/src/workflow_handle/mod.rs +183 -0
  14. package/sdk-core/core/src/abstractions.rs +10 -3
  15. package/sdk-core/core/src/core_tests/child_workflows.rs +7 -9
  16. package/sdk-core/core/src/core_tests/determinism.rs +8 -19
  17. package/sdk-core/core/src/core_tests/local_activities.rs +22 -32
  18. package/sdk-core/core/src/core_tests/queries.rs +272 -5
  19. package/sdk-core/core/src/core_tests/workers.rs +4 -34
  20. package/sdk-core/core/src/core_tests/workflow_tasks.rs +197 -41
  21. package/sdk-core/core/src/pending_activations.rs +11 -0
  22. package/sdk-core/core/src/telemetry/mod.rs +1 -1
  23. package/sdk-core/core/src/test_help/mod.rs +57 -7
  24. package/sdk-core/core/src/worker/mod.rs +64 -15
  25. package/sdk-core/core/src/workflow/machines/mod.rs +1 -1
  26. package/sdk-core/core/src/workflow/machines/timer_state_machine.rs +2 -2
  27. package/sdk-core/core/src/workflow/machines/workflow_machines.rs +14 -3
  28. package/sdk-core/core/src/workflow/mod.rs +5 -2
  29. package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +47 -2
  30. package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +16 -2
  31. package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +252 -125
  32. package/sdk-core/core-api/src/worker.rs +9 -0
  33. package/sdk-core/sdk/Cargo.toml +1 -0
  34. package/sdk-core/sdk/src/activity_context.rs +223 -0
  35. package/sdk-core/sdk/src/interceptors.rs +8 -2
  36. package/sdk-core/sdk/src/lib.rs +167 -122
  37. package/sdk-core/sdk-core-protos/src/history_info.rs +3 -7
  38. package/sdk-core/test-utils/Cargo.toml +1 -0
  39. package/sdk-core/test-utils/src/lib.rs +78 -37
  40. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +11 -4
  41. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +0 -1
  42. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +0 -3
  43. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +33 -17
  44. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +10 -1
  45. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +0 -1
  46. package/sdk-core/tests/integ_tests/workflow_tests.rs +71 -3
  47. package/sdk-core/tests/load_tests.rs +80 -6
  48. package/src/errors.rs +9 -2
  49. package/src/lib.rs +39 -16
  50. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
@@ -71,7 +71,7 @@ fn single_activity_failure_setup(hist_batches: &'static [usize]) -> Worker {
71
71
  #[case::incremental(single_timer_setup(&[1, 2]), NonSticky)]
72
72
  #[case::replay(single_timer_setup(&[2]), NonSticky)]
73
73
  #[case::incremental_evict(single_timer_setup(&[1, 2]), AfterEveryReply)]
74
- #[case::replay_evict(single_timer_setup(&[2, 2]), AfterEveryReply)]
74
+ #[case::replay_evict(single_timer_setup(&[2]), AfterEveryReply)]
75
75
  #[tokio::test]
76
76
  async fn single_timer(#[case] worker: Worker, #[case] evict: WorkflowCachingPolicy) {
77
77
  poll_and_reply(
@@ -1114,14 +1114,9 @@ async fn complete_after_eviction() {
1114
1114
  let eviction_activation = core.poll_workflow_activation().await.unwrap();
1115
1115
  assert_matches!(
1116
1116
  eviction_activation.jobs.as_slice(),
1117
- [
1118
- WorkflowActivationJob {
1119
- variant: Some(workflow_activation_job::Variant::FireTimer(_)),
1120
- },
1121
- WorkflowActivationJob {
1122
- variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1123
- }
1124
- ]
1117
+ [WorkflowActivationJob {
1118
+ variant: Some(workflow_activation_job::Variant::FireTimer(_)),
1119
+ },]
1125
1120
  );
1126
1121
  // Complete the activation containing the eviction, the way we normally would have
1127
1122
  core.complete_workflow_activation(WorkflowActivationCompletion::from_cmds(
@@ -1130,6 +1125,13 @@ async fn complete_after_eviction() {
1130
1125
  ))
1131
1126
  .await
1132
1127
  .unwrap();
1128
+ let eviction = core.poll_workflow_activation().await.unwrap();
1129
+ assert_matches!(
1130
+ eviction.jobs.as_slice(),
1131
+ [WorkflowActivationJob {
1132
+ variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1133
+ }]
1134
+ );
1133
1135
  core.shutdown().await;
1134
1136
  }
1135
1137
 
@@ -1260,38 +1262,6 @@ async fn buffered_work_drained_on_shutdown() {
1260
1262
  });
1261
1263
  }
1262
1264
 
1263
- #[tokio::test]
1264
- async fn buffering_tasks_doesnt_count_toward_outstanding_max() {
1265
- let wfid = "fake_wf_id";
1266
- let t = canned_histories::single_timer("1");
1267
- let mock = mock_workflow_client();
1268
- let mut tasks = VecDeque::new();
1269
- // A way bigger task list than allowed outstanding tasks
1270
- tasks.extend(
1271
- std::iter::repeat(hist_to_poll_resp(
1272
- &t,
1273
- wfid.to_owned(),
1274
- 2.into(),
1275
- TEST_Q.to_string(),
1276
- ))
1277
- .take(20),
1278
- );
1279
- let mut mock = MocksHolder::from_client_with_responses(mock, tasks, []);
1280
- mock.worker_cfg(|wc| {
1281
- wc.max_cached_workflows = 10;
1282
- wc.max_outstanding_workflow_tasks = 5;
1283
- });
1284
- let core = mock_worker(mock);
1285
- // Poll for first WFT
1286
- core.poll_workflow_activation().await.unwrap();
1287
- // This will error out when the mock runs out of responses. Otherwise it would hang when we
1288
- // hit the max
1289
- assert_matches!(
1290
- core.poll_workflow_activation().await.unwrap_err(),
1291
- PollWfError::TonicError(_)
1292
- );
1293
- }
1294
-
1295
1265
  #[tokio::test]
1296
1266
  async fn fail_wft_then_recover() {
1297
1267
  let t = canned_histories::long_sequential_timers(1);
@@ -1737,3 +1707,189 @@ async fn evict_missing_wf_during_poll_doesnt_eat_permit() {
1737
1707
 
1738
1708
  core.shutdown().await;
1739
1709
  }
1710
+
1711
+ #[tokio::test]
1712
+ async fn poll_faster_than_complete_wont_overflow_cache() {
1713
+ // Make workflow tasks for 5 different runs
1714
+ let tasks: Vec<_> = (1..=5)
1715
+ .map(|i| {
1716
+ hist_to_poll_resp(
1717
+ // New hist each time for new run ids
1718
+ &canned_histories::single_timer("1"),
1719
+ format!("wf-{}", i),
1720
+ ResponseType::ToTaskNum(1),
1721
+ TEST_Q.to_string(),
1722
+ )
1723
+ })
1724
+ .collect();
1725
+ let mut mock = mock_workflow_client();
1726
+ mock.expect_complete_workflow_task()
1727
+ .times(3)
1728
+ .returning(|_| Ok(Default::default()));
1729
+ let mut mock = MocksHolder::from_client_with_responses(mock, tasks, []);
1730
+ mock.worker_cfg(|wc| {
1731
+ wc.max_cached_workflows = 3;
1732
+ wc.max_outstanding_workflow_tasks = 3;
1733
+ });
1734
+ let core = mock_worker(mock);
1735
+ // Poll 4 times, completing once, such that max tasks are never exceeded
1736
+ let p1 = core.poll_workflow_activation().await.unwrap();
1737
+ let p2 = core.poll_workflow_activation().await.unwrap();
1738
+ let p3 = core.poll_workflow_activation().await.unwrap();
1739
+ for (i, p_res) in [&p1, &p2, &p3].into_iter().enumerate() {
1740
+ assert_matches!(
1741
+ &p_res.jobs[0].variant,
1742
+ Some(workflow_activation_job::Variant::StartWorkflow(sw))
1743
+ if sw.workflow_id == format!("wf-{}", i + 1)
1744
+ );
1745
+ }
1746
+ // Complete first task to free a wft slot. Cache size is at 3
1747
+ core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1748
+ p1.run_id,
1749
+ start_timer_cmd(1, Duration::from_secs(1)),
1750
+ ))
1751
+ .await
1752
+ .unwrap();
1753
+ // Now we're at cache limit. We will poll for a task, discover it is for a new run, issue
1754
+ // an eviction, and buffer the new run task. However, the run we're trying to evict has pending
1755
+ // activations! Thus, we must complete them first before this poll will unblock, and then it
1756
+ // will unblock with the eviciton.
1757
+ let p4 = async {
1758
+ let p4 = core.poll_workflow_activation().await.unwrap();
1759
+ assert_matches!(
1760
+ &p4.jobs.as_slice(),
1761
+ [WorkflowActivationJob {
1762
+ variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1763
+ }]
1764
+ );
1765
+ p4
1766
+ };
1767
+ let p2_pending_completer = async {
1768
+ // Sleep needed because otherwise the complete unblocks waiting for the cache to free a slot
1769
+ // before we have a chance to actually... wait for it.
1770
+ tokio::time::sleep(Duration::from_millis(100)).await;
1771
+ core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1772
+ p2.run_id,
1773
+ start_timer_cmd(1, Duration::from_secs(1)),
1774
+ ))
1775
+ .await
1776
+ .unwrap();
1777
+ };
1778
+ let (p4, _) = tokio::join!(p4, p2_pending_completer);
1779
+ assert_eq!(core.cached_workflows(), 3);
1780
+
1781
+ // This poll should also block until the eviction is actually completed
1782
+ let blocking_poll = async {
1783
+ let res = core.poll_workflow_activation().await.unwrap();
1784
+ assert_matches!(
1785
+ &res.jobs[0].variant,
1786
+ Some(workflow_activation_job::Variant::StartWorkflow(sw))
1787
+ if sw.workflow_id == format!("wf-{}", 4)
1788
+ );
1789
+ res
1790
+ };
1791
+ let complete_evict = async {
1792
+ core.complete_workflow_activation(WorkflowActivationCompletion::empty(p4.run_id))
1793
+ .await
1794
+ .unwrap();
1795
+ };
1796
+
1797
+ let (_p5, _) = tokio::join!(blocking_poll, complete_evict);
1798
+ assert_eq!(core.cached_workflows(), 3);
1799
+ // The next poll will get an buffer a task for a new run, and generate an eviction for p3 but
1800
+ // that eviction cannot be obtained until we complete the existing outstanding task.
1801
+ let p6 = async {
1802
+ let p6 = core.poll_workflow_activation().await.unwrap();
1803
+ assert_matches!(
1804
+ p6.jobs.as_slice(),
1805
+ [WorkflowActivationJob {
1806
+ variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1807
+ }]
1808
+ );
1809
+ p6
1810
+ };
1811
+ let completer = async {
1812
+ core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1813
+ p3.run_id,
1814
+ start_timer_cmd(1, Duration::from_secs(1)),
1815
+ ))
1816
+ .await
1817
+ .unwrap();
1818
+ };
1819
+ let (p6, _) = tokio::join!(p6, completer);
1820
+ let complete_evict = async {
1821
+ core.complete_workflow_activation(WorkflowActivationCompletion::empty(p6.run_id))
1822
+ .await
1823
+ .unwrap();
1824
+ };
1825
+ let blocking_poll = async {
1826
+ // This poll will also block until the last eviction goes through, and when it does it'll
1827
+ // produce the final start workflow task
1828
+ let res = core.poll_workflow_activation().await.unwrap();
1829
+ assert_matches!(
1830
+ &res.jobs[0].variant,
1831
+ Some(workflow_activation_job::Variant::StartWorkflow(sw))
1832
+ if sw.workflow_id == "wf-5"
1833
+ );
1834
+ };
1835
+
1836
+ tokio::join!(blocking_poll, complete_evict);
1837
+ // p5 outstanding and final poll outstanding -- hence one permit available
1838
+ assert_eq!(core.available_wft_permits(), 1);
1839
+ assert_eq!(core.cached_workflows(), 3);
1840
+ }
1841
+
1842
+ #[tokio::test]
1843
+ async fn eviction_waits_until_replay_finished() {
1844
+ let wfid = "fake_wf_id";
1845
+ let t = canned_histories::long_sequential_timers(3);
1846
+ let mock = mock_workflow_client();
1847
+ let mock = single_hist_mock_sg(wfid, t, &[3], mock, true);
1848
+ let core = mock_worker(mock);
1849
+
1850
+ let activation = core.poll_workflow_activation().await.unwrap();
1851
+ // Immediately request eviction after getting start workflow
1852
+ core.request_workflow_eviction(&activation.run_id);
1853
+ core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1854
+ activation.run_id,
1855
+ start_timer_cmd(1, Duration::from_secs(1)),
1856
+ ))
1857
+ .await
1858
+ .unwrap();
1859
+ let t1_fired = core.poll_workflow_activation().await.unwrap();
1860
+ assert_matches!(
1861
+ t1_fired.jobs.as_slice(),
1862
+ [WorkflowActivationJob {
1863
+ variant: Some(workflow_activation_job::Variant::FireTimer(_)),
1864
+ }]
1865
+ );
1866
+ core.complete_workflow_activation(WorkflowActivationCompletion::from_cmd(
1867
+ t1_fired.run_id,
1868
+ start_timer_cmd(2, Duration::from_secs(1)),
1869
+ ))
1870
+ .await
1871
+ .unwrap();
1872
+ let t2_fired = core.poll_workflow_activation().await.unwrap();
1873
+ assert_matches!(
1874
+ t2_fired.jobs.as_slice(),
1875
+ [WorkflowActivationJob {
1876
+ variant: Some(workflow_activation_job::Variant::FireTimer(_)),
1877
+ }]
1878
+ );
1879
+ core.complete_workflow_activation(WorkflowActivationCompletion::from_cmds(
1880
+ t2_fired.run_id,
1881
+ vec![CompleteWorkflowExecution { result: None }.into()],
1882
+ ))
1883
+ .await
1884
+ .unwrap();
1885
+ // The first two WFTs were replay, and now that we've caught up, the eviction will be sent
1886
+ let eviction = core.poll_workflow_activation().await.unwrap();
1887
+ assert_matches!(
1888
+ eviction.jobs.as_slice(),
1889
+ [WorkflowActivationJob {
1890
+ variant: Some(workflow_activation_job::Variant::RemoveFromCache(_)),
1891
+ }]
1892
+ );
1893
+
1894
+ core.shutdown().await;
1895
+ }
@@ -22,6 +22,7 @@ struct PaInner {
22
22
  queue: VecDeque<ActivationKey>,
23
23
  }
24
24
 
25
+ #[derive(Debug)]
25
26
  pub struct PendingActInfo {
26
27
  pub needs_eviction: Option<RemoveFromCache>,
27
28
  pub run_id: String,
@@ -41,6 +42,7 @@ impl PendingActivations {
41
42
  inner.queue.push_back(key);
42
43
  };
43
44
  }
45
+
44
46
  pub fn notify_needs_eviction(&self, run_id: &str, message: String, reason: EvictionReason) {
45
47
  let mut inner = self.inner.write();
46
48
 
@@ -106,6 +108,15 @@ impl PendingActivations {
106
108
  inner.activations.remove(k);
107
109
  }
108
110
  }
111
+
112
+ /// Returns true if any pending activation contains an eviction
113
+ pub fn is_some_eviction(&self) -> bool {
114
+ self.inner
115
+ .read()
116
+ .activations
117
+ .values()
118
+ .any(|act| act.needs_eviction.is_some())
119
+ }
109
120
  }
110
121
 
111
122
  #[cfg(test)]
@@ -271,7 +271,7 @@ pub(crate) fn test_telem_console() {
271
271
  pub(crate) fn test_telem_collector() {
272
272
  telemetry_init(&TelemetryOptions {
273
273
  otel_collector_url: Some("grpc://localhost:4317".parse().unwrap()),
274
- tracing_filter: "temporal_sdk_core=DEBUG".to_string(),
274
+ tracing_filter: "temporal_sdk_core=DEBUG,temporal_sdk=DEBUG".to_string(),
275
275
  log_forwarding_level: LevelFilter::Off,
276
276
  prometheus_export_bind_address: None,
277
277
  totally_disable: false,
@@ -16,6 +16,7 @@ use std::{
16
16
  collections::{BTreeMap, HashMap, HashSet, VecDeque},
17
17
  ops::RangeFull,
18
18
  sync::Arc,
19
+ time::Duration,
19
20
  };
20
21
  use temporal_sdk_core_api::Worker as WorkerTrait;
21
22
  use temporal_sdk_core_protos::{
@@ -34,6 +35,8 @@ use temporal_sdk_core_protos::{
34
35
  },
35
36
  },
36
37
  };
38
+ use temporal_sdk_core_test_utils::TestWorker;
39
+ use tokio::sync::Notify;
37
40
 
38
41
  pub const TEST_Q: &str = "q";
39
42
  pub static NO_MORE_WORK_ERROR_MSG: &str = "No more work to do";
@@ -103,13 +106,26 @@ pub(crate) fn mock_worker(mocks: MocksHolder) -> Worker {
103
106
  )
104
107
  }
105
108
 
109
+ pub(crate) fn mock_sdk(poll_cfg: MockPollCfg) -> TestWorker {
110
+ mock_sdk_cfg(poll_cfg, |_| {})
111
+ }
112
+ pub(crate) fn mock_sdk_cfg(
113
+ mut poll_cfg: MockPollCfg,
114
+ mutator: impl FnOnce(&mut WorkerConfig),
115
+ ) -> TestWorker {
116
+ poll_cfg.using_rust_sdk = true;
117
+ let mut mock = build_mock_pollers(poll_cfg);
118
+ mock.worker_cfg(mutator);
119
+ let core = mock_worker(mock);
120
+ TestWorker::new(Arc::new(core), TEST_Q.to_string())
121
+ }
122
+
106
123
  pub struct FakeWfResponses {
107
124
  pub wf_id: String,
108
125
  pub hist: TestHistoryBuilder,
109
126
  pub response_batches: Vec<ResponseType>,
110
127
  }
111
128
 
112
- // TODO: Rename to mock TQ or something?
113
129
  pub struct MocksHolder {
114
130
  client_bag: WorkerClientBag,
115
131
  mock_worker: MockWorker,
@@ -264,6 +280,10 @@ pub(crate) struct MockPollCfg {
264
280
  /// All calls to fail WFTs must match this predicate
265
281
  pub expect_fail_wft_matcher:
266
282
  Box<dyn Fn(&TaskToken, &WorkflowTaskFailedCause, &Option<Failure>) -> bool + Send>,
283
+ /// If being used with the Rust SDK, this is set true. It ensures pollers will not error out
284
+ /// early with no work, since we cannot know the exact number of times polling will happen.
285
+ /// Instead, they will just block forever.
286
+ pub using_rust_sdk: bool,
267
287
  }
268
288
 
269
289
  impl MockPollCfg {
@@ -278,6 +298,7 @@ impl MockPollCfg {
278
298
  num_expected_fails,
279
299
  mock_client: mock_workflow_client(),
280
300
  expect_fail_wft_matcher: Box::new(|_, _, _| true),
301
+ using_rust_sdk: false,
281
302
  }
282
303
  }
283
304
  pub fn from_resp_batches(
@@ -296,6 +317,7 @@ impl MockPollCfg {
296
317
  num_expected_fails: None,
297
318
  mock_client,
298
319
  expect_fail_wft_matcher: Box::new(|_, _, _| true),
320
+ using_rust_sdk: false,
299
321
  }
300
322
  }
301
323
  }
@@ -320,7 +342,7 @@ pub(crate) fn build_mock_pollers(mut cfg: MockPollCfg) -> MocksHolder {
320
342
  }
321
343
  }
322
344
 
323
- if cfg.enforce_correct_number_of_polls {
345
+ if cfg.enforce_correct_number_of_polls && !cfg.using_rust_sdk {
324
346
  *correct_num_polls.get_or_insert(0) += hist.response_batches.len();
325
347
  }
326
348
 
@@ -346,14 +368,17 @@ pub(crate) fn build_mock_pollers(mut cfg: MockPollCfg) -> MocksHolder {
346
368
  task_q_resps.insert(hist.wf_id, tasks);
347
369
  }
348
370
 
349
- let mut mock_poller = mock_poller();
371
+ let mut mock_poller = mock_manual_poller();
350
372
  // The poller will return history from any workflow runs that do not have currently
351
373
  // outstanding tasks.
352
374
  let outstanding = outstanding_wf_task_tokens.clone();
375
+ let outstanding_wakeup_orig = Arc::new(Notify::new());
376
+ let outstanding_wakeup = outstanding_wakeup_orig.clone();
353
377
  mock_poller
354
378
  .expect_poll()
355
379
  .times(correct_num_polls.map_or_else(|| RangeFull.into(), Into::<TimesRange>::into))
356
380
  .returning(move || {
381
+ let mut resp = None;
357
382
  for (_, tasks) in task_q_resps.iter_mut() {
358
383
  // Must extract run id from a workflow task associated with this workflow
359
384
  // TODO: Case where run id changes for same workflow id is not handled here
@@ -364,19 +389,40 @@ pub(crate) fn build_mock_pollers(mut cfg: MockPollCfg) -> MocksHolder {
364
389
  outstanding
365
390
  .write()
366
391
  .insert(rid, TaskToken(t.task_token.clone()));
367
- return Some(Ok(t));
392
+ resp = Some(Ok(t));
393
+ break;
368
394
  }
369
395
  }
370
396
  }
371
- Some(Err(tonic::Status::cancelled(NO_MORE_WORK_ERROR_MSG)))
397
+ let outstanding_wakeup = outstanding_wakeup.clone();
398
+ async move {
399
+ if resp.is_some() {
400
+ return resp;
401
+ }
402
+
403
+ if cfg.using_rust_sdk {
404
+ // Simulate poll timeout, or just send an empty response and then try again
405
+ // if we're told a new one might be ready.
406
+ tokio::select! {
407
+ _ = outstanding_wakeup.notified() => {}
408
+ _ = tokio::time::sleep(Duration::from_secs(60)) => {}
409
+ };
410
+ Some(Ok(Default::default()))
411
+ } else {
412
+ Some(Err(tonic::Status::cancelled(NO_MORE_WORK_ERROR_MSG)))
413
+ }
414
+ }
415
+ .boxed()
372
416
  });
373
417
  let mock_worker = MockWorker::new(Box::from(mock_poller));
374
418
 
375
419
  let outstanding = outstanding_wf_task_tokens.clone();
420
+ let outstanding_wakeup = outstanding_wakeup_orig.clone();
376
421
  cfg.mock_client
377
422
  .expect_complete_workflow_task()
378
423
  .returning(move |comp| {
379
424
  outstanding.write().remove_by_right(&comp.task_token);
425
+ outstanding_wakeup.notify_one();
380
426
  Ok(RespondWorkflowTaskCompletedResponse::default())
381
427
  });
382
428
  let outstanding = outstanding_wf_task_tokens.clone();
@@ -389,6 +435,7 @@ pub(crate) fn build_mock_pollers(mut cfg: MockPollCfg) -> MocksHolder {
389
435
  )
390
436
  .returning(move |tt, _, _| {
391
437
  outstanding.write().remove_by_right(&tt);
438
+ outstanding_wakeup_orig.notify_one();
392
439
  Ok(Default::default())
393
440
  });
394
441
 
@@ -498,10 +545,13 @@ pub(crate) async fn poll_and_reply_clears_outstanding_evicts<'a>(
498
545
  }
499
546
  };
500
547
 
548
+ let ends_execution = reply.has_execution_ending();
549
+
501
550
  worker.complete_workflow_activation(reply).await.unwrap();
502
551
 
503
- // Restart assertions from the beginning if it was an eviction
504
- if contains_eviction.is_some() {
552
+ // Restart assertions from the beginning if it was an eviction (and workflow execution
553
+ // isn't over)
554
+ if contains_eviction.is_some() && !ends_execution {
505
555
  continue 'outer;
506
556
  }
507
557
 
@@ -40,7 +40,7 @@ use crate::{
40
40
  };
41
41
  use activities::{LocalInFlightActInfo, WorkerActivityTasks};
42
42
  use futures::{Future, TryFutureExt};
43
- use std::{convert::TryInto, sync::Arc};
43
+ use std::{convert::TryInto, future, sync::Arc};
44
44
  use temporal_client::WorkflowTaskCompletion;
45
45
  use temporal_sdk_core_protos::{
46
46
  coresdk::{
@@ -102,7 +102,6 @@ pub struct Worker {
102
102
 
103
103
  #[async_trait::async_trait]
104
104
  impl WorkerTrait for Worker {
105
- #[instrument(level = "debug", skip(self), fields(run_id))]
106
105
  async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
107
106
  self.next_workflow_activation().await
108
107
  }
@@ -120,8 +119,6 @@ impl WorkerTrait for Worker {
120
119
  }
121
120
  }
122
121
 
123
- #[instrument(level = "debug", skip(self, completion),
124
- fields(completion=%&completion, run_id=%completion.run_id))]
125
122
  async fn complete_workflow_activation(
126
123
  &self,
127
124
  completion: WorkflowActivationCompletion,
@@ -165,7 +162,6 @@ impl WorkerTrait for Worker {
165
162
  }
166
163
 
167
164
  /// Begins the shutdown process, tells pollers they should stop. Is idempotent.
168
- // TODO: will be in trait after Roey's shutdown refactor
169
165
  fn initiate_shutdown(&self) {
170
166
  self.shutdown_token.cancel();
171
167
  // First, we want to stop polling of both activity and workflow tasks
@@ -173,6 +169,7 @@ impl WorkerTrait for Worker {
173
169
  atm.notify_shutdown();
174
170
  }
175
171
  self.wf_task_source.stop_pollers();
172
+ info!("Initiated shutdown");
176
173
  }
177
174
 
178
175
  async fn shutdown(&self) {
@@ -258,6 +255,11 @@ impl Worker {
258
255
  Self::new(config, None, Arc::new(client.into()), Default::default())
259
256
  }
260
257
 
258
+ /// Returns number of currently cached workflows
259
+ pub fn cached_workflows(&self) -> usize {
260
+ self.wft_manager.cached_workflows()
261
+ }
262
+
261
263
  pub(crate) fn new_with_pollers(
262
264
  config: WorkerConfig,
263
265
  sticky_queue_name: Option<String>,
@@ -313,7 +315,6 @@ impl Worker {
313
315
  /// completed
314
316
  pub(crate) async fn shutdown(&self) {
315
317
  self.initiate_shutdown();
316
- info!("Initiated shutdown");
317
318
  // Next we need to wait for all local activities to finish so no more workflow task
318
319
  // heartbeats will be generated
319
320
  self.local_act_mgr.shutdown_and_wait_all_finished().await;
@@ -436,6 +437,8 @@ impl Worker {
436
437
  Ok(())
437
438
  }
438
439
  }
440
+
441
+ #[instrument(level = "debug", skip(self), fields(run_id))]
439
442
  pub(crate) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
440
443
  // The poll needs to be in a loop because we can't guarantee tail call optimization in Rust
441
444
  // (simply) and we really, really need that for long-poll retries.
@@ -448,6 +451,21 @@ impl Worker {
448
451
  return Ok(pa);
449
452
  }
450
453
 
454
+ if self.config.max_cached_workflows > 0 {
455
+ if let Some(cache_cap_fut) = self.wft_manager.wait_for_cache_capacity() {
456
+ tokio::select! {
457
+ biased;
458
+ // We must loop up if there's a new pending activation, since those are for
459
+ // already-cached workflows and may include evictions which will change if
460
+ // we are still waiting or not.
461
+ _ = self.pending_activations_notify.notified() => {
462
+ continue
463
+ },
464
+ _ = cache_cap_fut => {}
465
+ };
466
+ }
467
+ }
468
+
451
469
  // Apply any buffered poll responses from the server. Must come after pending
452
470
  // activations, since there may be an eviction etc for whatever run is popped here.
453
471
  if let Some(buff_wft) = self.wft_manager.next_buffered_poll() {
@@ -481,6 +499,8 @@ impl Worker {
481
499
  }
482
500
  }
483
501
 
502
+ #[instrument(level = "debug", skip(self, completion),
503
+ fields(completion=%&completion, run_id=%completion.run_id))]
484
504
  pub(crate) async fn complete_workflow_activation(
485
505
  &self,
486
506
  completion: WorkflowActivationCompletion,
@@ -539,9 +559,9 @@ impl Worker {
539
559
  reason: EvictionReason,
540
560
  ) -> bool {
541
561
  match self.wft_manager.request_eviction(run_id, message, reason) {
542
- EvictionRequestResult::EvictionIssued(_) => true,
562
+ EvictionRequestResult::EvictionRequested(_) => true,
543
563
  EvictionRequestResult::NotFound => false,
544
- EvictionRequestResult::EvictionAlreadyOutstanding => false,
564
+ EvictionRequestResult::EvictionAlreadyRequested(_) => false,
545
565
  }
546
566
  }
547
567
 
@@ -572,7 +592,17 @@ impl Worker {
572
592
  async fn workflow_poll_or_wfts_drained(
573
593
  &self,
574
594
  ) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
595
+ let mut shutdown_seen = false;
575
596
  loop {
597
+ // If we've already seen shutdown once it's important we don't freak out and
598
+ // restart the loop constantly while waiting for poll to finish shutting down.
599
+ let shutdown_restarter = async {
600
+ if shutdown_seen {
601
+ future::pending::<()>().await;
602
+ } else {
603
+ self.shutdown_token.cancelled().await;
604
+ };
605
+ };
576
606
  tokio::select! {
577
607
  biased;
578
608
 
@@ -585,7 +615,9 @@ impl Worker {
585
615
  }
586
616
  return r
587
617
  },
588
- _ = self.shutdown_token.cancelled() => {},
618
+ _ = shutdown_restarter => {
619
+ shutdown_seen = true;
620
+ },
589
621
  }
590
622
  }
591
623
  }
@@ -593,8 +625,8 @@ impl Worker {
593
625
  /// Wait until not at the outstanding workflow task limit, and then poll this worker's task
594
626
  /// queue for new workflow tasks.
595
627
  ///
596
- /// Returns `Ok(None)` in the event of a poll timeout, or if there was some gRPC error that
597
- /// callers can't do anything about.
628
+ /// Returns `Ok(None)` in the event of a poll timeout, if there was some gRPC error that
629
+ /// callers can't do anything about, or any other reason to restart the poll loop.
598
630
  async fn workflow_poll(&self) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
599
631
  // We can't say we're shut down if there are outstanding LAs, as they could end up WFT
600
632
  // heartbeating which is a "new" workflow task that we need to accept and process as long as
@@ -642,6 +674,20 @@ impl Worker {
642
674
 
643
675
  // Only permanently take a permit in the event the poll finished completely
644
676
  sem.forget();
677
+
678
+ let work = if self.config.max_cached_workflows > 0 {
679
+ // Add the workflow to cache management. We do not even attempt insert if cache
680
+ // size is zero because we do not want to generate eviction requests for
681
+ // workflows which may immediately generate pending activations.
682
+ if let Some(ready_to_work) = self.wft_manager.add_new_run_to_cache(work).await {
683
+ ready_to_work
684
+ } else {
685
+ return Ok(None);
686
+ }
687
+ } else {
688
+ work
689
+ };
690
+
645
691
  Ok(Some(work))
646
692
  }
647
693
 
@@ -664,9 +710,9 @@ impl Worker {
664
710
  Some(a)
665
711
  }
666
712
  NewWfTaskOutcome::TaskBuffered => {
667
- // If the task was buffered, it's not actually outstanding, so we can
668
- // immediately return a permit.
669
- self.return_workflow_task_permit();
713
+ // Though the task is not outstanding in the lang sense, it is outstanding from the
714
+ // server perspective. We used to return a permit here, but that doesn't actually
715
+ // make much sense.
670
716
  None
671
717
  }
672
718
  NewWfTaskOutcome::Autocomplete | NewWfTaskOutcome::LocalActsOutstanding => {
@@ -743,7 +789,7 @@ impl Worker {
743
789
  commands,
744
790
  query_responses,
745
791
  sticky_attributes: None,
746
- return_new_workflow_task: force_new_wft,
792
+ return_new_workflow_task: true,
747
793
  force_create_new_workflow_task: force_new_wft,
748
794
  };
749
795
  let sticky_attrs = self.get_sticky_attrs();
@@ -950,6 +996,7 @@ impl Worker {
950
996
  }
951
997
  }
952
998
 
999
+ #[derive(Debug, Copy, Clone)]
953
1000
  struct WFTReportOutcome {
954
1001
  reported_to_server: bool,
955
1002
  failed: bool,
@@ -986,6 +1033,7 @@ mod tests {
986
1033
 
987
1034
  let cfg = test_worker_cfg()
988
1035
  .max_outstanding_workflow_tasks(5_usize)
1036
+ .max_cached_workflows(5_usize)
989
1037
  .build()
990
1038
  .unwrap();
991
1039
  let worker = Worker::new_test(cfg, mock_client);
@@ -1018,6 +1066,7 @@ mod tests {
1018
1066
 
1019
1067
  let cfg = test_worker_cfg()
1020
1068
  .max_outstanding_workflow_tasks(5_usize)
1069
+ .max_cached_workflows(5_usize)
1021
1070
  .build()
1022
1071
  .unwrap();
1023
1072
  let worker = Worker::new_test(cfg, mock_client);