@temporalio/core-bridge 0.22.0 → 1.0.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/Cargo.lock +120 -15
  2. package/Cargo.toml +3 -1
  3. package/README.md +1 -1
  4. package/index.d.ts +137 -33
  5. package/package.json +6 -6
  6. package/releases/aarch64-apple-darwin/index.node +0 -0
  7. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  8. package/releases/x86_64-apple-darwin/index.node +0 -0
  9. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  10. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  11. package/sdk-core/.buildkite/docker/docker-compose.yaml +4 -2
  12. package/sdk-core/ARCHITECTURE.md +9 -7
  13. package/sdk-core/README.md +5 -1
  14. package/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
  15. package/sdk-core/bridge-ffi/src/lib.rs +1 -1
  16. package/sdk-core/bridge-ffi/src/wrappers.rs +60 -37
  17. package/sdk-core/client/Cargo.toml +1 -0
  18. package/sdk-core/client/src/lib.rs +50 -15
  19. package/sdk-core/client/src/raw.rs +167 -55
  20. package/sdk-core/client/src/retry.rs +9 -4
  21. package/sdk-core/client/src/workflow_handle/mod.rs +4 -2
  22. package/sdk-core/core/Cargo.toml +2 -0
  23. package/sdk-core/core/benches/workflow_replay.rs +1 -7
  24. package/sdk-core/core/src/abstractions.rs +137 -16
  25. package/sdk-core/core/src/core_tests/activity_tasks.rs +258 -63
  26. package/sdk-core/core/src/core_tests/child_workflows.rs +1 -2
  27. package/sdk-core/core/src/core_tests/determinism.rs +2 -2
  28. package/sdk-core/core/src/core_tests/local_activities.rs +8 -7
  29. package/sdk-core/core/src/core_tests/queries.rs +146 -60
  30. package/sdk-core/core/src/core_tests/replay_flag.rs +1 -1
  31. package/sdk-core/core/src/core_tests/workers.rs +39 -23
  32. package/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
  33. package/sdk-core/core/src/core_tests/workflow_tasks.rs +387 -280
  34. package/sdk-core/core/src/lib.rs +8 -5
  35. package/sdk-core/core/src/pollers/poll_buffer.rs +16 -10
  36. package/sdk-core/core/src/protosext/mod.rs +7 -9
  37. package/sdk-core/core/src/retry_logic.rs +73 -16
  38. package/sdk-core/core/src/telemetry/metrics.rs +21 -7
  39. package/sdk-core/core/src/telemetry/mod.rs +182 -110
  40. package/sdk-core/core/src/test_help/mod.rs +341 -109
  41. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +18 -9
  42. package/sdk-core/core/src/worker/activities/local_activities.rs +22 -25
  43. package/sdk-core/core/src/worker/activities.rs +156 -29
  44. package/sdk-core/core/src/worker/client.rs +1 -0
  45. package/sdk-core/core/src/worker/mod.rs +132 -659
  46. package/sdk-core/core/src/{workflow → worker/workflow}/bridge.rs +1 -1
  47. package/sdk-core/core/src/{workflow → worker/workflow}/driven_workflow.rs +1 -1
  48. package/sdk-core/core/src/{workflow → worker/workflow}/history_update.rs +16 -2
  49. package/sdk-core/core/src/{workflow → worker/workflow}/machines/activity_state_machine.rs +39 -4
  50. package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_external_state_machine.rs +5 -2
  51. package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_workflow_state_machine.rs +1 -1
  52. package/sdk-core/core/src/{workflow → worker/workflow}/machines/child_workflow_state_machine.rs +2 -4
  53. package/sdk-core/core/src/{workflow → worker/workflow}/machines/complete_workflow_state_machine.rs +0 -0
  54. package/sdk-core/core/src/{workflow → worker/workflow}/machines/continue_as_new_workflow_state_machine.rs +1 -1
  55. package/sdk-core/core/src/{workflow → worker/workflow}/machines/fail_workflow_state_machine.rs +0 -0
  56. package/sdk-core/core/src/{workflow → worker/workflow}/machines/local_activity_state_machine.rs +2 -5
  57. package/sdk-core/core/src/{workflow → worker/workflow}/machines/mod.rs +1 -1
  58. package/sdk-core/core/src/{workflow → worker/workflow}/machines/mutable_side_effect_state_machine.rs +0 -0
  59. package/sdk-core/core/src/{workflow → worker/workflow}/machines/patch_state_machine.rs +1 -1
  60. package/sdk-core/core/src/{workflow → worker/workflow}/machines/side_effect_state_machine.rs +0 -0
  61. package/sdk-core/core/src/{workflow → worker/workflow}/machines/signal_external_state_machine.rs +4 -2
  62. package/sdk-core/core/src/{workflow → worker/workflow}/machines/timer_state_machine.rs +1 -2
  63. package/sdk-core/core/src/{workflow → worker/workflow}/machines/transition_coverage.rs +1 -1
  64. package/sdk-core/core/src/{workflow → worker/workflow}/machines/upsert_search_attributes_state_machine.rs +5 -7
  65. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines/local_acts.rs +2 -2
  66. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines.rs +40 -16
  67. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_task_state_machine.rs +0 -0
  68. package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
  69. package/sdk-core/core/src/worker/workflow/managed_run.rs +627 -0
  70. package/sdk-core/core/src/worker/workflow/mod.rs +1115 -0
  71. package/sdk-core/core/src/worker/workflow/run_cache.rs +143 -0
  72. package/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
  73. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +936 -0
  74. package/sdk-core/core-api/src/errors.rs +3 -10
  75. package/sdk-core/core-api/src/lib.rs +2 -1
  76. package/sdk-core/core-api/src/worker.rs +26 -2
  77. package/sdk-core/etc/dynamic-config.yaml +2 -0
  78. package/sdk-core/integ-with-otel.sh +1 -1
  79. package/sdk-core/protos/api_upstream/Makefile +4 -4
  80. package/sdk-core/protos/api_upstream/api-linter.yaml +2 -0
  81. package/sdk-core/protos/api_upstream/buf.yaml +8 -9
  82. package/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +83 -0
  83. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -1
  84. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +40 -0
  85. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +3 -0
  86. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +3 -1
  87. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
  88. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +3 -0
  89. package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +32 -4
  90. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +69 -19
  91. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +13 -0
  92. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +163 -0
  93. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +97 -0
  94. package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +300 -0
  95. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +25 -0
  96. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +180 -3
  97. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +53 -3
  98. package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +2 -2
  99. package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +6 -5
  100. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +27 -6
  101. package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +2 -1
  102. package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +0 -64
  103. package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -1
  104. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +11 -8
  105. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +30 -25
  106. package/sdk-core/sdk/src/activity_context.rs +12 -5
  107. package/sdk-core/sdk/src/app_data.rs +37 -0
  108. package/sdk-core/sdk/src/lib.rs +76 -43
  109. package/sdk-core/sdk/src/workflow_context/options.rs +8 -6
  110. package/sdk-core/sdk/src/workflow_context.rs +14 -19
  111. package/sdk-core/sdk/src/workflow_future.rs +11 -6
  112. package/sdk-core/sdk-core-protos/src/history_builder.rs +19 -5
  113. package/sdk-core/sdk-core-protos/src/history_info.rs +11 -6
  114. package/sdk-core/sdk-core-protos/src/lib.rs +87 -176
  115. package/sdk-core/test-utils/src/histfetch.rs +1 -1
  116. package/sdk-core/test-utils/src/lib.rs +93 -77
  117. package/sdk-core/tests/integ_tests/client_tests.rs +2 -2
  118. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +11 -9
  119. package/sdk-core/tests/integ_tests/polling_tests.rs +12 -0
  120. package/sdk-core/tests/integ_tests/queries_tests.rs +39 -22
  121. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +49 -4
  122. package/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
  123. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -1
  124. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +74 -13
  125. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +19 -0
  126. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -1
  127. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -3
  128. package/sdk-core/tests/integ_tests/workflow_tests.rs +10 -23
  129. package/sdk-core/tests/load_tests.rs +8 -3
  130. package/sdk-core/tests/main.rs +7 -3
  131. package/src/conversions.rs +149 -70
  132. package/src/errors.rs +10 -21
  133. package/src/lib.rs +400 -319
  134. package/sdk-core/core/src/pending_activations.rs +0 -173
  135. package/sdk-core/core/src/worker/wft_delivery.rs +0 -81
  136. package/sdk-core/core/src/workflow/mod.rs +0 -478
  137. package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +0 -194
  138. package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +0 -418
  139. package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +0 -989
@@ -9,8 +9,10 @@ use std::{
9
9
  time::{self, Duration, Instant},
10
10
  };
11
11
  use temporal_sdk_core_protos::{
12
- coresdk::{activity_task::ActivityCancelReason, common, ActivityHeartbeat, IntoPayloadsExt},
13
- temporal::api::workflowservice::v1::RecordActivityTaskHeartbeatResponse,
12
+ coresdk::{activity_task::ActivityCancelReason, ActivityHeartbeat, IntoPayloadsExt},
13
+ temporal::api::{
14
+ common::v1::Payload, workflowservice::v1::RecordActivityTaskHeartbeatResponse,
15
+ },
14
16
  };
15
17
  use tokio::{
16
18
  sync::{
@@ -47,7 +49,7 @@ enum HeartbeatAction {
47
49
  #[derive(Debug)]
48
50
  pub struct ValidActivityHeartbeat {
49
51
  pub task_token: TaskToken,
50
- pub details: Vec<common::Payload>,
52
+ pub details: Vec<Payload>,
51
53
  pub throttle_interval: time::Duration,
52
54
  }
53
55
 
@@ -58,7 +60,7 @@ enum HeartbeatExecutorAction {
58
60
  /// Report heartbeat to the server
59
61
  Report {
60
62
  task_token: TaskToken,
61
- details: Vec<common::Payload>,
63
+ details: Vec<Payload>,
62
64
  },
63
65
  }
64
66
 
@@ -132,7 +134,15 @@ impl ActivityHeartbeatManager {
132
134
  let _ = self.shutdown_token.cancel();
133
135
  let mut handle = self.join_handle.lock().await;
134
136
  if let Some(h) = handle.take() {
135
- h.await.expect("shutdown should exit cleanly");
137
+ let handle_r = h.await;
138
+ if let Err(e) = handle_r {
139
+ if !e.is_cancelled() {
140
+ error!(
141
+ "Unexpected error joining heartbeating tasks during shutdown: {:?}",
142
+ e
143
+ )
144
+ }
145
+ }
136
146
  }
137
147
  }
138
148
  }
@@ -140,7 +150,7 @@ impl ActivityHeartbeatManager {
140
150
  #[derive(Debug)]
141
151
  struct ActivityHeartbeatState {
142
152
  /// If None and throttle interval is over, untrack this task token
143
- last_recorded_details: Option<Vec<common::Payload>>,
153
+ last_recorded_details: Option<Vec<Payload>>,
144
154
  /// True if we've queued up a request to record against server, but it hasn't yet completed
145
155
  is_record_in_flight: bool,
146
156
  last_send_requested: Instant,
@@ -401,9 +411,8 @@ mod test {
401
411
 
402
412
  use crate::worker::client::mocks::mock_workflow_client;
403
413
  use std::time::Duration;
404
- use temporal_sdk_core_protos::{
405
- coresdk::common::Payload,
406
- temporal::api::workflowservice::v1::RecordActivityTaskHeartbeatResponse,
414
+ use temporal_sdk_core_protos::temporal::api::{
415
+ common::v1::Payload, workflowservice::v1::RecordActivityTaskHeartbeatResponse,
407
416
  };
408
417
  use tokio::time::sleep;
409
418
 
@@ -1,5 +1,7 @@
1
1
  use crate::{
2
- abstractions::MeteredSemaphore, protosext::ValidScheduleLA, retry_logic::RetryPolicyExt,
2
+ abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
3
+ protosext::ValidScheduleLA,
4
+ retry_logic::RetryPolicyExt,
3
5
  MetricsContext, TaskToken,
4
6
  };
5
7
  use parking_lot::Mutex;
@@ -12,12 +14,8 @@ use temporal_sdk_core_protos::{
12
14
  coresdk::{
13
15
  activity_result::{Cancellation, Failure as ActFail, Success},
14
16
  activity_task::{activity_task, ActivityCancelReason, ActivityTask, Cancel, Start},
15
- common::WorkflowExecution,
16
- },
17
- temporal::api::{
18
- enums::v1::TimeoutType,
19
- failure::v1::{failure::FailureInfo, ApplicationFailureInfo},
20
17
  },
18
+ temporal::api::{common::v1::WorkflowExecution, enums::v1::TimeoutType},
21
19
  };
22
20
  use tokio::{
23
21
  sync::{
@@ -47,6 +45,7 @@ pub(crate) struct LocalInFlightActInfo {
47
45
  pub la_info: NewLocalAct,
48
46
  pub dispatch_time: Instant,
49
47
  pub attempt: u32,
48
+ _permit: OwnedMeteredSemPermit,
50
49
  }
51
50
 
52
51
  #[derive(Debug, Clone)]
@@ -184,6 +183,7 @@ impl LocalActivityManager {
184
183
  )
185
184
  }
186
185
 
186
+ #[cfg(test)]
187
187
  pub(crate) fn num_outstanding(&self) -> usize {
188
188
  self.dat.lock().outstanding_activity_tasks.len()
189
189
  }
@@ -195,11 +195,11 @@ impl LocalActivityManager {
195
195
 
196
196
  pub(crate) fn enqueue(
197
197
  &self,
198
- reqs: impl IntoIterator<Item = LocalActRequest> + Debug,
198
+ reqs: impl IntoIterator<Item = LocalActRequest>,
199
199
  ) -> Vec<LocalActivityResolution> {
200
- debug!("Queuing local activities: {:?}", &reqs);
201
200
  let mut immediate_resolutions = vec![];
202
201
  for req in reqs {
202
+ debug!(local_activity = ?req, "Queuing local activity");
203
203
  match req {
204
204
  LocalActRequest::New(act) => {
205
205
  let id = ExecutingLAId {
@@ -270,7 +270,7 @@ impl LocalActivityManager {
270
270
  /// Returns the next pending local-activity related action, or None if shutdown has initiated
271
271
  /// and there are no more remaining actions to take.
272
272
  pub(crate) async fn next_pending(&self) -> Option<DispatchOrTimeoutLA> {
273
- let new_or_retry = match self.rcvs.lock().await.next(&self.semaphore).await? {
273
+ let (new_or_retry, permit) = match self.rcvs.lock().await.next(&self.semaphore).await? {
274
274
  NewOrCancel::Cancel(c) => {
275
275
  return match c {
276
276
  CancelOrTimeout::Cancel(c) => Some(DispatchOrTimeoutLA::Dispatch(c)),
@@ -311,7 +311,7 @@ impl LocalActivityManager {
311
311
  }
312
312
  };
313
313
  }
314
- NewOrCancel::New(n) => n,
314
+ NewOrCancel::New(n, perm) => (n, perm),
315
315
  };
316
316
 
317
317
  // It is important that there are no await points after receiving from the channel, as
@@ -366,6 +366,7 @@ impl LocalActivityManager {
366
366
  la_info: orig,
367
367
  dispatch_time: Instant::now(),
368
368
  attempt,
369
+ _permit: permit,
369
370
  },
370
371
  );
371
372
  if let Some(to) = dat.timeout_tasks.get_mut(&id) {
@@ -410,7 +411,6 @@ impl LocalActivityManager {
410
411
  seq_num: info.la_info.schedule_cmd.seq,
411
412
  };
412
413
  dlock.id_to_tt.remove(&exec_id);
413
- self.semaphore.add_permit();
414
414
 
415
415
  match status {
416
416
  LocalActivityExecutionResult::Completed(_)
@@ -423,13 +423,9 @@ impl LocalActivityManager {
423
423
  LocalActivityExecutionResult::Failed(f) => {
424
424
  if let Some(backoff_dur) = info.la_info.schedule_cmd.retry_policy.should_retry(
425
425
  info.attempt as usize,
426
- f.failure.as_ref().map_or("", |f| match &f.failure_info {
427
- Some(FailureInfo::ApplicationFailureInfo(ApplicationFailureInfo {
428
- r#type,
429
- ..
430
- })) => r#type.as_str(),
431
- _ => "",
432
- }),
426
+ f.failure
427
+ .as_ref()
428
+ .and_then(|f| f.maybe_application_failure()),
433
429
  ) {
434
430
  let will_use_timer =
435
431
  backoff_dur > info.la_info.schedule_cmd.local_retry_threshold;
@@ -520,7 +516,7 @@ enum CancelOrTimeout {
520
516
  }
521
517
 
522
518
  enum NewOrCancel {
523
- New(NewOrRetry),
519
+ New(NewOrRetry, OwnedMeteredSemPermit),
524
520
  Cancel(CancelOrTimeout),
525
521
  }
526
522
 
@@ -538,13 +534,13 @@ impl RcvChans {
538
534
  cancel = async { self.cancels_req_rx.recv().await } => {
539
535
  Some(NewOrCancel::Cancel(cancel.expect("Send halves of LA manager are not dropped")))
540
536
  }
541
- maybe_new_or_retry = async {
537
+ (maybe_new_or_retry, perm) = async {
542
538
  // Wait for a permit to take a task and forget it. Permits are removed until a
543
539
  // completion.
544
- new_sem.acquire().await.expect("is never closed").forget();
545
- self.act_req_rx.recv().await
540
+ let perm = new_sem.acquire_owned().await.expect("is never closed");
541
+ (self.act_req_rx.recv().await, perm)
546
542
  } => Some(NewOrCancel::New(
547
- maybe_new_or_retry.expect("Send halves of LA manager are not dropped")
543
+ maybe_new_or_retry.expect("Send halves of LA manager are not dropped"), perm
548
544
  )),
549
545
  _ = self.shutdown.cancelled() => None
550
546
  }
@@ -643,8 +639,9 @@ impl Drop for TimeoutBag {
643
639
  mod tests {
644
640
  use super::*;
645
641
  use crate::protosext::LACloseTimeouts;
646
- use temporal_sdk_core_protos::{
647
- coresdk::common::RetryPolicy, temporal::api::failure::v1::Failure,
642
+ use temporal_sdk_core_protos::temporal::api::{
643
+ common::v1::RetryPolicy,
644
+ failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
648
645
  };
649
646
  use tokio::{sync::mpsc::error::TryRecvError, task::yield_now};
650
647
 
@@ -8,7 +8,7 @@ pub(crate) use local_activities::{
8
8
  };
9
9
 
10
10
  use crate::{
11
- abstractions::MeteredSemaphore,
11
+ abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
12
12
  pollers::BoxedActPoller,
13
13
  telemetry::metrics::{activity_type, activity_worker_type, workflow_type, MetricsContext},
14
14
  worker::{
@@ -19,6 +19,12 @@ use crate::{
19
19
  };
20
20
  use activity_heartbeat_manager::ActivityHeartbeatManager;
21
21
  use dashmap::DashMap;
22
+ use governor::{
23
+ clock::DefaultClock,
24
+ middleware::NoOpMiddleware,
25
+ state::{InMemoryState, NotKeyed},
26
+ Quota, RateLimiter,
27
+ };
22
28
  use std::{
23
29
  convert::TryInto,
24
30
  sync::Arc,
@@ -52,7 +58,6 @@ struct InFlightActInfo {
52
58
  }
53
59
 
54
60
  /// Augments [InFlightActInfo] with details specific to remote activities
55
- #[derive(Debug)]
56
61
  struct RemoteInFlightActInfo {
57
62
  pub base: InFlightActInfo,
58
63
  /// Used to calculate aggregation delay between activity heartbeats.
@@ -63,12 +68,15 @@ struct RemoteInFlightActInfo {
63
68
  /// we have learned from heartbeating and issued a cancel task, in which case we may simply
64
69
  /// discard the reply.
65
70
  pub known_not_found: bool,
71
+ /// The permit from the max concurrent semaphore
72
+ _permit: OwnedMeteredSemPermit,
66
73
  }
67
74
  impl RemoteInFlightActInfo {
68
75
  fn new(
69
76
  activity_type: String,
70
77
  workflow_type: String,
71
78
  heartbeat_timeout: Option<prost_types::Duration>,
79
+ permit: OwnedMeteredSemPermit,
72
80
  ) -> Self {
73
81
  Self {
74
82
  base: InFlightActInfo {
@@ -79,10 +87,26 @@ impl RemoteInFlightActInfo {
79
87
  heartbeat_timeout,
80
88
  issued_cancel_to_lang: false,
81
89
  known_not_found: false,
90
+ _permit: permit,
82
91
  }
83
92
  }
84
93
  }
85
94
 
95
+ struct NonPollActBuffer {
96
+ tx: async_channel::Sender<PermittedTqResp>,
97
+ rx: async_channel::Receiver<PermittedTqResp>,
98
+ }
99
+ impl NonPollActBuffer {
100
+ pub fn new() -> Self {
101
+ let (tx, rx) = async_channel::unbounded();
102
+ Self { tx, rx }
103
+ }
104
+
105
+ pub async fn next(&self) -> PermittedTqResp {
106
+ self.rx.recv().await.expect("Send half cannot be dropped")
107
+ }
108
+ }
109
+
86
110
  pub(crate) struct WorkerActivityTasks {
87
111
  /// Centralizes management of heartbeat issuing / throttling
88
112
  heartbeat_manager: ActivityHeartbeatManager,
@@ -91,8 +115,13 @@ pub(crate) struct WorkerActivityTasks {
91
115
  /// Buffers activity task polling in the event we need to return a cancellation while a poll is
92
116
  /// ongoing.
93
117
  poller: BoxedActPoller,
118
+ /// Holds activity tasks we have received by non-polling means. EX: In direct response to
119
+ /// workflow task completion.
120
+ non_poll_tasks: NonPollActBuffer,
94
121
  /// Ensures we stay at or below this worker's maximum concurrent activity limit
95
- activities_semaphore: MeteredSemaphore,
122
+ activities_semaphore: Arc<MeteredSemaphore>,
123
+ /// Enables per-worker rate-limiting of activity tasks
124
+ ratelimiter: Option<RateLimiter<NotKeyed, InMemoryState, DefaultClock, NoOpMiddleware>>,
96
125
  /// Wakes every time an activity is removed from the outstanding map
97
126
  complete_notify: Notify,
98
127
 
@@ -105,6 +134,7 @@ pub(crate) struct WorkerActivityTasks {
105
134
  impl WorkerActivityTasks {
106
135
  pub(crate) fn new(
107
136
  max_activity_tasks: usize,
137
+ max_worker_act_per_sec: Option<f64>,
108
138
  poller: BoxedActPoller,
109
139
  client: Arc<WorkerClientBag>,
110
140
  metrics: MetricsContext,
@@ -115,11 +145,15 @@ impl WorkerActivityTasks {
115
145
  heartbeat_manager: ActivityHeartbeatManager::new(client),
116
146
  outstanding_activity_tasks: Default::default(),
117
147
  poller,
118
- activities_semaphore: MeteredSemaphore::new(
148
+ non_poll_tasks: NonPollActBuffer::new(),
149
+ activities_semaphore: Arc::new(MeteredSemaphore::new(
119
150
  max_activity_tasks,
120
151
  metrics.with_new_attrs([activity_worker_type()]),
121
152
  MetricsContext::available_task_slots,
122
- ),
153
+ )),
154
+ ratelimiter: max_worker_act_per_sec.and_then(|ps| {
155
+ Quota::with_period(Duration::from_secs_f64(ps.recip())).map(RateLimiter::direct)
156
+ }),
123
157
  complete_notify: Notify::new(),
124
158
  metrics,
125
159
  max_heartbeat_throttle_interval,
@@ -151,12 +185,15 @@ impl WorkerActivityTasks {
151
185
  // Acquire and subsequently forget a permit for an outstanding activity. When they are
152
186
  // completed, we must add a new permit to the semaphore, since holding the permit the
153
187
  // entire time lang does work would be a challenge.
154
- let sem = self
188
+ let perm = self
155
189
  .activities_semaphore
156
- .acquire()
190
+ .acquire_owned()
157
191
  .await
158
192
  .expect("outstanding activity semaphore not closed");
159
- (self.poller.poll().await, sem)
193
+ if let Some(ref rl) = self.ratelimiter {
194
+ rl.until_ready().await;
195
+ }
196
+ (self.poller.poll().await, perm)
160
197
  };
161
198
 
162
199
  tokio::select! {
@@ -165,7 +202,10 @@ impl WorkerActivityTasks {
165
202
  cancel_task = self.next_pending_cancel_task() => {
166
203
  cancel_task
167
204
  }
168
- (work, sem) = poll_with_semaphore => {
205
+ task = self.non_poll_tasks.next() => {
206
+ Ok(Some(self.about_to_issue_task(task)))
207
+ }
208
+ (work, permit) = poll_with_semaphore => {
169
209
  match work {
170
210
  Some(Ok(work)) => {
171
211
  if work == PollActivityTaskQueueResponse::default() {
@@ -173,23 +213,10 @@ impl WorkerActivityTasks {
173
213
  self.metrics.act_poll_timeout();
174
214
  return Ok(None)
175
215
  }
176
-
177
- if let Some(dur) = work.sched_to_start() {
178
- self.metrics
179
- .act_sched_to_start_latency(dur);
180
- }
181
-
182
- self.outstanding_activity_tasks.insert(
183
- work.task_token.clone().into(),
184
- RemoteInFlightActInfo::new(
185
- work.activity_type.clone().unwrap_or_default().name,
186
- work.workflow_type.clone().unwrap_or_default().name,
187
- work.heartbeat_timeout.clone()
188
- ),
189
- );
190
- // Only permanently take a permit in the event the poll finished properly
191
- sem.forget();
192
- Ok(Some(ActivityTask::start_from_poll_resp(work)))
216
+ let work = self.about_to_issue_task(PermittedTqResp {
217
+ resp: work, permit
218
+ });
219
+ Ok(Some(work))
193
220
  }
194
221
  None => {
195
222
  Err(PollActivityError::ShutDown)
@@ -212,10 +239,9 @@ impl WorkerActivityTasks {
212
239
  workflow_type(act_info.base.workflow_type.clone()),
213
240
  ]);
214
241
  act_metrics.act_execution_latency(act_info.base.start_time.elapsed());
215
- self.activities_semaphore.add_permit();
216
- self.heartbeat_manager.evict(task_token.clone()).await;
217
242
  let known_not_found = act_info.known_not_found;
218
243
  drop(act_info); // TODO: Get rid of dashmap. If we hold ref across await, bad stuff.
244
+ self.heartbeat_manager.evict(task_token.clone()).await;
219
245
  self.complete_notify.notify_waiters();
220
246
 
221
247
  // No need to report activities which we already know the server doesn't care about
@@ -304,6 +330,14 @@ impl WorkerActivityTasks {
304
330
  self.heartbeat_manager.record(details, throttle_interval)
305
331
  }
306
332
 
333
+ /// Returns a handle that the workflows management side can use to interact with this manager
334
+ pub(crate) fn get_handle_for_workflows(&self) -> ActivitiesFromWFTsHandle {
335
+ ActivitiesFromWFTsHandle {
336
+ sem: self.activities_semaphore.clone(),
337
+ tx: self.non_poll_tasks.tx.clone(),
338
+ }
339
+ }
340
+
307
341
  async fn next_pending_cancel_task(&self) -> Result<Option<ActivityTask>, PollActivityError> {
308
342
  let next_pc = self.heartbeat_manager.next_pending_cancel().await;
309
343
  // Issue cancellations for anything we noticed was cancelled during heartbeating
@@ -336,8 +370,101 @@ impl WorkerActivityTasks {
336
370
  }
337
371
  }
338
372
 
373
+ /// Called when there is a new act task about to be bubbled up out of the manager
374
+ fn about_to_issue_task(&self, task: PermittedTqResp) -> ActivityTask {
375
+ if let Some(dur) = task.resp.sched_to_start() {
376
+ self.metrics.act_sched_to_start_latency(dur);
377
+ };
378
+
379
+ self.outstanding_activity_tasks.insert(
380
+ task.resp.task_token.clone().into(),
381
+ RemoteInFlightActInfo::new(
382
+ task.resp.activity_type.clone().unwrap_or_default().name,
383
+ task.resp.workflow_type.clone().unwrap_or_default().name,
384
+ task.resp.heartbeat_timeout.clone(),
385
+ task.permit,
386
+ ),
387
+ );
388
+
389
+ ActivityTask::start_from_poll_resp(task.resp)
390
+ }
391
+
339
392
  #[cfg(test)]
340
393
  pub(crate) fn remaining_activity_capacity(&self) -> usize {
341
- self.activities_semaphore.sem.available_permits()
394
+ self.activities_semaphore.available_permits()
395
+ }
396
+ }
397
+
398
+ /// Provides facilities for the workflow side of things to interact with the activity manager.
399
+ /// Allows for the handling of activities returned by WFT completions.
400
+ pub(crate) struct ActivitiesFromWFTsHandle {
401
+ sem: Arc<MeteredSemaphore>,
402
+ tx: async_channel::Sender<PermittedTqResp>,
403
+ }
404
+
405
+ impl ActivitiesFromWFTsHandle {
406
+ /// Returns a handle that can be used to reserve an activity slot. EX: When requesting eager
407
+ /// dispatch of an activity to this worker upon workflow task completion
408
+ pub(crate) fn reserve_slot(&self) -> Option<OwnedMeteredSemPermit> {
409
+ self.sem.try_acquire_owned().ok()
410
+ }
411
+
412
+ /// Queue new activity tasks for dispatch received from non-polling sources (ex: eager returns
413
+ /// from WFT completion)
414
+ pub(crate) fn add_tasks(&self, tasks: impl IntoIterator<Item = PermittedTqResp>) {
415
+ for t in tasks.into_iter() {
416
+ self.tx.try_send(t).expect("Receive half cannot be dropped");
417
+ }
418
+ }
419
+ }
420
+
421
+ pub(crate) struct PermittedTqResp {
422
+ pub permit: OwnedMeteredSemPermit,
423
+ pub resp: PollActivityTaskQueueResponse,
424
+ }
425
+
426
+ #[cfg(test)]
427
+ mod tests {
428
+ use super::*;
429
+ use crate::{
430
+ test_help::mock_poller_from_resps, worker::client::mocks::mock_manual_workflow_client,
431
+ };
432
+
433
+ #[tokio::test]
434
+ async fn per_worker_ratelimit() {
435
+ let poller = mock_poller_from_resps([
436
+ PollActivityTaskQueueResponse {
437
+ task_token: vec![1],
438
+ activity_id: "act1".to_string(),
439
+ ..Default::default()
440
+ }
441
+ .into(),
442
+ PollActivityTaskQueueResponse {
443
+ task_token: vec![2],
444
+ activity_id: "act2".to_string(),
445
+ ..Default::default()
446
+ }
447
+ .into(),
448
+ ]);
449
+ let client = WorkerClientBag::new(
450
+ Box::new(mock_manual_workflow_client()),
451
+ "fake_namespace".to_string(),
452
+ );
453
+ let atm = WorkerActivityTasks::new(
454
+ 10,
455
+ Some(2.0),
456
+ poller,
457
+ Arc::new(client),
458
+ MetricsContext::default(),
459
+ Duration::from_secs(1),
460
+ Duration::from_secs(1),
461
+ );
462
+ let start = Instant::now();
463
+ atm.poll().await.unwrap().unwrap();
464
+ atm.poll().await.unwrap().unwrap();
465
+ // At least half a second will have elapsed since we only allow 2 tasks per second.
466
+ // With no ratelimit, even on a slow CI server with lots of load, this would typically take
467
+ // low single digit ms or less.
468
+ assert!(start.elapsed() > Duration::from_secs_f64(0.5));
342
469
  }
343
470
  }
@@ -118,6 +118,7 @@ pub(crate) trait WorkerClient: Sync + Send {
118
118
  #[async_trait::async_trait]
119
119
  impl<'a, T> WorkerClient for T
120
120
  where
121
+ // TODO: This should be workflow service... no reason to marry worker trait to sdk client trait
121
122
  T: Borrow<dyn WorkflowClientTrait + 'a + Send + Sync> + Send + Sync,
122
123
  {
123
124
  async fn poll_workflow_task(