@temporalio/core-bridge 0.23.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/Cargo.lock +118 -15
  2. package/Cargo.toml +2 -1
  3. package/LICENSE.md +1 -1
  4. package/README.md +1 -1
  5. package/index.d.ts +47 -18
  6. package/package.json +7 -7
  7. package/releases/aarch64-apple-darwin/index.node +0 -0
  8. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  9. package/releases/x86_64-apple-darwin/index.node +0 -0
  10. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  11. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  12. package/sdk-core/.buildkite/docker/docker-compose.yaml +4 -2
  13. package/sdk-core/ARCHITECTURE.md +9 -7
  14. package/sdk-core/README.md +5 -1
  15. package/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
  16. package/sdk-core/bridge-ffi/src/wrappers.rs +0 -3
  17. package/sdk-core/client/src/lib.rs +26 -8
  18. package/sdk-core/client/src/raw.rs +166 -54
  19. package/sdk-core/client/src/retry.rs +9 -4
  20. package/sdk-core/client/src/workflow_handle/mod.rs +4 -2
  21. package/sdk-core/core/Cargo.toml +2 -0
  22. package/sdk-core/core/src/abstractions.rs +137 -16
  23. package/sdk-core/core/src/core_tests/activity_tasks.rs +258 -63
  24. package/sdk-core/core/src/core_tests/child_workflows.rs +1 -2
  25. package/sdk-core/core/src/core_tests/determinism.rs +2 -2
  26. package/sdk-core/core/src/core_tests/local_activities.rs +8 -7
  27. package/sdk-core/core/src/core_tests/queries.rs +146 -60
  28. package/sdk-core/core/src/core_tests/replay_flag.rs +1 -1
  29. package/sdk-core/core/src/core_tests/workers.rs +39 -23
  30. package/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
  31. package/sdk-core/core/src/core_tests/workflow_tasks.rs +387 -280
  32. package/sdk-core/core/src/lib.rs +6 -4
  33. package/sdk-core/core/src/pollers/poll_buffer.rs +16 -10
  34. package/sdk-core/core/src/protosext/mod.rs +6 -6
  35. package/sdk-core/core/src/retry_logic.rs +1 -1
  36. package/sdk-core/core/src/telemetry/metrics.rs +21 -7
  37. package/sdk-core/core/src/telemetry/mod.rs +18 -4
  38. package/sdk-core/core/src/test_help/mod.rs +341 -109
  39. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +18 -9
  40. package/sdk-core/core/src/worker/activities/local_activities.rs +19 -16
  41. package/sdk-core/core/src/worker/activities.rs +156 -29
  42. package/sdk-core/core/src/worker/client.rs +1 -0
  43. package/sdk-core/core/src/worker/mod.rs +132 -659
  44. package/sdk-core/core/src/{workflow → worker/workflow}/bridge.rs +1 -1
  45. package/sdk-core/core/src/{workflow → worker/workflow}/driven_workflow.rs +1 -1
  46. package/sdk-core/core/src/{workflow → worker/workflow}/history_update.rs +16 -2
  47. package/sdk-core/core/src/{workflow → worker/workflow}/machines/activity_state_machine.rs +39 -4
  48. package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_external_state_machine.rs +5 -2
  49. package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_workflow_state_machine.rs +1 -1
  50. package/sdk-core/core/src/{workflow → worker/workflow}/machines/child_workflow_state_machine.rs +2 -4
  51. package/sdk-core/core/src/{workflow → worker/workflow}/machines/complete_workflow_state_machine.rs +0 -0
  52. package/sdk-core/core/src/{workflow → worker/workflow}/machines/continue_as_new_workflow_state_machine.rs +1 -1
  53. package/sdk-core/core/src/{workflow → worker/workflow}/machines/fail_workflow_state_machine.rs +0 -0
  54. package/sdk-core/core/src/{workflow → worker/workflow}/machines/local_activity_state_machine.rs +2 -5
  55. package/sdk-core/core/src/{workflow → worker/workflow}/machines/mod.rs +1 -1
  56. package/sdk-core/core/src/{workflow → worker/workflow}/machines/mutable_side_effect_state_machine.rs +0 -0
  57. package/sdk-core/core/src/{workflow → worker/workflow}/machines/patch_state_machine.rs +1 -1
  58. package/sdk-core/core/src/{workflow → worker/workflow}/machines/side_effect_state_machine.rs +0 -0
  59. package/sdk-core/core/src/{workflow → worker/workflow}/machines/signal_external_state_machine.rs +4 -2
  60. package/sdk-core/core/src/{workflow → worker/workflow}/machines/timer_state_machine.rs +1 -2
  61. package/sdk-core/core/src/{workflow → worker/workflow}/machines/transition_coverage.rs +1 -1
  62. package/sdk-core/core/src/{workflow → worker/workflow}/machines/upsert_search_attributes_state_machine.rs +5 -7
  63. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines/local_acts.rs +2 -2
  64. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines.rs +40 -16
  65. package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_task_state_machine.rs +0 -0
  66. package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
  67. package/sdk-core/core/src/worker/workflow/managed_run.rs +627 -0
  68. package/sdk-core/core/src/worker/workflow/mod.rs +1115 -0
  69. package/sdk-core/core/src/worker/workflow/run_cache.rs +143 -0
  70. package/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
  71. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +936 -0
  72. package/sdk-core/core-api/src/errors.rs +3 -10
  73. package/sdk-core/core-api/src/lib.rs +2 -1
  74. package/sdk-core/core-api/src/worker.rs +26 -2
  75. package/sdk-core/etc/dynamic-config.yaml +2 -0
  76. package/sdk-core/integ-with-otel.sh +1 -1
  77. package/sdk-core/protos/api_upstream/Makefile +4 -4
  78. package/sdk-core/protos/api_upstream/api-linter.yaml +2 -0
  79. package/sdk-core/protos/api_upstream/buf.yaml +8 -9
  80. package/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +83 -0
  81. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -1
  82. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +40 -0
  83. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +3 -0
  84. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +3 -1
  85. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
  86. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +3 -0
  87. package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +32 -4
  88. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +69 -19
  89. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +13 -0
  90. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +163 -0
  91. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +97 -0
  92. package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +300 -0
  93. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +25 -0
  94. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +180 -3
  95. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +53 -3
  96. package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +2 -2
  97. package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +6 -5
  98. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -1
  99. package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +2 -1
  100. package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +0 -64
  101. package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -1
  102. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +11 -8
  103. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +30 -25
  104. package/sdk-core/sdk/src/activity_context.rs +12 -5
  105. package/sdk-core/sdk/src/app_data.rs +37 -0
  106. package/sdk-core/sdk/src/lib.rs +76 -43
  107. package/sdk-core/sdk/src/workflow_context/options.rs +8 -6
  108. package/sdk-core/sdk/src/workflow_context.rs +14 -19
  109. package/sdk-core/sdk/src/workflow_future.rs +11 -6
  110. package/sdk-core/sdk-core-protos/src/history_builder.rs +19 -5
  111. package/sdk-core/sdk-core-protos/src/history_info.rs +11 -6
  112. package/sdk-core/sdk-core-protos/src/lib.rs +74 -176
  113. package/sdk-core/test-utils/src/lib.rs +85 -72
  114. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +11 -9
  115. package/sdk-core/tests/integ_tests/polling_tests.rs +12 -0
  116. package/sdk-core/tests/integ_tests/queries_tests.rs +39 -22
  117. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +49 -4
  118. package/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
  119. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -1
  120. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +74 -13
  121. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +19 -0
  122. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -1
  123. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -3
  124. package/sdk-core/tests/integ_tests/workflow_tests.rs +10 -23
  125. package/sdk-core/tests/load_tests.rs +8 -3
  126. package/sdk-core/tests/main.rs +2 -1
  127. package/src/conversions.rs +47 -39
  128. package/src/errors.rs +10 -21
  129. package/src/lib.rs +342 -325
  130. package/sdk-core/core/src/pending_activations.rs +0 -173
  131. package/sdk-core/core/src/worker/wft_delivery.rs +0 -81
  132. package/sdk-core/core/src/workflow/mod.rs +0 -478
  133. package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +0 -194
  134. package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +0 -418
  135. package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +0 -989
@@ -9,8 +9,10 @@ use std::{
9
9
  time::{self, Duration, Instant},
10
10
  };
11
11
  use temporal_sdk_core_protos::{
12
- coresdk::{activity_task::ActivityCancelReason, common, ActivityHeartbeat, IntoPayloadsExt},
13
- temporal::api::workflowservice::v1::RecordActivityTaskHeartbeatResponse,
12
+ coresdk::{activity_task::ActivityCancelReason, ActivityHeartbeat, IntoPayloadsExt},
13
+ temporal::api::{
14
+ common::v1::Payload, workflowservice::v1::RecordActivityTaskHeartbeatResponse,
15
+ },
14
16
  };
15
17
  use tokio::{
16
18
  sync::{
@@ -47,7 +49,7 @@ enum HeartbeatAction {
47
49
  #[derive(Debug)]
48
50
  pub struct ValidActivityHeartbeat {
49
51
  pub task_token: TaskToken,
50
- pub details: Vec<common::Payload>,
52
+ pub details: Vec<Payload>,
51
53
  pub throttle_interval: time::Duration,
52
54
  }
53
55
 
@@ -58,7 +60,7 @@ enum HeartbeatExecutorAction {
58
60
  /// Report heartbeat to the server
59
61
  Report {
60
62
  task_token: TaskToken,
61
- details: Vec<common::Payload>,
63
+ details: Vec<Payload>,
62
64
  },
63
65
  }
64
66
 
@@ -132,7 +134,15 @@ impl ActivityHeartbeatManager {
132
134
  let _ = self.shutdown_token.cancel();
133
135
  let mut handle = self.join_handle.lock().await;
134
136
  if let Some(h) = handle.take() {
135
- h.await.expect("shutdown should exit cleanly");
137
+ let handle_r = h.await;
138
+ if let Err(e) = handle_r {
139
+ if !e.is_cancelled() {
140
+ error!(
141
+ "Unexpected error joining heartbeating tasks during shutdown: {:?}",
142
+ e
143
+ )
144
+ }
145
+ }
136
146
  }
137
147
  }
138
148
  }
@@ -140,7 +150,7 @@ impl ActivityHeartbeatManager {
140
150
  #[derive(Debug)]
141
151
  struct ActivityHeartbeatState {
142
152
  /// If None and throttle interval is over, untrack this task token
143
- last_recorded_details: Option<Vec<common::Payload>>,
153
+ last_recorded_details: Option<Vec<Payload>>,
144
154
  /// True if we've queued up a request to record against server, but it hasn't yet completed
145
155
  is_record_in_flight: bool,
146
156
  last_send_requested: Instant,
@@ -401,9 +411,8 @@ mod test {
401
411
 
402
412
  use crate::worker::client::mocks::mock_workflow_client;
403
413
  use std::time::Duration;
404
- use temporal_sdk_core_protos::{
405
- coresdk::common::Payload,
406
- temporal::api::workflowservice::v1::RecordActivityTaskHeartbeatResponse,
414
+ use temporal_sdk_core_protos::temporal::api::{
415
+ common::v1::Payload, workflowservice::v1::RecordActivityTaskHeartbeatResponse,
407
416
  };
408
417
  use tokio::time::sleep;
409
418
 
@@ -1,5 +1,7 @@
1
1
  use crate::{
2
- abstractions::MeteredSemaphore, protosext::ValidScheduleLA, retry_logic::RetryPolicyExt,
2
+ abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
3
+ protosext::ValidScheduleLA,
4
+ retry_logic::RetryPolicyExt,
3
5
  MetricsContext, TaskToken,
4
6
  };
5
7
  use parking_lot::Mutex;
@@ -12,9 +14,8 @@ use temporal_sdk_core_protos::{
12
14
  coresdk::{
13
15
  activity_result::{Cancellation, Failure as ActFail, Success},
14
16
  activity_task::{activity_task, ActivityCancelReason, ActivityTask, Cancel, Start},
15
- common::WorkflowExecution,
16
17
  },
17
- temporal::api::enums::v1::TimeoutType,
18
+ temporal::api::{common::v1::WorkflowExecution, enums::v1::TimeoutType},
18
19
  };
19
20
  use tokio::{
20
21
  sync::{
@@ -44,6 +45,7 @@ pub(crate) struct LocalInFlightActInfo {
44
45
  pub la_info: NewLocalAct,
45
46
  pub dispatch_time: Instant,
46
47
  pub attempt: u32,
48
+ _permit: OwnedMeteredSemPermit,
47
49
  }
48
50
 
49
51
  #[derive(Debug, Clone)]
@@ -181,6 +183,7 @@ impl LocalActivityManager {
181
183
  )
182
184
  }
183
185
 
186
+ #[cfg(test)]
184
187
  pub(crate) fn num_outstanding(&self) -> usize {
185
188
  self.dat.lock().outstanding_activity_tasks.len()
186
189
  }
@@ -192,11 +195,11 @@ impl LocalActivityManager {
192
195
 
193
196
  pub(crate) fn enqueue(
194
197
  &self,
195
- reqs: impl IntoIterator<Item = LocalActRequest> + Debug,
198
+ reqs: impl IntoIterator<Item = LocalActRequest>,
196
199
  ) -> Vec<LocalActivityResolution> {
197
- debug!("Queuing local activities: {:?}", &reqs);
198
200
  let mut immediate_resolutions = vec![];
199
201
  for req in reqs {
202
+ debug!(local_activity = ?req, "Queuing local activity");
200
203
  match req {
201
204
  LocalActRequest::New(act) => {
202
205
  let id = ExecutingLAId {
@@ -267,7 +270,7 @@ impl LocalActivityManager {
267
270
  /// Returns the next pending local-activity related action, or None if shutdown has initiated
268
271
  /// and there are no more remaining actions to take.
269
272
  pub(crate) async fn next_pending(&self) -> Option<DispatchOrTimeoutLA> {
270
- let new_or_retry = match self.rcvs.lock().await.next(&self.semaphore).await? {
273
+ let (new_or_retry, permit) = match self.rcvs.lock().await.next(&self.semaphore).await? {
271
274
  NewOrCancel::Cancel(c) => {
272
275
  return match c {
273
276
  CancelOrTimeout::Cancel(c) => Some(DispatchOrTimeoutLA::Dispatch(c)),
@@ -308,7 +311,7 @@ impl LocalActivityManager {
308
311
  }
309
312
  };
310
313
  }
311
- NewOrCancel::New(n) => n,
314
+ NewOrCancel::New(n, perm) => (n, perm),
312
315
  };
313
316
 
314
317
  // It is important that there are no await points after receiving from the channel, as
@@ -363,6 +366,7 @@ impl LocalActivityManager {
363
366
  la_info: orig,
364
367
  dispatch_time: Instant::now(),
365
368
  attempt,
369
+ _permit: permit,
366
370
  },
367
371
  );
368
372
  if let Some(to) = dat.timeout_tasks.get_mut(&id) {
@@ -407,7 +411,6 @@ impl LocalActivityManager {
407
411
  seq_num: info.la_info.schedule_cmd.seq,
408
412
  };
409
413
  dlock.id_to_tt.remove(&exec_id);
410
- self.semaphore.add_permit();
411
414
 
412
415
  match status {
413
416
  LocalActivityExecutionResult::Completed(_)
@@ -513,7 +516,7 @@ enum CancelOrTimeout {
513
516
  }
514
517
 
515
518
  enum NewOrCancel {
516
- New(NewOrRetry),
519
+ New(NewOrRetry, OwnedMeteredSemPermit),
517
520
  Cancel(CancelOrTimeout),
518
521
  }
519
522
 
@@ -531,13 +534,13 @@ impl RcvChans {
531
534
  cancel = async { self.cancels_req_rx.recv().await } => {
532
535
  Some(NewOrCancel::Cancel(cancel.expect("Send halves of LA manager are not dropped")))
533
536
  }
534
- maybe_new_or_retry = async {
537
+ (maybe_new_or_retry, perm) = async {
535
538
  // Wait for a permit to take a task and forget it. Permits are removed until a
536
539
  // completion.
537
- new_sem.acquire().await.expect("is never closed").forget();
538
- self.act_req_rx.recv().await
540
+ let perm = new_sem.acquire_owned().await.expect("is never closed");
541
+ (self.act_req_rx.recv().await, perm)
539
542
  } => Some(NewOrCancel::New(
540
- maybe_new_or_retry.expect("Send halves of LA manager are not dropped")
543
+ maybe_new_or_retry.expect("Send halves of LA manager are not dropped"), perm
541
544
  )),
542
545
  _ = self.shutdown.cancelled() => None
543
546
  }
@@ -636,9 +639,9 @@ impl Drop for TimeoutBag {
636
639
  mod tests {
637
640
  use super::*;
638
641
  use crate::protosext::LACloseTimeouts;
639
- use temporal_sdk_core_protos::{
640
- coresdk::common::RetryPolicy,
641
- temporal::api::failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
642
+ use temporal_sdk_core_protos::temporal::api::{
643
+ common::v1::RetryPolicy,
644
+ failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
642
645
  };
643
646
  use tokio::{sync::mpsc::error::TryRecvError, task::yield_now};
644
647
 
@@ -8,7 +8,7 @@ pub(crate) use local_activities::{
8
8
  };
9
9
 
10
10
  use crate::{
11
- abstractions::MeteredSemaphore,
11
+ abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
12
12
  pollers::BoxedActPoller,
13
13
  telemetry::metrics::{activity_type, activity_worker_type, workflow_type, MetricsContext},
14
14
  worker::{
@@ -19,6 +19,12 @@ use crate::{
19
19
  };
20
20
  use activity_heartbeat_manager::ActivityHeartbeatManager;
21
21
  use dashmap::DashMap;
22
+ use governor::{
23
+ clock::DefaultClock,
24
+ middleware::NoOpMiddleware,
25
+ state::{InMemoryState, NotKeyed},
26
+ Quota, RateLimiter,
27
+ };
22
28
  use std::{
23
29
  convert::TryInto,
24
30
  sync::Arc,
@@ -52,7 +58,6 @@ struct InFlightActInfo {
52
58
  }
53
59
 
54
60
  /// Augments [InFlightActInfo] with details specific to remote activities
55
- #[derive(Debug)]
56
61
  struct RemoteInFlightActInfo {
57
62
  pub base: InFlightActInfo,
58
63
  /// Used to calculate aggregation delay between activity heartbeats.
@@ -63,12 +68,15 @@ struct RemoteInFlightActInfo {
63
68
  /// we have learned from heartbeating and issued a cancel task, in which case we may simply
64
69
  /// discard the reply.
65
70
  pub known_not_found: bool,
71
+ /// The permit from the max concurrent semaphore
72
+ _permit: OwnedMeteredSemPermit,
66
73
  }
67
74
  impl RemoteInFlightActInfo {
68
75
  fn new(
69
76
  activity_type: String,
70
77
  workflow_type: String,
71
78
  heartbeat_timeout: Option<prost_types::Duration>,
79
+ permit: OwnedMeteredSemPermit,
72
80
  ) -> Self {
73
81
  Self {
74
82
  base: InFlightActInfo {
@@ -79,10 +87,26 @@ impl RemoteInFlightActInfo {
79
87
  heartbeat_timeout,
80
88
  issued_cancel_to_lang: false,
81
89
  known_not_found: false,
90
+ _permit: permit,
82
91
  }
83
92
  }
84
93
  }
85
94
 
95
+ struct NonPollActBuffer {
96
+ tx: async_channel::Sender<PermittedTqResp>,
97
+ rx: async_channel::Receiver<PermittedTqResp>,
98
+ }
99
+ impl NonPollActBuffer {
100
+ pub fn new() -> Self {
101
+ let (tx, rx) = async_channel::unbounded();
102
+ Self { tx, rx }
103
+ }
104
+
105
+ pub async fn next(&self) -> PermittedTqResp {
106
+ self.rx.recv().await.expect("Send half cannot be dropped")
107
+ }
108
+ }
109
+
86
110
  pub(crate) struct WorkerActivityTasks {
87
111
  /// Centralizes management of heartbeat issuing / throttling
88
112
  heartbeat_manager: ActivityHeartbeatManager,
@@ -91,8 +115,13 @@ pub(crate) struct WorkerActivityTasks {
91
115
  /// Buffers activity task polling in the event we need to return a cancellation while a poll is
92
116
  /// ongoing.
93
117
  poller: BoxedActPoller,
118
+ /// Holds activity tasks we have received by non-polling means. EX: In direct response to
119
+ /// workflow task completion.
120
+ non_poll_tasks: NonPollActBuffer,
94
121
  /// Ensures we stay at or below this worker's maximum concurrent activity limit
95
- activities_semaphore: MeteredSemaphore,
122
+ activities_semaphore: Arc<MeteredSemaphore>,
123
+ /// Enables per-worker rate-limiting of activity tasks
124
+ ratelimiter: Option<RateLimiter<NotKeyed, InMemoryState, DefaultClock, NoOpMiddleware>>,
96
125
  /// Wakes every time an activity is removed from the outstanding map
97
126
  complete_notify: Notify,
98
127
 
@@ -105,6 +134,7 @@ pub(crate) struct WorkerActivityTasks {
105
134
  impl WorkerActivityTasks {
106
135
  pub(crate) fn new(
107
136
  max_activity_tasks: usize,
137
+ max_worker_act_per_sec: Option<f64>,
108
138
  poller: BoxedActPoller,
109
139
  client: Arc<WorkerClientBag>,
110
140
  metrics: MetricsContext,
@@ -115,11 +145,15 @@ impl WorkerActivityTasks {
115
145
  heartbeat_manager: ActivityHeartbeatManager::new(client),
116
146
  outstanding_activity_tasks: Default::default(),
117
147
  poller,
118
- activities_semaphore: MeteredSemaphore::new(
148
+ non_poll_tasks: NonPollActBuffer::new(),
149
+ activities_semaphore: Arc::new(MeteredSemaphore::new(
119
150
  max_activity_tasks,
120
151
  metrics.with_new_attrs([activity_worker_type()]),
121
152
  MetricsContext::available_task_slots,
122
- ),
153
+ )),
154
+ ratelimiter: max_worker_act_per_sec.and_then(|ps| {
155
+ Quota::with_period(Duration::from_secs_f64(ps.recip())).map(RateLimiter::direct)
156
+ }),
123
157
  complete_notify: Notify::new(),
124
158
  metrics,
125
159
  max_heartbeat_throttle_interval,
@@ -151,12 +185,15 @@ impl WorkerActivityTasks {
151
185
  // Acquire and subsequently forget a permit for an outstanding activity. When they are
152
186
  // completed, we must add a new permit to the semaphore, since holding the permit the
153
187
  // entire time lang does work would be a challenge.
154
- let sem = self
188
+ let perm = self
155
189
  .activities_semaphore
156
- .acquire()
190
+ .acquire_owned()
157
191
  .await
158
192
  .expect("outstanding activity semaphore not closed");
159
- (self.poller.poll().await, sem)
193
+ if let Some(ref rl) = self.ratelimiter {
194
+ rl.until_ready().await;
195
+ }
196
+ (self.poller.poll().await, perm)
160
197
  };
161
198
 
162
199
  tokio::select! {
@@ -165,7 +202,10 @@ impl WorkerActivityTasks {
165
202
  cancel_task = self.next_pending_cancel_task() => {
166
203
  cancel_task
167
204
  }
168
- (work, sem) = poll_with_semaphore => {
205
+ task = self.non_poll_tasks.next() => {
206
+ Ok(Some(self.about_to_issue_task(task)))
207
+ }
208
+ (work, permit) = poll_with_semaphore => {
169
209
  match work {
170
210
  Some(Ok(work)) => {
171
211
  if work == PollActivityTaskQueueResponse::default() {
@@ -173,23 +213,10 @@ impl WorkerActivityTasks {
173
213
  self.metrics.act_poll_timeout();
174
214
  return Ok(None)
175
215
  }
176
-
177
- if let Some(dur) = work.sched_to_start() {
178
- self.metrics
179
- .act_sched_to_start_latency(dur);
180
- }
181
-
182
- self.outstanding_activity_tasks.insert(
183
- work.task_token.clone().into(),
184
- RemoteInFlightActInfo::new(
185
- work.activity_type.clone().unwrap_or_default().name,
186
- work.workflow_type.clone().unwrap_or_default().name,
187
- work.heartbeat_timeout.clone()
188
- ),
189
- );
190
- // Only permanently take a permit in the event the poll finished properly
191
- sem.forget();
192
- Ok(Some(ActivityTask::start_from_poll_resp(work)))
216
+ let work = self.about_to_issue_task(PermittedTqResp {
217
+ resp: work, permit
218
+ });
219
+ Ok(Some(work))
193
220
  }
194
221
  None => {
195
222
  Err(PollActivityError::ShutDown)
@@ -212,10 +239,9 @@ impl WorkerActivityTasks {
212
239
  workflow_type(act_info.base.workflow_type.clone()),
213
240
  ]);
214
241
  act_metrics.act_execution_latency(act_info.base.start_time.elapsed());
215
- self.activities_semaphore.add_permit();
216
- self.heartbeat_manager.evict(task_token.clone()).await;
217
242
  let known_not_found = act_info.known_not_found;
218
243
  drop(act_info); // TODO: Get rid of dashmap. If we hold ref across await, bad stuff.
244
+ self.heartbeat_manager.evict(task_token.clone()).await;
219
245
  self.complete_notify.notify_waiters();
220
246
 
221
247
  // No need to report activities which we already know the server doesn't care about
@@ -304,6 +330,14 @@ impl WorkerActivityTasks {
304
330
  self.heartbeat_manager.record(details, throttle_interval)
305
331
  }
306
332
 
333
+ /// Returns a handle that the workflows management side can use to interact with this manager
334
+ pub(crate) fn get_handle_for_workflows(&self) -> ActivitiesFromWFTsHandle {
335
+ ActivitiesFromWFTsHandle {
336
+ sem: self.activities_semaphore.clone(),
337
+ tx: self.non_poll_tasks.tx.clone(),
338
+ }
339
+ }
340
+
307
341
  async fn next_pending_cancel_task(&self) -> Result<Option<ActivityTask>, PollActivityError> {
308
342
  let next_pc = self.heartbeat_manager.next_pending_cancel().await;
309
343
  // Issue cancellations for anything we noticed was cancelled during heartbeating
@@ -336,8 +370,101 @@ impl WorkerActivityTasks {
336
370
  }
337
371
  }
338
372
 
373
+ /// Called when there is a new act task about to be bubbled up out of the manager
374
+ fn about_to_issue_task(&self, task: PermittedTqResp) -> ActivityTask {
375
+ if let Some(dur) = task.resp.sched_to_start() {
376
+ self.metrics.act_sched_to_start_latency(dur);
377
+ };
378
+
379
+ self.outstanding_activity_tasks.insert(
380
+ task.resp.task_token.clone().into(),
381
+ RemoteInFlightActInfo::new(
382
+ task.resp.activity_type.clone().unwrap_or_default().name,
383
+ task.resp.workflow_type.clone().unwrap_or_default().name,
384
+ task.resp.heartbeat_timeout.clone(),
385
+ task.permit,
386
+ ),
387
+ );
388
+
389
+ ActivityTask::start_from_poll_resp(task.resp)
390
+ }
391
+
339
392
  #[cfg(test)]
340
393
  pub(crate) fn remaining_activity_capacity(&self) -> usize {
341
- self.activities_semaphore.sem.available_permits()
394
+ self.activities_semaphore.available_permits()
395
+ }
396
+ }
397
+
398
+ /// Provides facilities for the workflow side of things to interact with the activity manager.
399
+ /// Allows for the handling of activities returned by WFT completions.
400
+ pub(crate) struct ActivitiesFromWFTsHandle {
401
+ sem: Arc<MeteredSemaphore>,
402
+ tx: async_channel::Sender<PermittedTqResp>,
403
+ }
404
+
405
+ impl ActivitiesFromWFTsHandle {
406
+ /// Returns a handle that can be used to reserve an activity slot. EX: When requesting eager
407
+ /// dispatch of an activity to this worker upon workflow task completion
408
+ pub(crate) fn reserve_slot(&self) -> Option<OwnedMeteredSemPermit> {
409
+ self.sem.try_acquire_owned().ok()
410
+ }
411
+
412
+ /// Queue new activity tasks for dispatch received from non-polling sources (ex: eager returns
413
+ /// from WFT completion)
414
+ pub(crate) fn add_tasks(&self, tasks: impl IntoIterator<Item = PermittedTqResp>) {
415
+ for t in tasks.into_iter() {
416
+ self.tx.try_send(t).expect("Receive half cannot be dropped");
417
+ }
418
+ }
419
+ }
420
+
421
+ pub(crate) struct PermittedTqResp {
422
+ pub permit: OwnedMeteredSemPermit,
423
+ pub resp: PollActivityTaskQueueResponse,
424
+ }
425
+
426
+ #[cfg(test)]
427
+ mod tests {
428
+ use super::*;
429
+ use crate::{
430
+ test_help::mock_poller_from_resps, worker::client::mocks::mock_manual_workflow_client,
431
+ };
432
+
433
+ #[tokio::test]
434
+ async fn per_worker_ratelimit() {
435
+ let poller = mock_poller_from_resps([
436
+ PollActivityTaskQueueResponse {
437
+ task_token: vec![1],
438
+ activity_id: "act1".to_string(),
439
+ ..Default::default()
440
+ }
441
+ .into(),
442
+ PollActivityTaskQueueResponse {
443
+ task_token: vec![2],
444
+ activity_id: "act2".to_string(),
445
+ ..Default::default()
446
+ }
447
+ .into(),
448
+ ]);
449
+ let client = WorkerClientBag::new(
450
+ Box::new(mock_manual_workflow_client()),
451
+ "fake_namespace".to_string(),
452
+ );
453
+ let atm = WorkerActivityTasks::new(
454
+ 10,
455
+ Some(2.0),
456
+ poller,
457
+ Arc::new(client),
458
+ MetricsContext::default(),
459
+ Duration::from_secs(1),
460
+ Duration::from_secs(1),
461
+ );
462
+ let start = Instant::now();
463
+ atm.poll().await.unwrap().unwrap();
464
+ atm.poll().await.unwrap().unwrap();
465
+ // At least half a second will have elapsed since we only allow 2 tasks per second.
466
+ // With no ratelimit, even on a slow CI server with lots of load, this would typically take
467
+ // low single digit ms or less.
468
+ assert!(start.elapsed() > Duration::from_secs_f64(0.5));
342
469
  }
343
470
  }
@@ -118,6 +118,7 @@ pub(crate) trait WorkerClient: Sync + Send {
118
118
  #[async_trait::async_trait]
119
119
  impl<'a, T> WorkerClient for T
120
120
  where
121
+ // TODO: This should be workflow service... no reason to marry worker trait to sdk client trait
121
122
  T: Borrow<dyn WorkflowClientTrait + 'a + Send + Sync> + Send + Sync,
122
123
  {
123
124
  async fn poll_workflow_task(