@temporalio/core-bridge 0.19.2 → 0.20.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/Cargo.lock +90 -157
  2. package/Cargo.toml +1 -0
  3. package/index.d.ts +11 -27
  4. package/package.json +3 -3
  5. package/releases/aarch64-apple-darwin/index.node +0 -0
  6. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  7. package/releases/x86_64-apple-darwin/index.node +0 -0
  8. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  9. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  10. package/sdk-core/.buildkite/docker/Dockerfile +1 -1
  11. package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
  12. package/sdk-core/.cargo/config.toml +1 -0
  13. package/sdk-core/CODEOWNERS +1 -1
  14. package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +119 -86
  15. package/sdk-core/bridge-ffi/src/lib.rs +311 -315
  16. package/sdk-core/bridge-ffi/src/wrappers.rs +108 -113
  17. package/sdk-core/client/Cargo.toml +13 -9
  18. package/sdk-core/client/LICENSE.txt +23 -0
  19. package/sdk-core/client/src/lib.rs +286 -174
  20. package/sdk-core/client/src/metrics.rs +86 -12
  21. package/sdk-core/client/src/raw.rs +566 -0
  22. package/sdk-core/client/src/retry.rs +137 -99
  23. package/sdk-core/core/Cargo.toml +15 -10
  24. package/sdk-core/core/LICENSE.txt +23 -0
  25. package/sdk-core/core/benches/workflow_replay.rs +79 -0
  26. package/sdk-core/core/src/abstractions.rs +38 -0
  27. package/sdk-core/core/src/core_tests/activity_tasks.rs +108 -182
  28. package/sdk-core/core/src/core_tests/child_workflows.rs +16 -11
  29. package/sdk-core/core/src/core_tests/determinism.rs +24 -12
  30. package/sdk-core/core/src/core_tests/local_activities.rs +53 -27
  31. package/sdk-core/core/src/core_tests/mod.rs +30 -43
  32. package/sdk-core/core/src/core_tests/queries.rs +82 -81
  33. package/sdk-core/core/src/core_tests/workers.rs +111 -296
  34. package/sdk-core/core/src/core_tests/workflow_cancels.rs +4 -4
  35. package/sdk-core/core/src/core_tests/workflow_tasks.rs +257 -242
  36. package/sdk-core/core/src/lib.rs +73 -318
  37. package/sdk-core/core/src/pollers/mod.rs +4 -6
  38. package/sdk-core/core/src/pollers/poll_buffer.rs +20 -14
  39. package/sdk-core/core/src/protosext/mod.rs +7 -10
  40. package/sdk-core/core/src/replay/mod.rs +11 -150
  41. package/sdk-core/core/src/telemetry/metrics.rs +35 -2
  42. package/sdk-core/core/src/telemetry/mod.rs +49 -16
  43. package/sdk-core/core/src/telemetry/prometheus_server.rs +14 -35
  44. package/sdk-core/core/src/test_help/mod.rs +104 -170
  45. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +57 -34
  46. package/sdk-core/core/src/worker/activities/local_activities.rs +95 -23
  47. package/sdk-core/core/src/worker/activities.rs +23 -16
  48. package/sdk-core/core/src/worker/client/mocks.rs +86 -0
  49. package/sdk-core/core/src/worker/client.rs +209 -0
  50. package/sdk-core/core/src/worker/mod.rs +207 -108
  51. package/sdk-core/core/src/workflow/driven_workflow.rs +21 -6
  52. package/sdk-core/core/src/workflow/history_update.rs +107 -24
  53. package/sdk-core/core/src/workflow/machines/activity_state_machine.rs +2 -3
  54. package/sdk-core/core/src/workflow/machines/child_workflow_state_machine.rs +2 -3
  55. package/sdk-core/core/src/workflow/machines/mod.rs +20 -17
  56. package/sdk-core/core/src/workflow/machines/signal_external_state_machine.rs +56 -19
  57. package/sdk-core/core/src/workflow/machines/transition_coverage.rs +5 -0
  58. package/sdk-core/core/src/workflow/machines/upsert_search_attributes_state_machine.rs +230 -22
  59. package/sdk-core/core/src/workflow/machines/workflow_machines.rs +81 -115
  60. package/sdk-core/core/src/workflow/machines/workflow_task_state_machine.rs +4 -4
  61. package/sdk-core/core/src/workflow/mod.rs +13 -1
  62. package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +70 -11
  63. package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +65 -41
  64. package/sdk-core/core-api/Cargo.toml +9 -1
  65. package/sdk-core/core-api/LICENSE.txt +23 -0
  66. package/sdk-core/core-api/src/errors.rs +7 -38
  67. package/sdk-core/core-api/src/lib.rs +44 -52
  68. package/sdk-core/core-api/src/worker.rs +10 -2
  69. package/sdk-core/etc/deps.svg +127 -96
  70. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +11 -7
  71. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +10 -0
  72. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +6 -1
  73. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +6 -0
  74. package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +6 -0
  75. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +2 -1
  76. package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +3 -0
  77. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +12 -0
  78. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +25 -0
  79. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +4 -0
  80. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +19 -35
  81. package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -6
  82. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +53 -11
  83. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +14 -7
  84. package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +3 -5
  85. package/sdk-core/sdk/Cargo.toml +16 -2
  86. package/sdk-core/sdk/LICENSE.txt +23 -0
  87. package/sdk-core/sdk/src/interceptors.rs +11 -0
  88. package/sdk-core/sdk/src/lib.rs +139 -151
  89. package/sdk-core/sdk/src/workflow_context/options.rs +86 -1
  90. package/sdk-core/sdk/src/workflow_context.rs +36 -17
  91. package/sdk-core/sdk/src/workflow_future.rs +19 -25
  92. package/sdk-core/sdk-core-protos/Cargo.toml +1 -1
  93. package/sdk-core/sdk-core-protos/build.rs +1 -0
  94. package/sdk-core/sdk-core-protos/src/history_info.rs +17 -4
  95. package/sdk-core/sdk-core-protos/src/lib.rs +251 -47
  96. package/sdk-core/test-utils/Cargo.toml +3 -1
  97. package/sdk-core/test-utils/src/canned_histories.rs +27 -0
  98. package/sdk-core/test-utils/src/histfetch.rs +3 -3
  99. package/sdk-core/test-utils/src/lib.rs +223 -68
  100. package/sdk-core/tests/integ_tests/client_tests.rs +27 -4
  101. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +93 -14
  102. package/sdk-core/tests/integ_tests/polling_tests.rs +18 -12
  103. package/sdk-core/tests/integ_tests/queries_tests.rs +50 -53
  104. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +117 -103
  105. package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +8 -1
  106. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +10 -5
  107. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +7 -1
  108. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +32 -9
  109. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +7 -1
  110. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +76 -15
  111. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +19 -3
  112. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +39 -42
  113. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +84 -0
  114. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +30 -8
  115. package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +21 -6
  116. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +26 -16
  117. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +66 -0
  118. package/sdk-core/tests/integ_tests/workflow_tests.rs +78 -74
  119. package/sdk-core/tests/load_tests.rs +9 -6
  120. package/sdk-core/tests/main.rs +43 -10
  121. package/src/conversions.rs +7 -12
  122. package/src/lib.rs +322 -357
  123. package/sdk-core/client/src/mocks.rs +0 -167
  124. package/sdk-core/core/src/worker/dispatcher.rs +0 -171
  125. package/sdk-core/protos/local/temporal/sdk/core/bridge/service.proto +0 -61
@@ -1,5 +1,5 @@
1
1
  mod activities;
2
- mod dispatcher;
2
+ pub(crate) mod client;
3
3
  mod wft_delivery;
4
4
 
5
5
  pub use temporal_sdk_core_api::worker::{WorkerConfig, WorkerConfigBuilder};
@@ -8,20 +8,25 @@ pub(crate) use activities::{
8
8
  ExecutingLAId, LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
9
9
  NewLocalAct,
10
10
  };
11
- pub(crate) use dispatcher::WorkerDispatcher;
12
11
 
13
12
  use crate::{
13
+ abstractions::MeteredSemaphore,
14
14
  errors::CompleteWfError,
15
15
  pollers::{
16
16
  new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller, BoxedWFPoller, Poller,
17
17
  WorkflowTaskPoller,
18
18
  },
19
19
  protosext::{legacy_query_failure, ValidPollWFTQResponse},
20
- telemetry::metrics::{
21
- activity_poller, workflow_poller, workflow_sticky_poller, MetricsContext,
20
+ telemetry::{
21
+ metrics::{
22
+ activity_poller, local_activity_worker_type, workflow_poller, workflow_sticky_poller,
23
+ workflow_worker_type, MetricsContext,
24
+ },
25
+ VecDisplayer,
22
26
  },
23
27
  worker::{
24
28
  activities::{DispatchOrTimeoutLA, LACompleteAction, LocalActivityManager},
29
+ client::WorkerClientBag,
25
30
  wft_delivery::WFTSource,
26
31
  },
27
32
  workflow::{
@@ -31,18 +36,19 @@ use crate::{
31
36
  },
32
37
  EmptyWorkflowCommandErr, LocalResolution, WFMachinesError, WorkflowCachingPolicy,
33
38
  },
34
- ActivityHeartbeat, CompleteActivityError, PollActivityError, PollWfError,
39
+ ActivityHeartbeat, CompleteActivityError, PollActivityError, PollWfError, WorkerTrait,
35
40
  };
36
41
  use activities::{LocalInFlightActInfo, WorkerActivityTasks};
37
42
  use futures::{Future, TryFutureExt};
38
43
  use std::{convert::TryInto, sync::Arc};
39
- use temporal_client::{ServerGatewayApis, WorkflowTaskCompletion};
44
+ use temporal_client::WorkflowTaskCompletion;
40
45
  use temporal_sdk_core_protos::{
41
46
  coresdk::{
42
47
  activity_result::activity_execution_result,
43
48
  activity_task::ActivityTask,
44
49
  workflow_activation::{remove_from_cache::EvictionReason, WorkflowActivation},
45
50
  workflow_completion::{self, workflow_activation_completion, WorkflowActivationCompletion},
51
+ ActivityTaskCompletion,
46
52
  },
47
53
  temporal::api::{
48
54
  enums::v1::{TaskQueueKind, WorkflowTaskFailedCause},
@@ -52,14 +58,19 @@ use temporal_sdk_core_protos::{
52
58
  },
53
59
  TaskToken,
54
60
  };
55
- use tokio::sync::{watch, Notify, Semaphore};
61
+ use tokio::sync::Notify;
62
+ use tokio_util::sync::CancellationToken;
56
63
  use tonic::Code;
57
64
  use tracing_futures::Instrument;
58
65
 
66
+ #[cfg(test)]
67
+ use crate::worker::client::WorkerClient;
68
+ use crate::workflow::workflow_tasks::EvictionRequestResult;
69
+
59
70
  /// A worker polls on a certain task queue
60
71
  pub struct Worker {
61
72
  config: WorkerConfig,
62
- server_gateway: Arc<dyn ServerGatewayApis + Send + Sync>,
73
+ wf_client: Arc<WorkerClientBag>,
63
74
 
64
75
  /// Will be populated when this worker should poll on a sticky WFT queue
65
76
  sticky_name: Option<String>,
@@ -67,14 +78,14 @@ pub struct Worker {
67
78
  /// Buffers workflow task polling in the event we need to return a pending activation while
68
79
  /// a poll is ongoing. Sticky and nonsticky polling happens inside of it.
69
80
  wf_task_source: WFTSource,
70
- /// Workflow task management TODO: No pub
71
- pub(crate) wft_manager: WorkflowTaskManager,
81
+ /// Workflow task management
82
+ wft_manager: WorkflowTaskManager,
72
83
  /// Manages activity tasks for this worker/task queue
73
84
  at_task_mgr: Option<WorkerActivityTasks>,
74
85
  /// Manages local activities
75
86
  local_act_mgr: LocalActivityManager,
76
87
  /// Ensures we stay at or below this worker's maximum concurrent workflow limit
77
- workflows_semaphore: Semaphore,
88
+ workflows_semaphore: MeteredSemaphore,
78
89
  /// Used to wake blocked workflow task polling when there is some change to workflow activations
79
90
  /// that should cause us to restart the loop
80
91
  pending_activations_notify: Arc<Notify>,
@@ -82,21 +93,106 @@ pub struct Worker {
82
93
  /// a WFT is completed.
83
94
  wfts_drained_notify: Arc<Notify>,
84
95
  /// Has shutdown been called?
85
- shutdown_requested: watch::Receiver<bool>,
86
- shutdown_sender: watch::Sender<bool>,
96
+ shutdown_token: CancellationToken,
87
97
  /// Will be called at the end of each activation completion
88
98
  post_activate_hook: Option<Box<dyn Fn(&Self) + Send + Sync>>,
89
99
 
90
100
  metrics: MetricsContext,
91
101
  }
92
102
 
103
+ #[async_trait::async_trait]
104
+ impl WorkerTrait for Worker {
105
+ #[instrument(level = "debug", skip(self), fields(run_id))]
106
+ async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
107
+ self.next_workflow_activation().await
108
+ }
109
+
110
+ #[instrument(level = "debug", skip(self))]
111
+ async fn poll_activity_task(&self) -> Result<ActivityTask, PollActivityError> {
112
+ loop {
113
+ match self.activity_poll().await.transpose() {
114
+ Some(r) => break r,
115
+ None => {
116
+ tokio::task::yield_now().await;
117
+ continue;
118
+ }
119
+ }
120
+ }
121
+ }
122
+
123
+ #[instrument(level = "debug", skip(self, completion),
124
+ fields(completion=%&completion, run_id=%completion.run_id))]
125
+ async fn complete_workflow_activation(
126
+ &self,
127
+ completion: WorkflowActivationCompletion,
128
+ ) -> Result<(), CompleteWfError> {
129
+ self.complete_workflow_activation(completion).await
130
+ }
131
+
132
+ #[instrument(level = "debug", skip(self, completion),
133
+ fields(completion=%&completion))]
134
+ async fn complete_activity_task(
135
+ &self,
136
+ completion: ActivityTaskCompletion,
137
+ ) -> Result<(), CompleteActivityError> {
138
+ let task_token = TaskToken(completion.task_token);
139
+ let status = if let Some(s) = completion.result.and_then(|r| r.status) {
140
+ s
141
+ } else {
142
+ return Err(CompleteActivityError::MalformedActivityCompletion {
143
+ reason: "Activity completion had empty result/status field".to_owned(),
144
+ completion: None,
145
+ });
146
+ };
147
+
148
+ self.complete_activity(task_token, status).await
149
+ }
150
+
151
+ fn record_activity_heartbeat(&self, details: ActivityHeartbeat) {
152
+ self.record_heartbeat(details);
153
+ }
154
+
155
+ fn request_workflow_eviction(&self, run_id: &str) {
156
+ self.request_wf_eviction(
157
+ run_id,
158
+ "Eviction explicitly requested by lang",
159
+ EvictionReason::LangRequested,
160
+ );
161
+ }
162
+
163
+ fn get_config(&self) -> &WorkerConfig {
164
+ &self.config
165
+ }
166
+
167
+ /// Begins the shutdown process, tells pollers they should stop. Is idempotent.
168
+ // TODO: will be in trait after Roey's shutdown refactor
169
+ fn initiate_shutdown(&self) {
170
+ self.shutdown_token.cancel();
171
+ // First, we want to stop polling of both activity and workflow tasks
172
+ if let Some(atm) = self.at_task_mgr.as_ref() {
173
+ atm.notify_shutdown();
174
+ }
175
+ self.wf_task_source.stop_pollers();
176
+ }
177
+
178
+ async fn shutdown(&self) {
179
+ self.shutdown().await
180
+ }
181
+
182
+ async fn finalize_shutdown(self) {
183
+ self.shutdown().await;
184
+ self.finalize_shutdown().await
185
+ }
186
+ }
187
+
93
188
  impl Worker {
94
189
  pub(crate) fn new(
95
190
  config: WorkerConfig,
96
191
  sticky_queue_name: Option<String>,
97
- sg: Arc<dyn ServerGatewayApis + Send + Sync>,
192
+ client: Arc<WorkerClientBag>,
98
193
  metrics: MetricsContext,
99
194
  ) -> Self {
195
+ info!(task_queue = %config.task_queue, "Initializing worker");
100
196
  metrics.worker_registered();
101
197
 
102
198
  let max_nonsticky_polls = if sticky_queue_name.is_some() {
@@ -107,7 +203,7 @@ impl Worker {
107
203
  let max_sticky_polls = config.max_sticky_polls();
108
204
  let wft_metrics = metrics.with_new_attrs([workflow_poller()]);
109
205
  let mut wf_task_poll_buffer = new_workflow_task_buffer(
110
- sg.clone(),
206
+ client.clone(),
111
207
  config.task_queue.clone(),
112
208
  false,
113
209
  max_nonsticky_polls,
@@ -117,7 +213,7 @@ impl Worker {
117
213
  let sticky_queue_poller = sticky_queue_name.as_ref().map(|sqn| {
118
214
  let sticky_metrics = metrics.with_new_attrs([workflow_sticky_poller()]);
119
215
  let mut sp = new_workflow_task_buffer(
120
- sg.clone(),
216
+ client.clone(),
121
217
  sqn.clone(),
122
218
  true,
123
219
  max_sticky_polls,
@@ -130,10 +226,11 @@ impl Worker {
130
226
  None
131
227
  } else {
132
228
  let mut ap = new_activity_task_buffer(
133
- sg.clone(),
229
+ client.clone(),
134
230
  config.task_queue.clone(),
135
231
  config.max_concurrent_at_polls,
136
232
  config.max_concurrent_at_polls * 2,
233
+ config.max_task_queue_activities_per_second,
137
234
  );
138
235
  let act_metrics = metrics.with_new_attrs([activity_poller()]);
139
236
  ap.set_num_pollers_handler(move |np| act_metrics.record_num_pollers(np));
@@ -149,17 +246,22 @@ impl Worker {
149
246
  Self::new_with_pollers(
150
247
  config,
151
248
  sticky_queue_name,
152
- sg,
249
+ client,
153
250
  wf_task_poll_buffer,
154
251
  act_poll_buffer,
155
252
  metrics,
156
253
  )
157
254
  }
158
255
 
256
+ #[cfg(test)]
257
+ pub(crate) fn new_test(config: WorkerConfig, client: impl WorkerClient + 'static) -> Self {
258
+ Self::new(config, None, Arc::new(client.into()), Default::default())
259
+ }
260
+
159
261
  pub(crate) fn new_with_pollers(
160
262
  config: WorkerConfig,
161
263
  sticky_queue_name: Option<String>,
162
- sg: Arc<dyn ServerGatewayApis + Send + Sync>,
264
+ client: Arc<WorkerClientBag>,
163
265
  wft_poller: BoxedWFPoller,
164
266
  act_poller: Option<BoxedActPoller>,
165
267
  metrics: MetricsContext,
@@ -173,9 +275,8 @@ impl Worker {
173
275
  };
174
276
  let pa_notif = Arc::new(Notify::new());
175
277
  let wfts_drained_notify = Arc::new(Notify::new());
176
- let (shut_tx, shut_rx) = watch::channel(false);
177
278
  Self {
178
- server_gateway: sg.clone(),
279
+ wf_client: client.clone(),
179
280
  sticky_name: sticky_queue_name,
180
281
  wf_task_source: WFTSource::new(wft_poller),
181
282
  wft_manager: WorkflowTaskManager::new(pa_notif.clone(), cache_policy, metrics.clone()),
@@ -183,7 +284,7 @@ impl Worker {
183
284
  WorkerActivityTasks::new(
184
285
  config.max_outstanding_activities,
185
286
  ap,
186
- sg.clone(),
287
+ client.clone(),
187
288
  metrics.clone(),
188
289
  config.max_heartbeat_throttle_interval,
189
290
  config.default_heartbeat_throttle_interval,
@@ -191,12 +292,16 @@ impl Worker {
191
292
  }),
192
293
  local_act_mgr: LocalActivityManager::new(
193
294
  config.max_outstanding_local_activities,
194
- sg.get_options().namespace.clone(),
295
+ config.namespace.clone(),
296
+ metrics.with_new_attrs([local_activity_worker_type()]),
297
+ ),
298
+ workflows_semaphore: MeteredSemaphore::new(
299
+ config.max_outstanding_workflow_tasks,
300
+ metrics.with_new_attrs([workflow_worker_type()]),
301
+ MetricsContext::available_task_slots,
195
302
  ),
196
- workflows_semaphore: Semaphore::new(config.max_outstanding_workflow_tasks),
197
303
  config,
198
- shutdown_requested: shut_rx,
199
- shutdown_sender: shut_tx,
304
+ shutdown_token: CancellationToken::new(),
200
305
  post_activate_hook: None,
201
306
  pending_activations_notify: pa_notif,
202
307
  wfts_drained_notify,
@@ -204,20 +309,11 @@ impl Worker {
204
309
  }
205
310
  }
206
311
 
207
- /// Begins the shutdown process, tells pollers they should stop. Is idempotent.
208
- pub(crate) fn initiate_shutdown(&self) {
209
- let _ = self.shutdown_sender.send(true);
210
- // First, we want to stop polling of both activity and workflow tasks
211
- if let Some(atm) = self.at_task_mgr.as_ref() {
212
- atm.notify_shutdown();
213
- }
214
- self.wf_task_source.stop_pollers();
215
- }
216
-
217
312
  /// Will shutdown the worker. Does not resolve until all outstanding workflow tasks have been
218
313
  /// completed
219
314
  pub(crate) async fn shutdown(&self) {
220
315
  self.initiate_shutdown();
316
+ info!("Initiated shutdown");
221
317
  // Next we need to wait for all local activities to finish so no more workflow task
222
318
  // heartbeats will be generated
223
319
  self.local_act_mgr.shutdown_and_wait_all_finished().await;
@@ -249,7 +345,7 @@ impl Worker {
249
345
 
250
346
  #[cfg(test)]
251
347
  pub(crate) fn available_wft_permits(&self) -> usize {
252
- self.workflows_semaphore.available_permits()
348
+ self.workflows_semaphore.sem.available_permits()
253
349
  }
254
350
 
255
351
  /// Get new activity tasks (may be local or nonlocal). Local activities are returned first
@@ -257,12 +353,12 @@ impl Worker {
257
353
  ///
258
354
  /// Returns `Ok(None)` in the event of a poll timeout or if the polling loop should otherwise
259
355
  /// be restarted
260
- pub(crate) async fn activity_poll(&self) -> Result<Option<ActivityTask>, PollActivityError> {
356
+ async fn activity_poll(&self) -> Result<Option<ActivityTask>, PollActivityError> {
261
357
  let act_mgr_poll = async {
262
358
  if let Some(ref act_mgr) = self.at_task_mgr {
263
359
  act_mgr.poll().await
264
360
  } else {
265
- let _ = self.shutdown_requested.clone().changed().await;
361
+ self.shutdown_token.cancelled().await;
266
362
  Err(PollActivityError::ShutDown)
267
363
  }
268
364
  };
@@ -278,7 +374,12 @@ impl Worker {
278
374
  &run_id, LocalResolution::LocalActivity(resolution)).await;
279
375
  Ok(task)
280
376
  },
281
- None => Ok(None)
377
+ None => {
378
+ if self.shutdown_token.is_cancelled() {
379
+ return Err(PollActivityError::ShutDown);
380
+ }
381
+ Ok(None)
382
+ }
282
383
  }
283
384
  },
284
385
  r = act_mgr_poll => r,
@@ -326,8 +427,7 @@ impl Worker {
326
427
  }
327
428
 
328
429
  if let Some(atm) = &self.at_task_mgr {
329
- atm.complete(task_token, status, self.server_gateway.as_ref())
330
- .await
430
+ atm.complete(task_token, status, &**self.wf_client).await
331
431
  } else {
332
432
  error!(
333
433
  "Tried to complete activity {} on a worker that does not have an activity manager",
@@ -428,16 +528,21 @@ impl Worker {
428
528
 
429
529
  /// Tell the worker a workflow task has completed, for tracking max outstanding WFTs
430
530
  pub(crate) fn return_workflow_task_permit(&self) {
431
- self.workflows_semaphore.add_permits(1);
531
+ self.workflows_semaphore.add_permit();
432
532
  }
433
533
 
534
+ /// Request a workflow eviction. Returns true if we actually queued up a new eviction request.
434
535
  pub(crate) fn request_wf_eviction(
435
536
  &self,
436
537
  run_id: &str,
437
538
  message: impl Into<String>,
438
539
  reason: EvictionReason,
439
- ) {
440
- self.wft_manager.request_eviction(run_id, message, reason);
540
+ ) -> bool {
541
+ match self.wft_manager.request_eviction(run_id, message, reason) {
542
+ EvictionRequestResult::EvictionIssued(_) => true,
543
+ EvictionRequestResult::NotFound => false,
544
+ EvictionRequestResult::EvictionAlreadyOutstanding => false,
545
+ }
441
546
  }
442
547
 
443
548
  /// Sets a function to be called at the end of each activation completion
@@ -448,11 +553,25 @@ impl Worker {
448
553
  self.post_activate_hook = Some(Box::new(callback))
449
554
  }
450
555
 
556
+ /// Used for replay workers - causes the worker to shutdown when the given run reaches the
557
+ /// given event number
558
+ pub(crate) fn set_shutdown_on_run_reaches_event(&mut self, run_id: String, last_event: i64) {
559
+ self.set_post_activate_hook(move |worker| {
560
+ if worker
561
+ .wft_manager
562
+ .most_recently_processed_event(&run_id)
563
+ .unwrap_or_default()
564
+ >= last_event
565
+ {
566
+ worker.initiate_shutdown();
567
+ }
568
+ });
569
+ }
570
+
451
571
  /// Resolves with WFT poll response or `PollWfError::ShutDown` if WFTs have been drained
452
572
  async fn workflow_poll_or_wfts_drained(
453
573
  &self,
454
574
  ) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
455
- let mut shutdown_requested = self.shutdown_requested.clone();
456
575
  loop {
457
576
  tokio::select! {
458
577
  biased;
@@ -466,7 +585,7 @@ impl Worker {
466
585
  }
467
586
  return r
468
587
  },
469
- _ = shutdown_requested.changed() => {},
588
+ _ = self.shutdown_token.cancelled() => {},
470
589
  }
471
590
  }
472
591
  }
@@ -481,7 +600,7 @@ impl Worker {
481
600
  // heartbeating which is a "new" workflow task that we need to accept and process as long as
482
601
  // the LA is outstanding. Similarly, if we already have such tasks (from a WFT completion),
483
602
  // then we must fetch them from the source before we can say workflow polling is shutdown.
484
- if *self.shutdown_requested.borrow()
603
+ if self.shutdown_token.is_cancelled()
485
604
  && !self.wf_task_source.has_tasks_from_complete()
486
605
  && self.local_act_mgr.num_outstanding() == 0
487
606
  {
@@ -535,10 +654,9 @@ impl Worker {
535
654
  work: ValidPollWFTQResponse,
536
655
  ) -> Result<Option<WorkflowActivation>, PollWfError> {
537
656
  let we = work.workflow_execution.clone();
538
- let tt = work.task_token.clone();
539
657
  let res = self
540
658
  .wft_manager
541
- .apply_new_poll_resp(work, self.server_gateway.clone())
659
+ .apply_new_poll_resp(work, self.wf_client.clone())
542
660
  .await;
543
661
  Ok(match res {
544
662
  NewWfTaskOutcome::IssueActivation(a) => {
@@ -555,38 +673,24 @@ impl Worker {
555
673
  debug!(workflow_execution=?we,
556
674
  "No new work for lang to perform after polling server");
557
675
  self.complete_workflow_activation(WorkflowActivationCompletion {
558
- task_queue: self.config.task_queue.clone(),
559
676
  run_id: we.run_id,
560
677
  status: Some(workflow_completion::Success::from_variants(vec![]).into()),
561
678
  })
562
679
  .await?;
563
680
  None
564
681
  }
565
- NewWfTaskOutcome::CacheMiss => {
566
- debug!(workflow_execution=?we, "Unable to process workflow task with partial \
567
- history because workflow cache does not contain workflow anymore.");
568
- self.server_gateway
569
- .fail_workflow_task(
570
- tt,
571
- WorkflowTaskFailedCause::ResetStickyTaskQueue,
572
- Some(Failure {
573
- message: "Unable to process workflow task with partial history \
574
- because workflow cache does not contain workflow anymore."
575
- .to_string(),
576
- ..Default::default()
577
- }),
578
- )
579
- .await?;
580
- self.return_workflow_task_permit();
581
- None
582
- }
583
682
  NewWfTaskOutcome::Evict(e) => {
584
683
  warn!(error=?e, run_id=%we.run_id, "Error while applying poll response to workflow");
585
- self.request_wf_eviction(
684
+ let did_issue_eviction = self.request_wf_eviction(
586
685
  &we.run_id,
587
686
  format!("Error while applying poll response to workflow: {:?}", e),
588
687
  e.evict_reason(),
589
688
  );
689
+ // If we didn't actually need to issue an eviction, then return the WFT permit.
690
+ // EX: The workflow we tried to evict wasn't in the cache.
691
+ if !did_issue_eviction {
692
+ self.return_workflow_task_permit();
693
+ }
590
694
  None
591
695
  }
592
696
  })
@@ -627,9 +731,12 @@ impl Worker {
627
731
  force_new_wft,
628
732
  },
629
733
  })) => {
630
- debug!("Sending commands to server: {:?}", &commands);
734
+ debug!("Sending commands to server: {}", commands.display());
631
735
  if !query_responses.is_empty() {
632
- debug!("Sending query responses to server: {:?}", &query_responses);
736
+ debug!(
737
+ "Sending query responses to server: {}",
738
+ query_responses.display()
739
+ );
633
740
  }
634
741
  let mut completion = WorkflowTaskCompletion {
635
742
  task_token,
@@ -649,7 +756,7 @@ impl Worker {
649
756
 
650
757
  self.handle_wft_reporting_errs(run_id, || async {
651
758
  let maybe_wft = self
652
- .server_gateway
759
+ .wf_client
653
760
  .complete_workflow_task(completion)
654
761
  .instrument(span!(tracing::Level::DEBUG, "Complete WFT call"))
655
762
  .await?;
@@ -669,7 +776,7 @@ impl Worker {
669
776
  action: ActivationAction::RespondLegacyQuery { result },
670
777
  ..
671
778
  })) => {
672
- self.server_gateway
779
+ self.wf_client
673
780
  .respond_legacy_query(task_token, result)
674
781
  .await?;
675
782
  Ok(WFTReportOutcome {
@@ -720,7 +827,7 @@ impl Worker {
720
827
  FailedActivationOutcome::Report(tt) => {
721
828
  warn!(run_id, failure=?failure, "Failing workflow activation");
722
829
  self.handle_wft_reporting_errs(run_id, || async {
723
- self.server_gateway
830
+ self.wf_client
724
831
  .fail_workflow_task(tt, cause, failure.failure.map(Into::into))
725
832
  .await
726
833
  })
@@ -732,7 +839,7 @@ impl Worker {
732
839
  }
733
840
  FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
734
841
  warn!(run_id, failure=?failure, "Failing legacy query request");
735
- self.server_gateway
842
+ self.wf_client
736
843
  .respond_legacy_query(task_token, legacy_query_failure(failure))
737
844
  .await?;
738
845
  WFTReportOutcome {
@@ -765,7 +872,7 @@ impl Worker {
765
872
  // Silence unhandled command errors since the lang SDK cannot do anything about
766
873
  // them besides poll again, which it will do anyway.
767
874
  tonic::Code::InvalidArgument if err.message() == "UnhandledCommand" => {
768
- warn!(error = %err, run_id, "Unhandled command response when completing");
875
+ debug!(error = %err, run_id, "Unhandled command response when completing");
769
876
  should_evict = Some(EvictionReason::UnhandledCommand);
770
877
  Ok(())
771
878
  }
@@ -851,91 +958,83 @@ struct WFTReportOutcome {
851
958
  #[cfg(test)]
852
959
  mod tests {
853
960
  use super::*;
854
- use temporal_client::mocks::mock_gateway;
961
+ use crate::{test_help::test_worker_cfg, worker::client::mocks::mock_workflow_client};
855
962
  use temporal_sdk_core_protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse;
856
963
 
857
964
  #[tokio::test]
858
965
  async fn activity_timeouts_dont_eat_permits() {
859
- let mut mock_gateway = mock_gateway();
860
- mock_gateway
966
+ let mut mock_client = mock_workflow_client();
967
+ mock_client
861
968
  .expect_poll_activity_task()
862
- .returning(|_| Ok(PollActivityTaskQueueResponse::default()));
969
+ .returning(|_, _| Ok(PollActivityTaskQueueResponse::default()));
863
970
 
864
- let cfg = WorkerConfigBuilder::default()
865
- .task_queue("whatever")
971
+ let cfg = test_worker_cfg()
866
972
  .max_outstanding_activities(5_usize)
867
973
  .build()
868
974
  .unwrap();
869
- let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
975
+ let worker = Worker::new_test(cfg, mock_client);
870
976
  assert_eq!(worker.activity_poll().await.unwrap(), None);
871
977
  assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
872
978
  }
873
979
 
874
980
  #[tokio::test]
875
981
  async fn workflow_timeouts_dont_eat_permits() {
876
- let mut mock_gateway = mock_gateway();
877
- mock_gateway
982
+ let mut mock_client = mock_workflow_client();
983
+ mock_client
878
984
  .expect_poll_workflow_task()
879
985
  .returning(|_, _| Ok(PollWorkflowTaskQueueResponse::default()));
880
986
 
881
- let cfg = WorkerConfigBuilder::default()
882
- .task_queue("whatever")
987
+ let cfg = test_worker_cfg()
883
988
  .max_outstanding_workflow_tasks(5_usize)
884
989
  .build()
885
990
  .unwrap();
886
- let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
991
+ let worker = Worker::new_test(cfg, mock_client);
887
992
  assert_eq!(worker.workflow_poll().await.unwrap(), None);
888
- assert_eq!(worker.workflows_semaphore.available_permits(), 5);
993
+ assert_eq!(worker.workflows_semaphore.sem.available_permits(), 5);
889
994
  }
890
995
 
891
996
  #[tokio::test]
892
997
  async fn activity_errs_dont_eat_permits() {
893
- let mut mock_gateway = mock_gateway();
894
- mock_gateway
998
+ let mut mock_client = mock_workflow_client();
999
+ mock_client
895
1000
  .expect_poll_activity_task()
896
- .returning(|_| Err(tonic::Status::internal("ahhh")));
1001
+ .returning(|_, _| Err(tonic::Status::internal("ahhh")));
897
1002
 
898
- let cfg = WorkerConfigBuilder::default()
899
- .task_queue("whatever")
1003
+ let cfg = test_worker_cfg()
900
1004
  .max_outstanding_activities(5_usize)
901
1005
  .build()
902
1006
  .unwrap();
903
- let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
1007
+ let worker = Worker::new_test(cfg, mock_client);
904
1008
  assert!(worker.activity_poll().await.is_err());
905
1009
  assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
906
1010
  }
907
1011
 
908
1012
  #[tokio::test]
909
1013
  async fn workflow_errs_dont_eat_permits() {
910
- let mut mock_gateway = mock_gateway();
911
- mock_gateway
1014
+ let mut mock_client = mock_workflow_client();
1015
+ mock_client
912
1016
  .expect_poll_workflow_task()
913
1017
  .returning(|_, _| Err(tonic::Status::internal("ahhh")));
914
1018
 
915
- let cfg = WorkerConfigBuilder::default()
916
- .task_queue("whatever")
1019
+ let cfg = test_worker_cfg()
917
1020
  .max_outstanding_workflow_tasks(5_usize)
918
1021
  .build()
919
1022
  .unwrap();
920
- let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
1023
+ let worker = Worker::new_test(cfg, mock_client);
921
1024
  assert!(worker.workflow_poll().await.is_err());
922
- assert_eq!(worker.workflows_semaphore.available_permits(), 5);
1025
+ assert_eq!(worker.workflows_semaphore.sem.available_permits(), 5);
923
1026
  }
924
1027
 
925
1028
  #[test]
926
1029
  fn max_polls_calculated_properly() {
927
- let cfg = WorkerConfigBuilder::default()
928
- .task_queue("whatever")
929
- .build()
930
- .unwrap();
1030
+ let cfg = test_worker_cfg().build().unwrap();
931
1031
  assert_eq!(cfg.max_nonsticky_polls(), 1);
932
1032
  assert_eq!(cfg.max_sticky_polls(), 4);
933
1033
  }
934
1034
 
935
1035
  #[test]
936
1036
  fn max_polls_zero_is_err() {
937
- assert!(WorkerConfigBuilder::default()
938
- .task_queue("whatever")
1037
+ assert!(test_worker_cfg()
939
1038
  .max_concurrent_wft_polls(0_usize)
940
1039
  .build()
941
1040
  .is_err());