@temporalio/core-bridge 0.14.0 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/Cargo.lock +162 -38
  2. package/Cargo.toml +3 -3
  3. package/index.d.ts +14 -1
  4. package/index.node +0 -0
  5. package/package.json +8 -5
  6. package/releases/aarch64-apple-darwin/index.node +0 -0
  7. package/releases/{x86_64-pc-windows-gnu → aarch64-unknown-linux-gnu}/index.node +0 -0
  8. package/releases/x86_64-apple-darwin/index.node +0 -0
  9. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  10. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  11. package/scripts/build.js +77 -34
  12. package/sdk-core/.buildkite/docker/Dockerfile +1 -1
  13. package/sdk-core/Cargo.toml +6 -5
  14. package/sdk-core/fsm/Cargo.toml +1 -1
  15. package/sdk-core/fsm/rustfsm_procmacro/Cargo.toml +2 -2
  16. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +8 -9
  17. package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +13 -7
  18. package/sdk-core/fsm/rustfsm_trait/Cargo.toml +2 -2
  19. package/sdk-core/fsm/rustfsm_trait/src/lib.rs +1 -1
  20. package/sdk-core/protos/local/workflow_activation.proto +6 -3
  21. package/sdk-core/sdk-core-protos/Cargo.toml +4 -4
  22. package/sdk-core/sdk-core-protos/src/lib.rs +38 -50
  23. package/sdk-core/src/core_tests/activity_tasks.rs +5 -5
  24. package/sdk-core/src/core_tests/child_workflows.rs +55 -29
  25. package/sdk-core/src/core_tests/determinism.rs +19 -9
  26. package/sdk-core/src/core_tests/mod.rs +3 -3
  27. package/sdk-core/src/core_tests/retry.rs +14 -8
  28. package/sdk-core/src/core_tests/workers.rs +1 -1
  29. package/sdk-core/src/core_tests/workflow_tasks.rs +347 -4
  30. package/sdk-core/src/errors.rs +27 -44
  31. package/sdk-core/src/lib.rs +13 -3
  32. package/sdk-core/src/machines/activity_state_machine.rs +44 -5
  33. package/sdk-core/src/machines/child_workflow_state_machine.rs +31 -11
  34. package/sdk-core/src/machines/complete_workflow_state_machine.rs +1 -1
  35. package/sdk-core/src/machines/continue_as_new_workflow_state_machine.rs +1 -1
  36. package/sdk-core/src/machines/mod.rs +18 -23
  37. package/sdk-core/src/machines/patch_state_machine.rs +8 -8
  38. package/sdk-core/src/machines/signal_external_state_machine.rs +22 -1
  39. package/sdk-core/src/machines/timer_state_machine.rs +21 -3
  40. package/sdk-core/src/machines/transition_coverage.rs +3 -3
  41. package/sdk-core/src/machines/workflow_machines.rs +11 -11
  42. package/sdk-core/src/pending_activations.rs +27 -22
  43. package/sdk-core/src/pollers/gateway.rs +15 -7
  44. package/sdk-core/src/pollers/poll_buffer.rs +6 -5
  45. package/sdk-core/src/pollers/retry.rs +153 -120
  46. package/sdk-core/src/prototype_rust_sdk/workflow_context.rs +61 -46
  47. package/sdk-core/src/prototype_rust_sdk/workflow_future.rs +13 -12
  48. package/sdk-core/src/prototype_rust_sdk.rs +17 -23
  49. package/sdk-core/src/telemetry/metrics.rs +2 -4
  50. package/sdk-core/src/telemetry/mod.rs +6 -7
  51. package/sdk-core/src/test_help/canned_histories.rs +17 -93
  52. package/sdk-core/src/test_help/history_builder.rs +61 -2
  53. package/sdk-core/src/test_help/history_info.rs +21 -2
  54. package/sdk-core/src/test_help/mod.rs +26 -34
  55. package/sdk-core/src/worker/activities/activity_heartbeat_manager.rs +246 -138
  56. package/sdk-core/src/worker/activities.rs +46 -45
  57. package/sdk-core/src/worker/config.rs +11 -0
  58. package/sdk-core/src/worker/dispatcher.rs +5 -5
  59. package/sdk-core/src/worker/mod.rs +86 -56
  60. package/sdk-core/src/workflow/driven_workflow.rs +3 -3
  61. package/sdk-core/src/workflow/history_update.rs +1 -1
  62. package/sdk-core/src/workflow/mod.rs +2 -1
  63. package/sdk-core/src/workflow/workflow_tasks/cache_manager.rs +13 -17
  64. package/sdk-core/src/workflow/workflow_tasks/concurrency_manager.rs +10 -18
  65. package/sdk-core/src/workflow/workflow_tasks/mod.rs +72 -57
  66. package/sdk-core/test_utils/Cargo.toml +1 -1
  67. package/sdk-core/test_utils/src/lib.rs +2 -2
  68. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +61 -1
  69. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +2 -2
  70. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +49 -0
  71. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +2 -2
  72. package/sdk-core/tests/integ_tests/workflow_tests.rs +1 -0
  73. package/src/conversions.rs +17 -0
  74. package/src/errors.rs +0 -7
  75. package/src/lib.rs +0 -20
@@ -10,7 +10,6 @@ use activity_heartbeat_manager::ActivityHeartbeatManager;
10
10
  use dashmap::DashMap;
11
11
  use std::{
12
12
  convert::TryInto,
13
- ops::Div,
14
13
  sync::Arc,
15
14
  time::{Duration, Instant},
16
15
  };
@@ -80,6 +79,9 @@ pub(crate) struct WorkerActivityTasks {
80
79
  activities_semaphore: Semaphore,
81
80
 
82
81
  metrics: MetricsContext,
82
+
83
+ max_heartbeat_throttle_interval: Duration,
84
+ default_heartbeat_throttle_interval: Duration,
83
85
  }
84
86
 
85
87
  impl WorkerActivityTasks {
@@ -88,6 +90,8 @@ impl WorkerActivityTasks {
88
90
  poller: BoxedActPoller,
89
91
  sg: Arc<impl ServerGatewayApis + Send + Sync + 'static + ?Sized>,
90
92
  metrics: MetricsContext,
93
+ max_heartbeat_throttle_interval: Duration,
94
+ default_heartbeat_throttle_interval: Duration,
91
95
  ) -> Self {
92
96
  Self {
93
97
  heartbeat_manager: ActivityHeartbeatManager::new(sg),
@@ -95,12 +99,13 @@ impl WorkerActivityTasks {
95
99
  poller,
96
100
  activities_semaphore: Semaphore::new(max_activity_tasks),
97
101
  metrics,
102
+ max_heartbeat_throttle_interval,
103
+ default_heartbeat_throttle_interval,
98
104
  }
99
105
  }
100
106
 
101
107
  pub(crate) fn notify_shutdown(&self) {
102
108
  self.poller.notify_shutdown();
103
- self.heartbeat_manager.notify_shutdown();
104
109
  }
105
110
 
106
111
  pub(crate) async fn shutdown(self) {
@@ -171,16 +176,19 @@ impl WorkerActivityTasks {
171
176
  status: activity_result::Status,
172
177
  gateway: &(dyn ServerGatewayApis + Send + Sync),
173
178
  ) -> Result<(), CompleteActivityError> {
174
- if let Some(act_info) = self.outstanding_activity_tasks.get(&task_token) {
179
+ if let Some((_, act_info)) = self.outstanding_activity_tasks.remove(&task_token) {
175
180
  let act_metrics = self.metrics.with_new_attrs([
176
181
  activity_type(act_info.activity_type.clone()),
177
182
  workflow_type(act_info.workflow_type.clone()),
178
183
  ]);
179
184
  act_metrics.act_execution_latency(act_info.start_time.elapsed());
185
+ self.activities_semaphore.add_permits(1);
186
+ self.heartbeat_manager.evict(task_token.clone());
187
+ let known_not_found = act_info.known_not_found;
188
+ drop(act_info); // TODO: Get rid of dashmap. If we hold ref across await, bad stuff.
180
189
 
181
190
  // No need to report activities which we already know the server doesn't care about
182
- let should_remove = if !act_info.known_not_found {
183
- drop(act_info); // TODO: Get rid of dashmap. If we hold ref across await, bad stuff.
191
+ if !known_not_found {
184
192
  let maybe_net_err = match status {
185
193
  activity_result::Status::WillCompleteAsync(_) => None,
186
194
  activity_result::Status::Completed(ar::Success { result }) => gateway
@@ -195,19 +203,17 @@ impl WorkerActivityTasks {
195
203
  .err()
196
204
  }
197
205
  activity_result::Status::Cancelled(ar::Cancellation { failure }) => {
198
- let details = match failure {
199
- Some(Failure {
200
- failure_info:
201
- Some(FailureInfo::CanceledFailureInfo(CanceledFailureInfo {
202
- details,
203
- })),
204
- ..
205
- }) => details,
206
- _ => {
207
- warn!(task_token = ? task_token,
208
- "Expected activity cancelled status with CanceledFailureInfo");
209
- None
210
- }
206
+ let details = if let Some(Failure {
207
+ failure_info:
208
+ Some(FailureInfo::CanceledFailureInfo(CanceledFailureInfo { details })),
209
+ ..
210
+ }) = failure
211
+ {
212
+ details
213
+ } else {
214
+ warn!(task_token = ? task_token,
215
+ "Expected activity cancelled status with CanceledFailureInfo");
216
+ None
211
217
  };
212
218
  gateway
213
219
  .cancel_activity_task(task_token.clone(), details.map(Into::into))
@@ -215,37 +221,24 @@ impl WorkerActivityTasks {
215
221
  .err()
216
222
  }
217
223
  };
218
- match maybe_net_err {
219
- Some(e) if e.code() == tonic::Code::NotFound => {
224
+
225
+ if let Some(e) = maybe_net_err {
226
+ if e.code() == tonic::Code::NotFound {
220
227
  warn!(task_token = ?task_token, details = ?e, "Activity not found on \
221
228
  completion. This may happen if the activity has already been cancelled but \
222
229
  completed anyway.");
223
- true
224
- }
225
- Some(err) => return Err(err.into()),
226
- None => true,
227
- }
228
- } else {
229
- true
230
+ } else {
231
+ return Err(e.into());
232
+ };
233
+ };
230
234
  };
231
-
232
- if should_remove
233
- && self
234
- .outstanding_activity_tasks
235
- .remove(&task_token)
236
- .is_some()
237
- {
238
- self.activities_semaphore.add_permits(1);
239
- self.heartbeat_manager.evict(task_token);
240
- }
241
- Ok(())
242
235
  } else {
243
236
  warn!(
244
237
  "Attempted to complete activity task {} but we were not tracking it",
245
238
  &task_token
246
239
  );
247
- Ok(())
248
240
  }
241
+ Ok(())
249
242
  }
250
243
 
251
244
  /// Attempt to record an activity heartbeat
@@ -254,22 +247,30 @@ impl WorkerActivityTasks {
254
247
  details: ActivityHeartbeat,
255
248
  ) -> Result<(), ActivityHeartbeatError> {
256
249
  // TODO: Propagate these back as cancels. Silent fails is too nonobvious
257
- let t: Duration = self
250
+ let heartbeat_timeout: Duration = self
258
251
  .outstanding_activity_tasks
259
252
  .get(&TaskToken(details.task_token.clone()))
260
253
  .ok_or(ActivityHeartbeatError::UnknownActivity)?
261
254
  .heartbeat_timeout
262
255
  .clone()
263
- .ok_or(ActivityHeartbeatError::HeartbeatTimeoutNotSet)?
256
+ // We treat None as 0 (even though heartbeat_timeout is never set to None by the server)
257
+ .unwrap_or_default()
264
258
  .try_into()
259
+ // This technically should never happen since prost duration should be directly mappable
260
+ // to std::time::Duration.
265
261
  .or(Err(ActivityHeartbeatError::InvalidHeartbeatTimeout))?;
262
+
266
263
  // There is a bug in the server that translates non-set heartbeat timeouts into 0 duration.
267
264
  // That's why we treat 0 the same way as None, otherwise we wouldn't know which aggregation
268
265
  // delay to use, and using 0 is not a good idea as SDK would hammer the server too hard.
269
- if t.as_millis() == 0 {
270
- return Err(ActivityHeartbeatError::HeartbeatTimeoutNotSet);
271
- }
272
- self.heartbeat_manager.record(details, t.div(2))
266
+ let throttle_interval = if heartbeat_timeout.as_millis() == 0 {
267
+ self.default_heartbeat_throttle_interval
268
+ } else {
269
+ heartbeat_timeout.mul_f64(0.8)
270
+ };
271
+ let throttle_interval =
272
+ std::cmp::min(throttle_interval, self.max_heartbeat_throttle_interval);
273
+ self.heartbeat_manager.record(details, throttle_interval)
273
274
  }
274
275
 
275
276
  async fn next_pending_cancel_task(&self) -> Result<Option<ActivityTask>, PollActivityError> {
@@ -51,6 +51,17 @@ pub struct WorkerConfig {
51
51
  /// and moved to the non-sticky queue where it may be picked up by any worker.
52
52
  #[builder(default = "Duration::from_secs(10)")]
53
53
  pub sticky_queue_schedule_to_start_timeout: Duration,
54
+
55
+ /// Longest interval for throttling activity heartbeats
56
+ #[builder(default = "Duration::from_secs(60)")]
57
+ pub max_heartbeat_throttle_interval: Duration,
58
+
59
+ /// Default interval for throttling activity heartbeats in case
60
+ /// `ActivityOptions.heartbeat_timeout` is unset.
61
+ /// When the timeout *is* set in the `ActivityOptions`, throttling is set to
62
+ /// `heartbeat_timeout * 0.8`.
63
+ #[builder(default = "Duration::from_secs(30)")]
64
+ pub default_heartbeat_throttle_interval: Duration,
54
65
  }
55
66
 
56
67
  impl WorkerConfigBuilder {
@@ -6,7 +6,7 @@ use crate::{
6
6
  };
7
7
  use arc_swap::ArcSwap;
8
8
  use futures::future::join_all;
9
- use std::{collections::HashMap, ops::Deref, sync::Arc};
9
+ use std::{collections::HashMap, ops::Deref, option::Option, sync::Arc};
10
10
  use tokio::sync::Notify;
11
11
 
12
12
  /// Allows access to workers by task queue name
@@ -40,7 +40,7 @@ impl WorkerDispatcher {
40
40
  .workers
41
41
  .load()
42
42
  .get(&tq)
43
- .map(|wo| wo.is_some())
43
+ .map(Option::is_some)
44
44
  .unwrap_or_default()
45
45
  {
46
46
  return Err(WorkerRegistrationError::WorkerAlreadyRegisteredForQueue(tq));
@@ -77,7 +77,7 @@ impl WorkerDispatcher {
77
77
  self.workers.rcu(|map| {
78
78
  let mut map = HashMap::clone(map);
79
79
  if maybe_worker.is_none() {
80
- maybe_worker = map.get_mut(task_queue).and_then(|o| o.take());
80
+ maybe_worker = map.get_mut(task_queue).and_then(Option::take);
81
81
  }
82
82
  map
83
83
  });
@@ -149,7 +149,7 @@ impl Deref for WorkerRefCt {
149
149
  type Target = Worker;
150
150
 
151
151
  fn deref(&self) -> &Self::Target {
152
- self.inner.as_ref().expect("Must exist").deref()
152
+ self.inner.as_deref().expect("Must exist")
153
153
  }
154
154
  }
155
155
 
@@ -161,7 +161,7 @@ impl Drop for WorkerRefCt {
161
161
  Some(arc) => {
162
162
  // We wait until 2 rather than 1 because we ourselves still have an Arc
163
163
  if Arc::strong_count(arc) == 2 {
164
- self.notify.notify_one()
164
+ self.notify.notify_one();
165
165
  }
166
166
  }
167
167
  };
@@ -6,7 +6,7 @@ pub use crate::worker::config::{WorkerConfig, WorkerConfigBuilder};
6
6
  pub(crate) use dispatcher::WorkerDispatcher;
7
7
 
8
8
  use crate::{
9
- errors::{CompleteWfError, WorkflowUpdateError},
9
+ errors::CompleteWfError,
10
10
  machines::{EmptyWorkflowCommandErr, WFMachinesError},
11
11
  pollers::{
12
12
  new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller, BoxedWFPoller,
@@ -98,6 +98,7 @@ impl Worker {
98
98
  let mut wf_task_poll_buffer = new_workflow_task_buffer(
99
99
  sg.gw.clone(),
100
100
  config.task_queue.clone(),
101
+ false,
101
102
  max_nonsticky_polls,
102
103
  max_nonsticky_polls * 2,
103
104
  );
@@ -107,6 +108,7 @@ impl Worker {
107
108
  let mut sp = new_workflow_task_buffer(
108
109
  sg.gw.clone(),
109
110
  sqn.clone(),
111
+ true,
110
112
  max_sticky_polls,
111
113
  max_sticky_polls * 2,
112
114
  );
@@ -172,6 +174,8 @@ impl Worker {
172
174
  ap,
173
175
  sg.gw.clone(),
174
176
  metrics.clone(),
177
+ config.max_heartbeat_throttle_interval,
178
+ config.default_heartbeat_throttle_interval,
175
179
  )
176
180
  }),
177
181
  workflows_semaphore: Semaphore::new(config.max_outstanding_workflow_tasks),
@@ -219,6 +223,11 @@ impl Worker {
219
223
  self.wft_manager.outstanding_wft()
220
224
  }
221
225
 
226
+ #[cfg(test)]
227
+ pub(crate) fn available_wft_permits(&self) -> usize {
228
+ self.workflows_semaphore.available_permits()
229
+ }
230
+
222
231
  /// Wait until not at the outstanding activity limit, and then poll this worker's task queue for
223
232
  /// new activities.
224
233
  ///
@@ -246,7 +255,7 @@ impl Worker {
246
255
  if let Some(at_mgr) = self.at_task_mgr.as_ref() {
247
256
  let tt = details.task_token.clone();
248
257
  if let Err(e) = at_mgr.record_heartbeat(details) {
249
- warn!(task_token = ?tt, details = ?e, "Activity heartbeat failed.")
258
+ warn!(task_token = ?tt, details = ?e, "Activity heartbeat failed.");
250
259
  }
251
260
  }
252
261
  }
@@ -275,7 +284,7 @@ impl Worker {
275
284
  // We must first check if there are pending workflow activations for workflows that are
276
285
  // currently replaying or otherwise need immediate jobs, and issue those before
277
286
  // bothering the server.
278
- if let Some(pa) = self.wft_manager.next_pending_activation()? {
287
+ if let Some(pa) = self.wft_manager.next_pending_activation() {
279
288
  debug!(activation=%pa, "Sending pending activation to lang");
280
289
  return Ok(pa);
281
290
  }
@@ -284,7 +293,7 @@ impl Worker {
284
293
  // activations, since there may be an eviction etc for whatever run is popped here.
285
294
  if let Some(buff_wft) = self.wft_manager.next_buffered_poll() {
286
295
  match self.apply_server_work(buff_wft).await? {
287
- NewWfTaskOutcome::IssueActivation(a) => return Ok(a),
296
+ Some(a) => return Ok(a),
288
297
  _ => continue,
289
298
  }
290
299
  }
@@ -304,14 +313,8 @@ impl Worker {
304
313
 
305
314
  if let Some(work) = selected_f {
306
315
  self.metrics.wf_tq_poll_ok();
307
- match self.apply_server_work(work).await? {
308
- NewWfTaskOutcome::IssueActivation(a) => return Ok(a),
309
- NewWfTaskOutcome::TaskBuffered => {
310
- // If the task was buffered, it's not actually outstanding, so we can
311
- // immediately return a permit.
312
- self.return_workflow_task_permit();
313
- }
314
- _ => {}
316
+ if let Some(a) = self.apply_server_work(work).await? {
317
+ return Ok(a);
315
318
  }
316
319
  }
317
320
 
@@ -326,7 +329,7 @@ impl Worker {
326
329
  completion: WfActivationCompletion,
327
330
  ) -> Result<(), CompleteWfError> {
328
331
  let wfstatus = completion.status;
329
- let r = match wfstatus {
332
+ let did_complete_wft = match wfstatus {
330
333
  Some(wf_activation_completion::Status::Successful(success)) => {
331
334
  self.wf_activation_success(&completion.run_id, success)
332
335
  .await
@@ -338,11 +341,9 @@ impl Worker {
338
341
  reason: "Workflow completion had empty status field".to_owned(),
339
342
  completion: None,
340
343
  }),
341
- };
342
- self.after_wft_report(&completion.run_id)?;
343
- self.wft_manager.on_activation_done(&completion.run_id);
344
- self.maybe_notify_wtfs_drained();
345
- r
344
+ }?;
345
+ self.after_workflow_activation(&completion.run_id, did_complete_wft);
346
+ Ok(())
346
347
  }
347
348
 
348
349
  fn maybe_notify_wtfs_drained(&self) {
@@ -355,11 +356,11 @@ impl Worker {
355
356
 
356
357
  /// Tell the worker a workflow task has completed, for tracking max outstanding WFTs
357
358
  pub(crate) fn return_workflow_task_permit(&self) {
358
- self.workflows_semaphore.add_permits(1)
359
+ self.workflows_semaphore.add_permits(1);
359
360
  }
360
361
 
361
- pub(crate) fn request_wf_eviction(&self, run_id: &str) {
362
- self.wft_manager.request_eviction(run_id);
362
+ pub(crate) fn request_wf_eviction(&self, run_id: &str, reason: impl Into<String>) {
363
+ self.wft_manager.request_eviction(run_id, reason);
363
364
  }
364
365
 
365
366
  /// Resolves with WFT poll response or `PollWfError::ShutDown` if WFTs have been drained
@@ -443,18 +444,24 @@ impl Worker {
443
444
  async fn apply_server_work(
444
445
  &self,
445
446
  work: ValidPollWFTQResponse,
446
- ) -> Result<NewWfTaskOutcome, PollWfError> {
447
+ ) -> Result<Option<WfActivation>, PollWfError> {
447
448
  let we = work.workflow_execution.clone();
448
449
  let tt = work.task_token.clone();
449
450
  let res = self
450
451
  .wft_manager
451
452
  .apply_new_poll_resp(work, &self.server_gateway)
452
- .await?;
453
- match &res {
453
+ .await;
454
+ Ok(match res {
454
455
  NewWfTaskOutcome::IssueActivation(a) => {
455
456
  debug!(activation=%a, "Sending activation to lang");
457
+ Some(a)
458
+ }
459
+ NewWfTaskOutcome::TaskBuffered => {
460
+ // If the task was buffered, it's not actually outstanding, so we can
461
+ // immediately return a permit.
462
+ self.return_workflow_task_permit();
463
+ None
456
464
  }
457
- NewWfTaskOutcome::TaskBuffered => {}
458
465
  NewWfTaskOutcome::Autocomplete => {
459
466
  debug!(workflow_execution=?we,
460
467
  "No work for lang to perform after polling server. Sending autocomplete.");
@@ -464,6 +471,7 @@ impl Worker {
464
471
  status: Some(workflow_completion::Success::from_variants(vec![]).into()),
465
472
  })
466
473
  .await?;
474
+ None
467
475
  }
468
476
  NewWfTaskOutcome::CacheMiss => {
469
477
  debug!(workflow_execution=?we, "Unable to process workflow task with partial \
@@ -480,17 +488,28 @@ impl Worker {
480
488
  }),
481
489
  )
482
490
  .await?;
491
+ self.return_workflow_task_permit();
492
+ None
483
493
  }
484
- };
485
- Ok(res)
494
+ NewWfTaskOutcome::Evict(e) => {
495
+ warn!(error=?e, run_id=%we.run_id, "Error while applying poll response to workflow");
496
+ self.request_wf_eviction(
497
+ &we.run_id,
498
+ format!("Error while applying poll response to workflow: {:?}", e),
499
+ );
500
+ None
501
+ }
502
+ })
486
503
  }
487
504
 
488
- /// Handle a successful workflow completion
505
+ /// Handle a successful workflow activation
506
+ ///
507
+ /// Returns true if we actually reported WFT completion to server (success or failure)
489
508
  async fn wf_activation_success(
490
509
  &self,
491
510
  run_id: &str,
492
511
  success: workflow_completion::Success,
493
- ) -> Result<(), CompleteWfError> {
512
+ ) -> Result<bool, CompleteWfError> {
494
513
  // Convert to wf commands
495
514
  let cmds = success
496
515
  .commands
@@ -534,6 +553,7 @@ impl Worker {
534
553
  .await
535
554
  })
536
555
  .await?;
556
+ Ok(true)
537
557
  }
538
558
  Ok(Some(ServerCommandsWithWorkflowInfo {
539
559
  task_token,
@@ -543,8 +563,9 @@ impl Worker {
543
563
  self.server_gateway
544
564
  .respond_legacy_query(task_token, result)
545
565
  .await?;
566
+ Ok(true)
546
567
  }
547
- Ok(None) => {}
568
+ Ok(None) => Ok(false),
548
569
  Err(update_err) => {
549
570
  // Automatically fail the workflow task in the event we couldn't update machines
550
571
  let fail_cause = if matches!(&update_err.source, WFMachinesError::Nondeterminism(_))
@@ -554,35 +575,42 @@ impl Worker {
554
575
  WorkflowTaskFailedCause::Unspecified
555
576
  };
556
577
 
578
+ warn!(run_id, error=?update_err, "Failing workflow task");
579
+
557
580
  if let Some(ref tt) = update_err.task_token {
581
+ let wft_fail_str = format!("{:?}", update_err);
558
582
  self.handle_wft_reporting_errs(run_id, || async {
559
583
  self.server_gateway
560
584
  .fail_workflow_task(
561
585
  tt.clone(),
562
586
  fail_cause,
563
- Some(Failure::application_failure(
564
- format!("{:?}", update_err),
565
- false,
566
- )),
587
+ Some(Failure::application_failure(wft_fail_str.clone(), false)),
567
588
  )
568
589
  .await
569
590
  })
570
591
  .await?;
592
+ // We must evict the workflow since we've failed a WFT
593
+ self.request_wf_eviction(
594
+ run_id,
595
+ format!("Workflow task failure: {}", wft_fail_str),
596
+ );
597
+ Ok(true)
598
+ } else {
599
+ Ok(false)
571
600
  }
572
- return Err(update_err.into());
573
601
  }
574
602
  }
575
-
576
- Ok(())
577
603
  }
578
604
 
579
605
  /// Handle a failed workflow completion
606
+ ///
607
+ /// Returns true if we actually reported WFT completion to server
580
608
  async fn wf_activation_failed(
581
609
  &self,
582
610
  run_id: &str,
583
611
  failure: workflow_completion::Failure,
584
- ) -> Result<(), CompleteWfError> {
585
- match self.wft_manager.failed_activation(run_id) {
612
+ ) -> Result<bool, CompleteWfError> {
613
+ Ok(match self.wft_manager.failed_activation(run_id) {
586
614
  FailedActivationOutcome::Report(tt) => {
587
615
  self.handle_wft_reporting_errs(run_id, || async {
588
616
  self.server_gateway
@@ -594,23 +622,25 @@ impl Worker {
594
622
  .await
595
623
  })
596
624
  .await?;
625
+ true
597
626
  }
598
627
  FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
599
628
  self.server_gateway
600
629
  .respond_legacy_query(task_token, legacy_query_failure(failure))
601
630
  .await?;
631
+ true
602
632
  }
603
- _ => {}
604
- }
605
-
606
- Ok(())
633
+ FailedActivationOutcome::NoReport => false,
634
+ })
607
635
  }
608
636
 
609
- fn after_wft_report(&self, run_id: &str) -> Result<(), WorkflowUpdateError> {
610
- if self.wft_manager.after_wft_report(run_id)? {
637
+ fn after_workflow_activation(&self, run_id: &str, did_complete_wft: bool) {
638
+ self.wft_manager.after_wft_report(run_id);
639
+ if did_complete_wft {
611
640
  self.return_workflow_task_permit();
612
- };
613
- Ok(())
641
+ }
642
+ self.wft_manager.on_activation_done(run_id);
643
+ self.maybe_notify_wtfs_drained();
614
644
  }
615
645
 
616
646
  /// Handle server errors from either completing or failing a workflow task. Returns any errors
@@ -630,12 +660,12 @@ impl Worker {
630
660
  // Silence unhandled command errors since the lang SDK cannot do anything about
631
661
  // them besides poll again, which it will do anyway.
632
662
  tonic::Code::InvalidArgument if err.message() == "UnhandledCommand" => {
633
- warn!("Unhandled command response when completing: {}", err);
663
+ warn!(error = %err, "Unhandled command response when completing");
634
664
  should_evict = true;
635
665
  Ok(())
636
666
  }
637
667
  tonic::Code::NotFound => {
638
- warn!("Task not found when completing: {}", err);
668
+ warn!(error = %err, "Task not found when completing");
639
669
  should_evict = true;
640
670
  Ok(())
641
671
  }
@@ -645,7 +675,7 @@ impl Worker {
645
675
  _ => Ok(()),
646
676
  };
647
677
  if should_evict {
648
- self.wft_manager.request_eviction(run_id);
678
+ self.request_wf_eviction(run_id, "Error reporting WFT to server");
649
679
  }
650
680
  res.map_err(Into::into)
651
681
  }
@@ -704,7 +734,7 @@ mod tests {
704
734
 
705
735
  let cfg = WorkerConfigBuilder::default()
706
736
  .task_queue("whatever")
707
- .max_outstanding_activities(5usize)
737
+ .max_outstanding_activities(5_usize)
708
738
  .build()
709
739
  .unwrap();
710
740
  let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
@@ -717,12 +747,12 @@ mod tests {
717
747
  let mut mock_gateway = MockServerGatewayApis::new();
718
748
  mock_gateway
719
749
  .expect_poll_workflow_task()
720
- .returning(|_| Ok(PollWorkflowTaskQueueResponse::default()));
750
+ .returning(|_, _| Ok(PollWorkflowTaskQueueResponse::default()));
721
751
  let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
722
752
 
723
753
  let cfg = WorkerConfigBuilder::default()
724
754
  .task_queue("whatever")
725
- .max_outstanding_workflow_tasks(5usize)
755
+ .max_outstanding_workflow_tasks(5_usize)
726
756
  .build()
727
757
  .unwrap();
728
758
  let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
@@ -740,7 +770,7 @@ mod tests {
740
770
 
741
771
  let cfg = WorkerConfigBuilder::default()
742
772
  .task_queue("whatever")
743
- .max_outstanding_activities(5usize)
773
+ .max_outstanding_activities(5_usize)
744
774
  .build()
745
775
  .unwrap();
746
776
  let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
@@ -753,12 +783,12 @@ mod tests {
753
783
  let mut mock_gateway = MockServerGatewayApis::new();
754
784
  mock_gateway
755
785
  .expect_poll_workflow_task()
756
- .returning(|_| Err(tonic::Status::internal("ahhh")));
786
+ .returning(|_, _| Err(tonic::Status::internal("ahhh")));
757
787
  let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
758
788
 
759
789
  let cfg = WorkerConfigBuilder::default()
760
790
  .task_queue("whatever")
761
- .max_outstanding_workflow_tasks(5usize)
791
+ .max_outstanding_workflow_tasks(5_usize)
762
792
  .build()
763
793
  .unwrap();
764
794
  let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
@@ -33,7 +33,7 @@ impl DrivenWorkflow {
33
33
  /// Start the workflow
34
34
  pub fn start(&mut self, attribs: WorkflowExecutionStartedEventAttributes) {
35
35
  debug!(run_id = %attribs.original_execution_run_id, "Driven WF start");
36
- self.started_attrs = Some(attribs)
36
+ self.started_attrs = Some(attribs);
37
37
  }
38
38
 
39
39
  /// Enqueue a new job to be sent to the driven workflow
@@ -51,12 +51,12 @@ impl DrivenWorkflow {
51
51
 
52
52
  /// Signal the workflow
53
53
  pub fn signal(&mut self, signal: SignalWorkflow) {
54
- self.send_job(wf_activation_job::Variant::SignalWorkflow(signal))
54
+ self.send_job(wf_activation_job::Variant::SignalWorkflow(signal));
55
55
  }
56
56
 
57
57
  /// Cancel the workflow
58
58
  pub fn cancel(&mut self, attribs: CancelWorkflow) {
59
- self.send_job(wf_activation_job::Variant::CancelWorkflow(attribs))
59
+ self.send_job(wf_activation_job::Variant::CancelWorkflow(attribs));
60
60
  }
61
61
  }
62
62
 
@@ -324,7 +324,7 @@ mod tests {
324
324
  .take_next_wft_sequence(last_started_id)
325
325
  .await
326
326
  .unwrap();
327
- for e in seq.iter() {
327
+ for e in &seq {
328
328
  last_event_id += 1;
329
329
  assert_eq!(e.event_id, last_event_id);
330
330
  }
@@ -62,7 +62,7 @@ impl WorkflowManager {
62
62
  }
63
63
 
64
64
  #[cfg(test)]
65
- pub fn new_from_machines(workflow_machines: WorkflowMachines) -> Self {
65
+ pub const fn new_from_machines(workflow_machines: WorkflowMachines) -> Self {
66
66
  Self {
67
67
  machines: workflow_machines,
68
68
  command_sink: None,
@@ -277,6 +277,7 @@ pub mod managed_wf {
277
277
  // Send an eviction to ensure wf exits if it has not finished (ex: feeding partial hist)
278
278
  let _ = self.activation_tx.send(create_evict_activation(
279
279
  "not actually important".to_string(),
280
+ "force shutdown".to_string(),
280
281
  ));
281
282
  self.future_handle.take().unwrap().await.unwrap()
282
283
  }