@temporalio/core-bridge 0.13.0 → 0.16.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/Cargo.lock +203 -78
  2. package/Cargo.toml +3 -3
  3. package/index.d.ts +195 -0
  4. package/index.node +0 -0
  5. package/package.json +10 -6
  6. package/releases/aarch64-apple-darwin/index.node +0 -0
  7. package/releases/{x86_64-pc-windows-gnu → aarch64-unknown-linux-gnu}/index.node +0 -0
  8. package/releases/x86_64-apple-darwin/index.node +0 -0
  9. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  10. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  11. package/scripts/build.js +77 -34
  12. package/sdk-core/.buildkite/docker/Dockerfile +1 -1
  13. package/sdk-core/CODEOWNERS +1 -1
  14. package/sdk-core/Cargo.toml +6 -5
  15. package/sdk-core/fsm/Cargo.toml +1 -1
  16. package/sdk-core/fsm/rustfsm_procmacro/Cargo.toml +2 -2
  17. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +8 -9
  18. package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +13 -7
  19. package/sdk-core/fsm/rustfsm_trait/Cargo.toml +2 -2
  20. package/sdk-core/fsm/rustfsm_trait/src/lib.rs +1 -1
  21. package/sdk-core/protos/local/activity_result.proto +10 -1
  22. package/sdk-core/protos/local/workflow_activation.proto +6 -3
  23. package/sdk-core/sdk-core-protos/Cargo.toml +4 -4
  24. package/sdk-core/sdk-core-protos/src/lib.rs +44 -49
  25. package/sdk-core/src/core_tests/activity_tasks.rs +5 -5
  26. package/sdk-core/src/core_tests/child_workflows.rs +55 -29
  27. package/sdk-core/src/core_tests/determinism.rs +19 -9
  28. package/sdk-core/src/core_tests/mod.rs +3 -3
  29. package/sdk-core/src/core_tests/retry.rs +96 -2
  30. package/sdk-core/src/core_tests/workers.rs +1 -1
  31. package/sdk-core/src/core_tests/workflow_tasks.rs +278 -4
  32. package/sdk-core/src/errors.rs +27 -44
  33. package/sdk-core/src/lib.rs +13 -3
  34. package/sdk-core/src/machines/activity_state_machine.rs +44 -5
  35. package/sdk-core/src/machines/child_workflow_state_machine.rs +31 -11
  36. package/sdk-core/src/machines/complete_workflow_state_machine.rs +1 -1
  37. package/sdk-core/src/machines/continue_as_new_workflow_state_machine.rs +1 -1
  38. package/sdk-core/src/machines/mod.rs +18 -23
  39. package/sdk-core/src/machines/patch_state_machine.rs +8 -8
  40. package/sdk-core/src/machines/signal_external_state_machine.rs +22 -1
  41. package/sdk-core/src/machines/timer_state_machine.rs +21 -3
  42. package/sdk-core/src/machines/transition_coverage.rs +3 -3
  43. package/sdk-core/src/machines/workflow_machines.rs +11 -11
  44. package/sdk-core/src/pending_activations.rs +27 -22
  45. package/sdk-core/src/pollers/gateway.rs +28 -7
  46. package/sdk-core/src/pollers/poll_buffer.rs +6 -5
  47. package/sdk-core/src/pollers/retry.rs +193 -136
  48. package/sdk-core/src/prototype_rust_sdk/workflow_context.rs +61 -46
  49. package/sdk-core/src/prototype_rust_sdk/workflow_future.rs +13 -12
  50. package/sdk-core/src/prototype_rust_sdk.rs +17 -23
  51. package/sdk-core/src/telemetry/metrics.rs +2 -4
  52. package/sdk-core/src/telemetry/mod.rs +6 -7
  53. package/sdk-core/src/test_help/canned_histories.rs +17 -93
  54. package/sdk-core/src/test_help/history_builder.rs +51 -2
  55. package/sdk-core/src/test_help/history_info.rs +2 -2
  56. package/sdk-core/src/test_help/mod.rs +21 -34
  57. package/sdk-core/src/worker/activities/activity_heartbeat_manager.rs +246 -138
  58. package/sdk-core/src/worker/activities.rs +47 -45
  59. package/sdk-core/src/worker/config.rs +11 -0
  60. package/sdk-core/src/worker/dispatcher.rs +5 -5
  61. package/sdk-core/src/worker/mod.rs +86 -56
  62. package/sdk-core/src/workflow/driven_workflow.rs +3 -3
  63. package/sdk-core/src/workflow/history_update.rs +1 -1
  64. package/sdk-core/src/workflow/mod.rs +2 -1
  65. package/sdk-core/src/workflow/workflow_tasks/cache_manager.rs +13 -17
  66. package/sdk-core/src/workflow/workflow_tasks/concurrency_manager.rs +10 -18
  67. package/sdk-core/src/workflow/workflow_tasks/mod.rs +72 -57
  68. package/sdk-core/test_utils/Cargo.toml +1 -1
  69. package/sdk-core/test_utils/src/lib.rs +2 -2
  70. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +131 -2
  71. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +2 -2
  72. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +49 -0
  73. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +2 -2
  74. package/sdk-core/tests/integ_tests/workflow_tests.rs +74 -47
  75. package/src/conversions.rs +17 -0
  76. package/src/errors.rs +0 -7
  77. package/src/lib.rs +0 -20
@@ -10,7 +10,6 @@ use activity_heartbeat_manager::ActivityHeartbeatManager;
10
10
  use dashmap::DashMap;
11
11
  use std::{
12
12
  convert::TryInto,
13
- ops::Div,
14
13
  sync::Arc,
15
14
  time::{Duration, Instant},
16
15
  };
@@ -80,6 +79,9 @@ pub(crate) struct WorkerActivityTasks {
80
79
  activities_semaphore: Semaphore,
81
80
 
82
81
  metrics: MetricsContext,
82
+
83
+ max_heartbeat_throttle_interval: Duration,
84
+ default_heartbeat_throttle_interval: Duration,
83
85
  }
84
86
 
85
87
  impl WorkerActivityTasks {
@@ -88,6 +90,8 @@ impl WorkerActivityTasks {
88
90
  poller: BoxedActPoller,
89
91
  sg: Arc<impl ServerGatewayApis + Send + Sync + 'static + ?Sized>,
90
92
  metrics: MetricsContext,
93
+ max_heartbeat_throttle_interval: Duration,
94
+ default_heartbeat_throttle_interval: Duration,
91
95
  ) -> Self {
92
96
  Self {
93
97
  heartbeat_manager: ActivityHeartbeatManager::new(sg),
@@ -95,12 +99,13 @@ impl WorkerActivityTasks {
95
99
  poller,
96
100
  activities_semaphore: Semaphore::new(max_activity_tasks),
97
101
  metrics,
102
+ max_heartbeat_throttle_interval,
103
+ default_heartbeat_throttle_interval,
98
104
  }
99
105
  }
100
106
 
101
107
  pub(crate) fn notify_shutdown(&self) {
102
108
  self.poller.notify_shutdown();
103
- self.heartbeat_manager.notify_shutdown();
104
109
  }
105
110
 
106
111
  pub(crate) async fn shutdown(self) {
@@ -171,17 +176,21 @@ impl WorkerActivityTasks {
171
176
  status: activity_result::Status,
172
177
  gateway: &(dyn ServerGatewayApis + Send + Sync),
173
178
  ) -> Result<(), CompleteActivityError> {
174
- if let Some(act_info) = self.outstanding_activity_tasks.get(&task_token) {
179
+ if let Some((_, act_info)) = self.outstanding_activity_tasks.remove(&task_token) {
175
180
  let act_metrics = self.metrics.with_new_attrs([
176
181
  activity_type(act_info.activity_type.clone()),
177
182
  workflow_type(act_info.workflow_type.clone()),
178
183
  ]);
179
184
  act_metrics.act_execution_latency(act_info.start_time.elapsed());
185
+ self.activities_semaphore.add_permits(1);
186
+ self.heartbeat_manager.evict(task_token.clone());
187
+ let known_not_found = act_info.known_not_found;
188
+ drop(act_info); // TODO: Get rid of dashmap. If we hold ref across await, bad stuff.
180
189
 
181
190
  // No need to report activities which we already know the server doesn't care about
182
- let should_remove = if !act_info.known_not_found {
183
- drop(act_info); // TODO: Get rid of dashmap. If we hold ref across await, bad stuff.
191
+ if !known_not_found {
184
192
  let maybe_net_err = match status {
193
+ activity_result::Status::WillCompleteAsync(_) => None,
185
194
  activity_result::Status::Completed(ar::Success { result }) => gateway
186
195
  .complete_activity_task(task_token.clone(), result.map(Into::into))
187
196
  .await
@@ -194,19 +203,17 @@ impl WorkerActivityTasks {
194
203
  .err()
195
204
  }
196
205
  activity_result::Status::Cancelled(ar::Cancellation { failure }) => {
197
- let details = match failure {
198
- Some(Failure {
199
- failure_info:
200
- Some(FailureInfo::CanceledFailureInfo(CanceledFailureInfo {
201
- details,
202
- })),
203
- ..
204
- }) => details,
205
- _ => {
206
- warn!(task_token = ? task_token,
207
- "Expected activity cancelled status with CanceledFailureInfo");
208
- None
209
- }
206
+ let details = if let Some(Failure {
207
+ failure_info:
208
+ Some(FailureInfo::CanceledFailureInfo(CanceledFailureInfo { details })),
209
+ ..
210
+ }) = failure
211
+ {
212
+ details
213
+ } else {
214
+ warn!(task_token = ? task_token,
215
+ "Expected activity cancelled status with CanceledFailureInfo");
216
+ None
210
217
  };
211
218
  gateway
212
219
  .cancel_activity_task(task_token.clone(), details.map(Into::into))
@@ -214,37 +221,24 @@ impl WorkerActivityTasks {
214
221
  .err()
215
222
  }
216
223
  };
217
- match maybe_net_err {
218
- Some(e) if e.code() == tonic::Code::NotFound => {
224
+
225
+ if let Some(e) = maybe_net_err {
226
+ if e.code() == tonic::Code::NotFound {
219
227
  warn!(task_token = ?task_token, details = ?e, "Activity not found on \
220
228
  completion. This may happen if the activity has already been cancelled but \
221
229
  completed anyway.");
222
- true
223
- }
224
- Some(err) => return Err(err.into()),
225
- None => true,
226
- }
227
- } else {
228
- true
230
+ } else {
231
+ return Err(e.into());
232
+ };
233
+ };
229
234
  };
230
-
231
- if should_remove
232
- && self
233
- .outstanding_activity_tasks
234
- .remove(&task_token)
235
- .is_some()
236
- {
237
- self.activities_semaphore.add_permits(1);
238
- self.heartbeat_manager.evict(task_token);
239
- }
240
- Ok(())
241
235
  } else {
242
236
  warn!(
243
237
  "Attempted to complete activity task {} but we were not tracking it",
244
238
  &task_token
245
239
  );
246
- Ok(())
247
240
  }
241
+ Ok(())
248
242
  }
249
243
 
250
244
  /// Attempt to record an activity heartbeat
@@ -253,22 +247,30 @@ impl WorkerActivityTasks {
253
247
  details: ActivityHeartbeat,
254
248
  ) -> Result<(), ActivityHeartbeatError> {
255
249
  // TODO: Propagate these back as cancels. Silent fails is too nonobvious
256
- let t: Duration = self
250
+ let heartbeat_timeout: Duration = self
257
251
  .outstanding_activity_tasks
258
252
  .get(&TaskToken(details.task_token.clone()))
259
253
  .ok_or(ActivityHeartbeatError::UnknownActivity)?
260
254
  .heartbeat_timeout
261
255
  .clone()
262
- .ok_or(ActivityHeartbeatError::HeartbeatTimeoutNotSet)?
256
+ // We treat None as 0 (even though heartbeat_timeout is never set to None by the server)
257
+ .unwrap_or_default()
263
258
  .try_into()
259
+ // This technically should never happen since prost duration should be directly mappable
260
+ // to std::time::Duration.
264
261
  .or(Err(ActivityHeartbeatError::InvalidHeartbeatTimeout))?;
262
+
265
263
  // There is a bug in the server that translates non-set heartbeat timeouts into 0 duration.
266
264
  // That's why we treat 0 the same way as None, otherwise we wouldn't know which aggregation
267
265
  // delay to use, and using 0 is not a good idea as SDK would hammer the server too hard.
268
- if t.as_millis() == 0 {
269
- return Err(ActivityHeartbeatError::HeartbeatTimeoutNotSet);
270
- }
271
- self.heartbeat_manager.record(details, t.div(2))
266
+ let throttle_interval = if heartbeat_timeout.as_millis() == 0 {
267
+ self.default_heartbeat_throttle_interval
268
+ } else {
269
+ heartbeat_timeout.mul_f64(0.8)
270
+ };
271
+ let throttle_interval =
272
+ std::cmp::min(throttle_interval, self.max_heartbeat_throttle_interval);
273
+ self.heartbeat_manager.record(details, throttle_interval)
272
274
  }
273
275
 
274
276
  async fn next_pending_cancel_task(&self) -> Result<Option<ActivityTask>, PollActivityError> {
@@ -51,6 +51,17 @@ pub struct WorkerConfig {
51
51
  /// and moved to the non-sticky queue where it may be picked up by any worker.
52
52
  #[builder(default = "Duration::from_secs(10)")]
53
53
  pub sticky_queue_schedule_to_start_timeout: Duration,
54
+
55
+ /// Longest interval for throttling activity heartbeats
56
+ #[builder(default = "Duration::from_secs(60)")]
57
+ pub max_heartbeat_throttle_interval: Duration,
58
+
59
+ /// Default interval for throttling activity heartbeats in case
60
+ /// `ActivityOptions.heartbeat_timeout` is unset.
61
+ /// When the timeout *is* set in the `ActivityOptions`, throttling is set to
62
+ /// `heartbeat_timeout * 0.8`.
63
+ #[builder(default = "Duration::from_secs(30)")]
64
+ pub default_heartbeat_throttle_interval: Duration,
54
65
  }
55
66
 
56
67
  impl WorkerConfigBuilder {
@@ -6,7 +6,7 @@ use crate::{
6
6
  };
7
7
  use arc_swap::ArcSwap;
8
8
  use futures::future::join_all;
9
- use std::{collections::HashMap, ops::Deref, sync::Arc};
9
+ use std::{collections::HashMap, ops::Deref, option::Option, sync::Arc};
10
10
  use tokio::sync::Notify;
11
11
 
12
12
  /// Allows access to workers by task queue name
@@ -40,7 +40,7 @@ impl WorkerDispatcher {
40
40
  .workers
41
41
  .load()
42
42
  .get(&tq)
43
- .map(|wo| wo.is_some())
43
+ .map(Option::is_some)
44
44
  .unwrap_or_default()
45
45
  {
46
46
  return Err(WorkerRegistrationError::WorkerAlreadyRegisteredForQueue(tq));
@@ -77,7 +77,7 @@ impl WorkerDispatcher {
77
77
  self.workers.rcu(|map| {
78
78
  let mut map = HashMap::clone(map);
79
79
  if maybe_worker.is_none() {
80
- maybe_worker = map.get_mut(task_queue).and_then(|o| o.take());
80
+ maybe_worker = map.get_mut(task_queue).and_then(Option::take);
81
81
  }
82
82
  map
83
83
  });
@@ -149,7 +149,7 @@ impl Deref for WorkerRefCt {
149
149
  type Target = Worker;
150
150
 
151
151
  fn deref(&self) -> &Self::Target {
152
- self.inner.as_ref().expect("Must exist").deref()
152
+ self.inner.as_deref().expect("Must exist")
153
153
  }
154
154
  }
155
155
 
@@ -161,7 +161,7 @@ impl Drop for WorkerRefCt {
161
161
  Some(arc) => {
162
162
  // We wait until 2 rather than 1 because we ourselves still have an Arc
163
163
  if Arc::strong_count(arc) == 2 {
164
- self.notify.notify_one()
164
+ self.notify.notify_one();
165
165
  }
166
166
  }
167
167
  };
@@ -6,7 +6,7 @@ pub use crate::worker::config::{WorkerConfig, WorkerConfigBuilder};
6
6
  pub(crate) use dispatcher::WorkerDispatcher;
7
7
 
8
8
  use crate::{
9
- errors::{CompleteWfError, WorkflowUpdateError},
9
+ errors::CompleteWfError,
10
10
  machines::{EmptyWorkflowCommandErr, WFMachinesError},
11
11
  pollers::{
12
12
  new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller, BoxedWFPoller,
@@ -98,6 +98,7 @@ impl Worker {
98
98
  let mut wf_task_poll_buffer = new_workflow_task_buffer(
99
99
  sg.gw.clone(),
100
100
  config.task_queue.clone(),
101
+ false,
101
102
  max_nonsticky_polls,
102
103
  max_nonsticky_polls * 2,
103
104
  );
@@ -107,6 +108,7 @@ impl Worker {
107
108
  let mut sp = new_workflow_task_buffer(
108
109
  sg.gw.clone(),
109
110
  sqn.clone(),
111
+ true,
110
112
  max_sticky_polls,
111
113
  max_sticky_polls * 2,
112
114
  );
@@ -172,6 +174,8 @@ impl Worker {
172
174
  ap,
173
175
  sg.gw.clone(),
174
176
  metrics.clone(),
177
+ config.max_heartbeat_throttle_interval,
178
+ config.default_heartbeat_throttle_interval,
175
179
  )
176
180
  }),
177
181
  workflows_semaphore: Semaphore::new(config.max_outstanding_workflow_tasks),
@@ -219,6 +223,11 @@ impl Worker {
219
223
  self.wft_manager.outstanding_wft()
220
224
  }
221
225
 
226
+ #[cfg(test)]
227
+ pub(crate) fn available_wft_permits(&self) -> usize {
228
+ self.workflows_semaphore.available_permits()
229
+ }
230
+
222
231
  /// Wait until not at the outstanding activity limit, and then poll this worker's task queue for
223
232
  /// new activities.
224
233
  ///
@@ -246,7 +255,7 @@ impl Worker {
246
255
  if let Some(at_mgr) = self.at_task_mgr.as_ref() {
247
256
  let tt = details.task_token.clone();
248
257
  if let Err(e) = at_mgr.record_heartbeat(details) {
249
- warn!(task_token = ?tt, details = ?e, "Activity heartbeat failed.")
258
+ warn!(task_token = ?tt, details = ?e, "Activity heartbeat failed.");
250
259
  }
251
260
  }
252
261
  }
@@ -275,7 +284,7 @@ impl Worker {
275
284
  // We must first check if there are pending workflow activations for workflows that are
276
285
  // currently replaying or otherwise need immediate jobs, and issue those before
277
286
  // bothering the server.
278
- if let Some(pa) = self.wft_manager.next_pending_activation()? {
287
+ if let Some(pa) = self.wft_manager.next_pending_activation() {
279
288
  debug!(activation=%pa, "Sending pending activation to lang");
280
289
  return Ok(pa);
281
290
  }
@@ -284,7 +293,7 @@ impl Worker {
284
293
  // activations, since there may be an eviction etc for whatever run is popped here.
285
294
  if let Some(buff_wft) = self.wft_manager.next_buffered_poll() {
286
295
  match self.apply_server_work(buff_wft).await? {
287
- NewWfTaskOutcome::IssueActivation(a) => return Ok(a),
296
+ Some(a) => return Ok(a),
288
297
  _ => continue,
289
298
  }
290
299
  }
@@ -304,14 +313,8 @@ impl Worker {
304
313
 
305
314
  if let Some(work) = selected_f {
306
315
  self.metrics.wf_tq_poll_ok();
307
- match self.apply_server_work(work).await? {
308
- NewWfTaskOutcome::IssueActivation(a) => return Ok(a),
309
- NewWfTaskOutcome::TaskBuffered => {
310
- // If the task was buffered, it's not actually outstanding, so we can
311
- // immediately return a permit.
312
- self.return_workflow_task_permit();
313
- }
314
- _ => {}
316
+ if let Some(a) = self.apply_server_work(work).await? {
317
+ return Ok(a);
315
318
  }
316
319
  }
317
320
 
@@ -326,7 +329,7 @@ impl Worker {
326
329
  completion: WfActivationCompletion,
327
330
  ) -> Result<(), CompleteWfError> {
328
331
  let wfstatus = completion.status;
329
- let r = match wfstatus {
332
+ let did_complete_wft = match wfstatus {
330
333
  Some(wf_activation_completion::Status::Successful(success)) => {
331
334
  self.wf_activation_success(&completion.run_id, success)
332
335
  .await
@@ -338,11 +341,9 @@ impl Worker {
338
341
  reason: "Workflow completion had empty status field".to_owned(),
339
342
  completion: None,
340
343
  }),
341
- };
342
- self.after_wft_report(&completion.run_id)?;
343
- self.wft_manager.on_activation_done(&completion.run_id);
344
- self.maybe_notify_wtfs_drained();
345
- r
344
+ }?;
345
+ self.after_workflow_activation(&completion.run_id, did_complete_wft);
346
+ Ok(())
346
347
  }
347
348
 
348
349
  fn maybe_notify_wtfs_drained(&self) {
@@ -355,11 +356,11 @@ impl Worker {
355
356
 
356
357
  /// Tell the worker a workflow task has completed, for tracking max outstanding WFTs
357
358
  pub(crate) fn return_workflow_task_permit(&self) {
358
- self.workflows_semaphore.add_permits(1)
359
+ self.workflows_semaphore.add_permits(1);
359
360
  }
360
361
 
361
- pub(crate) fn request_wf_eviction(&self, run_id: &str) {
362
- self.wft_manager.request_eviction(run_id);
362
+ pub(crate) fn request_wf_eviction(&self, run_id: &str, reason: impl Into<String>) {
363
+ self.wft_manager.request_eviction(run_id, reason);
363
364
  }
364
365
 
365
366
  /// Resolves with WFT poll response or `PollWfError::ShutDown` if WFTs have been drained
@@ -443,18 +444,24 @@ impl Worker {
443
444
  async fn apply_server_work(
444
445
  &self,
445
446
  work: ValidPollWFTQResponse,
446
- ) -> Result<NewWfTaskOutcome, PollWfError> {
447
+ ) -> Result<Option<WfActivation>, PollWfError> {
447
448
  let we = work.workflow_execution.clone();
448
449
  let tt = work.task_token.clone();
449
450
  let res = self
450
451
  .wft_manager
451
452
  .apply_new_poll_resp(work, &self.server_gateway)
452
- .await?;
453
- match &res {
453
+ .await;
454
+ Ok(match res {
454
455
  NewWfTaskOutcome::IssueActivation(a) => {
455
456
  debug!(activation=%a, "Sending activation to lang");
457
+ Some(a)
458
+ }
459
+ NewWfTaskOutcome::TaskBuffered => {
460
+ // If the task was buffered, it's not actually outstanding, so we can
461
+ // immediately return a permit.
462
+ self.return_workflow_task_permit();
463
+ None
456
464
  }
457
- NewWfTaskOutcome::TaskBuffered => {}
458
465
  NewWfTaskOutcome::Autocomplete => {
459
466
  debug!(workflow_execution=?we,
460
467
  "No work for lang to perform after polling server. Sending autocomplete.");
@@ -464,6 +471,7 @@ impl Worker {
464
471
  status: Some(workflow_completion::Success::from_variants(vec![]).into()),
465
472
  })
466
473
  .await?;
474
+ None
467
475
  }
468
476
  NewWfTaskOutcome::CacheMiss => {
469
477
  debug!(workflow_execution=?we, "Unable to process workflow task with partial \
@@ -480,17 +488,27 @@ impl Worker {
480
488
  }),
481
489
  )
482
490
  .await?;
491
+ None
483
492
  }
484
- };
485
- Ok(res)
493
+ NewWfTaskOutcome::Evict(e) => {
494
+ warn!(error=?e, run_id=%we.run_id, "Error while applying poll response to workflow");
495
+ self.request_wf_eviction(
496
+ &we.run_id,
497
+ format!("Error while applying poll response to workflow: {:?}", e),
498
+ );
499
+ None
500
+ }
501
+ })
486
502
  }
487
503
 
488
- /// Handle a successful workflow completion
504
+ /// Handle a successful workflow activation
505
+ ///
506
+ /// Returns true if we actually reported WFT completion to server (success or failure)
489
507
  async fn wf_activation_success(
490
508
  &self,
491
509
  run_id: &str,
492
510
  success: workflow_completion::Success,
493
- ) -> Result<(), CompleteWfError> {
511
+ ) -> Result<bool, CompleteWfError> {
494
512
  // Convert to wf commands
495
513
  let cmds = success
496
514
  .commands
@@ -534,6 +552,7 @@ impl Worker {
534
552
  .await
535
553
  })
536
554
  .await?;
555
+ Ok(true)
537
556
  }
538
557
  Ok(Some(ServerCommandsWithWorkflowInfo {
539
558
  task_token,
@@ -543,8 +562,9 @@ impl Worker {
543
562
  self.server_gateway
544
563
  .respond_legacy_query(task_token, result)
545
564
  .await?;
565
+ Ok(true)
546
566
  }
547
- Ok(None) => {}
567
+ Ok(None) => Ok(false),
548
568
  Err(update_err) => {
549
569
  // Automatically fail the workflow task in the event we couldn't update machines
550
570
  let fail_cause = if matches!(&update_err.source, WFMachinesError::Nondeterminism(_))
@@ -554,35 +574,42 @@ impl Worker {
554
574
  WorkflowTaskFailedCause::Unspecified
555
575
  };
556
576
 
577
+ warn!(run_id, error=?update_err, "Failing workflow task");
578
+
557
579
  if let Some(ref tt) = update_err.task_token {
580
+ let wft_fail_str = format!("{:?}", update_err);
558
581
  self.handle_wft_reporting_errs(run_id, || async {
559
582
  self.server_gateway
560
583
  .fail_workflow_task(
561
584
  tt.clone(),
562
585
  fail_cause,
563
- Some(Failure::application_failure(
564
- format!("{:?}", update_err),
565
- false,
566
- )),
586
+ Some(Failure::application_failure(wft_fail_str.clone(), false)),
567
587
  )
568
588
  .await
569
589
  })
570
590
  .await?;
591
+ // We must evict the workflow since we've failed a WFT
592
+ self.request_wf_eviction(
593
+ run_id,
594
+ format!("Workflow task failure: {}", wft_fail_str),
595
+ );
596
+ Ok(true)
597
+ } else {
598
+ Ok(false)
571
599
  }
572
- return Err(update_err.into());
573
600
  }
574
601
  }
575
-
576
- Ok(())
577
602
  }
578
603
 
579
604
  /// Handle a failed workflow completion
605
+ ///
606
+ /// Returns true if we actually reported WFT completion to server
580
607
  async fn wf_activation_failed(
581
608
  &self,
582
609
  run_id: &str,
583
610
  failure: workflow_completion::Failure,
584
- ) -> Result<(), CompleteWfError> {
585
- match self.wft_manager.failed_activation(run_id) {
611
+ ) -> Result<bool, CompleteWfError> {
612
+ Ok(match self.wft_manager.failed_activation(run_id) {
586
613
  FailedActivationOutcome::Report(tt) => {
587
614
  self.handle_wft_reporting_errs(run_id, || async {
588
615
  self.server_gateway
@@ -594,23 +621,25 @@ impl Worker {
594
621
  .await
595
622
  })
596
623
  .await?;
624
+ true
597
625
  }
598
626
  FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
599
627
  self.server_gateway
600
628
  .respond_legacy_query(task_token, legacy_query_failure(failure))
601
629
  .await?;
630
+ true
602
631
  }
603
- _ => {}
604
- }
605
-
606
- Ok(())
632
+ FailedActivationOutcome::NoReport => false,
633
+ })
607
634
  }
608
635
 
609
- fn after_wft_report(&self, run_id: &str) -> Result<(), WorkflowUpdateError> {
610
- if self.wft_manager.after_wft_report(run_id)? {
636
+ fn after_workflow_activation(&self, run_id: &str, did_complete_wft: bool) {
637
+ self.wft_manager.after_wft_report(run_id);
638
+ if did_complete_wft {
611
639
  self.return_workflow_task_permit();
612
- };
613
- Ok(())
640
+ }
641
+ self.wft_manager.on_activation_done(run_id);
642
+ self.maybe_notify_wtfs_drained();
614
643
  }
615
644
 
616
645
  /// Handle server errors from either completing or failing a workflow task. Returns any errors
@@ -630,11 +659,12 @@ impl Worker {
630
659
  // Silence unhandled command errors since the lang SDK cannot do anything about
631
660
  // them besides poll again, which it will do anyway.
632
661
  tonic::Code::InvalidArgument if err.message() == "UnhandledCommand" => {
633
- warn!("Unhandled command response when completing: {}", err);
662
+ warn!(error = %err, "Unhandled command response when completing");
663
+ should_evict = true;
634
664
  Ok(())
635
665
  }
636
666
  tonic::Code::NotFound => {
637
- warn!("Task not found when completing: {}", err);
667
+ warn!(error = %err, "Task not found when completing");
638
668
  should_evict = true;
639
669
  Ok(())
640
670
  }
@@ -644,7 +674,7 @@ impl Worker {
644
674
  _ => Ok(()),
645
675
  };
646
676
  if should_evict {
647
- self.wft_manager.request_eviction(run_id);
677
+ self.request_wf_eviction(run_id, "Error reporting WFT to server");
648
678
  }
649
679
  res.map_err(Into::into)
650
680
  }
@@ -703,7 +733,7 @@ mod tests {
703
733
 
704
734
  let cfg = WorkerConfigBuilder::default()
705
735
  .task_queue("whatever")
706
- .max_outstanding_activities(5usize)
736
+ .max_outstanding_activities(5_usize)
707
737
  .build()
708
738
  .unwrap();
709
739
  let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
@@ -716,12 +746,12 @@ mod tests {
716
746
  let mut mock_gateway = MockServerGatewayApis::new();
717
747
  mock_gateway
718
748
  .expect_poll_workflow_task()
719
- .returning(|_| Ok(PollWorkflowTaskQueueResponse::default()));
749
+ .returning(|_, _| Ok(PollWorkflowTaskQueueResponse::default()));
720
750
  let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
721
751
 
722
752
  let cfg = WorkerConfigBuilder::default()
723
753
  .task_queue("whatever")
724
- .max_outstanding_workflow_tasks(5usize)
754
+ .max_outstanding_workflow_tasks(5_usize)
725
755
  .build()
726
756
  .unwrap();
727
757
  let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
@@ -739,7 +769,7 @@ mod tests {
739
769
 
740
770
  let cfg = WorkerConfigBuilder::default()
741
771
  .task_queue("whatever")
742
- .max_outstanding_activities(5usize)
772
+ .max_outstanding_activities(5_usize)
743
773
  .build()
744
774
  .unwrap();
745
775
  let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
@@ -752,12 +782,12 @@ mod tests {
752
782
  let mut mock_gateway = MockServerGatewayApis::new();
753
783
  mock_gateway
754
784
  .expect_poll_workflow_task()
755
- .returning(|_| Err(tonic::Status::internal("ahhh")));
785
+ .returning(|_, _| Err(tonic::Status::internal("ahhh")));
756
786
  let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
757
787
 
758
788
  let cfg = WorkerConfigBuilder::default()
759
789
  .task_queue("whatever")
760
- .max_outstanding_workflow_tasks(5usize)
790
+ .max_outstanding_workflow_tasks(5_usize)
761
791
  .build()
762
792
  .unwrap();
763
793
  let worker = Worker::new(cfg, None, Arc::new(gwref), Default::default());
@@ -33,7 +33,7 @@ impl DrivenWorkflow {
33
33
  /// Start the workflow
34
34
  pub fn start(&mut self, attribs: WorkflowExecutionStartedEventAttributes) {
35
35
  debug!(run_id = %attribs.original_execution_run_id, "Driven WF start");
36
- self.started_attrs = Some(attribs)
36
+ self.started_attrs = Some(attribs);
37
37
  }
38
38
 
39
39
  /// Enqueue a new job to be sent to the driven workflow
@@ -51,12 +51,12 @@ impl DrivenWorkflow {
51
51
 
52
52
  /// Signal the workflow
53
53
  pub fn signal(&mut self, signal: SignalWorkflow) {
54
- self.send_job(wf_activation_job::Variant::SignalWorkflow(signal))
54
+ self.send_job(wf_activation_job::Variant::SignalWorkflow(signal));
55
55
  }
56
56
 
57
57
  /// Cancel the workflow
58
58
  pub fn cancel(&mut self, attribs: CancelWorkflow) {
59
- self.send_job(wf_activation_job::Variant::CancelWorkflow(attribs))
59
+ self.send_job(wf_activation_job::Variant::CancelWorkflow(attribs));
60
60
  }
61
61
  }
62
62
 
@@ -324,7 +324,7 @@ mod tests {
324
324
  .take_next_wft_sequence(last_started_id)
325
325
  .await
326
326
  .unwrap();
327
- for e in seq.iter() {
327
+ for e in &seq {
328
328
  last_event_id += 1;
329
329
  assert_eq!(e.event_id, last_event_id);
330
330
  }
@@ -62,7 +62,7 @@ impl WorkflowManager {
62
62
  }
63
63
 
64
64
  #[cfg(test)]
65
- pub fn new_from_machines(workflow_machines: WorkflowMachines) -> Self {
65
+ pub const fn new_from_machines(workflow_machines: WorkflowMachines) -> Self {
66
66
  Self {
67
67
  machines: workflow_machines,
68
68
  command_sink: None,
@@ -277,6 +277,7 @@ pub mod managed_wf {
277
277
  // Send an eviction to ensure wf exits if it has not finished (ex: feeding partial hist)
278
278
  let _ = self.activation_tx.send(create_evict_activation(
279
279
  "not actually important".to_string(),
280
+ "force shutdown".to_string(),
280
281
  ));
281
282
  self.future_handle.take().unwrap().await.unwrap()
282
283
  }