@temporalio/core-bridge 0.20.2 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/Cargo.lock +137 -127
  2. package/index.d.ts +7 -2
  3. package/package.json +3 -3
  4. package/releases/aarch64-apple-darwin/index.node +0 -0
  5. package/releases/x86_64-apple-darwin/index.node +0 -0
  6. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  7. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  8. package/sdk-core/.buildkite/docker/docker-compose.yaml +5 -4
  9. package/sdk-core/client/Cargo.toml +1 -0
  10. package/sdk-core/client/src/lib.rs +52 -9
  11. package/sdk-core/client/src/raw.rs +9 -1
  12. package/sdk-core/client/src/retry.rs +12 -1
  13. package/sdk-core/client/src/workflow_handle/mod.rs +183 -0
  14. package/sdk-core/core/src/abstractions.rs +10 -3
  15. package/sdk-core/core/src/core_tests/child_workflows.rs +7 -9
  16. package/sdk-core/core/src/core_tests/determinism.rs +8 -19
  17. package/sdk-core/core/src/core_tests/local_activities.rs +22 -32
  18. package/sdk-core/core/src/core_tests/queries.rs +272 -5
  19. package/sdk-core/core/src/core_tests/workers.rs +4 -34
  20. package/sdk-core/core/src/core_tests/workflow_tasks.rs +197 -41
  21. package/sdk-core/core/src/pending_activations.rs +11 -0
  22. package/sdk-core/core/src/telemetry/mod.rs +1 -1
  23. package/sdk-core/core/src/test_help/mod.rs +57 -7
  24. package/sdk-core/core/src/worker/mod.rs +64 -15
  25. package/sdk-core/core/src/workflow/machines/mod.rs +1 -1
  26. package/sdk-core/core/src/workflow/machines/timer_state_machine.rs +2 -2
  27. package/sdk-core/core/src/workflow/machines/workflow_machines.rs +14 -3
  28. package/sdk-core/core/src/workflow/mod.rs +5 -2
  29. package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +47 -2
  30. package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +16 -2
  31. package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +252 -125
  32. package/sdk-core/core-api/src/worker.rs +9 -0
  33. package/sdk-core/sdk/Cargo.toml +1 -0
  34. package/sdk-core/sdk/src/activity_context.rs +223 -0
  35. package/sdk-core/sdk/src/interceptors.rs +8 -2
  36. package/sdk-core/sdk/src/lib.rs +167 -122
  37. package/sdk-core/sdk-core-protos/src/history_info.rs +3 -7
  38. package/sdk-core/test-utils/Cargo.toml +1 -0
  39. package/sdk-core/test-utils/src/lib.rs +78 -37
  40. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +11 -4
  41. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +0 -1
  42. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +0 -3
  43. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +33 -17
  44. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +10 -1
  45. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +0 -1
  46. package/sdk-core/tests/integ_tests/workflow_tests.rs +71 -3
  47. package/sdk-core/tests/load_tests.rs +80 -6
  48. package/src/errors.rs +9 -2
  49. package/src/lib.rs +39 -16
  50. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
@@ -23,6 +23,7 @@ use futures::FutureExt;
23
23
  use parking_lot::Mutex;
24
24
  use std::{
25
25
  fmt::Debug,
26
+ future::Future,
26
27
  ops::Add,
27
28
  sync::Arc,
28
29
  time::{Duration, Instant},
@@ -57,7 +58,7 @@ pub struct WorkflowTaskManager {
57
58
  pending_activations: PendingActivations,
58
59
  /// Holds activations which are purely query activations needed to respond to legacy queries.
59
60
  /// Activations may only be added here for runs which do not have other pending activations.
60
- pending_legacy_queries: SegQueue<WorkflowActivation>,
61
+ pending_queries: SegQueue<WorkflowActivation>,
61
62
  /// Holds poll wft responses from the server that need to be applied
62
63
  ready_buffered_wft: SegQueue<ValidPollWFTQResponse>,
63
64
  /// Used to wake blocked workflow task polling
@@ -74,9 +75,8 @@ pub struct WorkflowTaskManager {
74
75
  #[derive(Clone, Debug)]
75
76
  pub(crate) struct OutstandingTask {
76
77
  pub info: WorkflowTaskInfo,
77
- /// If set the outstanding task has query from the old `query` field which must be fulfilled
78
- /// upon finishing replay
79
- pub legacy_query: Option<QueryWorkflow>,
78
+ /// Set if the outstanding task has quer(ies) which must be fulfilled upon finishing replay
79
+ pub pending_queries: Vec<QueryWorkflow>,
80
80
  start_time: Instant,
81
81
  }
82
82
 
@@ -86,17 +86,29 @@ pub(crate) enum OutstandingActivation {
86
86
  Normal {
87
87
  /// True if there is an eviction in the joblist
88
88
  contains_eviction: bool,
89
+ /// Number of jobs in the activation
90
+ num_jobs: usize,
89
91
  },
90
92
  /// An activation for a legacy query
91
93
  LegacyQuery,
92
94
  }
93
95
 
94
96
  impl OutstandingActivation {
97
+ const fn has_only_eviction(self) -> bool {
98
+ matches!(
99
+ self,
100
+ OutstandingActivation::Normal {
101
+ contains_eviction: true,
102
+ num_jobs: nj
103
+ }
104
+ if nj == 1)
105
+ }
95
106
  const fn has_eviction(self) -> bool {
96
107
  matches!(
97
108
  self,
98
109
  OutstandingActivation::Normal {
99
- contains_eviction: true
110
+ contains_eviction: true,
111
+ ..
100
112
  }
101
113
  )
102
114
  }
@@ -152,9 +164,9 @@ pub(crate) enum ActivationAction {
152
164
 
153
165
  #[derive(Debug, Eq, PartialEq, Hash)]
154
166
  pub(crate) enum EvictionRequestResult {
155
- EvictionIssued(Option<u32>),
167
+ EvictionRequested(Option<u32>),
156
168
  NotFound,
157
- EvictionAlreadyOutstanding,
169
+ EvictionAlreadyRequested(Option<u32>),
158
170
  }
159
171
 
160
172
  macro_rules! machine_mut {
@@ -179,7 +191,7 @@ impl WorkflowTaskManager {
179
191
  Self {
180
192
  workflow_machines: WorkflowConcurrencyManager::new(),
181
193
  pending_activations: Default::default(),
182
- pending_legacy_queries: Default::default(),
194
+ pending_queries: Default::default(),
183
195
  ready_buffered_wft: Default::default(),
184
196
  pending_activations_notifier,
185
197
  cache_manager: Mutex::new(WorkflowCacheManager::new(eviction_policy, metrics.clone())),
@@ -187,9 +199,69 @@ impl WorkflowTaskManager {
187
199
  }
188
200
  }
189
201
 
202
+ /// Returns number of currently cached workflows
203
+ pub fn cached_workflows(&self) -> usize {
204
+ self.workflow_machines.cached_workflows()
205
+ }
206
+
207
+ /// Resolves once there is either capacity in the cache, or there are no pending evictions.
208
+ /// Inversely: Waits while there are pending evictions and the cache is full.
209
+ /// Waiting while there are no pending evictions must be avoided because it would block forever,
210
+ /// since there is no way for the cache size to be reduced.
211
+ pub fn wait_for_cache_capacity(&self) -> Option<impl Future<Output = ()> + '_> {
212
+ let are_no_pending_evictions = || {
213
+ !self.pending_activations.is_some_eviction()
214
+ && !self.workflow_machines.are_outstanding_evictions()
215
+ };
216
+ if !are_no_pending_evictions() {
217
+ let wait_fut = {
218
+ self.cache_manager
219
+ .lock()
220
+ .wait_for_capacity(are_no_pending_evictions)?
221
+ };
222
+ return Some(wait_fut);
223
+ }
224
+ None
225
+ }
226
+
227
+ /// Add a new run (as just received from polling) to the cache. If doing so would overflow the
228
+ /// cache, an eviction is queued to make room and the passed-in task is buffered and `None` is
229
+ /// returned.
230
+ ///
231
+ /// If the task is for a run already in the cache, the poll response is returned right away
232
+ /// and should be issued.
233
+ pub async fn add_new_run_to_cache(
234
+ &self,
235
+ poll_resp: ValidPollWFTQResponse,
236
+ ) -> Option<ValidPollWFTQResponse> {
237
+ let run_id = &poll_resp.workflow_execution.run_id;
238
+ let maybe_evicted = self.cache_manager.lock().insert(run_id);
239
+
240
+ if let Some(evicted_run_id) = maybe_evicted {
241
+ self.request_eviction(
242
+ &evicted_run_id,
243
+ "Workflow cache full",
244
+ EvictionReason::CacheFull,
245
+ );
246
+ debug!(run_id=%poll_resp.workflow_execution.run_id,
247
+ "Received a WFT for a new run while at the cache limit. Buffering the task.");
248
+ // Buffer the task
249
+ if let Some(not_buffered) = self
250
+ .workflow_machines
251
+ .buffer_resp_if_outstanding_work(poll_resp)
252
+ {
253
+ self.make_buffered_poll_ready(not_buffered);
254
+ }
255
+
256
+ return None;
257
+ }
258
+
259
+ Some(poll_resp)
260
+ }
261
+
190
262
  pub(crate) fn next_pending_activation(&self) -> Option<WorkflowActivation> {
191
- // Dispatch pending legacy queries first
192
- if let leg_q @ Some(_) = self.pending_legacy_queries.pop() {
263
+ // Dispatch pending queries first
264
+ if let leg_q @ Some(_) = self.pending_queries.pop() {
193
265
  return leg_q;
194
266
  }
195
267
  // It is important that we do not issue pending activations for any workflows which already
@@ -205,15 +277,24 @@ impl WorkflowTaskManager {
205
277
  .workflow_machines
206
278
  .access_sync(&pending_info.run_id, |wfm| wfm.machines.get_wf_activation())
207
279
  .and_then(|mut act| {
208
- if let Some(reason) = pending_info.needs_eviction {
209
- act.append_evict_job(reason);
280
+ // Only evict workflows after all other pending work is complete.
281
+ if act.jobs.is_empty() {
282
+ if let Some(reason) = pending_info.needs_eviction {
283
+ act.append_evict_job(reason);
284
+ }
285
+ }
286
+ if !act.jobs.is_empty() {
287
+ self.insert_outstanding_activation(&act)?;
288
+ self.cache_manager.lock().touch(&act.run_id);
289
+ Ok(Some(act))
290
+ } else {
291
+ // If for whatever reason we triggered a pending activation but there wasn't
292
+ // actually any work to be done, just ignore that.
293
+ Ok(None)
210
294
  }
211
- self.insert_outstanding_activation(&act)?;
212
- Ok(act)
213
295
  })
214
296
  {
215
- self.cache_manager.lock().touch(&act.run_id);
216
- Some(act)
297
+ act
217
298
  } else {
218
299
  self.request_eviction(
219
300
  &pending_info.run_id,
@@ -256,6 +337,10 @@ impl WorkflowTaskManager {
256
337
  reason: EvictionReason,
257
338
  ) -> EvictionRequestResult {
258
339
  if self.workflow_machines.exists(run_id) {
340
+ let attempts = self
341
+ .workflow_machines
342
+ .get_task(run_id)
343
+ .map(|wt| wt.info.attempt);
259
344
  if !self.activation_has_eviction(run_id) {
260
345
  let message = message.into();
261
346
  debug!(%run_id, %message, "Eviction requested");
@@ -263,13 +348,9 @@ impl WorkflowTaskManager {
263
348
  self.pending_activations
264
349
  .notify_needs_eviction(run_id, message, reason);
265
350
  self.pending_activations_notifier.notify_waiters();
266
- EvictionRequestResult::EvictionIssued(
267
- self.workflow_machines
268
- .get_task(run_id)
269
- .map(|wt| wt.info.attempt),
270
- )
351
+ EvictionRequestResult::EvictionRequested(attempts)
271
352
  } else {
272
- EvictionRequestResult::EvictionAlreadyOutstanding
353
+ EvictionRequestResult::EvictionAlreadyRequested(attempts)
273
354
  }
274
355
  } else {
275
356
  warn!(%run_id, "Eviction requested for unknown run");
@@ -277,11 +358,8 @@ impl WorkflowTaskManager {
277
358
  }
278
359
  }
279
360
 
280
- /// Evict a workflow from the cache by its run id and enqueue a pending activation to evict the
281
- /// workflow. Any existing pending activations will be destroyed, and any outstanding
282
- /// activations invalidated.
283
- ///
284
- /// Returns that workflow's task info if it was present.
361
+ /// Evict a workflow from the cache by its run id. Any existing pending activations will be
362
+ /// destroyed, and any outstanding activations invalidated.
285
363
  fn evict_run(&self, run_id: &str) {
286
364
  debug!(run_id=%run_id, "Evicting run");
287
365
 
@@ -333,33 +411,45 @@ impl WorkflowTaskManager {
333
411
  .take()
334
412
  .map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
335
413
 
336
- let (info, mut next_activation) =
414
+ let (info, mut next_activation, mut pending_queries) =
337
415
  match self.instantiate_or_update_workflow(work, client).await {
338
- Ok((info, next_activation)) => (info, next_activation),
416
+ Ok(res) => res,
339
417
  Err(e) => {
340
418
  return NewWfTaskOutcome::Evict(e);
341
419
  }
342
420
  };
343
421
 
422
+ if !pending_queries.is_empty() && legacy_query.is_some() {
423
+ error!(
424
+ "Server issued both normal and legacy queries. This should not happen. Please \
425
+ file a bug report."
426
+ );
427
+ return NewWfTaskOutcome::Evict(WorkflowUpdateError {
428
+ source: WFMachinesError::Fatal(
429
+ "Server issued both normal and legacy query".to_string(),
430
+ ),
431
+ run_id: next_activation.run_id,
432
+ });
433
+ }
434
+
344
435
  // Immediately dispatch query activation if no other jobs
345
- let legacy_query = if next_activation.jobs.is_empty() {
346
- if let Some(lq) = legacy_query {
436
+ if let Some(lq) = legacy_query {
437
+ if next_activation.jobs.is_empty() {
347
438
  debug!("Dispatching legacy query {}", &lq);
348
439
  next_activation
349
440
  .jobs
350
441
  .push(workflow_activation_job::Variant::QueryWorkflow(lq).into());
442
+ } else {
443
+ pending_queries.push(lq);
351
444
  }
352
- None
353
- } else {
354
- legacy_query
355
- };
445
+ }
356
446
 
357
447
  self.workflow_machines
358
448
  .insert_wft(
359
449
  &next_activation.run_id,
360
450
  OutstandingTask {
361
451
  info,
362
- legacy_query,
452
+ pending_queries,
363
453
  start_time: task_start_time,
364
454
  },
365
455
  )
@@ -396,20 +486,23 @@ impl WorkflowTaskManager {
396
486
  mut commands: Vec<WFCommand>,
397
487
  local_activity_request_sink: impl FnOnce(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>,
398
488
  ) -> Result<Option<ServerCommandsWithWorkflowInfo>, WorkflowUpdateError> {
399
- // No-command replies to evictions can simply skip everything
400
- if commands.is_empty() && self.activation_has_eviction(run_id) {
401
- return Ok(None);
402
- }
403
-
404
- let (task_token, is_leg_query_task, start_time) =
489
+ // There used to be code here that would return right away if the run reply had no commands
490
+ // and the activation that was just completed only had an eviction in it. That was bad
491
+ // because we wouldn't have yet sent any previously buffered commands since there was a
492
+ // pending activation (the eviction) and then we would *skip* doing anything with them here,
493
+ // because there were no new commands. In general it seems best to avoid short-circuiting
494
+ // here.
495
+
496
+ let activation_was_only_eviction = self.activation_has_only_eviction(run_id);
497
+ let (task_token, has_pending_query, start_time) =
405
498
  if let Some(entry) = self.workflow_machines.get_task(run_id) {
406
499
  (
407
500
  entry.info.task_token.clone(),
408
- entry.legacy_query.is_some(),
501
+ !entry.pending_queries.is_empty(),
409
502
  entry.start_time,
410
503
  )
411
504
  } else {
412
- if !self.activation_has_eviction(run_id) {
505
+ if !activation_was_only_eviction {
413
506
  // Don't bother warning if this was an eviction, since it's normal to issue
414
507
  // eviction activations without an associated workflow task in that case.
415
508
  warn!(
@@ -458,6 +551,7 @@ impl WorkflowTaskManager {
458
551
  }
459
552
  }
460
553
 
554
+ let activation_was_eviction = self.activation_has_eviction(run_id);
461
555
  let (are_pending, server_cmds, local_activities, wft_timeout) = machine_mut!(
462
556
  self,
463
557
  run_id,
@@ -466,7 +560,13 @@ impl WorkflowTaskManager {
466
560
  // Send commands from lang into the machines then check if the workflow run
467
561
  // needs another activation and mark it if so
468
562
  wfm.push_commands(commands).await?;
469
- let are_pending = wfm.apply_next_task_if_ready().await?;
563
+ // Don't bother applying the next task if we're evicting at the end of
564
+ // this activation
565
+ let are_pending = if !activation_was_eviction {
566
+ wfm.apply_next_task_if_ready().await?
567
+ } else {
568
+ false
569
+ };
470
570
  // We want to fetch the outgoing commands only after a next WFT may have
471
571
  // been applied, as outgoing server commands may be affected.
472
572
  let outgoing_cmds = wfm.get_server_commands();
@@ -506,36 +606,34 @@ impl WorkflowTaskManager {
506
606
  let must_heartbeat = self
507
607
  .wait_for_local_acts_or_heartbeat(run_id, wft_heartbeat_deadline)
508
608
  .await;
509
- let is_query_playback = is_leg_query_task && query_responses.is_empty();
609
+ let has_query_responses = !query_responses.is_empty();
610
+ let is_query_playback = has_pending_query && !has_query_responses;
510
611
 
511
612
  // We only actually want to send commands back to the server if there are no more
512
613
  // pending activations and we are caught up on replay. We don't want to complete a wft
513
614
  // if we already saw the final event in the workflow, or if we are playing back for the
514
- // express purpose of fulfilling a query
515
- if !self.pending_activations.has_pending(run_id)
516
- && !server_cmds.replaying
517
- && !is_query_playback
518
- {
519
- Some(ServerCommandsWithWorkflowInfo {
520
- task_token,
521
- action: ActivationAction::WftComplete {
522
- // TODO: Don't force if also sending complete execution cmd
523
- force_new_wft: must_heartbeat,
524
- commands: server_cmds.commands,
525
- query_responses,
526
- },
527
- })
528
- } else if query_responses.is_empty() {
529
- None
615
+ // express purpose of fulfilling a query. If the activation we sent was *only* an
616
+ // eviction, and there were no commands produced during iteration, don't send that
617
+ // either.
618
+ let no_commands_and_evicting =
619
+ server_cmds.commands.is_empty() && activation_was_only_eviction;
620
+ let to_be_sent = ServerCommandsWithWorkflowInfo {
621
+ task_token,
622
+ action: ActivationAction::WftComplete {
623
+ // TODO: Don't force if also sending complete execution cmd
624
+ force_new_wft: must_heartbeat,
625
+ commands: server_cmds.commands,
626
+ query_responses,
627
+ },
628
+ };
629
+ let should_respond = !(self.pending_activations.has_pending(run_id)
630
+ || server_cmds.replaying
631
+ || is_query_playback
632
+ || no_commands_and_evicting);
633
+ if should_respond || has_query_responses {
634
+ Some(to_be_sent)
530
635
  } else {
531
- Some(ServerCommandsWithWorkflowInfo {
532
- task_token,
533
- action: ActivationAction::WftComplete {
534
- commands: vec![],
535
- query_responses,
536
- force_new_wft: false,
537
- },
538
- })
636
+ None
539
637
  }
540
638
  };
541
639
  Ok(ret)
@@ -573,7 +671,8 @@ impl WorkflowTaskManager {
573
671
  } else {
574
672
  // Blow up any cached data associated with the workflow
575
673
  let should_report = match self.request_eviction(run_id, failstr, reason) {
576
- EvictionRequestResult::EvictionIssued(Some(attempt)) => attempt <= 1,
674
+ EvictionRequestResult::EvictionRequested(Some(attempt))
675
+ | EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => attempt <= 1,
577
676
  _ => false,
578
677
  };
579
678
  if should_report {
@@ -592,7 +691,8 @@ impl WorkflowTaskManager {
592
691
  &self,
593
692
  poll_wf_resp: ValidPollWFTQResponse,
594
693
  client: Arc<WorkerClientBag>,
595
- ) -> Result<(WorkflowTaskInfo, WorkflowActivation), WorkflowUpdateError> {
694
+ ) -> Result<(WorkflowTaskInfo, WorkflowActivation, Vec<QueryWorkflow>), WorkflowUpdateError>
695
+ {
596
696
  let run_id = poll_wf_resp.workflow_execution.run_id.clone();
597
697
 
598
698
  let wft_info = WorkflowTaskInfo {
@@ -606,11 +706,16 @@ impl WorkflowTaskManager {
606
706
  .get(0)
607
707
  .map(|ev| ev.event_id > 1)
608
708
  .unwrap_or_default();
709
+ let poll_resp_is_incremental =
710
+ poll_resp_is_incremental || poll_wf_resp.history.events.is_empty();
711
+
712
+ let mut did_miss_cache = !poll_resp_is_incremental;
609
713
 
610
714
  let page_token = if !self.workflow_machines.exists(&run_id) && poll_resp_is_incremental {
611
715
  debug!(run_id=?run_id, "Workflow task has partial history, but workflow is not in \
612
716
  cache. Will fetch history");
613
717
  self.metrics.sticky_cache_miss();
718
+ did_miss_cache = true;
614
719
  NextPageToken::FetchFromStart
615
720
  } else {
616
721
  poll_wf_resp.next_page_token.into()
@@ -639,16 +744,26 @@ impl WorkflowTaskManager {
639
744
  .await
640
745
  {
641
746
  Ok(mut activation) => {
642
- // If there are in-poll queries, insert jobs for those queries into the activation
747
+ // If there are in-poll queries, insert jobs for those queries into the activation,
748
+ // but only if we hit the cache. If we didn't, those queries will need to be dealt
749
+ // with once replay is over
750
+ let mut pending_queries = vec![];
643
751
  if !poll_wf_resp.query_requests.is_empty() {
644
- let query_jobs = poll_wf_resp
645
- .query_requests
646
- .into_iter()
647
- .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
648
- activation.jobs.extend(query_jobs);
752
+ if !did_miss_cache {
753
+ let query_jobs = poll_wf_resp
754
+ .query_requests
755
+ .into_iter()
756
+ .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
757
+ activation.jobs.extend(query_jobs);
758
+ } else {
759
+ poll_wf_resp
760
+ .query_requests
761
+ .into_iter()
762
+ .for_each(|q| pending_queries.push(q));
763
+ }
649
764
  }
650
765
 
651
- Ok((wft_info, activation))
766
+ Ok((wft_info, activation, pending_queries))
652
767
  }
653
768
  Err(source) => Err(WorkflowUpdateError { source, run_id }),
654
769
  }
@@ -660,12 +775,14 @@ impl WorkflowTaskManager {
660
775
  /// eviction, which could be avoided if this is called too early.
661
776
  ///
662
777
  /// Returns true if WFT was marked completed internally
663
- pub(crate) fn after_wft_report(&self, run_id: &str, did_complete_wft: bool) -> bool {
778
+ pub(crate) fn after_wft_report(&self, run_id: &str, reported_wft_to_server: bool) -> bool {
664
779
  let mut just_evicted = false;
665
780
 
666
- if let Some(OutstandingActivation::Normal {
667
- contains_eviction: true,
668
- }) = self.workflow_machines.get_activation(run_id)
781
+ if self
782
+ .workflow_machines
783
+ .get_activation(run_id)
784
+ .map(|a| a.has_eviction())
785
+ .unwrap_or_default()
669
786
  {
670
787
  self.evict_run(run_id);
671
788
  just_evicted = true;
@@ -673,50 +790,48 @@ impl WorkflowTaskManager {
673
790
 
674
791
  // Workflows with no more pending activations (IE: They have completed a WFT) must be
675
792
  // removed from the outstanding tasks map
676
- let retme = if !self.pending_activations.has_pending(run_id) {
677
- if !just_evicted {
678
- // Check if there was a legacy query which must be fulfilled, and if there is create
679
- // a new pending activation for it.
680
- if let Some(ref mut ot) = &mut *self
681
- .workflow_machines
682
- .get_task_mut(run_id)
683
- .expect("Machine must exist")
684
- {
685
- if let Some(query) = ot.legacy_query.take() {
793
+ if !self.pending_activations.has_pending(run_id) && !just_evicted {
794
+ if let Some(ref mut ot) = &mut *self
795
+ .workflow_machines
796
+ .get_task_mut(run_id)
797
+ .expect("Machine must exist")
798
+ {
799
+ // Check if there was a pending query which must be fulfilled, and if there is
800
+ // create a new pending activation for it.
801
+ if !ot.pending_queries.is_empty() {
802
+ for query in ot.pending_queries.drain(..) {
686
803
  let na = create_query_activation(run_id.to_string(), [query]);
687
- self.pending_legacy_queries.push(na);
688
- self.pending_activations_notifier.notify_waiters();
689
- return false;
804
+ self.pending_queries.push(na);
690
805
  }
806
+ self.pending_activations_notifier.notify_waiters();
807
+ return false;
691
808
  }
809
+ }
692
810
 
693
- // Evict run id if cache is full. Non-sticky will always evict.
694
- let maybe_evicted = self.cache_manager.lock().insert(run_id);
695
- if let Some(evicted_run_id) = maybe_evicted {
696
- self.request_eviction(
697
- &evicted_run_id,
698
- "Workflow cache full",
699
- EvictionReason::CacheFull,
700
- );
701
- }
811
+ // Evict run id if cache is full. Non-sticky will always evict.
812
+ let maybe_evicted = self.cache_manager.lock().insert(run_id);
813
+ if let Some(evicted_run_id) = maybe_evicted {
814
+ self.request_eviction(
815
+ &evicted_run_id,
816
+ "Workflow cache full",
817
+ EvictionReason::CacheFull,
818
+ );
819
+ }
702
820
 
703
- // If there was a buffered poll response from the server, it is now ready to
704
- // be handled.
705
- if let Some(buffd) = self.workflow_machines.take_buffered_poll(run_id) {
706
- self.make_buffered_poll_ready(buffd);
707
- }
821
+ // If there was a buffered poll response from the server, it is now ready to
822
+ // be handled.
823
+ if let Some(buffd) = self.workflow_machines.take_buffered_poll(run_id) {
824
+ self.make_buffered_poll_ready(buffd);
708
825
  }
826
+ }
709
827
 
710
- // The evict may or may not have already done this, but even when we aren't evicting
711
- // we want to clear the outstanding workflow task since it's now complete.
712
- self.workflow_machines
713
- .complete_wft(run_id, did_complete_wft)
714
- .is_some()
715
- } else {
716
- false
717
- };
828
+ // If we reported to server, we always want to mark it complete.
829
+ let wft_marked_complete = self
830
+ .workflow_machines
831
+ .complete_wft(run_id, reported_wft_to_server)
832
+ .is_some();
718
833
  self.on_activation_done(run_id);
719
- retme
834
+ wft_marked_complete
720
835
  }
721
836
 
722
837
  /// Must be called after *every* activation is replied to, regardless of whether or not we
@@ -725,10 +840,11 @@ impl WorkflowTaskManager {
725
840
  ///
726
841
  /// Any subsequent action that needs to be taken will be created as a new activation
727
842
  fn on_activation_done(&self, run_id: &str) {
728
- if self.workflow_machines.delete_activation(run_id).is_some() {
729
- self.pending_activations_notifier.notify_waiters();
730
- }
731
- // It's possible the activation is already removed due to completing an eviction
843
+ self.workflow_machines.delete_activation(run_id);
844
+ // It's important to use `notify_one` here to avoid possible races where we're waiting
845
+ // on a cache slot and fail to realize pending activations must be issued before a slot
846
+ // will free up.
847
+ self.pending_activations_notifier.notify_one();
732
848
  }
733
849
 
734
850
  /// Let a workflow know that something we've been waiting locally on has resolved, like a local
@@ -739,7 +855,8 @@ impl WorkflowTaskManager {
739
855
  run_id: &str,
740
856
  resolved: LocalResolution,
741
857
  ) -> Result<(), WorkflowUpdateError> {
742
- self.workflow_machines
858
+ let result_was_important = self
859
+ .workflow_machines
743
860
  .access_sync(run_id, |wfm: &mut WorkflowManager| {
744
861
  wfm.notify_of_local_result(resolved)
745
862
  })?
@@ -748,7 +865,9 @@ impl WorkflowTaskManager {
748
865
  run_id: run_id.to_string(),
749
866
  })?;
750
867
 
751
- self.needs_activation(run_id);
868
+ if result_was_important {
869
+ self.needs_activation(run_id);
870
+ }
752
871
  Ok(())
753
872
  }
754
873
 
@@ -765,6 +884,7 @@ impl WorkflowTaskManager {
765
884
  } else {
766
885
  OutstandingActivation::Normal {
767
886
  contains_eviction: act.eviction_index().is_some(),
887
+ num_jobs: act.jobs.len(),
768
888
  }
769
889
  };
770
890
  match self
@@ -785,6 +905,13 @@ impl WorkflowTaskManager {
785
905
  }
786
906
  }
787
907
 
908
+ fn activation_has_only_eviction(&self, run_id: &str) -> bool {
909
+ self.workflow_machines
910
+ .get_activation(run_id)
911
+ .map(OutstandingActivation::has_only_eviction)
912
+ .unwrap_or_default()
913
+ }
914
+
788
915
  fn activation_has_eviction(&self, run_id: &str) -> bool {
789
916
  self.workflow_machines
790
917
  .get_activation(run_id)
@@ -20,6 +20,8 @@ pub struct WorkerConfig {
20
20
  /// The maximum allowed number of workflow tasks that will ever be given to this worker at one
21
21
  /// time. Note that one workflow task may require multiple activations - so the WFT counts as
22
22
  /// "outstanding" until all activations it requires have been completed.
23
+ ///
24
+ /// Cannot be larger than `max_cached_workflows`.
23
25
  #[builder(default = "100")]
24
26
  pub max_outstanding_workflow_tasks: usize,
25
27
  /// The maximum number of activity tasks that will ever be given to this worker concurrently
@@ -90,6 +92,13 @@ impl WorkerConfigBuilder {
90
92
  if self.max_concurrent_wft_polls == Some(0) {
91
93
  return Err("`max_concurrent_wft_polls` must be at least 1".to_owned());
92
94
  }
95
+ if self.max_outstanding_workflow_tasks > self.max_cached_workflows {
96
+ return Err(
97
+ "Maximum concurrent workflow tasks cannot exceed the maximum number of cached \
98
+ workflows"
99
+ .to_owned(),
100
+ );
101
+ }
93
102
  Ok(())
94
103
  }
95
104
  }
@@ -13,6 +13,7 @@ categories = ["development-tools"]
13
13
  # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
14
14
 
15
15
  [dependencies]
16
+ async-trait = "0.1"
16
17
  anyhow = "1.0"
17
18
  base64 = "0.13"
18
19
  crossbeam = "0.8"