@temporalio/core-bridge 0.19.2 → 0.20.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/Cargo.lock +90 -157
  2. package/Cargo.toml +1 -0
  3. package/index.d.ts +11 -27
  4. package/package.json +3 -3
  5. package/releases/aarch64-apple-darwin/index.node +0 -0
  6. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  7. package/releases/x86_64-apple-darwin/index.node +0 -0
  8. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  9. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  10. package/sdk-core/.buildkite/docker/Dockerfile +1 -1
  11. package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
  12. package/sdk-core/.cargo/config.toml +1 -0
  13. package/sdk-core/CODEOWNERS +1 -1
  14. package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +119 -86
  15. package/sdk-core/bridge-ffi/src/lib.rs +311 -315
  16. package/sdk-core/bridge-ffi/src/wrappers.rs +108 -113
  17. package/sdk-core/client/Cargo.toml +13 -9
  18. package/sdk-core/client/LICENSE.txt +23 -0
  19. package/sdk-core/client/src/lib.rs +286 -174
  20. package/sdk-core/client/src/metrics.rs +86 -12
  21. package/sdk-core/client/src/raw.rs +566 -0
  22. package/sdk-core/client/src/retry.rs +137 -99
  23. package/sdk-core/core/Cargo.toml +15 -10
  24. package/sdk-core/core/LICENSE.txt +23 -0
  25. package/sdk-core/core/benches/workflow_replay.rs +79 -0
  26. package/sdk-core/core/src/abstractions.rs +38 -0
  27. package/sdk-core/core/src/core_tests/activity_tasks.rs +108 -182
  28. package/sdk-core/core/src/core_tests/child_workflows.rs +16 -11
  29. package/sdk-core/core/src/core_tests/determinism.rs +24 -12
  30. package/sdk-core/core/src/core_tests/local_activities.rs +53 -27
  31. package/sdk-core/core/src/core_tests/mod.rs +30 -43
  32. package/sdk-core/core/src/core_tests/queries.rs +82 -81
  33. package/sdk-core/core/src/core_tests/workers.rs +111 -296
  34. package/sdk-core/core/src/core_tests/workflow_cancels.rs +4 -4
  35. package/sdk-core/core/src/core_tests/workflow_tasks.rs +257 -242
  36. package/sdk-core/core/src/lib.rs +73 -318
  37. package/sdk-core/core/src/pollers/mod.rs +4 -6
  38. package/sdk-core/core/src/pollers/poll_buffer.rs +20 -14
  39. package/sdk-core/core/src/protosext/mod.rs +7 -10
  40. package/sdk-core/core/src/replay/mod.rs +11 -150
  41. package/sdk-core/core/src/telemetry/metrics.rs +35 -2
  42. package/sdk-core/core/src/telemetry/mod.rs +49 -16
  43. package/sdk-core/core/src/telemetry/prometheus_server.rs +14 -35
  44. package/sdk-core/core/src/test_help/mod.rs +104 -170
  45. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +57 -34
  46. package/sdk-core/core/src/worker/activities/local_activities.rs +95 -23
  47. package/sdk-core/core/src/worker/activities.rs +23 -16
  48. package/sdk-core/core/src/worker/client/mocks.rs +86 -0
  49. package/sdk-core/core/src/worker/client.rs +209 -0
  50. package/sdk-core/core/src/worker/mod.rs +207 -108
  51. package/sdk-core/core/src/workflow/driven_workflow.rs +21 -6
  52. package/sdk-core/core/src/workflow/history_update.rs +107 -24
  53. package/sdk-core/core/src/workflow/machines/activity_state_machine.rs +2 -3
  54. package/sdk-core/core/src/workflow/machines/child_workflow_state_machine.rs +2 -3
  55. package/sdk-core/core/src/workflow/machines/mod.rs +20 -17
  56. package/sdk-core/core/src/workflow/machines/signal_external_state_machine.rs +56 -19
  57. package/sdk-core/core/src/workflow/machines/transition_coverage.rs +5 -0
  58. package/sdk-core/core/src/workflow/machines/upsert_search_attributes_state_machine.rs +230 -22
  59. package/sdk-core/core/src/workflow/machines/workflow_machines.rs +81 -115
  60. package/sdk-core/core/src/workflow/machines/workflow_task_state_machine.rs +4 -4
  61. package/sdk-core/core/src/workflow/mod.rs +13 -1
  62. package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +70 -11
  63. package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +65 -41
  64. package/sdk-core/core-api/Cargo.toml +9 -1
  65. package/sdk-core/core-api/LICENSE.txt +23 -0
  66. package/sdk-core/core-api/src/errors.rs +7 -38
  67. package/sdk-core/core-api/src/lib.rs +44 -52
  68. package/sdk-core/core-api/src/worker.rs +10 -2
  69. package/sdk-core/etc/deps.svg +127 -96
  70. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +11 -7
  71. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +10 -0
  72. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +6 -1
  73. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +6 -0
  74. package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +6 -0
  75. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +2 -1
  76. package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +3 -0
  77. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +12 -0
  78. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +25 -0
  79. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +4 -0
  80. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +19 -35
  81. package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -6
  82. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +53 -11
  83. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +14 -7
  84. package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +3 -5
  85. package/sdk-core/sdk/Cargo.toml +16 -2
  86. package/sdk-core/sdk/LICENSE.txt +23 -0
  87. package/sdk-core/sdk/src/interceptors.rs +11 -0
  88. package/sdk-core/sdk/src/lib.rs +139 -151
  89. package/sdk-core/sdk/src/workflow_context/options.rs +86 -1
  90. package/sdk-core/sdk/src/workflow_context.rs +36 -17
  91. package/sdk-core/sdk/src/workflow_future.rs +19 -25
  92. package/sdk-core/sdk-core-protos/Cargo.toml +1 -1
  93. package/sdk-core/sdk-core-protos/build.rs +1 -0
  94. package/sdk-core/sdk-core-protos/src/history_info.rs +17 -4
  95. package/sdk-core/sdk-core-protos/src/lib.rs +251 -47
  96. package/sdk-core/test-utils/Cargo.toml +3 -1
  97. package/sdk-core/test-utils/src/canned_histories.rs +27 -0
  98. package/sdk-core/test-utils/src/histfetch.rs +3 -3
  99. package/sdk-core/test-utils/src/lib.rs +223 -68
  100. package/sdk-core/tests/integ_tests/client_tests.rs +27 -4
  101. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +93 -14
  102. package/sdk-core/tests/integ_tests/polling_tests.rs +18 -12
  103. package/sdk-core/tests/integ_tests/queries_tests.rs +50 -53
  104. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +117 -103
  105. package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +8 -1
  106. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +10 -5
  107. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +7 -1
  108. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +32 -9
  109. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +7 -1
  110. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +76 -15
  111. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +19 -3
  112. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +39 -42
  113. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +84 -0
  114. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +30 -8
  115. package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +21 -6
  116. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +26 -16
  117. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +66 -0
  118. package/sdk-core/tests/integ_tests/workflow_tests.rs +78 -74
  119. package/sdk-core/tests/load_tests.rs +9 -6
  120. package/sdk-core/tests/main.rs +43 -10
  121. package/src/conversions.rs +7 -12
  122. package/src/lib.rs +322 -357
  123. package/sdk-core/client/src/mocks.rs +0 -167
  124. package/sdk-core/core/src/worker/dispatcher.rs +0 -171
  125. package/sdk-core/protos/local/temporal/sdk/core/bridge/service.proto +0 -61
@@ -1,4 +1,7 @@
1
- use crate::{pollers::ServerGatewayApis, worker::activities::PendingActivityCancel, TaskToken};
1
+ use crate::{
2
+ worker::{activities::PendingActivityCancel, client::WorkerClientBag},
3
+ TaskToken,
4
+ };
2
5
  use futures::StreamExt;
3
6
  use std::{
4
7
  collections::{hash_map::Entry, HashMap},
@@ -56,7 +59,6 @@ enum HeartbeatExecutorAction {
56
59
  Report {
57
60
  task_token: TaskToken,
58
61
  details: Vec<common::Payload>,
59
- on_reported: Option<Arc<Notify>>,
60
62
  },
61
63
  }
62
64
 
@@ -139,6 +141,8 @@ impl ActivityHeartbeatManager {
139
141
  struct ActivityHeartbeatState {
140
142
  /// If None and throttle interval is over, untrack this task token
141
143
  last_recorded_details: Option<Vec<common::Payload>>,
144
+ /// True if we've queued up a request to record against server, but it hasn't yet completed
145
+ is_record_in_flight: bool,
142
146
  last_send_requested: Instant,
143
147
  throttle_interval: Duration,
144
148
  throttled_cancellation_token: Option<CancellationToken>,
@@ -161,6 +165,7 @@ impl ActivityHeartbeatState {
161
165
  #[derive(Debug)]
162
166
  struct HeartbeatStreamState {
163
167
  tt_to_state: HashMap<TaskToken, ActivityHeartbeatState>,
168
+ tt_needs_flush: HashMap<TaskToken, Arc<Notify>>,
164
169
  incoming_hbs: UnboundedReceiver<HeartbeatAction>,
165
170
  /// Token that can be used to cancel the entire stream.
166
171
  /// Requests to the server are not cancelled with this token.
@@ -175,6 +180,7 @@ impl HeartbeatStreamState {
175
180
  Self {
176
181
  cancellation_token: cancellation_token.clone(),
177
182
  tt_to_state: Default::default(),
183
+ tt_needs_flush: Default::default(),
178
184
  incoming_hbs,
179
185
  },
180
186
  heartbeat_tx,
@@ -193,13 +199,13 @@ impl HeartbeatStreamState {
193
199
  // None is used to mark that after throttling we can stop tracking this task
194
200
  // token.
195
201
  last_recorded_details: None,
202
+ is_record_in_flight: true,
196
203
  throttled_cancellation_token: None,
197
204
  };
198
205
  e.insert(state);
199
206
  Some(HeartbeatExecutorAction::Report {
200
207
  task_token: hb.task_token,
201
208
  details: hb.details,
202
- on_reported: None,
203
209
  })
204
210
  }
205
211
  Entry::Occupied(mut o) => {
@@ -212,7 +218,11 @@ impl HeartbeatStreamState {
212
218
 
213
219
  /// Heartbeat report to server completed
214
220
  fn handle_report_completed(&mut self, tt: TaskToken) -> Option<HeartbeatExecutorAction> {
221
+ if let Some(not) = self.tt_needs_flush.remove(&tt) {
222
+ not.notify_one();
223
+ }
215
224
  if let Some(st) = self.tt_to_state.get_mut(&tt) {
225
+ st.is_record_in_flight = false;
216
226
  let cancellation_token = self.cancellation_token.child_token();
217
227
  st.throttled_cancellation_token = Some(cancellation_token.clone());
218
228
  // Always sleep for simplicity even if the duration is 0
@@ -236,10 +246,10 @@ impl HeartbeatStreamState {
236
246
  // Reset the cancellation token and schedule another report
237
247
  state.throttled_cancellation_token = None;
238
248
  state.last_send_requested = Instant::now();
249
+ state.is_record_in_flight = true;
239
250
  Some(HeartbeatExecutorAction::Report {
240
251
  task_token: tt,
241
252
  details,
242
- on_reported: None,
243
253
  })
244
254
  } else {
245
255
  // Nothing to report, forget this task token
@@ -265,11 +275,14 @@ impl HeartbeatStreamState {
265
275
  let _ = cancel_tok.cancel();
266
276
  }
267
277
  if let Some(last_deets) = state.last_recorded_details {
278
+ self.tt_needs_flush.insert(tt.clone(), on_complete);
268
279
  return Some(HeartbeatExecutorAction::Report {
269
280
  task_token: tt,
270
281
  details: last_deets,
271
- on_reported: Some(on_complete),
272
282
  });
283
+ } else if state.is_record_in_flight {
284
+ self.tt_needs_flush.insert(tt, on_complete);
285
+ return None;
273
286
  }
274
287
  }
275
288
  // Since there's nothing to flush immediately report back that eviction is finished
@@ -281,7 +294,7 @@ impl HeartbeatStreamState {
281
294
  impl ActivityHeartbeatManager {
282
295
  /// Creates a new instance of an activity heartbeat manager and returns a handle to the user,
283
296
  /// which allows to send new heartbeats and initiate the shutdown.
284
- pub fn new(sg: Arc<impl ServerGatewayApis + Send + Sync + 'static + ?Sized>) -> Self {
297
+ pub fn new(client: Arc<WorkerClientBag>) -> Self {
285
298
  let (heartbeat_stream_state, heartbeat_tx_source, shutdown_token) =
286
299
  HeartbeatStreamState::new();
287
300
  let (cancels_tx, cancels_rx) = unbounded_channel();
@@ -320,7 +333,7 @@ impl ActivityHeartbeatManager {
320
333
  .filter_map(|opt| async { opt })
321
334
  .for_each_concurrent(None, move |action| {
322
335
  let heartbeat_tx = heartbeat_tx_source.clone();
323
- let sg = sg.clone();
336
+ let sg = client.clone();
324
337
  let cancels_tx = cancels_tx.clone();
325
338
  async move {
326
339
  match action {
@@ -332,7 +345,7 @@ impl ActivityHeartbeatManager {
332
345
  },
333
346
  };
334
347
  }
335
- HeartbeatExecutorAction::Report { task_token: tt, details, on_reported } => {
348
+ HeartbeatExecutorAction::Report { task_token: tt, details } => {
336
349
  match sg
337
350
  .record_activity_heartbeat(tt.clone(), details.into_payloads())
338
351
  .await
@@ -353,6 +366,8 @@ impl ActivityHeartbeatManager {
353
366
  // finished (which is one thing not found implies - other reasons
354
367
  // would seem equally valid).
355
368
  Err(s) if s.code() == tonic::Code::NotFound => {
369
+ debug!(task_token = %tt,
370
+ "Activity not found when recording heartbeat");
356
371
  cancels_tx
357
372
  .send(PendingActivityCancel::new(
358
373
  tt.clone(),
@@ -364,9 +379,6 @@ impl ActivityHeartbeatManager {
364
379
  warn!("Error when recording heartbeat: {:?}", e);
365
380
  }
366
381
  };
367
- if let Some(onrep) = on_reported {
368
- onrep.notify_one();
369
- }
370
382
  let _ = heartbeat_tx.send(HeartbeatAction::CompleteReport(tt));
371
383
  }
372
384
  }
@@ -386,10 +398,9 @@ impl ActivityHeartbeatManager {
386
398
  #[cfg(test)]
387
399
  mod test {
388
400
  use super::*;
389
- use crate::test_help::TEST_Q;
390
- use std::time::Duration;
391
401
 
392
- use temporal_client::mocks::mock_gateway;
402
+ use crate::worker::client::mocks::mock_workflow_client;
403
+ use std::time::Duration;
393
404
  use temporal_sdk_core_protos::{
394
405
  coresdk::common::Payload,
395
406
  temporal::api::workflowservice::v1::RecordActivityTaskHeartbeatResponse,
@@ -400,12 +411,12 @@ mod test {
400
411
  /// every 1/2 of the heartbeat timeout.
401
412
  #[tokio::test]
402
413
  async fn process_heartbeats_and_shutdown() {
403
- let mut mock_gateway = mock_gateway();
404
- mock_gateway
414
+ let mut mock_client = mock_workflow_client();
415
+ mock_client
405
416
  .expect_record_activity_heartbeat()
406
417
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
407
418
  .times(2);
408
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_gateway));
419
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
409
420
  let fake_task_token = vec![1, 2, 3];
410
421
  // Send 2 heartbeat requests for 20ms apart.
411
422
  // The first heartbeat should be sent right away, and
@@ -421,12 +432,12 @@ mod test {
421
432
 
422
433
  #[tokio::test]
423
434
  async fn send_heartbeats_less_frequently_than_throttle_interval() {
424
- let mut mock_gateway = mock_gateway();
425
- mock_gateway
435
+ let mut mock_client = mock_workflow_client();
436
+ mock_client
426
437
  .expect_record_activity_heartbeat()
427
438
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
428
439
  .times(3);
429
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_gateway));
440
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
430
441
  let fake_task_token = vec![1, 2, 3];
431
442
  // Heartbeats always get sent if recorded less frequently than the throttle intreval
432
443
  for i in 0_u8..3 {
@@ -441,12 +452,12 @@ mod test {
441
452
  /// interactions with the server - one immediately and one after 500ms after the throttle_interval.
442
453
  #[tokio::test]
443
454
  async fn process_tight_loop_and_shutdown() {
444
- let mut mock_gateway = mock_gateway();
445
- mock_gateway
455
+ let mut mock_client = mock_workflow_client();
456
+ mock_client
446
457
  .expect_record_activity_heartbeat()
447
458
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
448
459
  .times(1);
449
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_gateway));
460
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
450
461
  let fake_task_token = vec![1, 2, 3];
451
462
  // Send a whole bunch of heartbeats very fast. We should still only send one total.
452
463
  for i in 0_u8..50 {
@@ -460,12 +471,12 @@ mod test {
460
471
  /// This test reports one heartbeat and waits for the throttle_interval to elapse before sending another
461
472
  #[tokio::test]
462
473
  async fn report_heartbeat_after_timeout() {
463
- let mut mock_gateway = mock_gateway();
464
- mock_gateway
474
+ let mut mock_client = mock_workflow_client();
475
+ mock_client
465
476
  .expect_record_activity_heartbeat()
466
477
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
467
478
  .times(2);
468
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_gateway));
479
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
469
480
  let fake_task_token = vec![1, 2, 3];
470
481
  record_heartbeat(&hm, fake_task_token.clone(), 0, Duration::from_millis(100));
471
482
  sleep(Duration::from_millis(500)).await;
@@ -477,12 +488,12 @@ mod test {
477
488
 
478
489
  #[tokio::test]
479
490
  async fn evict_works() {
480
- let mut mock_gateway = mock_gateway();
481
- mock_gateway
491
+ let mut mock_client = mock_workflow_client();
492
+ mock_client
482
493
  .expect_record_activity_heartbeat()
483
494
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
484
495
  .times(2);
485
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_gateway));
496
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
486
497
  let fake_task_token = vec![1, 2, 3];
487
498
  record_heartbeat(&hm, fake_task_token.clone(), 0, Duration::from_millis(100));
488
499
  // Let it propagate
@@ -495,20 +506,33 @@ mod test {
495
506
  hm.shutdown().await;
496
507
  }
497
508
 
509
+ #[tokio::test]
510
+ async fn evict_immediate_after_record() {
511
+ let mut mock_client = mock_workflow_client();
512
+ mock_client
513
+ .expect_record_activity_heartbeat()
514
+ .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
515
+ .times(1);
516
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
517
+ let fake_task_token = vec![1, 2, 3];
518
+ record_heartbeat(&hm, fake_task_token.clone(), 0, Duration::from_millis(100));
519
+ hm.evict(fake_task_token.clone().into()).await;
520
+ hm.shutdown().await;
521
+ }
522
+
498
523
  /// Recording new heartbeats after shutdown is not allowed, and will result in error.
499
524
  #[tokio::test]
500
525
  async fn record_after_shutdown() {
501
- let mut mock_gateway = mock_gateway();
502
- mock_gateway
526
+ let mut mock_client = mock_workflow_client();
527
+ mock_client
503
528
  .expect_record_activity_heartbeat()
504
529
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
505
530
  .times(0);
506
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_gateway));
531
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
507
532
  hm.shutdown().await;
508
533
  match hm.record(
509
534
  ActivityHeartbeat {
510
535
  task_token: vec![1, 2, 3],
511
- task_queue: TEST_Q.to_string(),
512
536
  details: vec![Payload {
513
537
  // payload doesn't matter in this case, as it shouldn't get sent anyways.
514
538
  ..Default::default()
@@ -534,7 +558,6 @@ mod test {
534
558
  hm.record(
535
559
  ActivityHeartbeat {
536
560
  task_token,
537
- task_queue: TEST_Q.to_string(),
538
561
  details: vec![Payload {
539
562
  metadata: Default::default(),
540
563
  data: vec![payload_data],
@@ -1,4 +1,7 @@
1
- use crate::{protosext::ValidScheduleLA, retry_logic::RetryPolicyExt, TaskToken};
1
+ use crate::{
2
+ abstractions::MeteredSemaphore, protosext::ValidScheduleLA, retry_logic::RetryPolicyExt,
3
+ MetricsContext, TaskToken,
4
+ };
2
5
  use parking_lot::Mutex;
3
6
  use std::{
4
7
  collections::HashMap,
@@ -11,12 +14,15 @@ use temporal_sdk_core_protos::{
11
14
  activity_task::{activity_task, ActivityCancelReason, ActivityTask, Cancel, Start},
12
15
  common::WorkflowExecution,
13
16
  },
14
- temporal::api::enums::v1::TimeoutType,
17
+ temporal::api::{
18
+ enums::v1::TimeoutType,
19
+ failure::v1::{failure::FailureInfo, ApplicationFailureInfo},
20
+ },
15
21
  };
16
22
  use tokio::{
17
23
  sync::{
18
24
  mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
19
- Notify, Semaphore,
25
+ Notify,
20
26
  },
21
27
  task::JoinHandle,
22
28
  time::sleep,
@@ -103,7 +109,7 @@ pub(crate) struct LocalActivityManager {
103
109
  /// Just so we can provide activity tasks the same namespace as the worker
104
110
  namespace: String,
105
111
  /// Constrains number of currently executing local activities
106
- semaphore: Semaphore,
112
+ semaphore: MeteredSemaphore,
107
113
  /// Sink for new activity execution requests
108
114
  act_req_tx: UnboundedSender<NewOrRetry>,
109
115
  /// Cancels need a different queue since they should be taken first, and don't take a permit
@@ -135,13 +141,21 @@ impl LAMData {
135
141
  }
136
142
 
137
143
  impl LocalActivityManager {
138
- pub(crate) fn new(max_concurrent: usize, namespace: String) -> Self {
144
+ pub(crate) fn new(
145
+ max_concurrent: usize,
146
+ namespace: String,
147
+ metrics_context: MetricsContext,
148
+ ) -> Self {
139
149
  let (act_req_tx, act_req_rx) = unbounded_channel();
140
150
  let (cancels_req_tx, cancels_req_rx) = unbounded_channel();
141
151
  let shutdown_complete_tok = CancellationToken::new();
142
152
  Self {
143
153
  namespace,
144
- semaphore: Semaphore::new(max_concurrent),
154
+ semaphore: MeteredSemaphore::new(
155
+ max_concurrent,
156
+ metrics_context,
157
+ MetricsContext::available_task_slots,
158
+ ),
145
159
  act_req_tx,
146
160
  cancels_req_tx,
147
161
  complete_notify: Notify::new(),
@@ -161,6 +175,15 @@ impl LocalActivityManager {
161
175
  }
162
176
  }
163
177
 
178
+ #[cfg(test)]
179
+ fn test(max_concurrent: usize) -> Self {
180
+ Self::new(
181
+ max_concurrent,
182
+ "fake_ns".to_string(),
183
+ MetricsContext::default(),
184
+ )
185
+ }
186
+
164
187
  pub(crate) fn num_outstanding(&self) -> usize {
165
188
  self.dat.lock().outstanding_activity_tasks.len()
166
189
  }
@@ -358,7 +381,7 @@ impl LocalActivityManager {
358
381
  workflow_execution: Some(new_la.workflow_exec_info),
359
382
  activity_id: sa.activity_id,
360
383
  activity_type: sa.activity_type,
361
- header_fields: sa.header_fields,
384
+ header_fields: sa.headers,
362
385
  input: sa.arguments,
363
386
  heartbeat_details: vec![],
364
387
  scheduled_time: Some(new_la.schedule_time.into()),
@@ -387,7 +410,7 @@ impl LocalActivityManager {
387
410
  seq_num: info.la_info.schedule_cmd.seq,
388
411
  };
389
412
  dlock.id_to_tt.remove(&exec_id);
390
- self.semaphore.add_permits(1);
413
+ self.semaphore.add_permit();
391
414
 
392
415
  match status {
393
416
  LocalActivityExecutionResult::Completed(_)
@@ -400,10 +423,13 @@ impl LocalActivityManager {
400
423
  LocalActivityExecutionResult::Failed(f) => {
401
424
  if let Some(backoff_dur) = info.la_info.schedule_cmd.retry_policy.should_retry(
402
425
  info.attempt as usize,
403
- &f.failure
404
- .as_ref()
405
- .map(|f| format!("{:?}", f))
406
- .unwrap_or_else(|| "".to_string()),
426
+ f.failure.as_ref().map_or("", |f| match &f.failure_info {
427
+ Some(FailureInfo::ApplicationFailureInfo(ApplicationFailureInfo {
428
+ r#type,
429
+ ..
430
+ })) => r#type.as_str(),
431
+ _ => "",
432
+ }),
407
433
  ) {
408
434
  let will_use_timer =
409
435
  backoff_dur > info.la_info.schedule_cmd.local_retry_threshold;
@@ -507,7 +533,7 @@ struct RcvChans {
507
533
  }
508
534
 
509
535
  impl RcvChans {
510
- async fn next(&mut self, new_sem: &Semaphore) -> Option<NewOrCancel> {
536
+ async fn next(&mut self, new_sem: &MeteredSemaphore) -> Option<NewOrCancel> {
511
537
  tokio::select! {
512
538
  cancel = async { self.cancels_req_rx.recv().await } => {
513
539
  Some(NewOrCancel::Cancel(cancel.expect("Send halves of LA manager are not dropped")))
@@ -617,7 +643,9 @@ impl Drop for TimeoutBag {
617
643
  mod tests {
618
644
  use super::*;
619
645
  use crate::protosext::LACloseTimeouts;
620
- use temporal_sdk_core_protos::coresdk::common::RetryPolicy;
646
+ use temporal_sdk_core_protos::{
647
+ coresdk::common::RetryPolicy, temporal::api::failure::v1::Failure,
648
+ };
621
649
  use tokio::{sync::mpsc::error::TryRecvError, task::yield_now};
622
650
 
623
651
  impl DispatchOrTimeoutLA {
@@ -633,7 +661,7 @@ mod tests {
633
661
 
634
662
  #[tokio::test]
635
663
  async fn max_concurrent_respected() {
636
- let lam = LocalActivityManager::new(1, "whatever".to_string());
664
+ let lam = LocalActivityManager::test(1);
637
665
  lam.enqueue((1..=50).map(|i| {
638
666
  NewLocalAct {
639
667
  schedule_cmd: ValidScheduleLA {
@@ -673,7 +701,7 @@ mod tests {
673
701
 
674
702
  #[tokio::test]
675
703
  async fn no_work_doesnt_deadlock_with_complete() {
676
- let lam = LocalActivityManager::new(5, "whatever".to_string());
704
+ let lam = LocalActivityManager::test(5);
677
705
  lam.enqueue([NewLocalAct {
678
706
  schedule_cmd: ValidScheduleLA {
679
707
  seq: 1,
@@ -705,7 +733,7 @@ mod tests {
705
733
 
706
734
  #[tokio::test]
707
735
  async fn can_cancel_in_flight() {
708
- let lam = LocalActivityManager::new(5, "whatever".to_string());
736
+ let lam = LocalActivityManager::test(5);
709
737
  lam.enqueue([NewLocalAct {
710
738
  schedule_cmd: ValidScheduleLA {
711
739
  seq: 1,
@@ -732,7 +760,7 @@ mod tests {
732
760
 
733
761
  #[tokio::test]
734
762
  async fn respects_timer_backoff_threshold() {
735
- let lam = LocalActivityManager::new(1, "whatever".to_string());
763
+ let lam = LocalActivityManager::test(1);
736
764
  lam.enqueue([NewLocalAct {
737
765
  schedule_cmd: ValidScheduleLA {
738
766
  seq: 1,
@@ -765,9 +793,53 @@ mod tests {
765
793
  )
766
794
  }
767
795
 
796
+ #[tokio::test]
797
+ async fn respects_non_retryable_error_types() {
798
+ let lam = LocalActivityManager::test(1);
799
+ lam.enqueue([NewLocalAct {
800
+ schedule_cmd: ValidScheduleLA {
801
+ seq: 1,
802
+ activity_id: "1".to_string(),
803
+ attempt: 1,
804
+ retry_policy: RetryPolicy {
805
+ initial_interval: Some(Duration::from_secs(1).into()),
806
+ backoff_coefficient: 10.0,
807
+ maximum_interval: Some(Duration::from_secs(10).into()),
808
+ maximum_attempts: 10,
809
+ non_retryable_error_types: vec!["TestError".to_string()],
810
+ },
811
+ local_retry_threshold: Duration::from_secs(5),
812
+ ..Default::default()
813
+ },
814
+ workflow_type: "".to_string(),
815
+ workflow_exec_info: Default::default(),
816
+ schedule_time: SystemTime::now(),
817
+ }
818
+ .into()]);
819
+
820
+ let next = lam.next_pending().await.unwrap().unwrap();
821
+ let tt = TaskToken(next.task_token);
822
+ let res = lam.complete(
823
+ &tt,
824
+ &LocalActivityExecutionResult::Failed(ActFail {
825
+ failure: Some(Failure {
826
+ failure_info: Some(FailureInfo::ApplicationFailureInfo(
827
+ ApplicationFailureInfo {
828
+ r#type: "TestError".to_string(),
829
+ non_retryable: false,
830
+ ..Default::default()
831
+ },
832
+ )),
833
+ ..Default::default()
834
+ }),
835
+ }),
836
+ );
837
+ assert_matches!(res, LACompleteAction::Report(_));
838
+ }
839
+
768
840
  #[tokio::test]
769
841
  async fn can_cancel_during_local_backoff() {
770
- let lam = LocalActivityManager::new(1, "whatever".to_string());
842
+ let lam = LocalActivityManager::test(1);
771
843
  lam.enqueue([NewLocalAct {
772
844
  schedule_cmd: ValidScheduleLA {
773
845
  seq: 1,
@@ -816,7 +888,7 @@ mod tests {
816
888
 
817
889
  #[tokio::test]
818
890
  async fn local_backoff_clears_handle_map_when_started() {
819
- let lam = LocalActivityManager::new(1, "whatever".to_string());
891
+ let lam = LocalActivityManager::test(1);
820
892
  lam.enqueue([NewLocalAct {
821
893
  schedule_cmd: ValidScheduleLA {
822
894
  seq: 1,
@@ -852,7 +924,7 @@ mod tests {
852
924
 
853
925
  #[tokio::test]
854
926
  async fn sched_to_start_timeout() {
855
- let lam = LocalActivityManager::new(1, "whatever".to_string());
927
+ let lam = LocalActivityManager::test(1);
856
928
  let timeout = Duration::from_millis(100);
857
929
  lam.enqueue([NewLocalAct {
858
930
  schedule_cmd: ValidScheduleLA {
@@ -893,7 +965,7 @@ mod tests {
893
965
  #[case::start(false)]
894
966
  #[tokio::test]
895
967
  async fn local_x_to_close_timeout(#[case] is_schedule: bool) {
896
- let lam = LocalActivityManager::new(1, "whatever".to_string());
968
+ let lam = LocalActivityManager::test(1);
897
969
  let timeout = Duration::from_millis(100);
898
970
  let close_timeouts = if is_schedule {
899
971
  LACloseTimeouts::ScheduleOnly(timeout)
@@ -937,7 +1009,7 @@ mod tests {
937
1009
 
938
1010
  #[tokio::test]
939
1011
  async fn idempotency_enforced() {
940
- let lam = LocalActivityManager::new(10, "whatever".to_string());
1012
+ let lam = LocalActivityManager::test(10);
941
1013
  let new_la = NewLocalAct {
942
1014
  schedule_cmd: ValidScheduleLA {
943
1015
  seq: 1,
@@ -8,10 +8,14 @@ pub(crate) use local_activities::{
8
8
  };
9
9
 
10
10
  use crate::{
11
+ abstractions::MeteredSemaphore,
11
12
  pollers::BoxedActPoller,
12
- telemetry::metrics::{activity_type, workflow_type, MetricsContext},
13
- worker::activities::activity_heartbeat_manager::ActivityHeartbeatError,
14
- CompleteActivityError, PollActivityError, ServerGatewayApis, TaskToken,
13
+ telemetry::metrics::{activity_type, activity_worker_type, workflow_type, MetricsContext},
14
+ worker::{
15
+ activities::activity_heartbeat_manager::ActivityHeartbeatError,
16
+ client::{WorkerClient, WorkerClientBag},
17
+ },
18
+ CompleteActivityError, PollActivityError, TaskToken,
15
19
  };
16
20
  use activity_heartbeat_manager::ActivityHeartbeatManager;
17
21
  use dashmap::DashMap;
@@ -31,7 +35,7 @@ use temporal_sdk_core_protos::{
31
35
  workflowservice::v1::PollActivityTaskQueueResponse,
32
36
  },
33
37
  };
34
- use tokio::sync::{Notify, Semaphore};
38
+ use tokio::sync::Notify;
35
39
 
36
40
  #[derive(Debug, derive_more::Constructor)]
37
41
  struct PendingActivityCancel {
@@ -88,7 +92,7 @@ pub(crate) struct WorkerActivityTasks {
88
92
  /// ongoing.
89
93
  poller: BoxedActPoller,
90
94
  /// Ensures we stay at or below this worker's maximum concurrent activity limit
91
- activities_semaphore: Semaphore,
95
+ activities_semaphore: MeteredSemaphore,
92
96
  /// Wakes every time an activity is removed from the outstanding map
93
97
  complete_notify: Notify,
94
98
 
@@ -102,16 +106,20 @@ impl WorkerActivityTasks {
102
106
  pub(crate) fn new(
103
107
  max_activity_tasks: usize,
104
108
  poller: BoxedActPoller,
105
- sg: Arc<impl ServerGatewayApis + Send + Sync + 'static + ?Sized>,
109
+ client: Arc<WorkerClientBag>,
106
110
  metrics: MetricsContext,
107
111
  max_heartbeat_throttle_interval: Duration,
108
112
  default_heartbeat_throttle_interval: Duration,
109
113
  ) -> Self {
110
114
  Self {
111
- heartbeat_manager: ActivityHeartbeatManager::new(sg),
115
+ heartbeat_manager: ActivityHeartbeatManager::new(client),
112
116
  outstanding_activity_tasks: Default::default(),
113
117
  poller,
114
- activities_semaphore: Semaphore::new(max_activity_tasks),
118
+ activities_semaphore: MeteredSemaphore::new(
119
+ max_activity_tasks,
120
+ metrics.with_new_attrs([activity_worker_type()]),
121
+ MetricsContext::available_task_slots,
122
+ ),
115
123
  complete_notify: Notify::new(),
116
124
  metrics,
117
125
  max_heartbeat_throttle_interval,
@@ -196,7 +204,7 @@ impl WorkerActivityTasks {
196
204
  &self,
197
205
  task_token: TaskToken,
198
206
  status: aer::Status,
199
- gateway: &(dyn ServerGatewayApis + Send + Sync),
207
+ client: &dyn WorkerClient,
200
208
  ) -> Result<(), CompleteActivityError> {
201
209
  if let Some((_, act_info)) = self.outstanding_activity_tasks.remove(&task_token) {
202
210
  let act_metrics = self.metrics.with_new_attrs([
@@ -204,7 +212,7 @@ impl WorkerActivityTasks {
204
212
  workflow_type(act_info.base.workflow_type.clone()),
205
213
  ]);
206
214
  act_metrics.act_execution_latency(act_info.base.start_time.elapsed());
207
- self.activities_semaphore.add_permits(1);
215
+ self.activities_semaphore.add_permit();
208
216
  self.heartbeat_manager.evict(task_token.clone()).await;
209
217
  let known_not_found = act_info.known_not_found;
210
218
  drop(act_info); // TODO: Get rid of dashmap. If we hold ref across await, bad stuff.
@@ -214,13 +222,13 @@ impl WorkerActivityTasks {
214
222
  if !known_not_found {
215
223
  let maybe_net_err = match status {
216
224
  aer::Status::WillCompleteAsync(_) => None,
217
- aer::Status::Completed(ar::Success { result }) => gateway
225
+ aer::Status::Completed(ar::Success { result }) => client
218
226
  .complete_activity_task(task_token.clone(), result.map(Into::into))
219
227
  .await
220
228
  .err(),
221
229
  aer::Status::Failed(ar::Failure { failure }) => {
222
230
  act_metrics.act_execution_failed();
223
- gateway
231
+ client
224
232
  .fail_activity_task(task_token.clone(), failure.map(Into::into))
225
233
  .await
226
234
  .err()
@@ -238,7 +246,7 @@ impl WorkerActivityTasks {
238
246
  "Expected activity cancelled status with CanceledFailureInfo");
239
247
  None
240
248
  };
241
- gateway
249
+ client
242
250
  .cancel_activity_task(task_token.clone(), details.map(Into::into))
243
251
  .await
244
252
  .err()
@@ -316,8 +324,7 @@ impl WorkerActivityTasks {
316
324
  }
317
325
  Ok(Some(ActivityTask::cancel_from_ids(task_token.0, reason)))
318
326
  } else {
319
- warn!(task_token = ?task_token,
320
- "Unknown activity task when issuing cancel");
327
+ debug!(task_token = ?task_token, "Unknown activity task when issuing cancel");
321
328
  // If we can't find the activity here, it's already been completed,
322
329
  // in which case issuing a cancel again is pointless.
323
330
  Ok(None)
@@ -331,6 +338,6 @@ impl WorkerActivityTasks {
331
338
 
332
339
  #[cfg(test)]
333
340
  pub(crate) fn remaining_activity_capacity(&self) -> usize {
334
- self.activities_semaphore.available_permits()
341
+ self.activities_semaphore.sem.available_permits()
335
342
  }
336
343
  }