@temporalio/core-bridge 1.6.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/Cargo.lock +520 -456
  2. package/lib/index.d.ts +8 -6
  3. package/lib/index.js.map +1 -1
  4. package/package.json +8 -3
  5. package/releases/aarch64-apple-darwin/index.node +0 -0
  6. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  7. package/releases/x86_64-apple-darwin/index.node +0 -0
  8. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  9. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  10. package/sdk-core/.buildkite/docker/Dockerfile +2 -2
  11. package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
  12. package/sdk-core/.buildkite/pipeline.yml +1 -1
  13. package/sdk-core/.github/workflows/heavy.yml +1 -0
  14. package/sdk-core/README.md +13 -7
  15. package/sdk-core/client/src/lib.rs +27 -9
  16. package/sdk-core/client/src/metrics.rs +17 -8
  17. package/sdk-core/client/src/raw.rs +3 -3
  18. package/sdk-core/core/Cargo.toml +3 -4
  19. package/sdk-core/core/src/abstractions/take_cell.rs +28 -0
  20. package/sdk-core/core/src/abstractions.rs +197 -18
  21. package/sdk-core/core/src/core_tests/activity_tasks.rs +137 -45
  22. package/sdk-core/core/src/core_tests/child_workflows.rs +6 -5
  23. package/sdk-core/core/src/core_tests/determinism.rs +212 -2
  24. package/sdk-core/core/src/core_tests/local_activities.rs +183 -36
  25. package/sdk-core/core/src/core_tests/queries.rs +32 -14
  26. package/sdk-core/core/src/core_tests/workers.rs +8 -5
  27. package/sdk-core/core/src/core_tests/workflow_tasks.rs +340 -51
  28. package/sdk-core/core/src/ephemeral_server/mod.rs +110 -8
  29. package/sdk-core/core/src/internal_flags.rs +141 -0
  30. package/sdk-core/core/src/lib.rs +14 -9
  31. package/sdk-core/core/src/replay/mod.rs +16 -27
  32. package/sdk-core/core/src/telemetry/metrics.rs +69 -35
  33. package/sdk-core/core/src/telemetry/mod.rs +38 -14
  34. package/sdk-core/core/src/telemetry/prometheus_server.rs +19 -13
  35. package/sdk-core/core/src/test_help/mod.rs +65 -13
  36. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +119 -160
  37. package/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
  38. package/sdk-core/core/src/worker/activities/local_activities.rs +122 -6
  39. package/sdk-core/core/src/worker/activities.rs +347 -173
  40. package/sdk-core/core/src/worker/client/mocks.rs +22 -2
  41. package/sdk-core/core/src/worker/client.rs +18 -2
  42. package/sdk-core/core/src/worker/mod.rs +137 -44
  43. package/sdk-core/core/src/worker/workflow/history_update.rs +132 -51
  44. package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +207 -166
  45. package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +6 -7
  46. package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +6 -7
  47. package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +157 -82
  48. package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +12 -12
  49. package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +6 -7
  50. package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +13 -15
  51. package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +170 -60
  52. package/sdk-core/core/src/worker/workflow/machines/mod.rs +24 -16
  53. package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +6 -8
  54. package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +320 -204
  55. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +10 -13
  56. package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +15 -23
  57. package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +187 -46
  58. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +237 -111
  59. package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +13 -13
  60. package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +10 -6
  61. package/sdk-core/core/src/worker/workflow/managed_run.rs +81 -62
  62. package/sdk-core/core/src/worker/workflow/mod.rs +341 -79
  63. package/sdk-core/core/src/worker/workflow/run_cache.rs +18 -11
  64. package/sdk-core/core/src/worker/workflow/wft_extraction.rs +15 -3
  65. package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +2 -0
  66. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +75 -52
  67. package/sdk-core/core-api/Cargo.toml +0 -1
  68. package/sdk-core/core-api/src/lib.rs +13 -7
  69. package/sdk-core/core-api/src/telemetry.rs +4 -6
  70. package/sdk-core/core-api/src/worker.rs +5 -0
  71. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +80 -55
  72. package/sdk-core/fsm/rustfsm_trait/src/lib.rs +22 -68
  73. package/sdk-core/histories/ends_empty_wft_complete.bin +0 -0
  74. package/sdk-core/histories/old_change_marker_format.bin +0 -0
  75. package/sdk-core/protos/api_upstream/.github/CODEOWNERS +2 -1
  76. package/sdk-core/protos/api_upstream/Makefile +1 -1
  77. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +5 -17
  78. package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +11 -0
  79. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +1 -6
  80. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +6 -6
  81. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +5 -0
  82. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +22 -6
  83. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +48 -19
  84. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +2 -0
  85. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +3 -0
  86. package/sdk-core/protos/api_upstream/temporal/api/{enums/v1/interaction_type.proto → protocol/v1/message.proto} +29 -11
  87. package/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
  88. package/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +111 -0
  89. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +59 -28
  90. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +2 -2
  91. package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +7 -8
  92. package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +10 -7
  93. package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +19 -30
  94. package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
  95. package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
  96. package/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +8 -0
  97. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +65 -60
  98. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +85 -84
  99. package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +9 -3
  100. package/sdk-core/sdk/Cargo.toml +1 -1
  101. package/sdk-core/sdk/src/lib.rs +21 -5
  102. package/sdk-core/sdk/src/workflow_context/options.rs +7 -1
  103. package/sdk-core/sdk/src/workflow_context.rs +24 -17
  104. package/sdk-core/sdk/src/workflow_future.rs +9 -3
  105. package/sdk-core/sdk-core-protos/src/history_builder.rs +114 -89
  106. package/sdk-core/sdk-core-protos/src/history_info.rs +6 -1
  107. package/sdk-core/sdk-core-protos/src/lib.rs +205 -64
  108. package/sdk-core/test-utils/src/canned_histories.rs +106 -296
  109. package/sdk-core/test-utils/src/lib.rs +32 -5
  110. package/sdk-core/tests/heavy_tests.rs +10 -43
  111. package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
  112. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +5 -3
  113. package/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
  114. package/sdk-core/tests/integ_tests/polling_tests.rs +3 -8
  115. package/sdk-core/tests/integ_tests/queries_tests.rs +4 -2
  116. package/sdk-core/tests/integ_tests/visibility_tests.rs +34 -23
  117. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +97 -81
  118. package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
  119. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -0
  120. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +80 -3
  121. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +5 -1
  122. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +1 -0
  123. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +25 -3
  124. package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +2 -4
  125. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +30 -0
  126. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +64 -0
  127. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
  128. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +4 -0
  129. package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +3 -1
  130. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +7 -2
  131. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -7
  132. package/sdk-core/tests/integ_tests/workflow_tests.rs +8 -8
  133. package/sdk-core/tests/main.rs +16 -25
  134. package/sdk-core/tests/runner.rs +11 -9
  135. package/src/conversions.rs +14 -8
  136. package/src/runtime.rs +9 -8
  137. package/ts/index.ts +8 -6
  138. package/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +0 -87
@@ -1,4 +1,5 @@
1
1
  use crate::{
2
+ abstractions::take_cell::TakeCell,
2
3
  worker::{activities::PendingActivityCancel, client::WorkerClient},
3
4
  TaskToken,
4
5
  };
@@ -17,7 +18,7 @@ use temporal_sdk_core_protos::{
17
18
  use tokio::{
18
19
  sync::{
19
20
  mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
20
- Mutex, Notify,
21
+ Notify,
21
22
  },
22
23
  task::JoinHandle,
23
24
  };
@@ -26,12 +27,9 @@ use tokio_util::sync::CancellationToken;
26
27
  /// Used to supply new heartbeat events to the activity heartbeat manager, or to send a shutdown
27
28
  /// request.
28
29
  pub(crate) struct ActivityHeartbeatManager {
29
- /// Cancellations that have been received when heartbeating are queued here and can be consumed
30
- /// by [fetch_cancellations]
31
- incoming_cancels: Mutex<UnboundedReceiver<PendingActivityCancel>>,
32
30
  shutdown_token: CancellationToken,
33
31
  /// Used during `shutdown` to await until all inflight requests are sent.
34
- join_handle: Mutex<Option<JoinHandle<()>>>,
32
+ join_handle: TakeCell<JoinHandle<()>>,
35
33
  heartbeat_tx: UnboundedSender<HeartbeatAction>,
36
34
  }
37
35
 
@@ -74,15 +72,115 @@ pub enum ActivityHeartbeatError {
74
72
  /// to heartbeat.
75
73
  #[error("Unable to parse activity heartbeat timeout.")]
76
74
  InvalidHeartbeatTimeout,
77
- /// Core is shutting down and thus new heartbeats are not accepted
78
- #[error("New heartbeat requests are not accepted while shutting down")]
79
- ShuttingDown,
80
75
  }
81
76
 
82
77
  /// Manages activity heartbeating for a worker. Allows sending new heartbeats or requesting and
83
78
  /// awaiting for the shutdown. When shutdown is requested, signal gets sent to all processors, which
84
79
  /// allows them to complete gracefully.
85
80
  impl ActivityHeartbeatManager {
81
+ /// Creates a new instance of an activity heartbeat manager and returns a handle to the user,
82
+ /// which allows to send new heartbeats and initiate the shutdown.
83
+ /// Returns the manager and a channel that buffers cancellation notifications to be sent to Lang.
84
+ pub(super) fn new(
85
+ client: Arc<dyn WorkerClient>,
86
+ cancels_tx: UnboundedSender<PendingActivityCancel>,
87
+ ) -> Self {
88
+ let (heartbeat_stream_state, heartbeat_tx_source, shutdown_token) =
89
+ HeartbeatStreamState::new();
90
+ let heartbeat_tx = heartbeat_tx_source.clone();
91
+
92
+ let join_handle = tokio::spawn(
93
+ // The stream of incoming heartbeats uses unfold to carry state across each item in the
94
+ // stream. The closure checks if, for any given activity, we should heartbeat or not
95
+ // depending on its delay and when we last issued a heartbeat for it.
96
+ futures::stream::unfold(heartbeat_stream_state, move |mut hb_states| {
97
+ async move {
98
+ let hb = tokio::select! {
99
+ biased;
100
+
101
+ _ = hb_states.cancellation_token.cancelled() => {
102
+ return None
103
+ }
104
+ hb = hb_states.incoming_hbs.recv() => match hb {
105
+ None => return None,
106
+ Some(hb) => hb,
107
+ }
108
+ };
109
+
110
+ Some((
111
+ match hb {
112
+ HeartbeatAction::SendHeartbeat(hb) => hb_states.record(hb),
113
+ HeartbeatAction::CompleteReport(tt) => hb_states.handle_report_completed(tt),
114
+ HeartbeatAction::CompleteThrottle(tt) => hb_states.handle_throttle_completed(tt),
115
+ HeartbeatAction::Evict{ token, on_complete } => hb_states.evict(token, on_complete),
116
+ },
117
+ hb_states,
118
+ ))
119
+ }
120
+ })
121
+ // Filters out `None`s
122
+ .filter_map(|opt| async { opt })
123
+ .for_each_concurrent(None, move |action| {
124
+ let heartbeat_tx = heartbeat_tx_source.clone();
125
+ let sg = client.clone();
126
+ let cancels_tx = cancels_tx.clone();
127
+ async move {
128
+ match action {
129
+ HeartbeatExecutorAction::Sleep(tt, duration, cancellation_token) => {
130
+ tokio::select! {
131
+ _ = cancellation_token.cancelled() => (),
132
+ _ = tokio::time::sleep(duration) => {
133
+ let _ = heartbeat_tx.send(HeartbeatAction::CompleteThrottle(tt));
134
+ },
135
+ };
136
+ }
137
+ HeartbeatExecutorAction::Report { task_token: tt, details } => {
138
+ match sg
139
+ .record_activity_heartbeat(tt.clone(), details.into_payloads())
140
+ .await
141
+ {
142
+ Ok(RecordActivityTaskHeartbeatResponse { cancel_requested }) => {
143
+ if cancel_requested {
144
+ cancels_tx
145
+ .send(PendingActivityCancel::new(
146
+ tt.clone(),
147
+ ActivityCancelReason::Cancelled,
148
+ ))
149
+ .expect(
150
+ "Receive half of heartbeat cancels not blocked",
151
+ );
152
+ }
153
+ }
154
+ // Send cancels for any activity that learns its workflow already
155
+ // finished (which is one thing not found implies - other reasons
156
+ // would seem equally valid).
157
+ Err(s) if s.code() == tonic::Code::NotFound => {
158
+ debug!(task_token = %tt,
159
+ "Activity not found when recording heartbeat");
160
+ cancels_tx
161
+ .send(PendingActivityCancel::new(
162
+ tt.clone(),
163
+ ActivityCancelReason::NotFound,
164
+ ))
165
+ .expect("Receive half of heartbeat cancels not blocked");
166
+ }
167
+ Err(e) => {
168
+ warn!("Error when recording heartbeat: {:?}", e);
169
+ }
170
+ };
171
+ let _ = heartbeat_tx.send(HeartbeatAction::CompleteReport(tt));
172
+ }
173
+ }
174
+ }
175
+ }),
176
+ );
177
+
178
+ Self {
179
+ join_handle: TakeCell::new(join_handle),
180
+ shutdown_token,
181
+ heartbeat_tx,
182
+ }
183
+ }
86
184
  /// Records a new heartbeat, the first call will result in an immediate call to the server,
87
185
  /// while rapid successive calls would accumulate for up to `delay` and then latest heartbeat
88
186
  /// details will be sent to the server.
@@ -95,9 +193,6 @@ impl ActivityHeartbeatManager {
95
193
  hb: ActivityHeartbeat,
96
194
  throttle_interval: Duration,
97
195
  ) -> Result<(), ActivityHeartbeatError> {
98
- if self.shutdown_token.is_cancelled() {
99
- return Err(ActivityHeartbeatError::ShuttingDown);
100
- }
101
196
  self.heartbeat_tx
102
197
  .send(HeartbeatAction::SendHeartbeat(ValidActivityHeartbeat {
103
198
  task_token: TaskToken(hb.task_token),
@@ -121,19 +216,11 @@ impl ActivityHeartbeatManager {
121
216
  completed.notified().await;
122
217
  }
123
218
 
124
- /// Returns a future that resolves any time there is a new activity cancel that must be
125
- /// dispatched to lang
126
- pub(super) async fn next_pending_cancel(&self) -> Option<PendingActivityCancel> {
127
- self.incoming_cancels.lock().await.recv().await
128
- }
129
-
130
- // TODO: Can own self now!
131
219
  /// Initiates shutdown procedure by stopping lifecycle loop and awaiting for all in-flight
132
220
  /// heartbeat requests to be flushed to the server.
133
221
  pub(super) async fn shutdown(&self) {
134
222
  self.shutdown_token.cancel();
135
- let mut handle = self.join_handle.lock().await;
136
- if let Some(h) = handle.take() {
223
+ if let Some(h) = self.join_handle.take_once() {
137
224
  let handle_r = h.await;
138
225
  if let Err(e) = handle_r {
139
226
  if !e.is_cancelled() {
@@ -301,110 +388,6 @@ impl HeartbeatStreamState {
301
388
  }
302
389
  }
303
390
 
304
- impl ActivityHeartbeatManager {
305
- /// Creates a new instance of an activity heartbeat manager and returns a handle to the user,
306
- /// which allows to send new heartbeats and initiate the shutdown.
307
- pub fn new(client: Arc<dyn WorkerClient>) -> Self {
308
- let (heartbeat_stream_state, heartbeat_tx_source, shutdown_token) =
309
- HeartbeatStreamState::new();
310
- let (cancels_tx, cancels_rx) = unbounded_channel();
311
- let heartbeat_tx = heartbeat_tx_source.clone();
312
-
313
- let join_handle = tokio::spawn(
314
- // The stream of incoming heartbeats uses unfold to carry state across each item in the
315
- // stream. The closure checks if, for any given activity, we should heartbeat or not
316
- // depending on its delay and when we last issued a heartbeat for it.
317
- futures::stream::unfold(heartbeat_stream_state, move |mut hb_states| {
318
- async move {
319
- let hb = tokio::select! {
320
- biased;
321
-
322
- _ = hb_states.cancellation_token.cancelled() => {
323
- return None
324
- }
325
- hb = hb_states.incoming_hbs.recv() => match hb {
326
- None => return None,
327
- Some(hb) => hb,
328
- }
329
- };
330
-
331
- Some((
332
- match hb {
333
- HeartbeatAction::SendHeartbeat(hb) => hb_states.record(hb),
334
- HeartbeatAction::CompleteReport(tt) => hb_states.handle_report_completed(tt),
335
- HeartbeatAction::CompleteThrottle(tt) => hb_states.handle_throttle_completed(tt),
336
- HeartbeatAction::Evict{ token, on_complete } => hb_states.evict(token, on_complete),
337
- },
338
- hb_states,
339
- ))
340
- }
341
- })
342
- // Filters out `None`s
343
- .filter_map(|opt| async { opt })
344
- .for_each_concurrent(None, move |action| {
345
- let heartbeat_tx = heartbeat_tx_source.clone();
346
- let sg = client.clone();
347
- let cancels_tx = cancels_tx.clone();
348
- async move {
349
- match action {
350
- HeartbeatExecutorAction::Sleep(tt, duration, cancellation_token) => {
351
- tokio::select! {
352
- _ = cancellation_token.cancelled() => (),
353
- _ = tokio::time::sleep(duration) => {
354
- let _ = heartbeat_tx.send(HeartbeatAction::CompleteThrottle(tt));
355
- },
356
- };
357
- }
358
- HeartbeatExecutorAction::Report { task_token: tt, details } => {
359
- match sg
360
- .record_activity_heartbeat(tt.clone(), details.into_payloads())
361
- .await
362
- {
363
- Ok(RecordActivityTaskHeartbeatResponse { cancel_requested }) => {
364
- if cancel_requested {
365
- cancels_tx
366
- .send(PendingActivityCancel::new(
367
- tt.clone(),
368
- ActivityCancelReason::Cancelled,
369
- ))
370
- .expect(
371
- "Receive half of heartbeat cancels not blocked",
372
- );
373
- }
374
- }
375
- // Send cancels for any activity that learns its workflow already
376
- // finished (which is one thing not found implies - other reasons
377
- // would seem equally valid).
378
- Err(s) if s.code() == tonic::Code::NotFound => {
379
- debug!(task_token = %tt,
380
- "Activity not found when recording heartbeat");
381
- cancels_tx
382
- .send(PendingActivityCancel::new(
383
- tt.clone(),
384
- ActivityCancelReason::NotFound,
385
- ))
386
- .expect("Receive half of heartbeat cancels not blocked");
387
- }
388
- Err(e) => {
389
- warn!("Error when recording heartbeat: {:?}", e);
390
- }
391
- };
392
- let _ = heartbeat_tx.send(HeartbeatAction::CompleteReport(tt));
393
- }
394
- }
395
- }
396
- }),
397
- );
398
-
399
- Self {
400
- incoming_cancels: Mutex::new(cancels_rx),
401
- join_handle: Mutex::new(Some(join_handle)),
402
- shutdown_token,
403
- heartbeat_tx,
404
- }
405
- }
406
- }
407
-
408
391
  #[cfg(test)]
409
392
  mod test {
410
393
  use super::*;
@@ -425,7 +408,8 @@ mod test {
425
408
  .expect_record_activity_heartbeat()
426
409
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
427
410
  .times(2);
428
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_client));
411
+ let (cancel_tx, _cancel_rx) = unbounded_channel();
412
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client), cancel_tx);
429
413
  let fake_task_token = vec![1, 2, 3];
430
414
  // Send 2 heartbeat requests for 20ms apart.
431
415
  // The first heartbeat should be sent right away, and
@@ -446,14 +430,14 @@ mod test {
446
430
  .expect_record_activity_heartbeat()
447
431
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
448
432
  .times(3);
449
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_client));
433
+ let (cancel_tx, _cancel_rx) = unbounded_channel();
434
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client), cancel_tx);
450
435
  let fake_task_token = vec![1, 2, 3];
451
436
  // Heartbeats always get sent if recorded less frequently than the throttle interval
452
437
  for i in 0_u8..3 {
453
438
  record_heartbeat(&hm, fake_task_token.clone(), i, Duration::from_millis(10));
454
439
  sleep(Duration::from_millis(20)).await;
455
440
  }
456
- // sleep again to let heartbeats be flushed
457
441
  hm.shutdown().await;
458
442
  }
459
443
 
@@ -466,7 +450,8 @@ mod test {
466
450
  .expect_record_activity_heartbeat()
467
451
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
468
452
  .times(1);
469
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_client));
453
+ let (cancel_tx, _cancel_rx) = unbounded_channel();
454
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client), cancel_tx);
470
455
  let fake_task_token = vec![1, 2, 3];
471
456
  // Send a whole bunch of heartbeats very fast. We should still only send one total.
472
457
  for i in 0_u8..50 {
@@ -485,7 +470,8 @@ mod test {
485
470
  .expect_record_activity_heartbeat()
486
471
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
487
472
  .times(2);
488
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_client));
473
+ let (cancel_tx, _cancel_rx) = unbounded_channel();
474
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client), cancel_tx);
489
475
  let fake_task_token = vec![1, 2, 3];
490
476
  record_heartbeat(&hm, fake_task_token.clone(), 0, Duration::from_millis(100));
491
477
  sleep(Duration::from_millis(500)).await;
@@ -502,7 +488,8 @@ mod test {
502
488
  .expect_record_activity_heartbeat()
503
489
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
504
490
  .times(2);
505
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_client));
491
+ let (cancel_tx, _cancel_rx) = unbounded_channel();
492
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client), cancel_tx);
506
493
  let fake_task_token = vec![1, 2, 3];
507
494
  record_heartbeat(&hm, fake_task_token.clone(), 0, Duration::from_millis(100));
508
495
  // Let it propagate
@@ -522,42 +509,14 @@ mod test {
522
509
  .expect_record_activity_heartbeat()
523
510
  .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
524
511
  .times(1);
525
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_client));
512
+ let (cancel_tx, _cancel_rx) = unbounded_channel();
513
+ let hm = ActivityHeartbeatManager::new(Arc::new(mock_client), cancel_tx);
526
514
  let fake_task_token = vec![1, 2, 3];
527
515
  record_heartbeat(&hm, fake_task_token.clone(), 0, Duration::from_millis(100));
528
516
  hm.evict(fake_task_token.clone().into()).await;
529
517
  hm.shutdown().await;
530
518
  }
531
519
 
532
- /// Recording new heartbeats after shutdown is not allowed, and will result in error.
533
- #[tokio::test]
534
- async fn record_after_shutdown() {
535
- let mut mock_client = mock_workflow_client();
536
- mock_client
537
- .expect_record_activity_heartbeat()
538
- .returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
539
- .times(0);
540
- let hm = ActivityHeartbeatManager::new(Arc::new(mock_client));
541
- hm.shutdown().await;
542
- match hm.record(
543
- ActivityHeartbeat {
544
- task_token: vec![1, 2, 3],
545
- details: vec![Payload {
546
- // payload doesn't matter in this case, as it shouldn't get sent anyways.
547
- ..Default::default()
548
- }],
549
- },
550
- Duration::from_millis(1000),
551
- ) {
552
- Ok(_) => {
553
- unreachable!("heartbeat should not be recorded after the shutdown");
554
- }
555
- Err(e) => {
556
- matches!(e, ActivityHeartbeatError::ShuttingDown);
557
- }
558
- }
559
- }
560
-
561
520
  fn record_heartbeat(
562
521
  hm: &ActivityHeartbeatManager,
563
522
  task_token: Vec<u8>,
@@ -0,0 +1,89 @@
1
+ use crate::abstractions::MeteredSemaphore;
2
+ use crate::worker::activities::PermittedTqResp;
3
+ use crate::{pollers::BoxedActPoller, MetricsContext};
4
+ use futures::{stream, Stream};
5
+ use governor::clock::DefaultClock;
6
+ use governor::middleware::NoOpMiddleware;
7
+ use governor::state::{InMemoryState, NotKeyed};
8
+ use governor::RateLimiter;
9
+ use std::sync::Arc;
10
+ use temporal_sdk_core_protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse;
11
+ use tokio::select;
12
+ use tokio_util::sync::CancellationToken;
13
+
14
+ struct StreamState {
15
+ poller: BoxedActPoller,
16
+ semaphore: Arc<MeteredSemaphore>,
17
+ rate_limiter: Option<RateLimiter<NotKeyed, InMemoryState, DefaultClock, NoOpMiddleware>>,
18
+ metrics: MetricsContext,
19
+ shutdown_token: CancellationToken,
20
+ poller_was_shutdown: bool,
21
+ }
22
+
23
+ pub(crate) fn new_activity_task_poller(
24
+ poller: BoxedActPoller,
25
+ semaphore: Arc<MeteredSemaphore>,
26
+ rate_limiter: Option<RateLimiter<NotKeyed, InMemoryState, DefaultClock, NoOpMiddleware>>,
27
+ metrics: MetricsContext,
28
+ shutdown_token: CancellationToken,
29
+ ) -> impl Stream<Item = Result<PermittedTqResp, tonic::Status>> {
30
+ let state = StreamState {
31
+ poller,
32
+ semaphore,
33
+ rate_limiter,
34
+ metrics,
35
+ shutdown_token,
36
+ poller_was_shutdown: false,
37
+ };
38
+ stream::unfold(state, |mut state| async move {
39
+ loop {
40
+ let poll = async {
41
+ let permit = state
42
+ .semaphore
43
+ .acquire_owned()
44
+ .await
45
+ .expect("outstanding activity semaphore not closed");
46
+ if !state.poller_was_shutdown {
47
+ if let Some(ref rl) = state.rate_limiter {
48
+ rl.until_ready().await;
49
+ }
50
+ }
51
+ loop {
52
+ return match state.poller.poll().await {
53
+ Some(Ok(resp)) => {
54
+ if resp == PollActivityTaskQueueResponse::default() {
55
+ // We get the default proto in the event that the long poll times out.
56
+ debug!("Poll activity task timeout");
57
+ state.metrics.act_poll_timeout();
58
+ continue;
59
+ }
60
+ Some(Ok(PermittedTqResp { permit, resp }))
61
+ }
62
+ Some(Err(e)) => {
63
+ warn!(error=?e, "Error while polling for activity tasks");
64
+ Some(Err(e))
65
+ }
66
+ // If poller returns None, it's dead, thus we also return None to terminate this
67
+ // stream.
68
+ None => None,
69
+ };
70
+ }
71
+ };
72
+ if state.poller_was_shutdown {
73
+ return poll.await.map(|res| (res, state));
74
+ }
75
+ select! {
76
+ biased;
77
+
78
+ _ = state.shutdown_token.cancelled() => {
79
+ state.poller.notify_shutdown();
80
+ state.poller_was_shutdown = true;
81
+ continue;
82
+ }
83
+ res = poll => {
84
+ return res.map(|res| (res, state));
85
+ }
86
+ }
87
+ }
88
+ })
89
+ }
@@ -1,5 +1,5 @@
1
1
  use crate::{
2
- abstractions::{dbg_panic, MeteredSemaphore, OwnedMeteredSemPermit},
2
+ abstractions::{dbg_panic, MeteredSemaphore, OwnedMeteredSemPermit, UsedMeteredSemPermit},
3
3
  protosext::ValidScheduleLA,
4
4
  retry_logic::RetryPolicyExt,
5
5
  worker::workflow::HeartbeatTimeoutMsg,
@@ -20,7 +20,11 @@ use temporal_sdk_core_protos::{
20
20
  activity_result::{Cancellation, Failure as ActFail, Success},
21
21
  activity_task::{activity_task, ActivityCancelReason, ActivityTask, Cancel, Start},
22
22
  },
23
- temporal::api::{common::v1::WorkflowExecution, enums::v1::TimeoutType},
23
+ temporal::api::{
24
+ common::v1::WorkflowExecution,
25
+ enums::v1::TimeoutType,
26
+ failure::v1::{failure, Failure as APIFailure, TimeoutFailureInfo},
27
+ },
24
28
  };
25
29
  use tokio::{
26
30
  sync::{
@@ -51,7 +55,7 @@ pub(crate) struct LocalInFlightActInfo {
51
55
  pub la_info: NewLocalAct,
52
56
  pub dispatch_time: Instant,
53
57
  pub attempt: u32,
54
- _permit: OwnedMeteredSemPermit,
58
+ _permit: UsedMeteredSemPermit,
55
59
  }
56
60
 
57
61
  #[derive(Debug, Clone)]
@@ -70,7 +74,18 @@ impl LocalActivityExecutionResult {
70
74
  Self::Cancelled(Cancellation::from_details(None))
71
75
  }
72
76
  pub(crate) fn timeout(tt: TimeoutType) -> Self {
73
- Self::TimedOut(ActFail::timeout(tt))
77
+ Self::TimedOut(ActFail {
78
+ failure: Some(APIFailure {
79
+ message: "Activity timed out".to_string(),
80
+ failure_info: Some(failure::FailureInfo::TimeoutFailureInfo(
81
+ TimeoutFailureInfo {
82
+ timeout_type: tt as i32,
83
+ last_heartbeat_details: None,
84
+ },
85
+ )),
86
+ ..Default::default()
87
+ }),
88
+ })
74
89
  }
75
90
  }
76
91
 
@@ -110,12 +125,17 @@ impl Debug for NewLocalAct {
110
125
  pub(crate) enum LocalActRequest {
111
126
  New(NewLocalAct),
112
127
  Cancel(ExecutingLAId),
128
+ #[from(ignore)]
113
129
  CancelAllInRun(String),
114
130
  StartHeartbeatTimeout {
115
131
  send_on_elapse: HeartbeatTimeoutMsg,
116
132
  deadline: Instant,
117
133
  abort_reg: AbortRegistration,
118
134
  },
135
+ /// Tell the LA manager that a workflow task was responded to (completed or failed) for a
136
+ /// certain run id
137
+ #[from(ignore)]
138
+ IndicateWorkflowTaskCompleted(String),
119
139
  }
120
140
 
121
141
  #[derive(Debug, Clone, Eq, PartialEq, Hash)]
@@ -153,6 +173,10 @@ struct LocalActivityInfo {
153
173
  /// Tasks / info about timeouts associated with this LA. May be empty for very brief periods
154
174
  /// while the LA id has been generated, but it has not yet been scheduled.
155
175
  timeout_bag: Option<TimeoutBag>,
176
+ /// True once the first workflow task this LA started in has elapsed
177
+ first_wft_has_ended: bool,
178
+ /// Attempts at executing this LA during the current WFT
179
+ attempts_in_wft: usize,
156
180
  }
157
181
 
158
182
  struct LAMData {
@@ -270,6 +294,8 @@ impl LocalActivityManager {
270
294
  task_token: tt,
271
295
  backing_off_task: None,
272
296
  timeout_bag: None,
297
+ first_wft_has_ended: false,
298
+ attempts_in_wft: 0,
273
299
  });
274
300
 
275
301
  // Set up timeouts for the new activity
@@ -324,6 +350,17 @@ impl LocalActivityManager {
324
350
  }
325
351
  }
326
352
  }
353
+ LocalActRequest::IndicateWorkflowTaskCompleted(run_id) => {
354
+ let mut dlock = self.dat.lock();
355
+ let las_for_run = dlock
356
+ .la_info
357
+ .iter_mut()
358
+ .filter(|(id, _)| id.run_id == run_id);
359
+ for (_, lainf) in las_for_run {
360
+ lainf.first_wft_has_ended = true;
361
+ lainf.attempts_in_wft = 0;
362
+ }
363
+ }
327
364
  }
328
365
  }
329
366
  immediate_resolutions
@@ -432,7 +469,7 @@ impl LocalActivityManager {
432
469
  la_info: la_info_for_in_flight_map,
433
470
  dispatch_time: Instant::now(),
434
471
  attempt,
435
- _permit: permit,
472
+ _permit: permit.into_used(),
436
473
  },
437
474
  );
438
475
 
@@ -538,6 +575,14 @@ impl LocalActivityManager {
538
575
  LocalActivityInfo {
539
576
  task_token: tt,
540
577
  backing_off_task: Some(jh),
578
+ first_wft_has_ended: maybe_old_lai
579
+ .as_ref()
580
+ .map(|old| old.first_wft_has_ended)
581
+ .unwrap_or_default(),
582
+ attempts_in_wft: maybe_old_lai
583
+ .as_ref()
584
+ .map(|old| old.attempts_in_wft + 1)
585
+ .unwrap_or(1),
541
586
  timeout_bag: maybe_old_lai.and_then(|old| old.timeout_bag),
542
587
  },
543
588
  );
@@ -564,9 +609,26 @@ impl LocalActivityManager {
564
609
  }
565
610
  }
566
611
 
612
+ /// Try to close the activity stream as soon as worker shutdown is initiated. This is required
613
+ /// for activity-only workers where since workflows are not polled and the activity poller might
614
+ /// get "stuck".
615
+ pub(crate) fn shutdown_initiated(&self) {
616
+ self.set_shutdown_complete_if_ready(&mut self.dat.lock());
617
+ }
618
+
619
+ pub(crate) fn get_nonfirst_attempt_count(&self, for_run_id: &str) -> usize {
620
+ let dlock = self.dat.lock();
621
+ dlock
622
+ .la_info
623
+ .iter()
624
+ .filter(|(id, info)| id.run_id == for_run_id && info.first_wft_has_ended)
625
+ .map(|(_, info)| info.attempts_in_wft)
626
+ .sum()
627
+ }
628
+
567
629
  fn set_shutdown_complete_if_ready(&self, dlock: &mut MutexGuard<LAMData>) -> bool {
568
630
  let nothing_outstanding = dlock.outstanding_activity_tasks.is_empty();
569
- if nothing_outstanding {
631
+ if nothing_outstanding && self.workflows_have_shut_down.is_cancelled() {
570
632
  self.shutdown_complete_tok.cancel();
571
633
  }
572
634
  nothing_outstanding
@@ -1174,4 +1236,58 @@ mod tests {
1174
1236
  assert_eq!(lam.num_outstanding(), 1);
1175
1237
  assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
1176
1238
  }
1239
+
1240
+ #[tokio::test]
1241
+ async fn nonfirst_la_attempt_count_is_accurate() {
1242
+ let run_id = "run_id";
1243
+ let lam = LocalActivityManager::test(10);
1244
+ let new_la = NewLocalAct {
1245
+ schedule_cmd: ValidScheduleLA {
1246
+ seq: 1,
1247
+ activity_id: 1.to_string(),
1248
+ retry_policy: RetryPolicy {
1249
+ initial_interval: Some(prost_dur!(from_millis(1))),
1250
+ backoff_coefficient: 1.0,
1251
+ ..Default::default()
1252
+ },
1253
+ local_retry_threshold: Duration::from_secs(500),
1254
+ ..Default::default()
1255
+ },
1256
+ workflow_type: "".to_string(),
1257
+ workflow_exec_info: WorkflowExecution {
1258
+ workflow_id: "".to_string(),
1259
+ run_id: run_id.to_string(),
1260
+ },
1261
+ schedule_time: SystemTime::now(),
1262
+ };
1263
+ lam.enqueue([new_la.clone().into()]);
1264
+ let spinfail = || async {
1265
+ for _ in 1..=10 {
1266
+ let next = lam.next_pending().await.unwrap().unwrap();
1267
+ let tt = TaskToken(next.task_token);
1268
+ lam.complete(
1269
+ &tt,
1270
+ &LocalActivityExecutionResult::Failed(Default::default()),
1271
+ );
1272
+ }
1273
+ };
1274
+
1275
+ // Fail a bunch of times
1276
+ spinfail().await;
1277
+ // Nonfirst attempt count should still be zero
1278
+ let count = lam.get_nonfirst_attempt_count(run_id);
1279
+ assert_eq!(count, 0);
1280
+
1281
+ for _ in 1..=2 {
1282
+ // This should work over multiple WFTs
1283
+ // say the first wft was completed
1284
+ lam.enqueue([LocalActRequest::IndicateWorkflowTaskCompleted(
1285
+ run_id.to_string(),
1286
+ )]);
1287
+ // Do some more attempts
1288
+ spinfail().await;
1289
+ let count = lam.get_nonfirst_attempt_count(run_id);
1290
+ assert_eq!(count, 10);
1291
+ }
1292
+ }
1177
1293
  }