@temporalio/core-bridge 1.12.0 → 1.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/Cargo.lock +64 -119
  2. package/Cargo.toml +1 -1
  3. package/index.js +3 -2
  4. package/package.json +3 -3
  5. package/releases/aarch64-apple-darwin/index.node +0 -0
  6. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  7. package/releases/x86_64-apple-darwin/index.node +0 -0
  8. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  9. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  10. package/sdk-core/.cargo/config.toml +1 -2
  11. package/sdk-core/.github/workflows/per-pr.yml +2 -0
  12. package/sdk-core/AGENTS.md +7 -0
  13. package/sdk-core/Cargo.toml +9 -5
  14. package/sdk-core/README.md +6 -5
  15. package/sdk-core/client/Cargo.toml +3 -2
  16. package/sdk-core/client/src/lib.rs +17 -8
  17. package/sdk-core/client/src/metrics.rs +57 -23
  18. package/sdk-core/client/src/raw.rs +33 -15
  19. package/sdk-core/core/Cargo.toml +11 -9
  20. package/sdk-core/core/benches/workflow_replay.rs +114 -15
  21. package/sdk-core/core/src/core_tests/activity_tasks.rs +18 -18
  22. package/sdk-core/core/src/core_tests/child_workflows.rs +4 -4
  23. package/sdk-core/core/src/core_tests/determinism.rs +6 -6
  24. package/sdk-core/core/src/core_tests/local_activities.rs +20 -20
  25. package/sdk-core/core/src/core_tests/mod.rs +40 -5
  26. package/sdk-core/core/src/core_tests/queries.rs +25 -16
  27. package/sdk-core/core/src/core_tests/replay_flag.rs +3 -3
  28. package/sdk-core/core/src/core_tests/updates.rs +3 -3
  29. package/sdk-core/core/src/core_tests/workers.rs +9 -7
  30. package/sdk-core/core/src/core_tests/workflow_tasks.rs +40 -42
  31. package/sdk-core/core/src/ephemeral_server/mod.rs +1 -19
  32. package/sdk-core/core/src/lib.rs +10 -1
  33. package/sdk-core/core/src/pollers/poll_buffer.rs +2 -2
  34. package/sdk-core/core/src/replay/mod.rs +3 -3
  35. package/sdk-core/core/src/telemetry/metrics.rs +306 -152
  36. package/sdk-core/core/src/telemetry/mod.rs +11 -4
  37. package/sdk-core/core/src/telemetry/otel.rs +134 -131
  38. package/sdk-core/core/src/telemetry/prometheus_meter.rs +885 -0
  39. package/sdk-core/core/src/telemetry/prometheus_server.rs +48 -28
  40. package/sdk-core/core/src/test_help/mod.rs +27 -12
  41. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +7 -7
  42. package/sdk-core/core/src/worker/activities.rs +4 -4
  43. package/sdk-core/core/src/worker/client/mocks.rs +10 -3
  44. package/sdk-core/core/src/worker/client.rs +68 -5
  45. package/sdk-core/core/src/worker/heartbeat.rs +229 -0
  46. package/sdk-core/core/src/worker/mod.rs +35 -14
  47. package/sdk-core/core/src/worker/tuner/resource_based.rs +4 -4
  48. package/sdk-core/core/src/worker/workflow/history_update.rs +71 -19
  49. package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +1 -2
  50. package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +1 -1
  51. package/sdk-core/core/src/worker/workflow/machines/nexus_operation_state_machine.rs +31 -48
  52. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +1 -2
  53. package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +3 -3
  54. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +4 -1
  55. package/sdk-core/core/src/worker/workflow/managed_run.rs +1 -1
  56. package/sdk-core/core/src/worker/workflow/mod.rs +15 -15
  57. package/sdk-core/core-api/Cargo.toml +2 -2
  58. package/sdk-core/core-api/src/envconfig.rs +204 -99
  59. package/sdk-core/core-api/src/lib.rs +9 -0
  60. package/sdk-core/core-api/src/telemetry/metrics.rs +548 -100
  61. package/sdk-core/core-api/src/worker.rs +11 -5
  62. package/sdk-core/core-c-bridge/Cargo.toml +49 -0
  63. package/sdk-core/core-c-bridge/build.rs +26 -0
  64. package/sdk-core/core-c-bridge/include/temporal-sdk-core-c-bridge.h +817 -0
  65. package/sdk-core/core-c-bridge/src/client.rs +679 -0
  66. package/sdk-core/core-c-bridge/src/lib.rs +245 -0
  67. package/sdk-core/core-c-bridge/src/metric.rs +682 -0
  68. package/sdk-core/core-c-bridge/src/random.rs +61 -0
  69. package/sdk-core/core-c-bridge/src/runtime.rs +445 -0
  70. package/sdk-core/core-c-bridge/src/testing.rs +282 -0
  71. package/sdk-core/core-c-bridge/src/tests/context.rs +644 -0
  72. package/sdk-core/core-c-bridge/src/tests/mod.rs +178 -0
  73. package/sdk-core/core-c-bridge/src/tests/utils.rs +108 -0
  74. package/sdk-core/core-c-bridge/src/worker.rs +1069 -0
  75. package/sdk-core/etc/deps.svg +64 -64
  76. package/sdk-core/sdk/src/activity_context.rs +6 -4
  77. package/sdk-core/sdk/src/lib.rs +49 -27
  78. package/sdk-core/sdk/src/workflow_future.rs +18 -25
  79. package/sdk-core/sdk-core-protos/protos/api_upstream/README.md +4 -0
  80. package/sdk-core/sdk-core-protos/protos/api_upstream/buf.yaml +0 -2
  81. package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/openapiv2.json +630 -83
  82. package/sdk-core/sdk-core-protos/protos/api_upstream/openapi/openapiv3.yaml +632 -78
  83. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/batch/v1/message.proto +4 -4
  84. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/command/v1/message.proto +6 -4
  85. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/common/v1/message.proto +2 -2
  86. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/deployment/v1/message.proto +32 -2
  87. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/common.proto +10 -1
  88. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/deployment.proto +26 -0
  89. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +2 -0
  90. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/enums/v1/reset.proto +4 -4
  91. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
  92. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/history/v1/message.proto +47 -31
  93. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/nexus/v1/message.proto +4 -4
  94. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/schedule/v1/message.proto +7 -1
  95. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/worker/v1/message.proto +134 -0
  96. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflow/v1/message.proto +14 -11
  97. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +148 -37
  98. package/sdk-core/sdk-core-protos/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +21 -0
  99. package/sdk-core/sdk-core-protos/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +4 -4
  100. package/sdk-core/sdk-core-protos/src/history_builder.rs +9 -5
  101. package/sdk-core/sdk-core-protos/src/lib.rs +96 -6
  102. package/sdk-core/test-utils/src/lib.rs +11 -3
  103. package/sdk-core/tests/cloud_tests.rs +3 -3
  104. package/sdk-core/tests/heavy_tests.rs +11 -3
  105. package/sdk-core/tests/integ_tests/client_tests.rs +12 -13
  106. package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +1 -1
  107. package/sdk-core/tests/integ_tests/metrics_tests.rs +188 -83
  108. package/sdk-core/tests/integ_tests/polling_tests.rs +1 -1
  109. package/sdk-core/tests/integ_tests/queries_tests.rs +56 -40
  110. package/sdk-core/tests/integ_tests/update_tests.rs +2 -7
  111. package/sdk-core/tests/integ_tests/worker_tests.rs +3 -4
  112. package/sdk-core/tests/integ_tests/worker_versioning_tests.rs +3 -7
  113. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +3 -5
  114. package/sdk-core/tests/integ_tests/workflow_tests/nexus.rs +24 -17
  115. package/src/client.rs +6 -0
  116. package/src/metrics.rs +6 -6
@@ -0,0 +1,229 @@
1
+ use crate::WorkerClient;
2
+ use crate::abstractions::dbg_panic;
3
+ use gethostname::gethostname;
4
+ use parking_lot::Mutex;
5
+ use prost_types::Duration as PbDuration;
6
+ use std::sync::{Arc, OnceLock};
7
+ use std::time::{Duration, SystemTime};
8
+ use temporal_sdk_core_api::worker::WorkerConfig;
9
+ use temporal_sdk_core_protos::temporal::api::worker::v1::{WorkerHeartbeat, WorkerHostInfo};
10
+ use tokio::sync::Notify;
11
+ use tokio::task::JoinHandle;
12
+ use tokio::time::MissedTickBehavior;
13
+ use uuid::Uuid;
14
+
15
+ pub(crate) type HeartbeatFn = Box<dyn Fn() -> Option<WorkerHeartbeat> + Send + Sync>;
16
+
17
+ pub(crate) struct WorkerHeartbeatManager {
18
+ heartbeat_handle: JoinHandle<()>,
19
+ }
20
+
21
+ impl WorkerHeartbeatManager {
22
+ pub(crate) fn new(
23
+ config: WorkerConfig,
24
+ identity: String,
25
+ heartbeat_fn: Arc<OnceLock<HeartbeatFn>>,
26
+ client: Arc<dyn WorkerClient>,
27
+ ) -> Self {
28
+ let sdk_name_and_ver = client.sdk_name_and_version();
29
+ let reset_notify = Arc::new(Notify::new());
30
+ let data = Arc::new(Mutex::new(WorkerHeartbeatData::new(
31
+ config,
32
+ identity,
33
+ sdk_name_and_ver,
34
+ reset_notify.clone(),
35
+ )));
36
+ let data_clone = data.clone();
37
+
38
+ let heartbeat_handle = tokio::spawn(async move {
39
+ let mut ticker = tokio::time::interval(data_clone.lock().heartbeat_interval);
40
+ ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
41
+ loop {
42
+ tokio::select! {
43
+ _ = ticker.tick() => {
44
+ let heartbeat = if let Some(heartbeat) = data_clone.lock().capture_heartbeat_if_needed() {
45
+ heartbeat
46
+ } else {
47
+ continue
48
+ };
49
+ if let Err(e) = client.clone().record_worker_heartbeat(heartbeat).await {
50
+ if matches!(
51
+ e.code(),
52
+ tonic::Code::Unimplemented
53
+ ) {
54
+ return;
55
+ }
56
+ warn!(error=?e, "Network error while sending worker heartbeat");
57
+ }
58
+ }
59
+ _ = reset_notify.notified() => {
60
+ ticker.reset();
61
+ }
62
+ }
63
+ }
64
+ });
65
+
66
+ let data_clone = data.clone();
67
+ if heartbeat_fn
68
+ .set(Box::new(move || {
69
+ data_clone.lock().capture_heartbeat_if_needed()
70
+ }))
71
+ .is_err()
72
+ {
73
+ dbg_panic!(
74
+ "Failed to set heartbeat_fn, heartbeat_fn should only be set once, when a singular WorkerHeartbeatInfo is created"
75
+ );
76
+ }
77
+
78
+ Self { heartbeat_handle }
79
+ }
80
+
81
+ pub(crate) fn shutdown(&self) {
82
+ self.heartbeat_handle.abort()
83
+ }
84
+ }
85
+
86
+ #[derive(Debug, Clone)]
87
+ struct WorkerHeartbeatData {
88
+ worker_instance_key: String,
89
+ worker_identity: String,
90
+ host_info: WorkerHostInfo,
91
+ // Time of the last heartbeat. This is used to both for heartbeat_time and last_heartbeat_time
92
+ heartbeat_time: Option<SystemTime>,
93
+ task_queue: String,
94
+ /// SDK name
95
+ sdk_name: String,
96
+ /// SDK version
97
+ sdk_version: String,
98
+ /// Worker start time
99
+ start_time: SystemTime,
100
+ heartbeat_interval: Duration,
101
+ reset_notify: Arc<Notify>,
102
+ }
103
+
104
+ impl WorkerHeartbeatData {
105
+ fn new(
106
+ worker_config: WorkerConfig,
107
+ worker_identity: String,
108
+ sdk_name_and_ver: (String, String),
109
+ reset_notify: Arc<Notify>,
110
+ ) -> Self {
111
+ Self {
112
+ worker_identity,
113
+ host_info: WorkerHostInfo {
114
+ host_name: gethostname().to_string_lossy().to_string(),
115
+ process_id: std::process::id().to_string(),
116
+ ..Default::default()
117
+ },
118
+ sdk_name: sdk_name_and_ver.0,
119
+ sdk_version: sdk_name_and_ver.1,
120
+ task_queue: worker_config.task_queue.clone(),
121
+ start_time: SystemTime::now(),
122
+ heartbeat_time: None,
123
+ worker_instance_key: Uuid::new_v4().to_string(),
124
+ heartbeat_interval: worker_config.heartbeat_interval,
125
+ reset_notify,
126
+ }
127
+ }
128
+
129
+ fn capture_heartbeat_if_needed(&mut self) -> Option<WorkerHeartbeat> {
130
+ let now = SystemTime::now();
131
+ let elapsed_since_last_heartbeat = if let Some(heartbeat_time) = self.heartbeat_time {
132
+ let dur = now.duration_since(heartbeat_time).unwrap_or(Duration::ZERO);
133
+
134
+ // Only send poll data if it's nearly been a full interval since this data has been sent
135
+ // In this case, "nearly" is 90% of the interval
136
+ if dur.as_secs_f64() < 0.9 * self.heartbeat_interval.as_secs_f64() {
137
+ return None;
138
+ }
139
+ Some(PbDuration {
140
+ seconds: dur.as_secs() as i64,
141
+ nanos: dur.subsec_nanos() as i32,
142
+ })
143
+ } else {
144
+ None
145
+ };
146
+
147
+ self.heartbeat_time = Some(now);
148
+
149
+ self.reset_notify.notify_one();
150
+
151
+ Some(WorkerHeartbeat {
152
+ worker_instance_key: self.worker_instance_key.clone(),
153
+ worker_identity: self.worker_identity.clone(),
154
+ host_info: Some(self.host_info.clone()),
155
+ task_queue: self.task_queue.clone(),
156
+ sdk_name: self.sdk_name.clone(),
157
+ sdk_version: self.sdk_version.clone(),
158
+ status: 0,
159
+ start_time: Some(self.start_time.into()),
160
+ heartbeat_time: Some(SystemTime::now().into()),
161
+ elapsed_since_last_heartbeat,
162
+ ..Default::default()
163
+ })
164
+ }
165
+ }
166
+
167
+ #[cfg(test)]
168
+ mod tests {
169
+ use super::*;
170
+ use crate::test_help::WorkerExt;
171
+ use crate::test_help::test_worker_cfg;
172
+ use crate::worker;
173
+ use crate::worker::client::mocks::mock_worker_client;
174
+ use std::sync::Arc;
175
+ use std::time::Duration;
176
+ use temporal_sdk_core_api::worker::PollerBehavior;
177
+ use temporal_sdk_core_protos::temporal::api::workflowservice::v1::RecordWorkerHeartbeatResponse;
178
+
179
+ #[tokio::test]
180
+ async fn worker_heartbeat() {
181
+ let mut mock = mock_worker_client();
182
+ mock.expect_record_worker_heartbeat()
183
+ .times(2)
184
+ .returning(move |heartbeat| {
185
+ let host_info = heartbeat.host_info.clone().unwrap();
186
+ assert_eq!("test-identity", heartbeat.worker_identity);
187
+ assert!(!heartbeat.worker_instance_key.is_empty());
188
+ assert_eq!(
189
+ host_info.host_name,
190
+ gethostname::gethostname().to_string_lossy().to_string()
191
+ );
192
+ assert_eq!(host_info.process_id, std::process::id().to_string());
193
+ assert_eq!(heartbeat.sdk_name, "test-core");
194
+ assert_eq!(heartbeat.sdk_version, "0.0.0");
195
+ assert_eq!(heartbeat.task_queue, "q");
196
+ assert!(heartbeat.heartbeat_time.is_some());
197
+ assert!(heartbeat.start_time.is_some());
198
+
199
+ Ok(RecordWorkerHeartbeatResponse {})
200
+ });
201
+
202
+ let config = test_worker_cfg()
203
+ .activity_task_poller_behavior(PollerBehavior::SimpleMaximum(1_usize))
204
+ .max_outstanding_activities(1_usize)
205
+ .heartbeat_interval(Duration::from_millis(200))
206
+ .build()
207
+ .unwrap();
208
+
209
+ let heartbeat_fn = Arc::new(OnceLock::new());
210
+ let client = Arc::new(mock);
211
+ let worker = worker::Worker::new(config, None, client, None, Some(heartbeat_fn.clone()));
212
+ heartbeat_fn.get().unwrap()();
213
+
214
+ // heartbeat timer fires once
215
+ advance_time(Duration::from_millis(300)).await;
216
+ // it hasn't been >90% of the interval since the last heartbeat, so no data should be returned here
217
+ assert_eq!(None, heartbeat_fn.get().unwrap()());
218
+ // heartbeat timer fires once
219
+ advance_time(Duration::from_millis(300)).await;
220
+
221
+ worker.drain_activity_poller_and_shutdown().await;
222
+ }
223
+
224
+ async fn advance_time(dur: Duration) {
225
+ tokio::time::pause();
226
+ tokio::time::advance(dur).await;
227
+ tokio::time::resume();
228
+ }
229
+ }
@@ -1,5 +1,6 @@
1
1
  mod activities;
2
2
  pub(crate) mod client;
3
+ mod heartbeat;
3
4
  mod nexus;
4
5
  mod slot_provider;
5
6
  pub(crate) mod tuner;
@@ -19,6 +20,7 @@ pub(crate) use activities::{
19
20
  pub(crate) use wft_poller::WFTPollerShared;
20
21
  pub(crate) use workflow::LEGACY_QUERY_ID;
21
22
 
23
+ use crate::worker::heartbeat::{HeartbeatFn, WorkerHeartbeatManager};
22
24
  use crate::{
23
25
  ActivityHeartbeat, CompleteActivityError, PollError, WorkerTrait,
24
26
  abstractions::{MeteredPermitDealer, PermitDealerContextData, dbg_panic},
@@ -41,10 +43,15 @@ use crate::{
41
43
  },
42
44
  },
43
45
  };
46
+ use crate::{
47
+ pollers::{ActivityTaskOptions, LongPollBuffer},
48
+ worker::workflow::wft_poller,
49
+ };
44
50
  use activities::WorkerActivityTasks;
45
51
  use futures_util::{StreamExt, stream};
46
52
  use parking_lot::Mutex;
47
53
  use slot_provider::SlotProvider;
54
+ use std::sync::OnceLock;
48
55
  use std::{
49
56
  convert::TryInto,
50
57
  future,
@@ -77,11 +84,6 @@ use temporal_sdk_core_protos::{
77
84
  use tokio::sync::{mpsc::unbounded_channel, watch};
78
85
  use tokio_stream::wrappers::UnboundedReceiverStream;
79
86
  use tokio_util::sync::CancellationToken;
80
-
81
- use crate::{
82
- pollers::{ActivityTaskOptions, LongPollBuffer},
83
- worker::workflow::wft_poller,
84
- };
85
87
  #[cfg(test)]
86
88
  use {
87
89
  crate::{
@@ -119,6 +121,8 @@ pub struct Worker {
119
121
  local_activities_complete: Arc<AtomicBool>,
120
122
  /// Used to track all permits have been released
121
123
  all_permits_tracker: tokio::sync::Mutex<AllPermitsTracker>,
124
+ /// Used to shutdown the worker heartbeat task
125
+ worker_heartbeat: Option<WorkerHeartbeatManager>,
122
126
  }
123
127
 
124
128
  struct AllPermitsTracker {
@@ -271,6 +275,7 @@ impl Worker {
271
275
  sticky_queue_name: Option<String>,
272
276
  client: Arc<dyn WorkerClient>,
273
277
  telem_instance: Option<&TelemetryInstance>,
278
+ heartbeat_fn: Option<Arc<OnceLock<HeartbeatFn>>>,
274
279
  ) -> Self {
275
280
  info!(task_queue=%config.task_queue, namespace=%config.namespace, "Initializing worker");
276
281
 
@@ -280,6 +285,7 @@ impl Worker {
280
285
  client,
281
286
  TaskPollers::Real,
282
287
  telem_instance,
288
+ heartbeat_fn,
283
289
  )
284
290
  }
285
291
 
@@ -297,7 +303,7 @@ impl Worker {
297
303
 
298
304
  #[cfg(test)]
299
305
  pub(crate) fn new_test(config: WorkerConfig, client: impl WorkerClient + 'static) -> Self {
300
- Self::new(config, None, Arc::new(client), None)
306
+ Self::new(config, None, Arc::new(client), None, None)
301
307
  }
302
308
 
303
309
  pub(crate) fn new_with_pollers(
@@ -306,6 +312,7 @@ impl Worker {
306
312
  client: Arc<dyn WorkerClient>,
307
313
  task_pollers: TaskPollers,
308
314
  telem_instance: Option<&TelemetryInstance>,
315
+ heartbeat_fn: Option<Arc<OnceLock<HeartbeatFn>>>,
309
316
  ) -> Self {
310
317
  let (metrics, meter) = if let Some(ti) = telem_instance {
311
318
  (
@@ -325,7 +332,7 @@ impl Worker {
325
332
  let shutdown_token = CancellationToken::new();
326
333
  let slot_context_data = Arc::new(PermitDealerContextData {
327
334
  task_queue: config.task_queue.clone(),
328
- worker_identity: config.client_identity_override.clone().unwrap_or_default(),
335
+ worker_identity: client.get_identity(),
329
336
  worker_deployment_version: config.computed_deployment_version(),
330
337
  });
331
338
  let wft_slots = MeteredPermitDealer::new(
@@ -437,17 +444,17 @@ impl Worker {
437
444
  };
438
445
 
439
446
  let (hb_tx, hb_rx) = unbounded_channel();
440
- let la_pemit_dealer = MeteredPermitDealer::new(
447
+ let la_permit_dealer = MeteredPermitDealer::new(
441
448
  tuner.local_activity_slot_supplier(),
442
449
  metrics.with_new_attrs([local_activity_worker_type()]),
443
450
  None,
444
- slot_context_data,
451
+ slot_context_data.clone(),
445
452
  meter.clone(),
446
453
  );
447
- let la_permits = la_pemit_dealer.get_extant_count_rcv();
454
+ let la_permits = la_permit_dealer.get_extant_count_rcv();
448
455
  let local_act_mgr = Arc::new(LocalActivityManager::new(
449
456
  config.namespace.clone(),
450
- la_pemit_dealer,
457
+ la_permit_dealer,
451
458
  hb_tx,
452
459
  metrics.clone(),
453
460
  ));
@@ -484,6 +491,16 @@ impl Worker {
484
491
  );
485
492
  let worker_key = Mutex::new(client.workers().register(Box::new(provider)));
486
493
  let sdk_name_and_ver = client.sdk_name_and_version();
494
+
495
+ let worker_heartbeat = heartbeat_fn.map(|heartbeat_fn| {
496
+ WorkerHeartbeatManager::new(
497
+ config.clone(),
498
+ client.get_identity(),
499
+ heartbeat_fn,
500
+ client.clone(),
501
+ )
502
+ });
503
+
487
504
  Self {
488
505
  worker_key,
489
506
  client: client.clone(),
@@ -540,6 +557,7 @@ impl Worker {
540
557
  la_permits,
541
558
  }),
542
559
  nexus_mgr,
560
+ worker_heartbeat,
543
561
  }
544
562
  }
545
563
 
@@ -584,6 +602,9 @@ impl Worker {
584
602
  dbg_panic!("Waiting for all slot permits to release took too long!");
585
603
  }
586
604
  }
605
+ if let Some(heartbeat) = self.worker_heartbeat.as_ref() {
606
+ heartbeat.shutdown();
607
+ }
587
608
  }
588
609
 
589
610
  /// Finish shutting down by consuming the background pollers and freeing all resources
@@ -883,7 +904,7 @@ mod tests {
883
904
  use crate::{
884
905
  advance_fut,
885
906
  test_help::test_worker_cfg,
886
- worker::client::mocks::{mock_manual_workflow_client, mock_workflow_client},
907
+ worker::client::mocks::{mock_manual_worker_client, mock_worker_client},
887
908
  };
888
909
  use futures_util::FutureExt;
889
910
  use temporal_sdk_core_api::worker::PollerBehavior;
@@ -891,7 +912,7 @@ mod tests {
891
912
 
892
913
  #[tokio::test]
893
914
  async fn activity_timeouts_maintain_permit() {
894
- let mut mock_client = mock_workflow_client();
915
+ let mut mock_client = mock_worker_client();
895
916
  mock_client
896
917
  .expect_poll_activity_task()
897
918
  .returning(|_, _| Ok(PollActivityTaskQueueResponse::default()));
@@ -913,7 +934,7 @@ mod tests {
913
934
  async fn activity_errs_dont_eat_permits() {
914
935
  // Return one error followed by simulating waiting on the poll, otherwise the poller will
915
936
  // loop very fast and be in some indeterminate state.
916
- let mut mock_client = mock_manual_workflow_client();
937
+ let mut mock_client = mock_manual_worker_client();
917
938
  mock_client
918
939
  .expect_poll_activity_task()
919
940
  .returning(|_, _| async { Err(tonic::Status::internal("ahhh")) }.boxed())
@@ -190,10 +190,10 @@ struct PidControllers {
190
190
  }
191
191
  struct MetricInstruments {
192
192
  attribs: MetricAttributes,
193
- mem_usage: Arc<dyn GaugeF64>,
194
- cpu_usage: Arc<dyn GaugeF64>,
195
- mem_pid_output: Arc<dyn GaugeF64>,
196
- cpu_pid_output: Arc<dyn GaugeF64>,
193
+ mem_usage: GaugeF64,
194
+ cpu_usage: GaugeF64,
195
+ mem_pid_output: GaugeF64,
196
+ cpu_pid_output: GaugeF64,
197
197
  }
198
198
  #[derive(Clone, Copy, Default)]
199
199
  struct LastMetricVals {
@@ -686,6 +686,7 @@ impl NextWFTSeqEndIndex {
686
686
  }
687
687
 
688
688
  /// Discovers the index of the last event in next WFT sequence within the passed-in slice
689
+ /// For more on workflow task chunking, see arch_docs/workflow_task_chunking.md
689
690
  fn find_end_index_of_next_wft_seq(
690
691
  events: &[HistoryEvent],
691
692
  from_event_id: i64,
@@ -718,11 +719,6 @@ fn find_end_index_of_next_wft_seq(
718
719
  return NextWFTSeqEndIndex::Complete(last_index);
719
720
  }
720
721
 
721
- // TODO: Emergency undo for boundary calculation change. Remove if no problems after a bit.
722
- if std::env::var("TEMPORAL_NO_WFT_BOUNDARY_CHANGE").is_ok() {
723
- saw_command = false;
724
- }
725
-
726
722
  if e.event_type() == EventType::WorkflowTaskStarted {
727
723
  wft_started_event_id_to_index.push((e.event_id, ix));
728
724
  if let Some(next_event) = events.get(ix + 1) {
@@ -737,6 +733,9 @@ fn find_end_index_of_next_wft_seq(
737
733
  | EventType::WorkflowExecutionTerminated
738
734
  | EventType::WorkflowExecutionCanceled
739
735
  ) {
736
+ // Since we're skipping this WFT, we don't want to include it in the vec used
737
+ // for update accepted sequencing lookups.
738
+ wft_started_event_id_to_index.pop();
740
739
  continue;
741
740
  } else if next_event_type == EventType::WorkflowTaskCompleted {
742
741
  if let Some(next_next_event) = events.get(ix + 2) {
@@ -761,7 +760,12 @@ fn find_end_index_of_next_wft_seq(
761
760
  ),
762
761
  ) = next_next_event.attributes
763
762
  {
764
- // Find index of closest WFT started before sequencing id
763
+ // Find index of closest unskipped WFT started before sequencing id.
764
+ // The fact that the WFT wasn't skipped is important. If it was, we
765
+ // need to avoid stopping at that point even though that's where the
766
+ // update was sequenced. If we did, we'll fail to actually include
767
+ // the update accepted event and therefore fail to generate the
768
+ // request to run the update handler on replay.
765
769
  if let Some(ret_ix) = wft_started_event_id_to_index
766
770
  .iter()
767
771
  .rev()
@@ -803,7 +807,7 @@ mod tests {
803
807
  use crate::{
804
808
  replay::{HistoryInfo, TestHistoryBuilder},
805
809
  test_help::{MockPollCfg, ResponseType, canned_histories, hist_to_poll_resp, mock_sdk_cfg},
806
- worker::client::mocks::mock_workflow_client,
810
+ worker::client::mocks::mock_worker_client,
807
811
  };
808
812
  use futures_util::{StreamExt, TryStreamExt};
809
813
  use std::sync::atomic::{AtomicUsize, Ordering};
@@ -965,7 +969,7 @@ mod tests {
965
969
  let wft_started = hinfo.workflow_task_started_event_id();
966
970
  let full_hist = hinfo.into_events();
967
971
  let initial_hist = full_hist.chunks(chunk_size).next().unwrap().to_vec();
968
- let mut mock_client = mock_workflow_client();
972
+ let mut mock_client = mock_worker_client();
969
973
 
970
974
  let mut npt = 1;
971
975
  mock_client
@@ -1162,7 +1166,7 @@ mod tests {
1162
1166
  // Chop off the last event, which is WFT started, which server doesn't return in get
1163
1167
  // history
1164
1168
  history_from_get.history.as_mut().map(|h| h.events.pop());
1165
- let mut mock_client = mock_workflow_client();
1169
+ let mut mock_client = mock_worker_client();
1166
1170
  mock_client
1167
1171
  .expect_get_workflow_execution_history()
1168
1172
  .returning(move |_, _, _| Ok(history_from_get.clone()));
@@ -1220,7 +1224,7 @@ mod tests {
1220
1224
  let partial_task = timer_hist.get_one_wft(2).unwrap();
1221
1225
  let prev_started_wft_id = partial_task.previous_started_event_id();
1222
1226
  let wft_started_id = partial_task.workflow_task_started_event_id();
1223
- let mut mock_client = mock_workflow_client();
1227
+ let mut mock_client = mock_worker_client();
1224
1228
  mock_client
1225
1229
  .expect_get_workflow_execution_history()
1226
1230
  .returning(move |_, _, _| Ok(Default::default()));
@@ -1247,7 +1251,7 @@ mod tests {
1247
1251
  let wft_started_id = partial_task.workflow_task_started_event_id();
1248
1252
  let full_resp: GetWorkflowExecutionHistoryResponse =
1249
1253
  timer_hist.get_full_history_info().unwrap().into();
1250
- let mut mock_client = mock_workflow_client();
1254
+ let mut mock_client = mock_worker_client();
1251
1255
  mock_client
1252
1256
  .expect_get_workflow_execution_history()
1253
1257
  .returning(move |_, _, _| {
@@ -1296,7 +1300,7 @@ mod tests {
1296
1300
  timer_hist.get_full_history_info().unwrap().into();
1297
1301
  full_resp_with_npt.next_page_token = vec![1];
1298
1302
 
1299
- let mut mock_client = mock_workflow_client();
1303
+ let mut mock_client = mock_worker_client();
1300
1304
  mock_client
1301
1305
  .expect_get_workflow_execution_history()
1302
1306
  .returning(move |_, _, _| Ok(full_resp_with_npt.clone()))
@@ -1375,7 +1379,7 @@ mod tests {
1375
1379
  resp_1.next_page_token = vec![1];
1376
1380
  resp_1.history.as_mut().unwrap().events.truncate(4);
1377
1381
 
1378
- let mut mock_client = mock_workflow_client();
1382
+ let mut mock_client = mock_worker_client();
1379
1383
  mock_client
1380
1384
  .expect_get_workflow_execution_history()
1381
1385
  .returning(move |_, _, _| Ok(resp_1.clone()))
@@ -1486,7 +1490,7 @@ mod tests {
1486
1490
  t.add_we_signaled("hi", vec![]);
1487
1491
  t.add_workflow_task_scheduled_and_started();
1488
1492
 
1489
- let mut mock_client = mock_workflow_client();
1493
+ let mut mock_client = mock_worker_client();
1490
1494
 
1491
1495
  let events: Vec<HistoryEvent> = t.get_full_history_info().unwrap().into_events();
1492
1496
  let first_event = events[0].clone();
@@ -1602,7 +1606,7 @@ mod tests {
1602
1606
  let events: Vec<HistoryEvent> = t.get_full_history_info().unwrap().into_events();
1603
1607
  let first_event = events[0].clone();
1604
1608
 
1605
- let mut mock_client = mock_workflow_client();
1609
+ let mut mock_client = mock_worker_client();
1606
1610
 
1607
1611
  for (i, event) in events.into_iter().enumerate() {
1608
1612
  // Add an empty page
@@ -1722,7 +1726,7 @@ mod tests {
1722
1726
  t.get_full_history_info().unwrap().into();
1723
1727
  resp_1.next_page_token = vec![2];
1724
1728
 
1725
- let mut mock_client = mock_workflow_client();
1729
+ let mut mock_client = mock_worker_client();
1726
1730
  mock_client
1727
1731
  .expect_get_workflow_execution_history()
1728
1732
  .returning(move |_, _, _| Ok(resp_1.clone()))
@@ -1765,7 +1769,7 @@ mod tests {
1765
1769
  let workflow_task = t.get_full_history_info().unwrap();
1766
1770
  let prev_started_wft_id = workflow_task.previous_started_event_id();
1767
1771
  let wft_started_id = workflow_task.workflow_task_started_event_id();
1768
- let mock_client = mock_workflow_client();
1772
+ let mock_client = mock_worker_client();
1769
1773
  let mut paginator = HistoryPaginator::new(
1770
1774
  workflow_task.into(),
1771
1775
  prev_started_wft_id,
@@ -1802,7 +1806,7 @@ mod tests {
1802
1806
  let full_resp: GetWorkflowExecutionHistoryResponse =
1803
1807
  t.get_full_history_info().unwrap().into();
1804
1808
 
1805
- let mut mock_client = mock_workflow_client();
1809
+ let mut mock_client = mock_worker_client();
1806
1810
  mock_client
1807
1811
  .expect_get_workflow_execution_history()
1808
1812
  .returning(move |_, _, _| Ok(full_resp.clone()))
@@ -1839,7 +1843,7 @@ mod tests {
1839
1843
  let incremental_task =
1840
1844
  hist_to_poll_resp(&t, "wfid".to_owned(), ResponseType::OneTask(3)).resp;
1841
1845
 
1842
- let mut mock_client = mock_workflow_client();
1846
+ let mut mock_client = mock_worker_client();
1843
1847
  let mut one_task_resp: GetWorkflowExecutionHistoryResponse =
1844
1848
  t.get_history_info(1).unwrap().into();
1845
1849
  one_task_resp.next_page_token = vec![1];
@@ -1877,4 +1881,52 @@ mod tests {
1877
1881
  let seq = next_check_peek(&mut update, 7);
1878
1882
  assert_eq!(seq.last().unwrap().event_id, 11);
1879
1883
  }
1884
+
1885
+ #[tokio::test]
1886
+ async fn wft_fail_on_first_task_with_update() {
1887
+ let mut t = TestHistoryBuilder::default();
1888
+ t.add_by_type(EventType::WorkflowExecutionStarted);
1889
+ t.add_workflow_task_scheduled_and_started();
1890
+ t.add_workflow_task_failed_with_failure(
1891
+ WorkflowTaskFailedCause::Unspecified,
1892
+ Default::default(),
1893
+ );
1894
+ t.add_full_wf_task();
1895
+ let accept_id = t.add_update_accepted("1", "upd");
1896
+ let timer_id = t.add_timer_started("1".to_string());
1897
+ t.add_update_completed(accept_id);
1898
+ t.add_timer_fired(timer_id, "1".to_string());
1899
+ t.add_full_wf_task();
1900
+
1901
+ let mut update = t.as_history_update();
1902
+ let seq = next_check_peek(&mut update, 0);
1903
+ // In this case, we expect to see up to the task with update, since the task failure
1904
+ // should be skipped. This means that the peek of the _next_ task will include the update
1905
+ // and thus properly synthesize the update request with the first activation.
1906
+ assert_eq!(seq.len(), 6);
1907
+ let seq = next_check_peek(&mut update, 6);
1908
+ assert_eq!(seq.len(), 7);
1909
+ }
1910
+
1911
+ #[test]
1912
+ fn update_accepted_after_empty_wft() {
1913
+ let mut t = TestHistoryBuilder::default();
1914
+ t.add_by_type(EventType::WorkflowExecutionStarted);
1915
+ t.add_full_wf_task();
1916
+ t.add_full_wf_task();
1917
+ let accept_id = t.add_update_accepted("1", "upd");
1918
+ let timer_id = t.add_timer_started("1".to_string());
1919
+ t.add_update_completed(accept_id);
1920
+ t.add_timer_fired(timer_id, "1".to_string());
1921
+ t.add_full_wf_task();
1922
+
1923
+ let mut update = t.as_history_update();
1924
+ let seq = next_check_peek(&mut update, 0);
1925
+ // unlike the case with a wft failure, here the first task should not extend through to
1926
+ // the update, because here the first empty WFT happened with _just_ the workflow init,
1927
+ // not also with the update.
1928
+ assert_eq!(seq.len(), 3);
1929
+ let seq = next_check_peek(&mut update, 3);
1930
+ assert_eq!(seq.len(), 3);
1931
+ }
1880
1932
  }
@@ -69,10 +69,9 @@ pub(super) fn new_external_cancel(
69
69
  namespace: workflow_execution.namespace,
70
70
  workflow_id: workflow_execution.workflow_id,
71
71
  run_id: workflow_execution.run_id,
72
- // Apparently this is effectively deprecated at this point
73
- control: "".to_string(),
74
72
  child_workflow_only: only_child,
75
73
  reason,
74
+ ..Default::default()
76
75
  },
77
76
  );
78
77
  NewMachineWithCommand {
@@ -714,7 +714,7 @@ impl WFMachinesAdapter for ChildWorkflowMachine {
714
714
  run_id: self.shared_state.run_id.clone(),
715
715
  child_workflow_only: true,
716
716
  reason,
717
- control: "".to_string(),
717
+ ..Default::default()
718
718
  }
719
719
  .into(),
720
720
  ))