@temporalio/core-bridge 0.19.2 → 0.20.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +90 -157
- package/Cargo.toml +1 -0
- package/index.d.ts +11 -27
- package/package.json +3 -3
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/Dockerfile +1 -1
- package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
- package/sdk-core/.cargo/config.toml +1 -0
- package/sdk-core/CODEOWNERS +1 -1
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +119 -86
- package/sdk-core/bridge-ffi/src/lib.rs +311 -315
- package/sdk-core/bridge-ffi/src/wrappers.rs +108 -113
- package/sdk-core/client/Cargo.toml +13 -9
- package/sdk-core/client/LICENSE.txt +23 -0
- package/sdk-core/client/src/lib.rs +286 -174
- package/sdk-core/client/src/metrics.rs +86 -12
- package/sdk-core/client/src/raw.rs +566 -0
- package/sdk-core/client/src/retry.rs +137 -99
- package/sdk-core/core/Cargo.toml +15 -10
- package/sdk-core/core/LICENSE.txt +23 -0
- package/sdk-core/core/benches/workflow_replay.rs +79 -0
- package/sdk-core/core/src/abstractions.rs +38 -0
- package/sdk-core/core/src/core_tests/activity_tasks.rs +108 -182
- package/sdk-core/core/src/core_tests/child_workflows.rs +16 -11
- package/sdk-core/core/src/core_tests/determinism.rs +24 -12
- package/sdk-core/core/src/core_tests/local_activities.rs +53 -27
- package/sdk-core/core/src/core_tests/mod.rs +30 -43
- package/sdk-core/core/src/core_tests/queries.rs +82 -81
- package/sdk-core/core/src/core_tests/workers.rs +111 -296
- package/sdk-core/core/src/core_tests/workflow_cancels.rs +4 -4
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +257 -242
- package/sdk-core/core/src/lib.rs +73 -318
- package/sdk-core/core/src/pollers/mod.rs +4 -6
- package/sdk-core/core/src/pollers/poll_buffer.rs +20 -14
- package/sdk-core/core/src/protosext/mod.rs +7 -10
- package/sdk-core/core/src/replay/mod.rs +11 -150
- package/sdk-core/core/src/telemetry/metrics.rs +35 -2
- package/sdk-core/core/src/telemetry/mod.rs +49 -16
- package/sdk-core/core/src/telemetry/prometheus_server.rs +14 -35
- package/sdk-core/core/src/test_help/mod.rs +104 -170
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +57 -34
- package/sdk-core/core/src/worker/activities/local_activities.rs +95 -23
- package/sdk-core/core/src/worker/activities.rs +23 -16
- package/sdk-core/core/src/worker/client/mocks.rs +86 -0
- package/sdk-core/core/src/worker/client.rs +209 -0
- package/sdk-core/core/src/worker/mod.rs +207 -108
- package/sdk-core/core/src/workflow/driven_workflow.rs +21 -6
- package/sdk-core/core/src/workflow/history_update.rs +107 -24
- package/sdk-core/core/src/workflow/machines/activity_state_machine.rs +2 -3
- package/sdk-core/core/src/workflow/machines/child_workflow_state_machine.rs +2 -3
- package/sdk-core/core/src/workflow/machines/mod.rs +20 -17
- package/sdk-core/core/src/workflow/machines/signal_external_state_machine.rs +56 -19
- package/sdk-core/core/src/workflow/machines/transition_coverage.rs +5 -0
- package/sdk-core/core/src/workflow/machines/upsert_search_attributes_state_machine.rs +230 -22
- package/sdk-core/core/src/workflow/machines/workflow_machines.rs +81 -115
- package/sdk-core/core/src/workflow/machines/workflow_task_state_machine.rs +4 -4
- package/sdk-core/core/src/workflow/mod.rs +13 -1
- package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +70 -11
- package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +65 -41
- package/sdk-core/core-api/Cargo.toml +9 -1
- package/sdk-core/core-api/LICENSE.txt +23 -0
- package/sdk-core/core-api/src/errors.rs +7 -38
- package/sdk-core/core-api/src/lib.rs +44 -52
- package/sdk-core/core-api/src/worker.rs +10 -2
- package/sdk-core/etc/deps.svg +127 -96
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +11 -7
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +10 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +6 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +2 -1
- package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +12 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +25 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +4 -0
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +19 -35
- package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -6
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +53 -11
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +14 -7
- package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +3 -5
- package/sdk-core/sdk/Cargo.toml +16 -2
- package/sdk-core/sdk/LICENSE.txt +23 -0
- package/sdk-core/sdk/src/interceptors.rs +11 -0
- package/sdk-core/sdk/src/lib.rs +139 -151
- package/sdk-core/sdk/src/workflow_context/options.rs +86 -1
- package/sdk-core/sdk/src/workflow_context.rs +36 -17
- package/sdk-core/sdk/src/workflow_future.rs +19 -25
- package/sdk-core/sdk-core-protos/Cargo.toml +1 -1
- package/sdk-core/sdk-core-protos/build.rs +1 -0
- package/sdk-core/sdk-core-protos/src/history_info.rs +17 -4
- package/sdk-core/sdk-core-protos/src/lib.rs +251 -47
- package/sdk-core/test-utils/Cargo.toml +3 -1
- package/sdk-core/test-utils/src/canned_histories.rs +27 -0
- package/sdk-core/test-utils/src/histfetch.rs +3 -3
- package/sdk-core/test-utils/src/lib.rs +223 -68
- package/sdk-core/tests/integ_tests/client_tests.rs +27 -4
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +93 -14
- package/sdk-core/tests/integ_tests/polling_tests.rs +18 -12
- package/sdk-core/tests/integ_tests/queries_tests.rs +50 -53
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +117 -103
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +8 -1
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +10 -5
- package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +7 -1
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +32 -9
- package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +7 -1
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +76 -15
- package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +19 -3
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +39 -42
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +84 -0
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +30 -8
- package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +21 -6
- package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +26 -16
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +66 -0
- package/sdk-core/tests/integ_tests/workflow_tests.rs +78 -74
- package/sdk-core/tests/load_tests.rs +9 -6
- package/sdk-core/tests/main.rs +43 -10
- package/src/conversions.rs +7 -12
- package/src/lib.rs +322 -357
- package/sdk-core/client/src/mocks.rs +0 -167
- package/sdk-core/core/src/worker/dispatcher.rs +0 -171
- package/sdk-core/protos/local/temporal/sdk/core/bridge/service.proto +0 -61
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
use crate::{
|
|
1
|
+
use crate::{
|
|
2
|
+
worker::{activities::PendingActivityCancel, client::WorkerClientBag},
|
|
3
|
+
TaskToken,
|
|
4
|
+
};
|
|
2
5
|
use futures::StreamExt;
|
|
3
6
|
use std::{
|
|
4
7
|
collections::{hash_map::Entry, HashMap},
|
|
@@ -56,7 +59,6 @@ enum HeartbeatExecutorAction {
|
|
|
56
59
|
Report {
|
|
57
60
|
task_token: TaskToken,
|
|
58
61
|
details: Vec<common::Payload>,
|
|
59
|
-
on_reported: Option<Arc<Notify>>,
|
|
60
62
|
},
|
|
61
63
|
}
|
|
62
64
|
|
|
@@ -139,6 +141,8 @@ impl ActivityHeartbeatManager {
|
|
|
139
141
|
struct ActivityHeartbeatState {
|
|
140
142
|
/// If None and throttle interval is over, untrack this task token
|
|
141
143
|
last_recorded_details: Option<Vec<common::Payload>>,
|
|
144
|
+
/// True if we've queued up a request to record against server, but it hasn't yet completed
|
|
145
|
+
is_record_in_flight: bool,
|
|
142
146
|
last_send_requested: Instant,
|
|
143
147
|
throttle_interval: Duration,
|
|
144
148
|
throttled_cancellation_token: Option<CancellationToken>,
|
|
@@ -161,6 +165,7 @@ impl ActivityHeartbeatState {
|
|
|
161
165
|
#[derive(Debug)]
|
|
162
166
|
struct HeartbeatStreamState {
|
|
163
167
|
tt_to_state: HashMap<TaskToken, ActivityHeartbeatState>,
|
|
168
|
+
tt_needs_flush: HashMap<TaskToken, Arc<Notify>>,
|
|
164
169
|
incoming_hbs: UnboundedReceiver<HeartbeatAction>,
|
|
165
170
|
/// Token that can be used to cancel the entire stream.
|
|
166
171
|
/// Requests to the server are not cancelled with this token.
|
|
@@ -175,6 +180,7 @@ impl HeartbeatStreamState {
|
|
|
175
180
|
Self {
|
|
176
181
|
cancellation_token: cancellation_token.clone(),
|
|
177
182
|
tt_to_state: Default::default(),
|
|
183
|
+
tt_needs_flush: Default::default(),
|
|
178
184
|
incoming_hbs,
|
|
179
185
|
},
|
|
180
186
|
heartbeat_tx,
|
|
@@ -193,13 +199,13 @@ impl HeartbeatStreamState {
|
|
|
193
199
|
// None is used to mark that after throttling we can stop tracking this task
|
|
194
200
|
// token.
|
|
195
201
|
last_recorded_details: None,
|
|
202
|
+
is_record_in_flight: true,
|
|
196
203
|
throttled_cancellation_token: None,
|
|
197
204
|
};
|
|
198
205
|
e.insert(state);
|
|
199
206
|
Some(HeartbeatExecutorAction::Report {
|
|
200
207
|
task_token: hb.task_token,
|
|
201
208
|
details: hb.details,
|
|
202
|
-
on_reported: None,
|
|
203
209
|
})
|
|
204
210
|
}
|
|
205
211
|
Entry::Occupied(mut o) => {
|
|
@@ -212,7 +218,11 @@ impl HeartbeatStreamState {
|
|
|
212
218
|
|
|
213
219
|
/// Heartbeat report to server completed
|
|
214
220
|
fn handle_report_completed(&mut self, tt: TaskToken) -> Option<HeartbeatExecutorAction> {
|
|
221
|
+
if let Some(not) = self.tt_needs_flush.remove(&tt) {
|
|
222
|
+
not.notify_one();
|
|
223
|
+
}
|
|
215
224
|
if let Some(st) = self.tt_to_state.get_mut(&tt) {
|
|
225
|
+
st.is_record_in_flight = false;
|
|
216
226
|
let cancellation_token = self.cancellation_token.child_token();
|
|
217
227
|
st.throttled_cancellation_token = Some(cancellation_token.clone());
|
|
218
228
|
// Always sleep for simplicity even if the duration is 0
|
|
@@ -236,10 +246,10 @@ impl HeartbeatStreamState {
|
|
|
236
246
|
// Reset the cancellation token and schedule another report
|
|
237
247
|
state.throttled_cancellation_token = None;
|
|
238
248
|
state.last_send_requested = Instant::now();
|
|
249
|
+
state.is_record_in_flight = true;
|
|
239
250
|
Some(HeartbeatExecutorAction::Report {
|
|
240
251
|
task_token: tt,
|
|
241
252
|
details,
|
|
242
|
-
on_reported: None,
|
|
243
253
|
})
|
|
244
254
|
} else {
|
|
245
255
|
// Nothing to report, forget this task token
|
|
@@ -265,11 +275,14 @@ impl HeartbeatStreamState {
|
|
|
265
275
|
let _ = cancel_tok.cancel();
|
|
266
276
|
}
|
|
267
277
|
if let Some(last_deets) = state.last_recorded_details {
|
|
278
|
+
self.tt_needs_flush.insert(tt.clone(), on_complete);
|
|
268
279
|
return Some(HeartbeatExecutorAction::Report {
|
|
269
280
|
task_token: tt,
|
|
270
281
|
details: last_deets,
|
|
271
|
-
on_reported: Some(on_complete),
|
|
272
282
|
});
|
|
283
|
+
} else if state.is_record_in_flight {
|
|
284
|
+
self.tt_needs_flush.insert(tt, on_complete);
|
|
285
|
+
return None;
|
|
273
286
|
}
|
|
274
287
|
}
|
|
275
288
|
// Since there's nothing to flush immediately report back that eviction is finished
|
|
@@ -281,7 +294,7 @@ impl HeartbeatStreamState {
|
|
|
281
294
|
impl ActivityHeartbeatManager {
|
|
282
295
|
/// Creates a new instance of an activity heartbeat manager and returns a handle to the user,
|
|
283
296
|
/// which allows to send new heartbeats and initiate the shutdown.
|
|
284
|
-
pub fn new(
|
|
297
|
+
pub fn new(client: Arc<WorkerClientBag>) -> Self {
|
|
285
298
|
let (heartbeat_stream_state, heartbeat_tx_source, shutdown_token) =
|
|
286
299
|
HeartbeatStreamState::new();
|
|
287
300
|
let (cancels_tx, cancels_rx) = unbounded_channel();
|
|
@@ -320,7 +333,7 @@ impl ActivityHeartbeatManager {
|
|
|
320
333
|
.filter_map(|opt| async { opt })
|
|
321
334
|
.for_each_concurrent(None, move |action| {
|
|
322
335
|
let heartbeat_tx = heartbeat_tx_source.clone();
|
|
323
|
-
let sg =
|
|
336
|
+
let sg = client.clone();
|
|
324
337
|
let cancels_tx = cancels_tx.clone();
|
|
325
338
|
async move {
|
|
326
339
|
match action {
|
|
@@ -332,7 +345,7 @@ impl ActivityHeartbeatManager {
|
|
|
332
345
|
},
|
|
333
346
|
};
|
|
334
347
|
}
|
|
335
|
-
HeartbeatExecutorAction::Report { task_token: tt, details
|
|
348
|
+
HeartbeatExecutorAction::Report { task_token: tt, details } => {
|
|
336
349
|
match sg
|
|
337
350
|
.record_activity_heartbeat(tt.clone(), details.into_payloads())
|
|
338
351
|
.await
|
|
@@ -353,6 +366,8 @@ impl ActivityHeartbeatManager {
|
|
|
353
366
|
// finished (which is one thing not found implies - other reasons
|
|
354
367
|
// would seem equally valid).
|
|
355
368
|
Err(s) if s.code() == tonic::Code::NotFound => {
|
|
369
|
+
debug!(task_token = %tt,
|
|
370
|
+
"Activity not found when recording heartbeat");
|
|
356
371
|
cancels_tx
|
|
357
372
|
.send(PendingActivityCancel::new(
|
|
358
373
|
tt.clone(),
|
|
@@ -364,9 +379,6 @@ impl ActivityHeartbeatManager {
|
|
|
364
379
|
warn!("Error when recording heartbeat: {:?}", e);
|
|
365
380
|
}
|
|
366
381
|
};
|
|
367
|
-
if let Some(onrep) = on_reported {
|
|
368
|
-
onrep.notify_one();
|
|
369
|
-
}
|
|
370
382
|
let _ = heartbeat_tx.send(HeartbeatAction::CompleteReport(tt));
|
|
371
383
|
}
|
|
372
384
|
}
|
|
@@ -386,10 +398,9 @@ impl ActivityHeartbeatManager {
|
|
|
386
398
|
#[cfg(test)]
|
|
387
399
|
mod test {
|
|
388
400
|
use super::*;
|
|
389
|
-
use crate::test_help::TEST_Q;
|
|
390
|
-
use std::time::Duration;
|
|
391
401
|
|
|
392
|
-
use
|
|
402
|
+
use crate::worker::client::mocks::mock_workflow_client;
|
|
403
|
+
use std::time::Duration;
|
|
393
404
|
use temporal_sdk_core_protos::{
|
|
394
405
|
coresdk::common::Payload,
|
|
395
406
|
temporal::api::workflowservice::v1::RecordActivityTaskHeartbeatResponse,
|
|
@@ -400,12 +411,12 @@ mod test {
|
|
|
400
411
|
/// every 1/2 of the heartbeat timeout.
|
|
401
412
|
#[tokio::test]
|
|
402
413
|
async fn process_heartbeats_and_shutdown() {
|
|
403
|
-
let mut
|
|
404
|
-
|
|
414
|
+
let mut mock_client = mock_workflow_client();
|
|
415
|
+
mock_client
|
|
405
416
|
.expect_record_activity_heartbeat()
|
|
406
417
|
.returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
|
|
407
418
|
.times(2);
|
|
408
|
-
let hm = ActivityHeartbeatManager::new(Arc::new(
|
|
419
|
+
let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
|
|
409
420
|
let fake_task_token = vec![1, 2, 3];
|
|
410
421
|
// Send 2 heartbeat requests for 20ms apart.
|
|
411
422
|
// The first heartbeat should be sent right away, and
|
|
@@ -421,12 +432,12 @@ mod test {
|
|
|
421
432
|
|
|
422
433
|
#[tokio::test]
|
|
423
434
|
async fn send_heartbeats_less_frequently_than_throttle_interval() {
|
|
424
|
-
let mut
|
|
425
|
-
|
|
435
|
+
let mut mock_client = mock_workflow_client();
|
|
436
|
+
mock_client
|
|
426
437
|
.expect_record_activity_heartbeat()
|
|
427
438
|
.returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
|
|
428
439
|
.times(3);
|
|
429
|
-
let hm = ActivityHeartbeatManager::new(Arc::new(
|
|
440
|
+
let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
|
|
430
441
|
let fake_task_token = vec![1, 2, 3];
|
|
431
442
|
// Heartbeats always get sent if recorded less frequently than the throttle intreval
|
|
432
443
|
for i in 0_u8..3 {
|
|
@@ -441,12 +452,12 @@ mod test {
|
|
|
441
452
|
/// interactions with the server - one immediately and one after 500ms after the throttle_interval.
|
|
442
453
|
#[tokio::test]
|
|
443
454
|
async fn process_tight_loop_and_shutdown() {
|
|
444
|
-
let mut
|
|
445
|
-
|
|
455
|
+
let mut mock_client = mock_workflow_client();
|
|
456
|
+
mock_client
|
|
446
457
|
.expect_record_activity_heartbeat()
|
|
447
458
|
.returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
|
|
448
459
|
.times(1);
|
|
449
|
-
let hm = ActivityHeartbeatManager::new(Arc::new(
|
|
460
|
+
let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
|
|
450
461
|
let fake_task_token = vec![1, 2, 3];
|
|
451
462
|
// Send a whole bunch of heartbeats very fast. We should still only send one total.
|
|
452
463
|
for i in 0_u8..50 {
|
|
@@ -460,12 +471,12 @@ mod test {
|
|
|
460
471
|
/// This test reports one heartbeat and waits for the throttle_interval to elapse before sending another
|
|
461
472
|
#[tokio::test]
|
|
462
473
|
async fn report_heartbeat_after_timeout() {
|
|
463
|
-
let mut
|
|
464
|
-
|
|
474
|
+
let mut mock_client = mock_workflow_client();
|
|
475
|
+
mock_client
|
|
465
476
|
.expect_record_activity_heartbeat()
|
|
466
477
|
.returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
|
|
467
478
|
.times(2);
|
|
468
|
-
let hm = ActivityHeartbeatManager::new(Arc::new(
|
|
479
|
+
let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
|
|
469
480
|
let fake_task_token = vec![1, 2, 3];
|
|
470
481
|
record_heartbeat(&hm, fake_task_token.clone(), 0, Duration::from_millis(100));
|
|
471
482
|
sleep(Duration::from_millis(500)).await;
|
|
@@ -477,12 +488,12 @@ mod test {
|
|
|
477
488
|
|
|
478
489
|
#[tokio::test]
|
|
479
490
|
async fn evict_works() {
|
|
480
|
-
let mut
|
|
481
|
-
|
|
491
|
+
let mut mock_client = mock_workflow_client();
|
|
492
|
+
mock_client
|
|
482
493
|
.expect_record_activity_heartbeat()
|
|
483
494
|
.returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
|
|
484
495
|
.times(2);
|
|
485
|
-
let hm = ActivityHeartbeatManager::new(Arc::new(
|
|
496
|
+
let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
|
|
486
497
|
let fake_task_token = vec![1, 2, 3];
|
|
487
498
|
record_heartbeat(&hm, fake_task_token.clone(), 0, Duration::from_millis(100));
|
|
488
499
|
// Let it propagate
|
|
@@ -495,20 +506,33 @@ mod test {
|
|
|
495
506
|
hm.shutdown().await;
|
|
496
507
|
}
|
|
497
508
|
|
|
509
|
+
#[tokio::test]
|
|
510
|
+
async fn evict_immediate_after_record() {
|
|
511
|
+
let mut mock_client = mock_workflow_client();
|
|
512
|
+
mock_client
|
|
513
|
+
.expect_record_activity_heartbeat()
|
|
514
|
+
.returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
|
|
515
|
+
.times(1);
|
|
516
|
+
let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
|
|
517
|
+
let fake_task_token = vec![1, 2, 3];
|
|
518
|
+
record_heartbeat(&hm, fake_task_token.clone(), 0, Duration::from_millis(100));
|
|
519
|
+
hm.evict(fake_task_token.clone().into()).await;
|
|
520
|
+
hm.shutdown().await;
|
|
521
|
+
}
|
|
522
|
+
|
|
498
523
|
/// Recording new heartbeats after shutdown is not allowed, and will result in error.
|
|
499
524
|
#[tokio::test]
|
|
500
525
|
async fn record_after_shutdown() {
|
|
501
|
-
let mut
|
|
502
|
-
|
|
526
|
+
let mut mock_client = mock_workflow_client();
|
|
527
|
+
mock_client
|
|
503
528
|
.expect_record_activity_heartbeat()
|
|
504
529
|
.returning(|_, _| Ok(RecordActivityTaskHeartbeatResponse::default()))
|
|
505
530
|
.times(0);
|
|
506
|
-
let hm = ActivityHeartbeatManager::new(Arc::new(
|
|
531
|
+
let hm = ActivityHeartbeatManager::new(Arc::new(mock_client.into()));
|
|
507
532
|
hm.shutdown().await;
|
|
508
533
|
match hm.record(
|
|
509
534
|
ActivityHeartbeat {
|
|
510
535
|
task_token: vec![1, 2, 3],
|
|
511
|
-
task_queue: TEST_Q.to_string(),
|
|
512
536
|
details: vec![Payload {
|
|
513
537
|
// payload doesn't matter in this case, as it shouldn't get sent anyways.
|
|
514
538
|
..Default::default()
|
|
@@ -534,7 +558,6 @@ mod test {
|
|
|
534
558
|
hm.record(
|
|
535
559
|
ActivityHeartbeat {
|
|
536
560
|
task_token,
|
|
537
|
-
task_queue: TEST_Q.to_string(),
|
|
538
561
|
details: vec![Payload {
|
|
539
562
|
metadata: Default::default(),
|
|
540
563
|
data: vec![payload_data],
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
use crate::{
|
|
1
|
+
use crate::{
|
|
2
|
+
abstractions::MeteredSemaphore, protosext::ValidScheduleLA, retry_logic::RetryPolicyExt,
|
|
3
|
+
MetricsContext, TaskToken,
|
|
4
|
+
};
|
|
2
5
|
use parking_lot::Mutex;
|
|
3
6
|
use std::{
|
|
4
7
|
collections::HashMap,
|
|
@@ -11,12 +14,15 @@ use temporal_sdk_core_protos::{
|
|
|
11
14
|
activity_task::{activity_task, ActivityCancelReason, ActivityTask, Cancel, Start},
|
|
12
15
|
common::WorkflowExecution,
|
|
13
16
|
},
|
|
14
|
-
temporal::api::
|
|
17
|
+
temporal::api::{
|
|
18
|
+
enums::v1::TimeoutType,
|
|
19
|
+
failure::v1::{failure::FailureInfo, ApplicationFailureInfo},
|
|
20
|
+
},
|
|
15
21
|
};
|
|
16
22
|
use tokio::{
|
|
17
23
|
sync::{
|
|
18
24
|
mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
|
|
19
|
-
Notify,
|
|
25
|
+
Notify,
|
|
20
26
|
},
|
|
21
27
|
task::JoinHandle,
|
|
22
28
|
time::sleep,
|
|
@@ -103,7 +109,7 @@ pub(crate) struct LocalActivityManager {
|
|
|
103
109
|
/// Just so we can provide activity tasks the same namespace as the worker
|
|
104
110
|
namespace: String,
|
|
105
111
|
/// Constrains number of currently executing local activities
|
|
106
|
-
semaphore:
|
|
112
|
+
semaphore: MeteredSemaphore,
|
|
107
113
|
/// Sink for new activity execution requests
|
|
108
114
|
act_req_tx: UnboundedSender<NewOrRetry>,
|
|
109
115
|
/// Cancels need a different queue since they should be taken first, and don't take a permit
|
|
@@ -135,13 +141,21 @@ impl LAMData {
|
|
|
135
141
|
}
|
|
136
142
|
|
|
137
143
|
impl LocalActivityManager {
|
|
138
|
-
pub(crate) fn new(
|
|
144
|
+
pub(crate) fn new(
|
|
145
|
+
max_concurrent: usize,
|
|
146
|
+
namespace: String,
|
|
147
|
+
metrics_context: MetricsContext,
|
|
148
|
+
) -> Self {
|
|
139
149
|
let (act_req_tx, act_req_rx) = unbounded_channel();
|
|
140
150
|
let (cancels_req_tx, cancels_req_rx) = unbounded_channel();
|
|
141
151
|
let shutdown_complete_tok = CancellationToken::new();
|
|
142
152
|
Self {
|
|
143
153
|
namespace,
|
|
144
|
-
semaphore:
|
|
154
|
+
semaphore: MeteredSemaphore::new(
|
|
155
|
+
max_concurrent,
|
|
156
|
+
metrics_context,
|
|
157
|
+
MetricsContext::available_task_slots,
|
|
158
|
+
),
|
|
145
159
|
act_req_tx,
|
|
146
160
|
cancels_req_tx,
|
|
147
161
|
complete_notify: Notify::new(),
|
|
@@ -161,6 +175,15 @@ impl LocalActivityManager {
|
|
|
161
175
|
}
|
|
162
176
|
}
|
|
163
177
|
|
|
178
|
+
#[cfg(test)]
|
|
179
|
+
fn test(max_concurrent: usize) -> Self {
|
|
180
|
+
Self::new(
|
|
181
|
+
max_concurrent,
|
|
182
|
+
"fake_ns".to_string(),
|
|
183
|
+
MetricsContext::default(),
|
|
184
|
+
)
|
|
185
|
+
}
|
|
186
|
+
|
|
164
187
|
pub(crate) fn num_outstanding(&self) -> usize {
|
|
165
188
|
self.dat.lock().outstanding_activity_tasks.len()
|
|
166
189
|
}
|
|
@@ -358,7 +381,7 @@ impl LocalActivityManager {
|
|
|
358
381
|
workflow_execution: Some(new_la.workflow_exec_info),
|
|
359
382
|
activity_id: sa.activity_id,
|
|
360
383
|
activity_type: sa.activity_type,
|
|
361
|
-
header_fields: sa.
|
|
384
|
+
header_fields: sa.headers,
|
|
362
385
|
input: sa.arguments,
|
|
363
386
|
heartbeat_details: vec![],
|
|
364
387
|
scheduled_time: Some(new_la.schedule_time.into()),
|
|
@@ -387,7 +410,7 @@ impl LocalActivityManager {
|
|
|
387
410
|
seq_num: info.la_info.schedule_cmd.seq,
|
|
388
411
|
};
|
|
389
412
|
dlock.id_to_tt.remove(&exec_id);
|
|
390
|
-
self.semaphore.
|
|
413
|
+
self.semaphore.add_permit();
|
|
391
414
|
|
|
392
415
|
match status {
|
|
393
416
|
LocalActivityExecutionResult::Completed(_)
|
|
@@ -400,10 +423,13 @@ impl LocalActivityManager {
|
|
|
400
423
|
LocalActivityExecutionResult::Failed(f) => {
|
|
401
424
|
if let Some(backoff_dur) = info.la_info.schedule_cmd.retry_policy.should_retry(
|
|
402
425
|
info.attempt as usize,
|
|
403
|
-
&f.
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
426
|
+
f.failure.as_ref().map_or("", |f| match &f.failure_info {
|
|
427
|
+
Some(FailureInfo::ApplicationFailureInfo(ApplicationFailureInfo {
|
|
428
|
+
r#type,
|
|
429
|
+
..
|
|
430
|
+
})) => r#type.as_str(),
|
|
431
|
+
_ => "",
|
|
432
|
+
}),
|
|
407
433
|
) {
|
|
408
434
|
let will_use_timer =
|
|
409
435
|
backoff_dur > info.la_info.schedule_cmd.local_retry_threshold;
|
|
@@ -507,7 +533,7 @@ struct RcvChans {
|
|
|
507
533
|
}
|
|
508
534
|
|
|
509
535
|
impl RcvChans {
|
|
510
|
-
async fn next(&mut self, new_sem: &
|
|
536
|
+
async fn next(&mut self, new_sem: &MeteredSemaphore) -> Option<NewOrCancel> {
|
|
511
537
|
tokio::select! {
|
|
512
538
|
cancel = async { self.cancels_req_rx.recv().await } => {
|
|
513
539
|
Some(NewOrCancel::Cancel(cancel.expect("Send halves of LA manager are not dropped")))
|
|
@@ -617,7 +643,9 @@ impl Drop for TimeoutBag {
|
|
|
617
643
|
mod tests {
|
|
618
644
|
use super::*;
|
|
619
645
|
use crate::protosext::LACloseTimeouts;
|
|
620
|
-
use temporal_sdk_core_protos::
|
|
646
|
+
use temporal_sdk_core_protos::{
|
|
647
|
+
coresdk::common::RetryPolicy, temporal::api::failure::v1::Failure,
|
|
648
|
+
};
|
|
621
649
|
use tokio::{sync::mpsc::error::TryRecvError, task::yield_now};
|
|
622
650
|
|
|
623
651
|
impl DispatchOrTimeoutLA {
|
|
@@ -633,7 +661,7 @@ mod tests {
|
|
|
633
661
|
|
|
634
662
|
#[tokio::test]
|
|
635
663
|
async fn max_concurrent_respected() {
|
|
636
|
-
let lam = LocalActivityManager::
|
|
664
|
+
let lam = LocalActivityManager::test(1);
|
|
637
665
|
lam.enqueue((1..=50).map(|i| {
|
|
638
666
|
NewLocalAct {
|
|
639
667
|
schedule_cmd: ValidScheduleLA {
|
|
@@ -673,7 +701,7 @@ mod tests {
|
|
|
673
701
|
|
|
674
702
|
#[tokio::test]
|
|
675
703
|
async fn no_work_doesnt_deadlock_with_complete() {
|
|
676
|
-
let lam = LocalActivityManager::
|
|
704
|
+
let lam = LocalActivityManager::test(5);
|
|
677
705
|
lam.enqueue([NewLocalAct {
|
|
678
706
|
schedule_cmd: ValidScheduleLA {
|
|
679
707
|
seq: 1,
|
|
@@ -705,7 +733,7 @@ mod tests {
|
|
|
705
733
|
|
|
706
734
|
#[tokio::test]
|
|
707
735
|
async fn can_cancel_in_flight() {
|
|
708
|
-
let lam = LocalActivityManager::
|
|
736
|
+
let lam = LocalActivityManager::test(5);
|
|
709
737
|
lam.enqueue([NewLocalAct {
|
|
710
738
|
schedule_cmd: ValidScheduleLA {
|
|
711
739
|
seq: 1,
|
|
@@ -732,7 +760,7 @@ mod tests {
|
|
|
732
760
|
|
|
733
761
|
#[tokio::test]
|
|
734
762
|
async fn respects_timer_backoff_threshold() {
|
|
735
|
-
let lam = LocalActivityManager::
|
|
763
|
+
let lam = LocalActivityManager::test(1);
|
|
736
764
|
lam.enqueue([NewLocalAct {
|
|
737
765
|
schedule_cmd: ValidScheduleLA {
|
|
738
766
|
seq: 1,
|
|
@@ -765,9 +793,53 @@ mod tests {
|
|
|
765
793
|
)
|
|
766
794
|
}
|
|
767
795
|
|
|
796
|
+
#[tokio::test]
|
|
797
|
+
async fn respects_non_retryable_error_types() {
|
|
798
|
+
let lam = LocalActivityManager::test(1);
|
|
799
|
+
lam.enqueue([NewLocalAct {
|
|
800
|
+
schedule_cmd: ValidScheduleLA {
|
|
801
|
+
seq: 1,
|
|
802
|
+
activity_id: "1".to_string(),
|
|
803
|
+
attempt: 1,
|
|
804
|
+
retry_policy: RetryPolicy {
|
|
805
|
+
initial_interval: Some(Duration::from_secs(1).into()),
|
|
806
|
+
backoff_coefficient: 10.0,
|
|
807
|
+
maximum_interval: Some(Duration::from_secs(10).into()),
|
|
808
|
+
maximum_attempts: 10,
|
|
809
|
+
non_retryable_error_types: vec!["TestError".to_string()],
|
|
810
|
+
},
|
|
811
|
+
local_retry_threshold: Duration::from_secs(5),
|
|
812
|
+
..Default::default()
|
|
813
|
+
},
|
|
814
|
+
workflow_type: "".to_string(),
|
|
815
|
+
workflow_exec_info: Default::default(),
|
|
816
|
+
schedule_time: SystemTime::now(),
|
|
817
|
+
}
|
|
818
|
+
.into()]);
|
|
819
|
+
|
|
820
|
+
let next = lam.next_pending().await.unwrap().unwrap();
|
|
821
|
+
let tt = TaskToken(next.task_token);
|
|
822
|
+
let res = lam.complete(
|
|
823
|
+
&tt,
|
|
824
|
+
&LocalActivityExecutionResult::Failed(ActFail {
|
|
825
|
+
failure: Some(Failure {
|
|
826
|
+
failure_info: Some(FailureInfo::ApplicationFailureInfo(
|
|
827
|
+
ApplicationFailureInfo {
|
|
828
|
+
r#type: "TestError".to_string(),
|
|
829
|
+
non_retryable: false,
|
|
830
|
+
..Default::default()
|
|
831
|
+
},
|
|
832
|
+
)),
|
|
833
|
+
..Default::default()
|
|
834
|
+
}),
|
|
835
|
+
}),
|
|
836
|
+
);
|
|
837
|
+
assert_matches!(res, LACompleteAction::Report(_));
|
|
838
|
+
}
|
|
839
|
+
|
|
768
840
|
#[tokio::test]
|
|
769
841
|
async fn can_cancel_during_local_backoff() {
|
|
770
|
-
let lam = LocalActivityManager::
|
|
842
|
+
let lam = LocalActivityManager::test(1);
|
|
771
843
|
lam.enqueue([NewLocalAct {
|
|
772
844
|
schedule_cmd: ValidScheduleLA {
|
|
773
845
|
seq: 1,
|
|
@@ -816,7 +888,7 @@ mod tests {
|
|
|
816
888
|
|
|
817
889
|
#[tokio::test]
|
|
818
890
|
async fn local_backoff_clears_handle_map_when_started() {
|
|
819
|
-
let lam = LocalActivityManager::
|
|
891
|
+
let lam = LocalActivityManager::test(1);
|
|
820
892
|
lam.enqueue([NewLocalAct {
|
|
821
893
|
schedule_cmd: ValidScheduleLA {
|
|
822
894
|
seq: 1,
|
|
@@ -852,7 +924,7 @@ mod tests {
|
|
|
852
924
|
|
|
853
925
|
#[tokio::test]
|
|
854
926
|
async fn sched_to_start_timeout() {
|
|
855
|
-
let lam = LocalActivityManager::
|
|
927
|
+
let lam = LocalActivityManager::test(1);
|
|
856
928
|
let timeout = Duration::from_millis(100);
|
|
857
929
|
lam.enqueue([NewLocalAct {
|
|
858
930
|
schedule_cmd: ValidScheduleLA {
|
|
@@ -893,7 +965,7 @@ mod tests {
|
|
|
893
965
|
#[case::start(false)]
|
|
894
966
|
#[tokio::test]
|
|
895
967
|
async fn local_x_to_close_timeout(#[case] is_schedule: bool) {
|
|
896
|
-
let lam = LocalActivityManager::
|
|
968
|
+
let lam = LocalActivityManager::test(1);
|
|
897
969
|
let timeout = Duration::from_millis(100);
|
|
898
970
|
let close_timeouts = if is_schedule {
|
|
899
971
|
LACloseTimeouts::ScheduleOnly(timeout)
|
|
@@ -937,7 +1009,7 @@ mod tests {
|
|
|
937
1009
|
|
|
938
1010
|
#[tokio::test]
|
|
939
1011
|
async fn idempotency_enforced() {
|
|
940
|
-
let lam = LocalActivityManager::
|
|
1012
|
+
let lam = LocalActivityManager::test(10);
|
|
941
1013
|
let new_la = NewLocalAct {
|
|
942
1014
|
schedule_cmd: ValidScheduleLA {
|
|
943
1015
|
seq: 1,
|
|
@@ -8,10 +8,14 @@ pub(crate) use local_activities::{
|
|
|
8
8
|
};
|
|
9
9
|
|
|
10
10
|
use crate::{
|
|
11
|
+
abstractions::MeteredSemaphore,
|
|
11
12
|
pollers::BoxedActPoller,
|
|
12
|
-
telemetry::metrics::{activity_type, workflow_type, MetricsContext},
|
|
13
|
-
worker::
|
|
14
|
-
|
|
13
|
+
telemetry::metrics::{activity_type, activity_worker_type, workflow_type, MetricsContext},
|
|
14
|
+
worker::{
|
|
15
|
+
activities::activity_heartbeat_manager::ActivityHeartbeatError,
|
|
16
|
+
client::{WorkerClient, WorkerClientBag},
|
|
17
|
+
},
|
|
18
|
+
CompleteActivityError, PollActivityError, TaskToken,
|
|
15
19
|
};
|
|
16
20
|
use activity_heartbeat_manager::ActivityHeartbeatManager;
|
|
17
21
|
use dashmap::DashMap;
|
|
@@ -31,7 +35,7 @@ use temporal_sdk_core_protos::{
|
|
|
31
35
|
workflowservice::v1::PollActivityTaskQueueResponse,
|
|
32
36
|
},
|
|
33
37
|
};
|
|
34
|
-
use tokio::sync::
|
|
38
|
+
use tokio::sync::Notify;
|
|
35
39
|
|
|
36
40
|
#[derive(Debug, derive_more::Constructor)]
|
|
37
41
|
struct PendingActivityCancel {
|
|
@@ -88,7 +92,7 @@ pub(crate) struct WorkerActivityTasks {
|
|
|
88
92
|
/// ongoing.
|
|
89
93
|
poller: BoxedActPoller,
|
|
90
94
|
/// Ensures we stay at or below this worker's maximum concurrent activity limit
|
|
91
|
-
activities_semaphore:
|
|
95
|
+
activities_semaphore: MeteredSemaphore,
|
|
92
96
|
/// Wakes every time an activity is removed from the outstanding map
|
|
93
97
|
complete_notify: Notify,
|
|
94
98
|
|
|
@@ -102,16 +106,20 @@ impl WorkerActivityTasks {
|
|
|
102
106
|
pub(crate) fn new(
|
|
103
107
|
max_activity_tasks: usize,
|
|
104
108
|
poller: BoxedActPoller,
|
|
105
|
-
|
|
109
|
+
client: Arc<WorkerClientBag>,
|
|
106
110
|
metrics: MetricsContext,
|
|
107
111
|
max_heartbeat_throttle_interval: Duration,
|
|
108
112
|
default_heartbeat_throttle_interval: Duration,
|
|
109
113
|
) -> Self {
|
|
110
114
|
Self {
|
|
111
|
-
heartbeat_manager: ActivityHeartbeatManager::new(
|
|
115
|
+
heartbeat_manager: ActivityHeartbeatManager::new(client),
|
|
112
116
|
outstanding_activity_tasks: Default::default(),
|
|
113
117
|
poller,
|
|
114
|
-
activities_semaphore:
|
|
118
|
+
activities_semaphore: MeteredSemaphore::new(
|
|
119
|
+
max_activity_tasks,
|
|
120
|
+
metrics.with_new_attrs([activity_worker_type()]),
|
|
121
|
+
MetricsContext::available_task_slots,
|
|
122
|
+
),
|
|
115
123
|
complete_notify: Notify::new(),
|
|
116
124
|
metrics,
|
|
117
125
|
max_heartbeat_throttle_interval,
|
|
@@ -196,7 +204,7 @@ impl WorkerActivityTasks {
|
|
|
196
204
|
&self,
|
|
197
205
|
task_token: TaskToken,
|
|
198
206
|
status: aer::Status,
|
|
199
|
-
|
|
207
|
+
client: &dyn WorkerClient,
|
|
200
208
|
) -> Result<(), CompleteActivityError> {
|
|
201
209
|
if let Some((_, act_info)) = self.outstanding_activity_tasks.remove(&task_token) {
|
|
202
210
|
let act_metrics = self.metrics.with_new_attrs([
|
|
@@ -204,7 +212,7 @@ impl WorkerActivityTasks {
|
|
|
204
212
|
workflow_type(act_info.base.workflow_type.clone()),
|
|
205
213
|
]);
|
|
206
214
|
act_metrics.act_execution_latency(act_info.base.start_time.elapsed());
|
|
207
|
-
self.activities_semaphore.
|
|
215
|
+
self.activities_semaphore.add_permit();
|
|
208
216
|
self.heartbeat_manager.evict(task_token.clone()).await;
|
|
209
217
|
let known_not_found = act_info.known_not_found;
|
|
210
218
|
drop(act_info); // TODO: Get rid of dashmap. If we hold ref across await, bad stuff.
|
|
@@ -214,13 +222,13 @@ impl WorkerActivityTasks {
|
|
|
214
222
|
if !known_not_found {
|
|
215
223
|
let maybe_net_err = match status {
|
|
216
224
|
aer::Status::WillCompleteAsync(_) => None,
|
|
217
|
-
aer::Status::Completed(ar::Success { result }) =>
|
|
225
|
+
aer::Status::Completed(ar::Success { result }) => client
|
|
218
226
|
.complete_activity_task(task_token.clone(), result.map(Into::into))
|
|
219
227
|
.await
|
|
220
228
|
.err(),
|
|
221
229
|
aer::Status::Failed(ar::Failure { failure }) => {
|
|
222
230
|
act_metrics.act_execution_failed();
|
|
223
|
-
|
|
231
|
+
client
|
|
224
232
|
.fail_activity_task(task_token.clone(), failure.map(Into::into))
|
|
225
233
|
.await
|
|
226
234
|
.err()
|
|
@@ -238,7 +246,7 @@ impl WorkerActivityTasks {
|
|
|
238
246
|
"Expected activity cancelled status with CanceledFailureInfo");
|
|
239
247
|
None
|
|
240
248
|
};
|
|
241
|
-
|
|
249
|
+
client
|
|
242
250
|
.cancel_activity_task(task_token.clone(), details.map(Into::into))
|
|
243
251
|
.await
|
|
244
252
|
.err()
|
|
@@ -316,8 +324,7 @@ impl WorkerActivityTasks {
|
|
|
316
324
|
}
|
|
317
325
|
Ok(Some(ActivityTask::cancel_from_ids(task_token.0, reason)))
|
|
318
326
|
} else {
|
|
319
|
-
|
|
320
|
-
"Unknown activity task when issuing cancel");
|
|
327
|
+
debug!(task_token = ?task_token, "Unknown activity task when issuing cancel");
|
|
321
328
|
// If we can't find the activity here, it's already been completed,
|
|
322
329
|
// in which case issuing a cancel again is pointless.
|
|
323
330
|
Ok(None)
|
|
@@ -331,6 +338,6 @@ impl WorkerActivityTasks {
|
|
|
331
338
|
|
|
332
339
|
#[cfg(test)]
|
|
333
340
|
pub(crate) fn remaining_activity_capacity(&self) -> usize {
|
|
334
|
-
self.activities_semaphore.available_permits()
|
|
341
|
+
self.activities_semaphore.sem.available_permits()
|
|
335
342
|
}
|
|
336
343
|
}
|