@temporalio/core-bridge 0.22.0 → 1.0.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +120 -15
- package/Cargo.toml +3 -1
- package/README.md +1 -1
- package/index.d.ts +137 -33
- package/package.json +6 -6
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/docker-compose.yaml +4 -2
- package/sdk-core/ARCHITECTURE.md +9 -7
- package/sdk-core/README.md +5 -1
- package/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
- package/sdk-core/bridge-ffi/src/lib.rs +1 -1
- package/sdk-core/bridge-ffi/src/wrappers.rs +60 -37
- package/sdk-core/client/Cargo.toml +1 -0
- package/sdk-core/client/src/lib.rs +50 -15
- package/sdk-core/client/src/raw.rs +167 -55
- package/sdk-core/client/src/retry.rs +9 -4
- package/sdk-core/client/src/workflow_handle/mod.rs +4 -2
- package/sdk-core/core/Cargo.toml +2 -0
- package/sdk-core/core/benches/workflow_replay.rs +1 -7
- package/sdk-core/core/src/abstractions.rs +137 -16
- package/sdk-core/core/src/core_tests/activity_tasks.rs +258 -63
- package/sdk-core/core/src/core_tests/child_workflows.rs +1 -2
- package/sdk-core/core/src/core_tests/determinism.rs +2 -2
- package/sdk-core/core/src/core_tests/local_activities.rs +8 -7
- package/sdk-core/core/src/core_tests/queries.rs +146 -60
- package/sdk-core/core/src/core_tests/replay_flag.rs +1 -1
- package/sdk-core/core/src/core_tests/workers.rs +39 -23
- package/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +387 -280
- package/sdk-core/core/src/lib.rs +8 -5
- package/sdk-core/core/src/pollers/poll_buffer.rs +16 -10
- package/sdk-core/core/src/protosext/mod.rs +7 -9
- package/sdk-core/core/src/retry_logic.rs +73 -16
- package/sdk-core/core/src/telemetry/metrics.rs +21 -7
- package/sdk-core/core/src/telemetry/mod.rs +182 -110
- package/sdk-core/core/src/test_help/mod.rs +341 -109
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +18 -9
- package/sdk-core/core/src/worker/activities/local_activities.rs +22 -25
- package/sdk-core/core/src/worker/activities.rs +156 -29
- package/sdk-core/core/src/worker/client.rs +1 -0
- package/sdk-core/core/src/worker/mod.rs +132 -659
- package/sdk-core/core/src/{workflow → worker/workflow}/bridge.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/driven_workflow.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/history_update.rs +16 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/activity_state_machine.rs +39 -4
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_external_state_machine.rs +5 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_workflow_state_machine.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/child_workflow_state_machine.rs +2 -4
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/complete_workflow_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/continue_as_new_workflow_state_machine.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/fail_workflow_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/local_activity_state_machine.rs +2 -5
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/mod.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/mutable_side_effect_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/patch_state_machine.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/side_effect_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/signal_external_state_machine.rs +4 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/timer_state_machine.rs +1 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/transition_coverage.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/upsert_search_attributes_state_machine.rs +5 -7
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines/local_acts.rs +2 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines.rs +40 -16
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_task_state_machine.rs +0 -0
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
- package/sdk-core/core/src/worker/workflow/managed_run.rs +627 -0
- package/sdk-core/core/src/worker/workflow/mod.rs +1115 -0
- package/sdk-core/core/src/worker/workflow/run_cache.rs +143 -0
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +936 -0
- package/sdk-core/core-api/src/errors.rs +3 -10
- package/sdk-core/core-api/src/lib.rs +2 -1
- package/sdk-core/core-api/src/worker.rs +26 -2
- package/sdk-core/etc/dynamic-config.yaml +2 -0
- package/sdk-core/integ-with-otel.sh +1 -1
- package/sdk-core/protos/api_upstream/Makefile +4 -4
- package/sdk-core/protos/api_upstream/api-linter.yaml +2 -0
- package/sdk-core/protos/api_upstream/buf.yaml +8 -9
- package/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +83 -0
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +40 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +3 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +32 -4
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +69 -19
- package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +13 -0
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +163 -0
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +97 -0
- package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +300 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +25 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +180 -3
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +53 -3
- package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +2 -2
- package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +6 -5
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +27 -6
- package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +2 -1
- package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +0 -64
- package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -1
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +11 -8
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +30 -25
- package/sdk-core/sdk/src/activity_context.rs +12 -5
- package/sdk-core/sdk/src/app_data.rs +37 -0
- package/sdk-core/sdk/src/lib.rs +76 -43
- package/sdk-core/sdk/src/workflow_context/options.rs +8 -6
- package/sdk-core/sdk/src/workflow_context.rs +14 -19
- package/sdk-core/sdk/src/workflow_future.rs +11 -6
- package/sdk-core/sdk-core-protos/src/history_builder.rs +19 -5
- package/sdk-core/sdk-core-protos/src/history_info.rs +11 -6
- package/sdk-core/sdk-core-protos/src/lib.rs +87 -176
- package/sdk-core/test-utils/src/histfetch.rs +1 -1
- package/sdk-core/test-utils/src/lib.rs +93 -77
- package/sdk-core/tests/integ_tests/client_tests.rs +2 -2
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +11 -9
- package/sdk-core/tests/integ_tests/polling_tests.rs +12 -0
- package/sdk-core/tests/integ_tests/queries_tests.rs +39 -22
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +49 -4
- package/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +74 -13
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +19 -0
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -3
- package/sdk-core/tests/integ_tests/workflow_tests.rs +10 -23
- package/sdk-core/tests/load_tests.rs +8 -3
- package/sdk-core/tests/main.rs +7 -3
- package/src/conversions.rs +149 -70
- package/src/errors.rs +10 -21
- package/src/lib.rs +400 -319
- package/sdk-core/core/src/pending_activations.rs +0 -173
- package/sdk-core/core/src/worker/wft_delivery.rs +0 -81
- package/sdk-core/core/src/workflow/mod.rs +0 -478
- package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +0 -194
- package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +0 -418
- package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +0 -989
|
@@ -9,8 +9,10 @@ use std::{
|
|
|
9
9
|
time::{self, Duration, Instant},
|
|
10
10
|
};
|
|
11
11
|
use temporal_sdk_core_protos::{
|
|
12
|
-
coresdk::{activity_task::ActivityCancelReason,
|
|
13
|
-
temporal::api::
|
|
12
|
+
coresdk::{activity_task::ActivityCancelReason, ActivityHeartbeat, IntoPayloadsExt},
|
|
13
|
+
temporal::api::{
|
|
14
|
+
common::v1::Payload, workflowservice::v1::RecordActivityTaskHeartbeatResponse,
|
|
15
|
+
},
|
|
14
16
|
};
|
|
15
17
|
use tokio::{
|
|
16
18
|
sync::{
|
|
@@ -47,7 +49,7 @@ enum HeartbeatAction {
|
|
|
47
49
|
#[derive(Debug)]
|
|
48
50
|
pub struct ValidActivityHeartbeat {
|
|
49
51
|
pub task_token: TaskToken,
|
|
50
|
-
pub details: Vec<
|
|
52
|
+
pub details: Vec<Payload>,
|
|
51
53
|
pub throttle_interval: time::Duration,
|
|
52
54
|
}
|
|
53
55
|
|
|
@@ -58,7 +60,7 @@ enum HeartbeatExecutorAction {
|
|
|
58
60
|
/// Report heartbeat to the server
|
|
59
61
|
Report {
|
|
60
62
|
task_token: TaskToken,
|
|
61
|
-
details: Vec<
|
|
63
|
+
details: Vec<Payload>,
|
|
62
64
|
},
|
|
63
65
|
}
|
|
64
66
|
|
|
@@ -132,7 +134,15 @@ impl ActivityHeartbeatManager {
|
|
|
132
134
|
let _ = self.shutdown_token.cancel();
|
|
133
135
|
let mut handle = self.join_handle.lock().await;
|
|
134
136
|
if let Some(h) = handle.take() {
|
|
135
|
-
h.await
|
|
137
|
+
let handle_r = h.await;
|
|
138
|
+
if let Err(e) = handle_r {
|
|
139
|
+
if !e.is_cancelled() {
|
|
140
|
+
error!(
|
|
141
|
+
"Unexpected error joining heartbeating tasks during shutdown: {:?}",
|
|
142
|
+
e
|
|
143
|
+
)
|
|
144
|
+
}
|
|
145
|
+
}
|
|
136
146
|
}
|
|
137
147
|
}
|
|
138
148
|
}
|
|
@@ -140,7 +150,7 @@ impl ActivityHeartbeatManager {
|
|
|
140
150
|
#[derive(Debug)]
|
|
141
151
|
struct ActivityHeartbeatState {
|
|
142
152
|
/// If None and throttle interval is over, untrack this task token
|
|
143
|
-
last_recorded_details: Option<Vec<
|
|
153
|
+
last_recorded_details: Option<Vec<Payload>>,
|
|
144
154
|
/// True if we've queued up a request to record against server, but it hasn't yet completed
|
|
145
155
|
is_record_in_flight: bool,
|
|
146
156
|
last_send_requested: Instant,
|
|
@@ -401,9 +411,8 @@ mod test {
|
|
|
401
411
|
|
|
402
412
|
use crate::worker::client::mocks::mock_workflow_client;
|
|
403
413
|
use std::time::Duration;
|
|
404
|
-
use temporal_sdk_core_protos::{
|
|
405
|
-
|
|
406
|
-
temporal::api::workflowservice::v1::RecordActivityTaskHeartbeatResponse,
|
|
414
|
+
use temporal_sdk_core_protos::temporal::api::{
|
|
415
|
+
common::v1::Payload, workflowservice::v1::RecordActivityTaskHeartbeatResponse,
|
|
407
416
|
};
|
|
408
417
|
use tokio::time::sleep;
|
|
409
418
|
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
use crate::{
|
|
2
|
-
abstractions::MeteredSemaphore,
|
|
2
|
+
abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
|
|
3
|
+
protosext::ValidScheduleLA,
|
|
4
|
+
retry_logic::RetryPolicyExt,
|
|
3
5
|
MetricsContext, TaskToken,
|
|
4
6
|
};
|
|
5
7
|
use parking_lot::Mutex;
|
|
@@ -12,12 +14,8 @@ use temporal_sdk_core_protos::{
|
|
|
12
14
|
coresdk::{
|
|
13
15
|
activity_result::{Cancellation, Failure as ActFail, Success},
|
|
14
16
|
activity_task::{activity_task, ActivityCancelReason, ActivityTask, Cancel, Start},
|
|
15
|
-
common::WorkflowExecution,
|
|
16
|
-
},
|
|
17
|
-
temporal::api::{
|
|
18
|
-
enums::v1::TimeoutType,
|
|
19
|
-
failure::v1::{failure::FailureInfo, ApplicationFailureInfo},
|
|
20
17
|
},
|
|
18
|
+
temporal::api::{common::v1::WorkflowExecution, enums::v1::TimeoutType},
|
|
21
19
|
};
|
|
22
20
|
use tokio::{
|
|
23
21
|
sync::{
|
|
@@ -47,6 +45,7 @@ pub(crate) struct LocalInFlightActInfo {
|
|
|
47
45
|
pub la_info: NewLocalAct,
|
|
48
46
|
pub dispatch_time: Instant,
|
|
49
47
|
pub attempt: u32,
|
|
48
|
+
_permit: OwnedMeteredSemPermit,
|
|
50
49
|
}
|
|
51
50
|
|
|
52
51
|
#[derive(Debug, Clone)]
|
|
@@ -184,6 +183,7 @@ impl LocalActivityManager {
|
|
|
184
183
|
)
|
|
185
184
|
}
|
|
186
185
|
|
|
186
|
+
#[cfg(test)]
|
|
187
187
|
pub(crate) fn num_outstanding(&self) -> usize {
|
|
188
188
|
self.dat.lock().outstanding_activity_tasks.len()
|
|
189
189
|
}
|
|
@@ -195,11 +195,11 @@ impl LocalActivityManager {
|
|
|
195
195
|
|
|
196
196
|
pub(crate) fn enqueue(
|
|
197
197
|
&self,
|
|
198
|
-
reqs: impl IntoIterator<Item = LocalActRequest
|
|
198
|
+
reqs: impl IntoIterator<Item = LocalActRequest>,
|
|
199
199
|
) -> Vec<LocalActivityResolution> {
|
|
200
|
-
debug!("Queuing local activities: {:?}", &reqs);
|
|
201
200
|
let mut immediate_resolutions = vec![];
|
|
202
201
|
for req in reqs {
|
|
202
|
+
debug!(local_activity = ?req, "Queuing local activity");
|
|
203
203
|
match req {
|
|
204
204
|
LocalActRequest::New(act) => {
|
|
205
205
|
let id = ExecutingLAId {
|
|
@@ -270,7 +270,7 @@ impl LocalActivityManager {
|
|
|
270
270
|
/// Returns the next pending local-activity related action, or None if shutdown has initiated
|
|
271
271
|
/// and there are no more remaining actions to take.
|
|
272
272
|
pub(crate) async fn next_pending(&self) -> Option<DispatchOrTimeoutLA> {
|
|
273
|
-
let new_or_retry = match self.rcvs.lock().await.next(&self.semaphore).await? {
|
|
273
|
+
let (new_or_retry, permit) = match self.rcvs.lock().await.next(&self.semaphore).await? {
|
|
274
274
|
NewOrCancel::Cancel(c) => {
|
|
275
275
|
return match c {
|
|
276
276
|
CancelOrTimeout::Cancel(c) => Some(DispatchOrTimeoutLA::Dispatch(c)),
|
|
@@ -311,7 +311,7 @@ impl LocalActivityManager {
|
|
|
311
311
|
}
|
|
312
312
|
};
|
|
313
313
|
}
|
|
314
|
-
NewOrCancel::New(n) => n,
|
|
314
|
+
NewOrCancel::New(n, perm) => (n, perm),
|
|
315
315
|
};
|
|
316
316
|
|
|
317
317
|
// It is important that there are no await points after receiving from the channel, as
|
|
@@ -366,6 +366,7 @@ impl LocalActivityManager {
|
|
|
366
366
|
la_info: orig,
|
|
367
367
|
dispatch_time: Instant::now(),
|
|
368
368
|
attempt,
|
|
369
|
+
_permit: permit,
|
|
369
370
|
},
|
|
370
371
|
);
|
|
371
372
|
if let Some(to) = dat.timeout_tasks.get_mut(&id) {
|
|
@@ -410,7 +411,6 @@ impl LocalActivityManager {
|
|
|
410
411
|
seq_num: info.la_info.schedule_cmd.seq,
|
|
411
412
|
};
|
|
412
413
|
dlock.id_to_tt.remove(&exec_id);
|
|
413
|
-
self.semaphore.add_permit();
|
|
414
414
|
|
|
415
415
|
match status {
|
|
416
416
|
LocalActivityExecutionResult::Completed(_)
|
|
@@ -423,13 +423,9 @@ impl LocalActivityManager {
|
|
|
423
423
|
LocalActivityExecutionResult::Failed(f) => {
|
|
424
424
|
if let Some(backoff_dur) = info.la_info.schedule_cmd.retry_policy.should_retry(
|
|
425
425
|
info.attempt as usize,
|
|
426
|
-
f.failure
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
..
|
|
430
|
-
})) => r#type.as_str(),
|
|
431
|
-
_ => "",
|
|
432
|
-
}),
|
|
426
|
+
f.failure
|
|
427
|
+
.as_ref()
|
|
428
|
+
.and_then(|f| f.maybe_application_failure()),
|
|
433
429
|
) {
|
|
434
430
|
let will_use_timer =
|
|
435
431
|
backoff_dur > info.la_info.schedule_cmd.local_retry_threshold;
|
|
@@ -520,7 +516,7 @@ enum CancelOrTimeout {
|
|
|
520
516
|
}
|
|
521
517
|
|
|
522
518
|
enum NewOrCancel {
|
|
523
|
-
New(NewOrRetry),
|
|
519
|
+
New(NewOrRetry, OwnedMeteredSemPermit),
|
|
524
520
|
Cancel(CancelOrTimeout),
|
|
525
521
|
}
|
|
526
522
|
|
|
@@ -538,13 +534,13 @@ impl RcvChans {
|
|
|
538
534
|
cancel = async { self.cancels_req_rx.recv().await } => {
|
|
539
535
|
Some(NewOrCancel::Cancel(cancel.expect("Send halves of LA manager are not dropped")))
|
|
540
536
|
}
|
|
541
|
-
maybe_new_or_retry = async {
|
|
537
|
+
(maybe_new_or_retry, perm) = async {
|
|
542
538
|
// Wait for a permit to take a task and forget it. Permits are removed until a
|
|
543
539
|
// completion.
|
|
544
|
-
new_sem.
|
|
545
|
-
self.act_req_rx.recv().await
|
|
540
|
+
let perm = new_sem.acquire_owned().await.expect("is never closed");
|
|
541
|
+
(self.act_req_rx.recv().await, perm)
|
|
546
542
|
} => Some(NewOrCancel::New(
|
|
547
|
-
maybe_new_or_retry.expect("Send halves of LA manager are not dropped")
|
|
543
|
+
maybe_new_or_retry.expect("Send halves of LA manager are not dropped"), perm
|
|
548
544
|
)),
|
|
549
545
|
_ = self.shutdown.cancelled() => None
|
|
550
546
|
}
|
|
@@ -643,8 +639,9 @@ impl Drop for TimeoutBag {
|
|
|
643
639
|
mod tests {
|
|
644
640
|
use super::*;
|
|
645
641
|
use crate::protosext::LACloseTimeouts;
|
|
646
|
-
use temporal_sdk_core_protos::{
|
|
647
|
-
|
|
642
|
+
use temporal_sdk_core_protos::temporal::api::{
|
|
643
|
+
common::v1::RetryPolicy,
|
|
644
|
+
failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
|
|
648
645
|
};
|
|
649
646
|
use tokio::{sync::mpsc::error::TryRecvError, task::yield_now};
|
|
650
647
|
|
|
@@ -8,7 +8,7 @@ pub(crate) use local_activities::{
|
|
|
8
8
|
};
|
|
9
9
|
|
|
10
10
|
use crate::{
|
|
11
|
-
abstractions::MeteredSemaphore,
|
|
11
|
+
abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
|
|
12
12
|
pollers::BoxedActPoller,
|
|
13
13
|
telemetry::metrics::{activity_type, activity_worker_type, workflow_type, MetricsContext},
|
|
14
14
|
worker::{
|
|
@@ -19,6 +19,12 @@ use crate::{
|
|
|
19
19
|
};
|
|
20
20
|
use activity_heartbeat_manager::ActivityHeartbeatManager;
|
|
21
21
|
use dashmap::DashMap;
|
|
22
|
+
use governor::{
|
|
23
|
+
clock::DefaultClock,
|
|
24
|
+
middleware::NoOpMiddleware,
|
|
25
|
+
state::{InMemoryState, NotKeyed},
|
|
26
|
+
Quota, RateLimiter,
|
|
27
|
+
};
|
|
22
28
|
use std::{
|
|
23
29
|
convert::TryInto,
|
|
24
30
|
sync::Arc,
|
|
@@ -52,7 +58,6 @@ struct InFlightActInfo {
|
|
|
52
58
|
}
|
|
53
59
|
|
|
54
60
|
/// Augments [InFlightActInfo] with details specific to remote activities
|
|
55
|
-
#[derive(Debug)]
|
|
56
61
|
struct RemoteInFlightActInfo {
|
|
57
62
|
pub base: InFlightActInfo,
|
|
58
63
|
/// Used to calculate aggregation delay between activity heartbeats.
|
|
@@ -63,12 +68,15 @@ struct RemoteInFlightActInfo {
|
|
|
63
68
|
/// we have learned from heartbeating and issued a cancel task, in which case we may simply
|
|
64
69
|
/// discard the reply.
|
|
65
70
|
pub known_not_found: bool,
|
|
71
|
+
/// The permit from the max concurrent semaphore
|
|
72
|
+
_permit: OwnedMeteredSemPermit,
|
|
66
73
|
}
|
|
67
74
|
impl RemoteInFlightActInfo {
|
|
68
75
|
fn new(
|
|
69
76
|
activity_type: String,
|
|
70
77
|
workflow_type: String,
|
|
71
78
|
heartbeat_timeout: Option<prost_types::Duration>,
|
|
79
|
+
permit: OwnedMeteredSemPermit,
|
|
72
80
|
) -> Self {
|
|
73
81
|
Self {
|
|
74
82
|
base: InFlightActInfo {
|
|
@@ -79,10 +87,26 @@ impl RemoteInFlightActInfo {
|
|
|
79
87
|
heartbeat_timeout,
|
|
80
88
|
issued_cancel_to_lang: false,
|
|
81
89
|
known_not_found: false,
|
|
90
|
+
_permit: permit,
|
|
82
91
|
}
|
|
83
92
|
}
|
|
84
93
|
}
|
|
85
94
|
|
|
95
|
+
struct NonPollActBuffer {
|
|
96
|
+
tx: async_channel::Sender<PermittedTqResp>,
|
|
97
|
+
rx: async_channel::Receiver<PermittedTqResp>,
|
|
98
|
+
}
|
|
99
|
+
impl NonPollActBuffer {
|
|
100
|
+
pub fn new() -> Self {
|
|
101
|
+
let (tx, rx) = async_channel::unbounded();
|
|
102
|
+
Self { tx, rx }
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
pub async fn next(&self) -> PermittedTqResp {
|
|
106
|
+
self.rx.recv().await.expect("Send half cannot be dropped")
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
86
110
|
pub(crate) struct WorkerActivityTasks {
|
|
87
111
|
/// Centralizes management of heartbeat issuing / throttling
|
|
88
112
|
heartbeat_manager: ActivityHeartbeatManager,
|
|
@@ -91,8 +115,13 @@ pub(crate) struct WorkerActivityTasks {
|
|
|
91
115
|
/// Buffers activity task polling in the event we need to return a cancellation while a poll is
|
|
92
116
|
/// ongoing.
|
|
93
117
|
poller: BoxedActPoller,
|
|
118
|
+
/// Holds activity tasks we have received by non-polling means. EX: In direct response to
|
|
119
|
+
/// workflow task completion.
|
|
120
|
+
non_poll_tasks: NonPollActBuffer,
|
|
94
121
|
/// Ensures we stay at or below this worker's maximum concurrent activity limit
|
|
95
|
-
activities_semaphore: MeteredSemaphore
|
|
122
|
+
activities_semaphore: Arc<MeteredSemaphore>,
|
|
123
|
+
/// Enables per-worker rate-limiting of activity tasks
|
|
124
|
+
ratelimiter: Option<RateLimiter<NotKeyed, InMemoryState, DefaultClock, NoOpMiddleware>>,
|
|
96
125
|
/// Wakes every time an activity is removed from the outstanding map
|
|
97
126
|
complete_notify: Notify,
|
|
98
127
|
|
|
@@ -105,6 +134,7 @@ pub(crate) struct WorkerActivityTasks {
|
|
|
105
134
|
impl WorkerActivityTasks {
|
|
106
135
|
pub(crate) fn new(
|
|
107
136
|
max_activity_tasks: usize,
|
|
137
|
+
max_worker_act_per_sec: Option<f64>,
|
|
108
138
|
poller: BoxedActPoller,
|
|
109
139
|
client: Arc<WorkerClientBag>,
|
|
110
140
|
metrics: MetricsContext,
|
|
@@ -115,11 +145,15 @@ impl WorkerActivityTasks {
|
|
|
115
145
|
heartbeat_manager: ActivityHeartbeatManager::new(client),
|
|
116
146
|
outstanding_activity_tasks: Default::default(),
|
|
117
147
|
poller,
|
|
118
|
-
|
|
148
|
+
non_poll_tasks: NonPollActBuffer::new(),
|
|
149
|
+
activities_semaphore: Arc::new(MeteredSemaphore::new(
|
|
119
150
|
max_activity_tasks,
|
|
120
151
|
metrics.with_new_attrs([activity_worker_type()]),
|
|
121
152
|
MetricsContext::available_task_slots,
|
|
122
|
-
),
|
|
153
|
+
)),
|
|
154
|
+
ratelimiter: max_worker_act_per_sec.and_then(|ps| {
|
|
155
|
+
Quota::with_period(Duration::from_secs_f64(ps.recip())).map(RateLimiter::direct)
|
|
156
|
+
}),
|
|
123
157
|
complete_notify: Notify::new(),
|
|
124
158
|
metrics,
|
|
125
159
|
max_heartbeat_throttle_interval,
|
|
@@ -151,12 +185,15 @@ impl WorkerActivityTasks {
|
|
|
151
185
|
// Acquire and subsequently forget a permit for an outstanding activity. When they are
|
|
152
186
|
// completed, we must add a new permit to the semaphore, since holding the permit the
|
|
153
187
|
// entire time lang does work would be a challenge.
|
|
154
|
-
let
|
|
188
|
+
let perm = self
|
|
155
189
|
.activities_semaphore
|
|
156
|
-
.
|
|
190
|
+
.acquire_owned()
|
|
157
191
|
.await
|
|
158
192
|
.expect("outstanding activity semaphore not closed");
|
|
159
|
-
(self.
|
|
193
|
+
if let Some(ref rl) = self.ratelimiter {
|
|
194
|
+
rl.until_ready().await;
|
|
195
|
+
}
|
|
196
|
+
(self.poller.poll().await, perm)
|
|
160
197
|
};
|
|
161
198
|
|
|
162
199
|
tokio::select! {
|
|
@@ -165,7 +202,10 @@ impl WorkerActivityTasks {
|
|
|
165
202
|
cancel_task = self.next_pending_cancel_task() => {
|
|
166
203
|
cancel_task
|
|
167
204
|
}
|
|
168
|
-
|
|
205
|
+
task = self.non_poll_tasks.next() => {
|
|
206
|
+
Ok(Some(self.about_to_issue_task(task)))
|
|
207
|
+
}
|
|
208
|
+
(work, permit) = poll_with_semaphore => {
|
|
169
209
|
match work {
|
|
170
210
|
Some(Ok(work)) => {
|
|
171
211
|
if work == PollActivityTaskQueueResponse::default() {
|
|
@@ -173,23 +213,10 @@ impl WorkerActivityTasks {
|
|
|
173
213
|
self.metrics.act_poll_timeout();
|
|
174
214
|
return Ok(None)
|
|
175
215
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
self.outstanding_activity_tasks.insert(
|
|
183
|
-
work.task_token.clone().into(),
|
|
184
|
-
RemoteInFlightActInfo::new(
|
|
185
|
-
work.activity_type.clone().unwrap_or_default().name,
|
|
186
|
-
work.workflow_type.clone().unwrap_or_default().name,
|
|
187
|
-
work.heartbeat_timeout.clone()
|
|
188
|
-
),
|
|
189
|
-
);
|
|
190
|
-
// Only permanently take a permit in the event the poll finished properly
|
|
191
|
-
sem.forget();
|
|
192
|
-
Ok(Some(ActivityTask::start_from_poll_resp(work)))
|
|
216
|
+
let work = self.about_to_issue_task(PermittedTqResp {
|
|
217
|
+
resp: work, permit
|
|
218
|
+
});
|
|
219
|
+
Ok(Some(work))
|
|
193
220
|
}
|
|
194
221
|
None => {
|
|
195
222
|
Err(PollActivityError::ShutDown)
|
|
@@ -212,10 +239,9 @@ impl WorkerActivityTasks {
|
|
|
212
239
|
workflow_type(act_info.base.workflow_type.clone()),
|
|
213
240
|
]);
|
|
214
241
|
act_metrics.act_execution_latency(act_info.base.start_time.elapsed());
|
|
215
|
-
self.activities_semaphore.add_permit();
|
|
216
|
-
self.heartbeat_manager.evict(task_token.clone()).await;
|
|
217
242
|
let known_not_found = act_info.known_not_found;
|
|
218
243
|
drop(act_info); // TODO: Get rid of dashmap. If we hold ref across await, bad stuff.
|
|
244
|
+
self.heartbeat_manager.evict(task_token.clone()).await;
|
|
219
245
|
self.complete_notify.notify_waiters();
|
|
220
246
|
|
|
221
247
|
// No need to report activities which we already know the server doesn't care about
|
|
@@ -304,6 +330,14 @@ impl WorkerActivityTasks {
|
|
|
304
330
|
self.heartbeat_manager.record(details, throttle_interval)
|
|
305
331
|
}
|
|
306
332
|
|
|
333
|
+
/// Returns a handle that the workflows management side can use to interact with this manager
|
|
334
|
+
pub(crate) fn get_handle_for_workflows(&self) -> ActivitiesFromWFTsHandle {
|
|
335
|
+
ActivitiesFromWFTsHandle {
|
|
336
|
+
sem: self.activities_semaphore.clone(),
|
|
337
|
+
tx: self.non_poll_tasks.tx.clone(),
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
307
341
|
async fn next_pending_cancel_task(&self) -> Result<Option<ActivityTask>, PollActivityError> {
|
|
308
342
|
let next_pc = self.heartbeat_manager.next_pending_cancel().await;
|
|
309
343
|
// Issue cancellations for anything we noticed was cancelled during heartbeating
|
|
@@ -336,8 +370,101 @@ impl WorkerActivityTasks {
|
|
|
336
370
|
}
|
|
337
371
|
}
|
|
338
372
|
|
|
373
|
+
/// Called when there is a new act task about to be bubbled up out of the manager
|
|
374
|
+
fn about_to_issue_task(&self, task: PermittedTqResp) -> ActivityTask {
|
|
375
|
+
if let Some(dur) = task.resp.sched_to_start() {
|
|
376
|
+
self.metrics.act_sched_to_start_latency(dur);
|
|
377
|
+
};
|
|
378
|
+
|
|
379
|
+
self.outstanding_activity_tasks.insert(
|
|
380
|
+
task.resp.task_token.clone().into(),
|
|
381
|
+
RemoteInFlightActInfo::new(
|
|
382
|
+
task.resp.activity_type.clone().unwrap_or_default().name,
|
|
383
|
+
task.resp.workflow_type.clone().unwrap_or_default().name,
|
|
384
|
+
task.resp.heartbeat_timeout.clone(),
|
|
385
|
+
task.permit,
|
|
386
|
+
),
|
|
387
|
+
);
|
|
388
|
+
|
|
389
|
+
ActivityTask::start_from_poll_resp(task.resp)
|
|
390
|
+
}
|
|
391
|
+
|
|
339
392
|
#[cfg(test)]
|
|
340
393
|
pub(crate) fn remaining_activity_capacity(&self) -> usize {
|
|
341
|
-
self.activities_semaphore.
|
|
394
|
+
self.activities_semaphore.available_permits()
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/// Provides facilities for the workflow side of things to interact with the activity manager.
|
|
399
|
+
/// Allows for the handling of activities returned by WFT completions.
|
|
400
|
+
pub(crate) struct ActivitiesFromWFTsHandle {
|
|
401
|
+
sem: Arc<MeteredSemaphore>,
|
|
402
|
+
tx: async_channel::Sender<PermittedTqResp>,
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
impl ActivitiesFromWFTsHandle {
|
|
406
|
+
/// Returns a handle that can be used to reserve an activity slot. EX: When requesting eager
|
|
407
|
+
/// dispatch of an activity to this worker upon workflow task completion
|
|
408
|
+
pub(crate) fn reserve_slot(&self) -> Option<OwnedMeteredSemPermit> {
|
|
409
|
+
self.sem.try_acquire_owned().ok()
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/// Queue new activity tasks for dispatch received from non-polling sources (ex: eager returns
|
|
413
|
+
/// from WFT completion)
|
|
414
|
+
pub(crate) fn add_tasks(&self, tasks: impl IntoIterator<Item = PermittedTqResp>) {
|
|
415
|
+
for t in tasks.into_iter() {
|
|
416
|
+
self.tx.try_send(t).expect("Receive half cannot be dropped");
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
pub(crate) struct PermittedTqResp {
|
|
422
|
+
pub permit: OwnedMeteredSemPermit,
|
|
423
|
+
pub resp: PollActivityTaskQueueResponse,
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
#[cfg(test)]
|
|
427
|
+
mod tests {
|
|
428
|
+
use super::*;
|
|
429
|
+
use crate::{
|
|
430
|
+
test_help::mock_poller_from_resps, worker::client::mocks::mock_manual_workflow_client,
|
|
431
|
+
};
|
|
432
|
+
|
|
433
|
+
#[tokio::test]
|
|
434
|
+
async fn per_worker_ratelimit() {
|
|
435
|
+
let poller = mock_poller_from_resps([
|
|
436
|
+
PollActivityTaskQueueResponse {
|
|
437
|
+
task_token: vec![1],
|
|
438
|
+
activity_id: "act1".to_string(),
|
|
439
|
+
..Default::default()
|
|
440
|
+
}
|
|
441
|
+
.into(),
|
|
442
|
+
PollActivityTaskQueueResponse {
|
|
443
|
+
task_token: vec![2],
|
|
444
|
+
activity_id: "act2".to_string(),
|
|
445
|
+
..Default::default()
|
|
446
|
+
}
|
|
447
|
+
.into(),
|
|
448
|
+
]);
|
|
449
|
+
let client = WorkerClientBag::new(
|
|
450
|
+
Box::new(mock_manual_workflow_client()),
|
|
451
|
+
"fake_namespace".to_string(),
|
|
452
|
+
);
|
|
453
|
+
let atm = WorkerActivityTasks::new(
|
|
454
|
+
10,
|
|
455
|
+
Some(2.0),
|
|
456
|
+
poller,
|
|
457
|
+
Arc::new(client),
|
|
458
|
+
MetricsContext::default(),
|
|
459
|
+
Duration::from_secs(1),
|
|
460
|
+
Duration::from_secs(1),
|
|
461
|
+
);
|
|
462
|
+
let start = Instant::now();
|
|
463
|
+
atm.poll().await.unwrap().unwrap();
|
|
464
|
+
atm.poll().await.unwrap().unwrap();
|
|
465
|
+
// At least half a second will have elapsed since we only allow 2 tasks per second.
|
|
466
|
+
// With no ratelimit, even on a slow CI server with lots of load, this would typically take
|
|
467
|
+
// low single digit ms or less.
|
|
468
|
+
assert!(start.elapsed() > Duration::from_secs_f64(0.5));
|
|
342
469
|
}
|
|
343
470
|
}
|
|
@@ -118,6 +118,7 @@ pub(crate) trait WorkerClient: Sync + Send {
|
|
|
118
118
|
#[async_trait::async_trait]
|
|
119
119
|
impl<'a, T> WorkerClient for T
|
|
120
120
|
where
|
|
121
|
+
// TODO: This should be workflow service... no reason to marry worker trait to sdk client trait
|
|
121
122
|
T: Borrow<dyn WorkflowClientTrait + 'a + Send + Sync> + Send + Sync,
|
|
122
123
|
{
|
|
123
124
|
async fn poll_workflow_task(
|