@temporalio/core-bridge 0.23.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +118 -15
- package/Cargo.toml +2 -1
- package/LICENSE.md +1 -1
- package/README.md +1 -1
- package/index.d.ts +47 -18
- package/package.json +7 -7
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/docker-compose.yaml +4 -2
- package/sdk-core/ARCHITECTURE.md +9 -7
- package/sdk-core/README.md +5 -1
- package/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
- package/sdk-core/bridge-ffi/src/wrappers.rs +0 -3
- package/sdk-core/client/src/lib.rs +26 -8
- package/sdk-core/client/src/raw.rs +166 -54
- package/sdk-core/client/src/retry.rs +9 -4
- package/sdk-core/client/src/workflow_handle/mod.rs +4 -2
- package/sdk-core/core/Cargo.toml +2 -0
- package/sdk-core/core/src/abstractions.rs +137 -16
- package/sdk-core/core/src/core_tests/activity_tasks.rs +258 -63
- package/sdk-core/core/src/core_tests/child_workflows.rs +1 -2
- package/sdk-core/core/src/core_tests/determinism.rs +2 -2
- package/sdk-core/core/src/core_tests/local_activities.rs +8 -7
- package/sdk-core/core/src/core_tests/queries.rs +146 -60
- package/sdk-core/core/src/core_tests/replay_flag.rs +1 -1
- package/sdk-core/core/src/core_tests/workers.rs +39 -23
- package/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +387 -280
- package/sdk-core/core/src/lib.rs +6 -4
- package/sdk-core/core/src/pollers/poll_buffer.rs +16 -10
- package/sdk-core/core/src/protosext/mod.rs +6 -6
- package/sdk-core/core/src/retry_logic.rs +1 -1
- package/sdk-core/core/src/telemetry/metrics.rs +21 -7
- package/sdk-core/core/src/telemetry/mod.rs +18 -4
- package/sdk-core/core/src/test_help/mod.rs +341 -109
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +18 -9
- package/sdk-core/core/src/worker/activities/local_activities.rs +19 -16
- package/sdk-core/core/src/worker/activities.rs +156 -29
- package/sdk-core/core/src/worker/client.rs +1 -0
- package/sdk-core/core/src/worker/mod.rs +132 -659
- package/sdk-core/core/src/{workflow → worker/workflow}/bridge.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/driven_workflow.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/history_update.rs +16 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/activity_state_machine.rs +39 -4
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_external_state_machine.rs +5 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_workflow_state_machine.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/child_workflow_state_machine.rs +2 -4
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/complete_workflow_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/continue_as_new_workflow_state_machine.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/fail_workflow_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/local_activity_state_machine.rs +2 -5
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/mod.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/mutable_side_effect_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/patch_state_machine.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/side_effect_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/signal_external_state_machine.rs +4 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/timer_state_machine.rs +1 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/transition_coverage.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/upsert_search_attributes_state_machine.rs +5 -7
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines/local_acts.rs +2 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines.rs +40 -16
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_task_state_machine.rs +0 -0
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
- package/sdk-core/core/src/worker/workflow/managed_run.rs +627 -0
- package/sdk-core/core/src/worker/workflow/mod.rs +1115 -0
- package/sdk-core/core/src/worker/workflow/run_cache.rs +143 -0
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +936 -0
- package/sdk-core/core-api/src/errors.rs +3 -10
- package/sdk-core/core-api/src/lib.rs +2 -1
- package/sdk-core/core-api/src/worker.rs +26 -2
- package/sdk-core/etc/dynamic-config.yaml +2 -0
- package/sdk-core/integ-with-otel.sh +1 -1
- package/sdk-core/protos/api_upstream/Makefile +4 -4
- package/sdk-core/protos/api_upstream/api-linter.yaml +2 -0
- package/sdk-core/protos/api_upstream/buf.yaml +8 -9
- package/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +83 -0
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +40 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +3 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +32 -4
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +69 -19
- package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +13 -0
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +163 -0
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +97 -0
- package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +300 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +25 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +180 -3
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +53 -3
- package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +2 -2
- package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +6 -5
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -1
- package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +2 -1
- package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +0 -64
- package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -1
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +11 -8
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +30 -25
- package/sdk-core/sdk/src/activity_context.rs +12 -5
- package/sdk-core/sdk/src/app_data.rs +37 -0
- package/sdk-core/sdk/src/lib.rs +76 -43
- package/sdk-core/sdk/src/workflow_context/options.rs +8 -6
- package/sdk-core/sdk/src/workflow_context.rs +14 -19
- package/sdk-core/sdk/src/workflow_future.rs +11 -6
- package/sdk-core/sdk-core-protos/src/history_builder.rs +19 -5
- package/sdk-core/sdk-core-protos/src/history_info.rs +11 -6
- package/sdk-core/sdk-core-protos/src/lib.rs +74 -176
- package/sdk-core/test-utils/src/lib.rs +85 -72
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +11 -9
- package/sdk-core/tests/integ_tests/polling_tests.rs +12 -0
- package/sdk-core/tests/integ_tests/queries_tests.rs +39 -22
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +49 -4
- package/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +74 -13
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +19 -0
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -3
- package/sdk-core/tests/integ_tests/workflow_tests.rs +10 -23
- package/sdk-core/tests/load_tests.rs +8 -3
- package/sdk-core/tests/main.rs +2 -1
- package/src/conversions.rs +47 -39
- package/src/errors.rs +10 -21
- package/src/lib.rs +342 -325
- package/sdk-core/core/src/pending_activations.rs +0 -173
- package/sdk-core/core/src/worker/wft_delivery.rs +0 -81
- package/sdk-core/core/src/workflow/mod.rs +0 -478
- package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +0 -194
- package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +0 -418
- package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +0 -989
|
@@ -9,8 +9,10 @@ use std::{
|
|
|
9
9
|
time::{self, Duration, Instant},
|
|
10
10
|
};
|
|
11
11
|
use temporal_sdk_core_protos::{
|
|
12
|
-
coresdk::{activity_task::ActivityCancelReason,
|
|
13
|
-
temporal::api::
|
|
12
|
+
coresdk::{activity_task::ActivityCancelReason, ActivityHeartbeat, IntoPayloadsExt},
|
|
13
|
+
temporal::api::{
|
|
14
|
+
common::v1::Payload, workflowservice::v1::RecordActivityTaskHeartbeatResponse,
|
|
15
|
+
},
|
|
14
16
|
};
|
|
15
17
|
use tokio::{
|
|
16
18
|
sync::{
|
|
@@ -47,7 +49,7 @@ enum HeartbeatAction {
|
|
|
47
49
|
#[derive(Debug)]
|
|
48
50
|
pub struct ValidActivityHeartbeat {
|
|
49
51
|
pub task_token: TaskToken,
|
|
50
|
-
pub details: Vec<
|
|
52
|
+
pub details: Vec<Payload>,
|
|
51
53
|
pub throttle_interval: time::Duration,
|
|
52
54
|
}
|
|
53
55
|
|
|
@@ -58,7 +60,7 @@ enum HeartbeatExecutorAction {
|
|
|
58
60
|
/// Report heartbeat to the server
|
|
59
61
|
Report {
|
|
60
62
|
task_token: TaskToken,
|
|
61
|
-
details: Vec<
|
|
63
|
+
details: Vec<Payload>,
|
|
62
64
|
},
|
|
63
65
|
}
|
|
64
66
|
|
|
@@ -132,7 +134,15 @@ impl ActivityHeartbeatManager {
|
|
|
132
134
|
let _ = self.shutdown_token.cancel();
|
|
133
135
|
let mut handle = self.join_handle.lock().await;
|
|
134
136
|
if let Some(h) = handle.take() {
|
|
135
|
-
h.await
|
|
137
|
+
let handle_r = h.await;
|
|
138
|
+
if let Err(e) = handle_r {
|
|
139
|
+
if !e.is_cancelled() {
|
|
140
|
+
error!(
|
|
141
|
+
"Unexpected error joining heartbeating tasks during shutdown: {:?}",
|
|
142
|
+
e
|
|
143
|
+
)
|
|
144
|
+
}
|
|
145
|
+
}
|
|
136
146
|
}
|
|
137
147
|
}
|
|
138
148
|
}
|
|
@@ -140,7 +150,7 @@ impl ActivityHeartbeatManager {
|
|
|
140
150
|
#[derive(Debug)]
|
|
141
151
|
struct ActivityHeartbeatState {
|
|
142
152
|
/// If None and throttle interval is over, untrack this task token
|
|
143
|
-
last_recorded_details: Option<Vec<
|
|
153
|
+
last_recorded_details: Option<Vec<Payload>>,
|
|
144
154
|
/// True if we've queued up a request to record against server, but it hasn't yet completed
|
|
145
155
|
is_record_in_flight: bool,
|
|
146
156
|
last_send_requested: Instant,
|
|
@@ -401,9 +411,8 @@ mod test {
|
|
|
401
411
|
|
|
402
412
|
use crate::worker::client::mocks::mock_workflow_client;
|
|
403
413
|
use std::time::Duration;
|
|
404
|
-
use temporal_sdk_core_protos::{
|
|
405
|
-
|
|
406
|
-
temporal::api::workflowservice::v1::RecordActivityTaskHeartbeatResponse,
|
|
414
|
+
use temporal_sdk_core_protos::temporal::api::{
|
|
415
|
+
common::v1::Payload, workflowservice::v1::RecordActivityTaskHeartbeatResponse,
|
|
407
416
|
};
|
|
408
417
|
use tokio::time::sleep;
|
|
409
418
|
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
use crate::{
|
|
2
|
-
abstractions::MeteredSemaphore,
|
|
2
|
+
abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
|
|
3
|
+
protosext::ValidScheduleLA,
|
|
4
|
+
retry_logic::RetryPolicyExt,
|
|
3
5
|
MetricsContext, TaskToken,
|
|
4
6
|
};
|
|
5
7
|
use parking_lot::Mutex;
|
|
@@ -12,9 +14,8 @@ use temporal_sdk_core_protos::{
|
|
|
12
14
|
coresdk::{
|
|
13
15
|
activity_result::{Cancellation, Failure as ActFail, Success},
|
|
14
16
|
activity_task::{activity_task, ActivityCancelReason, ActivityTask, Cancel, Start},
|
|
15
|
-
common::WorkflowExecution,
|
|
16
17
|
},
|
|
17
|
-
temporal::api::enums::v1::TimeoutType,
|
|
18
|
+
temporal::api::{common::v1::WorkflowExecution, enums::v1::TimeoutType},
|
|
18
19
|
};
|
|
19
20
|
use tokio::{
|
|
20
21
|
sync::{
|
|
@@ -44,6 +45,7 @@ pub(crate) struct LocalInFlightActInfo {
|
|
|
44
45
|
pub la_info: NewLocalAct,
|
|
45
46
|
pub dispatch_time: Instant,
|
|
46
47
|
pub attempt: u32,
|
|
48
|
+
_permit: OwnedMeteredSemPermit,
|
|
47
49
|
}
|
|
48
50
|
|
|
49
51
|
#[derive(Debug, Clone)]
|
|
@@ -181,6 +183,7 @@ impl LocalActivityManager {
|
|
|
181
183
|
)
|
|
182
184
|
}
|
|
183
185
|
|
|
186
|
+
#[cfg(test)]
|
|
184
187
|
pub(crate) fn num_outstanding(&self) -> usize {
|
|
185
188
|
self.dat.lock().outstanding_activity_tasks.len()
|
|
186
189
|
}
|
|
@@ -192,11 +195,11 @@ impl LocalActivityManager {
|
|
|
192
195
|
|
|
193
196
|
pub(crate) fn enqueue(
|
|
194
197
|
&self,
|
|
195
|
-
reqs: impl IntoIterator<Item = LocalActRequest
|
|
198
|
+
reqs: impl IntoIterator<Item = LocalActRequest>,
|
|
196
199
|
) -> Vec<LocalActivityResolution> {
|
|
197
|
-
debug!("Queuing local activities: {:?}", &reqs);
|
|
198
200
|
let mut immediate_resolutions = vec![];
|
|
199
201
|
for req in reqs {
|
|
202
|
+
debug!(local_activity = ?req, "Queuing local activity");
|
|
200
203
|
match req {
|
|
201
204
|
LocalActRequest::New(act) => {
|
|
202
205
|
let id = ExecutingLAId {
|
|
@@ -267,7 +270,7 @@ impl LocalActivityManager {
|
|
|
267
270
|
/// Returns the next pending local-activity related action, or None if shutdown has initiated
|
|
268
271
|
/// and there are no more remaining actions to take.
|
|
269
272
|
pub(crate) async fn next_pending(&self) -> Option<DispatchOrTimeoutLA> {
|
|
270
|
-
let new_or_retry = match self.rcvs.lock().await.next(&self.semaphore).await? {
|
|
273
|
+
let (new_or_retry, permit) = match self.rcvs.lock().await.next(&self.semaphore).await? {
|
|
271
274
|
NewOrCancel::Cancel(c) => {
|
|
272
275
|
return match c {
|
|
273
276
|
CancelOrTimeout::Cancel(c) => Some(DispatchOrTimeoutLA::Dispatch(c)),
|
|
@@ -308,7 +311,7 @@ impl LocalActivityManager {
|
|
|
308
311
|
}
|
|
309
312
|
};
|
|
310
313
|
}
|
|
311
|
-
NewOrCancel::New(n) => n,
|
|
314
|
+
NewOrCancel::New(n, perm) => (n, perm),
|
|
312
315
|
};
|
|
313
316
|
|
|
314
317
|
// It is important that there are no await points after receiving from the channel, as
|
|
@@ -363,6 +366,7 @@ impl LocalActivityManager {
|
|
|
363
366
|
la_info: orig,
|
|
364
367
|
dispatch_time: Instant::now(),
|
|
365
368
|
attempt,
|
|
369
|
+
_permit: permit,
|
|
366
370
|
},
|
|
367
371
|
);
|
|
368
372
|
if let Some(to) = dat.timeout_tasks.get_mut(&id) {
|
|
@@ -407,7 +411,6 @@ impl LocalActivityManager {
|
|
|
407
411
|
seq_num: info.la_info.schedule_cmd.seq,
|
|
408
412
|
};
|
|
409
413
|
dlock.id_to_tt.remove(&exec_id);
|
|
410
|
-
self.semaphore.add_permit();
|
|
411
414
|
|
|
412
415
|
match status {
|
|
413
416
|
LocalActivityExecutionResult::Completed(_)
|
|
@@ -513,7 +516,7 @@ enum CancelOrTimeout {
|
|
|
513
516
|
}
|
|
514
517
|
|
|
515
518
|
enum NewOrCancel {
|
|
516
|
-
New(NewOrRetry),
|
|
519
|
+
New(NewOrRetry, OwnedMeteredSemPermit),
|
|
517
520
|
Cancel(CancelOrTimeout),
|
|
518
521
|
}
|
|
519
522
|
|
|
@@ -531,13 +534,13 @@ impl RcvChans {
|
|
|
531
534
|
cancel = async { self.cancels_req_rx.recv().await } => {
|
|
532
535
|
Some(NewOrCancel::Cancel(cancel.expect("Send halves of LA manager are not dropped")))
|
|
533
536
|
}
|
|
534
|
-
maybe_new_or_retry = async {
|
|
537
|
+
(maybe_new_or_retry, perm) = async {
|
|
535
538
|
// Wait for a permit to take a task and forget it. Permits are removed until a
|
|
536
539
|
// completion.
|
|
537
|
-
new_sem.
|
|
538
|
-
self.act_req_rx.recv().await
|
|
540
|
+
let perm = new_sem.acquire_owned().await.expect("is never closed");
|
|
541
|
+
(self.act_req_rx.recv().await, perm)
|
|
539
542
|
} => Some(NewOrCancel::New(
|
|
540
|
-
maybe_new_or_retry.expect("Send halves of LA manager are not dropped")
|
|
543
|
+
maybe_new_or_retry.expect("Send halves of LA manager are not dropped"), perm
|
|
541
544
|
)),
|
|
542
545
|
_ = self.shutdown.cancelled() => None
|
|
543
546
|
}
|
|
@@ -636,9 +639,9 @@ impl Drop for TimeoutBag {
|
|
|
636
639
|
mod tests {
|
|
637
640
|
use super::*;
|
|
638
641
|
use crate::protosext::LACloseTimeouts;
|
|
639
|
-
use temporal_sdk_core_protos::{
|
|
640
|
-
|
|
641
|
-
|
|
642
|
+
use temporal_sdk_core_protos::temporal::api::{
|
|
643
|
+
common::v1::RetryPolicy,
|
|
644
|
+
failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
|
|
642
645
|
};
|
|
643
646
|
use tokio::{sync::mpsc::error::TryRecvError, task::yield_now};
|
|
644
647
|
|
|
@@ -8,7 +8,7 @@ pub(crate) use local_activities::{
|
|
|
8
8
|
};
|
|
9
9
|
|
|
10
10
|
use crate::{
|
|
11
|
-
abstractions::MeteredSemaphore,
|
|
11
|
+
abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
|
|
12
12
|
pollers::BoxedActPoller,
|
|
13
13
|
telemetry::metrics::{activity_type, activity_worker_type, workflow_type, MetricsContext},
|
|
14
14
|
worker::{
|
|
@@ -19,6 +19,12 @@ use crate::{
|
|
|
19
19
|
};
|
|
20
20
|
use activity_heartbeat_manager::ActivityHeartbeatManager;
|
|
21
21
|
use dashmap::DashMap;
|
|
22
|
+
use governor::{
|
|
23
|
+
clock::DefaultClock,
|
|
24
|
+
middleware::NoOpMiddleware,
|
|
25
|
+
state::{InMemoryState, NotKeyed},
|
|
26
|
+
Quota, RateLimiter,
|
|
27
|
+
};
|
|
22
28
|
use std::{
|
|
23
29
|
convert::TryInto,
|
|
24
30
|
sync::Arc,
|
|
@@ -52,7 +58,6 @@ struct InFlightActInfo {
|
|
|
52
58
|
}
|
|
53
59
|
|
|
54
60
|
/// Augments [InFlightActInfo] with details specific to remote activities
|
|
55
|
-
#[derive(Debug)]
|
|
56
61
|
struct RemoteInFlightActInfo {
|
|
57
62
|
pub base: InFlightActInfo,
|
|
58
63
|
/// Used to calculate aggregation delay between activity heartbeats.
|
|
@@ -63,12 +68,15 @@ struct RemoteInFlightActInfo {
|
|
|
63
68
|
/// we have learned from heartbeating and issued a cancel task, in which case we may simply
|
|
64
69
|
/// discard the reply.
|
|
65
70
|
pub known_not_found: bool,
|
|
71
|
+
/// The permit from the max concurrent semaphore
|
|
72
|
+
_permit: OwnedMeteredSemPermit,
|
|
66
73
|
}
|
|
67
74
|
impl RemoteInFlightActInfo {
|
|
68
75
|
fn new(
|
|
69
76
|
activity_type: String,
|
|
70
77
|
workflow_type: String,
|
|
71
78
|
heartbeat_timeout: Option<prost_types::Duration>,
|
|
79
|
+
permit: OwnedMeteredSemPermit,
|
|
72
80
|
) -> Self {
|
|
73
81
|
Self {
|
|
74
82
|
base: InFlightActInfo {
|
|
@@ -79,10 +87,26 @@ impl RemoteInFlightActInfo {
|
|
|
79
87
|
heartbeat_timeout,
|
|
80
88
|
issued_cancel_to_lang: false,
|
|
81
89
|
known_not_found: false,
|
|
90
|
+
_permit: permit,
|
|
82
91
|
}
|
|
83
92
|
}
|
|
84
93
|
}
|
|
85
94
|
|
|
95
|
+
struct NonPollActBuffer {
|
|
96
|
+
tx: async_channel::Sender<PermittedTqResp>,
|
|
97
|
+
rx: async_channel::Receiver<PermittedTqResp>,
|
|
98
|
+
}
|
|
99
|
+
impl NonPollActBuffer {
|
|
100
|
+
pub fn new() -> Self {
|
|
101
|
+
let (tx, rx) = async_channel::unbounded();
|
|
102
|
+
Self { tx, rx }
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
pub async fn next(&self) -> PermittedTqResp {
|
|
106
|
+
self.rx.recv().await.expect("Send half cannot be dropped")
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
86
110
|
pub(crate) struct WorkerActivityTasks {
|
|
87
111
|
/// Centralizes management of heartbeat issuing / throttling
|
|
88
112
|
heartbeat_manager: ActivityHeartbeatManager,
|
|
@@ -91,8 +115,13 @@ pub(crate) struct WorkerActivityTasks {
|
|
|
91
115
|
/// Buffers activity task polling in the event we need to return a cancellation while a poll is
|
|
92
116
|
/// ongoing.
|
|
93
117
|
poller: BoxedActPoller,
|
|
118
|
+
/// Holds activity tasks we have received by non-polling means. EX: In direct response to
|
|
119
|
+
/// workflow task completion.
|
|
120
|
+
non_poll_tasks: NonPollActBuffer,
|
|
94
121
|
/// Ensures we stay at or below this worker's maximum concurrent activity limit
|
|
95
|
-
activities_semaphore: MeteredSemaphore
|
|
122
|
+
activities_semaphore: Arc<MeteredSemaphore>,
|
|
123
|
+
/// Enables per-worker rate-limiting of activity tasks
|
|
124
|
+
ratelimiter: Option<RateLimiter<NotKeyed, InMemoryState, DefaultClock, NoOpMiddleware>>,
|
|
96
125
|
/// Wakes every time an activity is removed from the outstanding map
|
|
97
126
|
complete_notify: Notify,
|
|
98
127
|
|
|
@@ -105,6 +134,7 @@ pub(crate) struct WorkerActivityTasks {
|
|
|
105
134
|
impl WorkerActivityTasks {
|
|
106
135
|
pub(crate) fn new(
|
|
107
136
|
max_activity_tasks: usize,
|
|
137
|
+
max_worker_act_per_sec: Option<f64>,
|
|
108
138
|
poller: BoxedActPoller,
|
|
109
139
|
client: Arc<WorkerClientBag>,
|
|
110
140
|
metrics: MetricsContext,
|
|
@@ -115,11 +145,15 @@ impl WorkerActivityTasks {
|
|
|
115
145
|
heartbeat_manager: ActivityHeartbeatManager::new(client),
|
|
116
146
|
outstanding_activity_tasks: Default::default(),
|
|
117
147
|
poller,
|
|
118
|
-
|
|
148
|
+
non_poll_tasks: NonPollActBuffer::new(),
|
|
149
|
+
activities_semaphore: Arc::new(MeteredSemaphore::new(
|
|
119
150
|
max_activity_tasks,
|
|
120
151
|
metrics.with_new_attrs([activity_worker_type()]),
|
|
121
152
|
MetricsContext::available_task_slots,
|
|
122
|
-
),
|
|
153
|
+
)),
|
|
154
|
+
ratelimiter: max_worker_act_per_sec.and_then(|ps| {
|
|
155
|
+
Quota::with_period(Duration::from_secs_f64(ps.recip())).map(RateLimiter::direct)
|
|
156
|
+
}),
|
|
123
157
|
complete_notify: Notify::new(),
|
|
124
158
|
metrics,
|
|
125
159
|
max_heartbeat_throttle_interval,
|
|
@@ -151,12 +185,15 @@ impl WorkerActivityTasks {
|
|
|
151
185
|
// Acquire and subsequently forget a permit for an outstanding activity. When they are
|
|
152
186
|
// completed, we must add a new permit to the semaphore, since holding the permit the
|
|
153
187
|
// entire time lang does work would be a challenge.
|
|
154
|
-
let
|
|
188
|
+
let perm = self
|
|
155
189
|
.activities_semaphore
|
|
156
|
-
.
|
|
190
|
+
.acquire_owned()
|
|
157
191
|
.await
|
|
158
192
|
.expect("outstanding activity semaphore not closed");
|
|
159
|
-
(self.
|
|
193
|
+
if let Some(ref rl) = self.ratelimiter {
|
|
194
|
+
rl.until_ready().await;
|
|
195
|
+
}
|
|
196
|
+
(self.poller.poll().await, perm)
|
|
160
197
|
};
|
|
161
198
|
|
|
162
199
|
tokio::select! {
|
|
@@ -165,7 +202,10 @@ impl WorkerActivityTasks {
|
|
|
165
202
|
cancel_task = self.next_pending_cancel_task() => {
|
|
166
203
|
cancel_task
|
|
167
204
|
}
|
|
168
|
-
|
|
205
|
+
task = self.non_poll_tasks.next() => {
|
|
206
|
+
Ok(Some(self.about_to_issue_task(task)))
|
|
207
|
+
}
|
|
208
|
+
(work, permit) = poll_with_semaphore => {
|
|
169
209
|
match work {
|
|
170
210
|
Some(Ok(work)) => {
|
|
171
211
|
if work == PollActivityTaskQueueResponse::default() {
|
|
@@ -173,23 +213,10 @@ impl WorkerActivityTasks {
|
|
|
173
213
|
self.metrics.act_poll_timeout();
|
|
174
214
|
return Ok(None)
|
|
175
215
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
self.outstanding_activity_tasks.insert(
|
|
183
|
-
work.task_token.clone().into(),
|
|
184
|
-
RemoteInFlightActInfo::new(
|
|
185
|
-
work.activity_type.clone().unwrap_or_default().name,
|
|
186
|
-
work.workflow_type.clone().unwrap_or_default().name,
|
|
187
|
-
work.heartbeat_timeout.clone()
|
|
188
|
-
),
|
|
189
|
-
);
|
|
190
|
-
// Only permanently take a permit in the event the poll finished properly
|
|
191
|
-
sem.forget();
|
|
192
|
-
Ok(Some(ActivityTask::start_from_poll_resp(work)))
|
|
216
|
+
let work = self.about_to_issue_task(PermittedTqResp {
|
|
217
|
+
resp: work, permit
|
|
218
|
+
});
|
|
219
|
+
Ok(Some(work))
|
|
193
220
|
}
|
|
194
221
|
None => {
|
|
195
222
|
Err(PollActivityError::ShutDown)
|
|
@@ -212,10 +239,9 @@ impl WorkerActivityTasks {
|
|
|
212
239
|
workflow_type(act_info.base.workflow_type.clone()),
|
|
213
240
|
]);
|
|
214
241
|
act_metrics.act_execution_latency(act_info.base.start_time.elapsed());
|
|
215
|
-
self.activities_semaphore.add_permit();
|
|
216
|
-
self.heartbeat_manager.evict(task_token.clone()).await;
|
|
217
242
|
let known_not_found = act_info.known_not_found;
|
|
218
243
|
drop(act_info); // TODO: Get rid of dashmap. If we hold ref across await, bad stuff.
|
|
244
|
+
self.heartbeat_manager.evict(task_token.clone()).await;
|
|
219
245
|
self.complete_notify.notify_waiters();
|
|
220
246
|
|
|
221
247
|
// No need to report activities which we already know the server doesn't care about
|
|
@@ -304,6 +330,14 @@ impl WorkerActivityTasks {
|
|
|
304
330
|
self.heartbeat_manager.record(details, throttle_interval)
|
|
305
331
|
}
|
|
306
332
|
|
|
333
|
+
/// Returns a handle that the workflows management side can use to interact with this manager
|
|
334
|
+
pub(crate) fn get_handle_for_workflows(&self) -> ActivitiesFromWFTsHandle {
|
|
335
|
+
ActivitiesFromWFTsHandle {
|
|
336
|
+
sem: self.activities_semaphore.clone(),
|
|
337
|
+
tx: self.non_poll_tasks.tx.clone(),
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
307
341
|
async fn next_pending_cancel_task(&self) -> Result<Option<ActivityTask>, PollActivityError> {
|
|
308
342
|
let next_pc = self.heartbeat_manager.next_pending_cancel().await;
|
|
309
343
|
// Issue cancellations for anything we noticed was cancelled during heartbeating
|
|
@@ -336,8 +370,101 @@ impl WorkerActivityTasks {
|
|
|
336
370
|
}
|
|
337
371
|
}
|
|
338
372
|
|
|
373
|
+
/// Called when there is a new act task about to be bubbled up out of the manager
|
|
374
|
+
fn about_to_issue_task(&self, task: PermittedTqResp) -> ActivityTask {
|
|
375
|
+
if let Some(dur) = task.resp.sched_to_start() {
|
|
376
|
+
self.metrics.act_sched_to_start_latency(dur);
|
|
377
|
+
};
|
|
378
|
+
|
|
379
|
+
self.outstanding_activity_tasks.insert(
|
|
380
|
+
task.resp.task_token.clone().into(),
|
|
381
|
+
RemoteInFlightActInfo::new(
|
|
382
|
+
task.resp.activity_type.clone().unwrap_or_default().name,
|
|
383
|
+
task.resp.workflow_type.clone().unwrap_or_default().name,
|
|
384
|
+
task.resp.heartbeat_timeout.clone(),
|
|
385
|
+
task.permit,
|
|
386
|
+
),
|
|
387
|
+
);
|
|
388
|
+
|
|
389
|
+
ActivityTask::start_from_poll_resp(task.resp)
|
|
390
|
+
}
|
|
391
|
+
|
|
339
392
|
#[cfg(test)]
|
|
340
393
|
pub(crate) fn remaining_activity_capacity(&self) -> usize {
|
|
341
|
-
self.activities_semaphore.
|
|
394
|
+
self.activities_semaphore.available_permits()
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/// Provides facilities for the workflow side of things to interact with the activity manager.
|
|
399
|
+
/// Allows for the handling of activities returned by WFT completions.
|
|
400
|
+
pub(crate) struct ActivitiesFromWFTsHandle {
|
|
401
|
+
sem: Arc<MeteredSemaphore>,
|
|
402
|
+
tx: async_channel::Sender<PermittedTqResp>,
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
impl ActivitiesFromWFTsHandle {
|
|
406
|
+
/// Returns a handle that can be used to reserve an activity slot. EX: When requesting eager
|
|
407
|
+
/// dispatch of an activity to this worker upon workflow task completion
|
|
408
|
+
pub(crate) fn reserve_slot(&self) -> Option<OwnedMeteredSemPermit> {
|
|
409
|
+
self.sem.try_acquire_owned().ok()
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/// Queue new activity tasks for dispatch received from non-polling sources (ex: eager returns
|
|
413
|
+
/// from WFT completion)
|
|
414
|
+
pub(crate) fn add_tasks(&self, tasks: impl IntoIterator<Item = PermittedTqResp>) {
|
|
415
|
+
for t in tasks.into_iter() {
|
|
416
|
+
self.tx.try_send(t).expect("Receive half cannot be dropped");
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
pub(crate) struct PermittedTqResp {
|
|
422
|
+
pub permit: OwnedMeteredSemPermit,
|
|
423
|
+
pub resp: PollActivityTaskQueueResponse,
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
#[cfg(test)]
|
|
427
|
+
mod tests {
|
|
428
|
+
use super::*;
|
|
429
|
+
use crate::{
|
|
430
|
+
test_help::mock_poller_from_resps, worker::client::mocks::mock_manual_workflow_client,
|
|
431
|
+
};
|
|
432
|
+
|
|
433
|
+
#[tokio::test]
|
|
434
|
+
async fn per_worker_ratelimit() {
|
|
435
|
+
let poller = mock_poller_from_resps([
|
|
436
|
+
PollActivityTaskQueueResponse {
|
|
437
|
+
task_token: vec![1],
|
|
438
|
+
activity_id: "act1".to_string(),
|
|
439
|
+
..Default::default()
|
|
440
|
+
}
|
|
441
|
+
.into(),
|
|
442
|
+
PollActivityTaskQueueResponse {
|
|
443
|
+
task_token: vec![2],
|
|
444
|
+
activity_id: "act2".to_string(),
|
|
445
|
+
..Default::default()
|
|
446
|
+
}
|
|
447
|
+
.into(),
|
|
448
|
+
]);
|
|
449
|
+
let client = WorkerClientBag::new(
|
|
450
|
+
Box::new(mock_manual_workflow_client()),
|
|
451
|
+
"fake_namespace".to_string(),
|
|
452
|
+
);
|
|
453
|
+
let atm = WorkerActivityTasks::new(
|
|
454
|
+
10,
|
|
455
|
+
Some(2.0),
|
|
456
|
+
poller,
|
|
457
|
+
Arc::new(client),
|
|
458
|
+
MetricsContext::default(),
|
|
459
|
+
Duration::from_secs(1),
|
|
460
|
+
Duration::from_secs(1),
|
|
461
|
+
);
|
|
462
|
+
let start = Instant::now();
|
|
463
|
+
atm.poll().await.unwrap().unwrap();
|
|
464
|
+
atm.poll().await.unwrap().unwrap();
|
|
465
|
+
// At least half a second will have elapsed since we only allow 2 tasks per second.
|
|
466
|
+
// With no ratelimit, even on a slow CI server with lots of load, this would typically take
|
|
467
|
+
// low single digit ms or less.
|
|
468
|
+
assert!(start.elapsed() > Duration::from_secs_f64(0.5));
|
|
342
469
|
}
|
|
343
470
|
}
|
|
@@ -118,6 +118,7 @@ pub(crate) trait WorkerClient: Sync + Send {
|
|
|
118
118
|
#[async_trait::async_trait]
|
|
119
119
|
impl<'a, T> WorkerClient for T
|
|
120
120
|
where
|
|
121
|
+
// TODO: This should be workflow service... no reason to marry worker trait to sdk client trait
|
|
121
122
|
T: Borrow<dyn WorkflowClientTrait + 'a + Send + Sync> + Send + Sync,
|
|
122
123
|
{
|
|
123
124
|
async fn poll_workflow_task(
|