@temporalio/core-bridge 1.5.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +255 -48
- package/package.json +4 -4
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/pipeline.yml +1 -3
- package/sdk-core/.cargo/config.toml +5 -2
- package/sdk-core/.github/workflows/heavy.yml +28 -0
- package/sdk-core/Cargo.toml +1 -1
- package/sdk-core/README.md +9 -5
- package/sdk-core/client/src/lib.rs +211 -36
- package/sdk-core/client/src/raw.rs +1 -1
- package/sdk-core/client/src/retry.rs +32 -20
- package/sdk-core/core/Cargo.toml +23 -9
- package/sdk-core/core/src/abstractions.rs +11 -0
- package/sdk-core/core/src/core_tests/activity_tasks.rs +6 -5
- package/sdk-core/core/src/core_tests/local_activities.rs +263 -22
- package/sdk-core/core/src/core_tests/queries.rs +2 -2
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +249 -5
- package/sdk-core/core/src/ephemeral_server/mod.rs +5 -6
- package/sdk-core/core/src/lib.rs +2 -0
- package/sdk-core/core/src/protosext/mod.rs +1 -1
- package/sdk-core/core/src/telemetry/log_export.rs +1 -1
- package/sdk-core/core/src/telemetry/mod.rs +23 -8
- package/sdk-core/core/src/test_help/mod.rs +8 -1
- package/sdk-core/core/src/worker/activities/local_activities.rs +259 -125
- package/sdk-core/core/src/worker/activities.rs +3 -2
- package/sdk-core/core/src/worker/mod.rs +53 -26
- package/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
- package/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
- package/sdk-core/core/src/worker/workflow/history_update.rs +835 -277
- package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +9 -17
- package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +3 -5
- package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +3 -5
- package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +73 -51
- package/sdk-core/core/src/worker/workflow/machines/mod.rs +3 -3
- package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +4 -4
- package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +3 -5
- package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +6 -7
- package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
- package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +4 -4
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +89 -58
- package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +4 -7
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +21 -9
- package/sdk-core/core/src/worker/workflow/managed_run.rs +1021 -360
- package/sdk-core/core/src/worker/workflow/mod.rs +306 -346
- package/sdk-core/core/src/worker/workflow/run_cache.rs +29 -53
- package/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
- package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +115 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +444 -714
- package/sdk-core/core-api/Cargo.toml +2 -0
- package/sdk-core/core-api/src/errors.rs +1 -34
- package/sdk-core/core-api/src/lib.rs +6 -2
- package/sdk-core/core-api/src/worker.rs +14 -1
- package/sdk-core/etc/deps.svg +115 -140
- package/sdk-core/etc/regen-depgraph.sh +5 -0
- package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +6 -6
- package/sdk-core/fsm/rustfsm_trait/src/lib.rs +7 -3
- package/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
- package/sdk-core/protos/api_upstream/Makefile +5 -5
- package/sdk-core/protos/api_upstream/build/go.mod +7 -0
- package/sdk-core/protos/api_upstream/build/go.sum +5 -0
- package/sdk-core/protos/api_upstream/build/tools.go +29 -0
- package/sdk-core/protos/api_upstream/go.mod +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +9 -2
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +12 -19
- package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +3 -3
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +20 -2
- package/sdk-core/protos/api_upstream/temporal/api/{update/v1/message.proto → enums/v1/interaction_type.proto} +11 -18
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +2 -13
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +13 -19
- package/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +87 -0
- package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +13 -8
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +2 -2
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +2 -0
- package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
- package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
- package/sdk-core/sdk/Cargo.toml +4 -3
- package/sdk-core/sdk/src/lib.rs +87 -21
- package/sdk-core/sdk/src/workflow_future.rs +7 -12
- package/sdk-core/sdk-core-protos/Cargo.toml +5 -2
- package/sdk-core/sdk-core-protos/build.rs +36 -2
- package/sdk-core/sdk-core-protos/src/history_builder.rs +26 -19
- package/sdk-core/sdk-core-protos/src/history_info.rs +4 -0
- package/sdk-core/sdk-core-protos/src/lib.rs +78 -34
- package/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
- package/sdk-core/test-utils/Cargo.toml +3 -1
- package/sdk-core/test-utils/src/histfetch.rs +1 -1
- package/sdk-core/test-utils/src/lib.rs +50 -18
- package/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
- package/sdk-core/test-utils/src/workflows.rs +29 -0
- package/sdk-core/tests/fuzzy_workflow.rs +130 -0
- package/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +114 -7
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +5 -2
- package/sdk-core/tests/integ_tests/metrics_tests.rs +1 -1
- package/sdk-core/tests/integ_tests/polling_tests.rs +1 -39
- package/sdk-core/tests/integ_tests/queries_tests.rs +2 -127
- package/sdk-core/tests/integ_tests/visibility_tests.rs +52 -5
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +74 -1
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +5 -13
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +2 -10
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +69 -197
- package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +4 -28
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +12 -7
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +14 -14
- package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +3 -19
- package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +3 -19
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests.rs +5 -6
- package/sdk-core/tests/main.rs +2 -12
- package/sdk-core/tests/runner.rs +71 -34
- package/sdk-core/tests/wf_input_replay.rs +32 -0
- package/sdk-core/bridge-ffi/Cargo.toml +0 -24
- package/sdk-core/bridge-ffi/LICENSE.txt +0 -23
- package/sdk-core/bridge-ffi/build.rs +0 -25
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -224
- package/sdk-core/bridge-ffi/src/lib.rs +0 -746
- package/sdk-core/bridge-ffi/src/wrappers.rs +0 -221
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
- package/sdk-core/sdk/src/conversions.rs +0 -8
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
use crate::{
|
|
2
|
-
abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
|
|
2
|
+
abstractions::{dbg_panic, MeteredSemaphore, OwnedMeteredSemPermit},
|
|
3
3
|
protosext::ValidScheduleLA,
|
|
4
4
|
retry_logic::RetryPolicyExt,
|
|
5
|
+
worker::workflow::HeartbeatTimeoutMsg,
|
|
5
6
|
MetricsContext, TaskToken,
|
|
6
7
|
};
|
|
7
|
-
use
|
|
8
|
+
use futures::{stream::BoxStream, Stream};
|
|
9
|
+
use futures_util::{future, future::AbortRegistration, stream, StreamExt};
|
|
10
|
+
use parking_lot::{Mutex, MutexGuard};
|
|
8
11
|
use std::{
|
|
9
|
-
collections::HashMap,
|
|
12
|
+
collections::{hash_map::Entry, HashMap},
|
|
10
13
|
fmt::{Debug, Formatter},
|
|
14
|
+
pin::Pin,
|
|
15
|
+
task::{Context, Poll},
|
|
11
16
|
time::{Duration, Instant, SystemTime},
|
|
12
17
|
};
|
|
13
18
|
use temporal_sdk_core_protos::{
|
|
@@ -25,6 +30,7 @@ use tokio::{
|
|
|
25
30
|
task::JoinHandle,
|
|
26
31
|
time::sleep,
|
|
27
32
|
};
|
|
33
|
+
use tokio_stream::wrappers::UnboundedReceiverStream;
|
|
28
34
|
use tokio_util::sync::CancellationToken;
|
|
29
35
|
|
|
30
36
|
#[allow(clippy::large_enum_variant)] // Timeouts are relatively rare
|
|
@@ -49,6 +55,10 @@ pub(crate) struct LocalInFlightActInfo {
|
|
|
49
55
|
}
|
|
50
56
|
|
|
51
57
|
#[derive(Debug, Clone)]
|
|
58
|
+
#[cfg_attr(
|
|
59
|
+
feature = "save_wf_inputs",
|
|
60
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
61
|
+
)]
|
|
52
62
|
pub(crate) enum LocalActivityExecutionResult {
|
|
53
63
|
Completed(Success),
|
|
54
64
|
Failed(ActFail),
|
|
@@ -65,6 +75,10 @@ impl LocalActivityExecutionResult {
|
|
|
65
75
|
}
|
|
66
76
|
|
|
67
77
|
#[derive(Debug, Clone)]
|
|
78
|
+
#[cfg_attr(
|
|
79
|
+
feature = "save_wf_inputs",
|
|
80
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
81
|
+
)]
|
|
68
82
|
pub(crate) struct LocalActivityResolution {
|
|
69
83
|
pub seq: u32,
|
|
70
84
|
pub result: LocalActivityExecutionResult,
|
|
@@ -96,6 +110,12 @@ impl Debug for NewLocalAct {
|
|
|
96
110
|
pub(crate) enum LocalActRequest {
|
|
97
111
|
New(NewLocalAct),
|
|
98
112
|
Cancel(ExecutingLAId),
|
|
113
|
+
CancelAllInRun(String),
|
|
114
|
+
StartHeartbeatTimeout {
|
|
115
|
+
send_on_elapse: HeartbeatTimeoutMsg,
|
|
116
|
+
deadline: Instant,
|
|
117
|
+
abort_reg: AbortRegistration,
|
|
118
|
+
},
|
|
99
119
|
}
|
|
100
120
|
|
|
101
121
|
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
|
@@ -107,28 +127,39 @@ pub(crate) struct ExecutingLAId {
|
|
|
107
127
|
pub(crate) struct LocalActivityManager {
|
|
108
128
|
/// Just so we can provide activity tasks the same namespace as the worker
|
|
109
129
|
namespace: String,
|
|
110
|
-
/// Constrains number of currently executing local activities
|
|
111
|
-
semaphore: MeteredSemaphore,
|
|
112
130
|
/// Sink for new activity execution requests
|
|
113
131
|
act_req_tx: UnboundedSender<NewOrRetry>,
|
|
114
132
|
/// Cancels need a different queue since they should be taken first, and don't take a permit
|
|
115
133
|
cancels_req_tx: UnboundedSender<CancelOrTimeout>,
|
|
134
|
+
/// For the emission of heartbeat timeouts, back into the workflow machines. This channel
|
|
135
|
+
/// needs to come in from above us, because we cannot rely on callers getting the next
|
|
136
|
+
/// activation as a way to deliver heartbeats.
|
|
137
|
+
heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
|
|
116
138
|
/// Wakes every time a complete is processed
|
|
117
139
|
complete_notify: Notify,
|
|
140
|
+
/// Set once workflows have finished shutting down, and thus we know we will no longer receive
|
|
141
|
+
/// any requests to spawn new LAs
|
|
142
|
+
workflows_have_shut_down: CancellationToken,
|
|
118
143
|
|
|
119
144
|
rcvs: tokio::sync::Mutex<RcvChans>,
|
|
120
145
|
shutdown_complete_tok: CancellationToken,
|
|
121
146
|
dat: Mutex<LAMData>,
|
|
122
147
|
}
|
|
123
148
|
|
|
149
|
+
struct LocalActivityInfo {
|
|
150
|
+
task_token: TaskToken,
|
|
151
|
+
/// Tasks for the current backoff until the next retry, if any.
|
|
152
|
+
backing_off_task: Option<JoinHandle<()>>,
|
|
153
|
+
/// Tasks / info about timeouts associated with this LA. May be empty for very brief periods
|
|
154
|
+
/// while the LA id has been generated, but it has not yet been scheduled.
|
|
155
|
+
timeout_bag: Option<TimeoutBag>,
|
|
156
|
+
}
|
|
157
|
+
|
|
124
158
|
struct LAMData {
|
|
159
|
+
/// Maps local activity identifiers to information about them
|
|
160
|
+
la_info: HashMap<ExecutingLAId, LocalActivityInfo>,
|
|
125
161
|
/// Activities that have been issued to lang but not yet completed
|
|
126
162
|
outstanding_activity_tasks: HashMap<TaskToken, LocalInFlightActInfo>,
|
|
127
|
-
id_to_tt: HashMap<ExecutingLAId, TaskToken>,
|
|
128
|
-
/// Tasks for activities which are currently backing off. May be used to cancel retrying them.
|
|
129
|
-
backing_off_tasks: HashMap<ExecutingLAId, JoinHandle<()>>,
|
|
130
|
-
/// Tasks for timing out activities which are currently in the queue or dispatched.
|
|
131
|
-
timeout_tasks: HashMap<ExecutingLAId, TimeoutBag>,
|
|
132
163
|
next_tt_num: u32,
|
|
133
164
|
}
|
|
134
165
|
|
|
@@ -143,42 +174,46 @@ impl LocalActivityManager {
|
|
|
143
174
|
pub(crate) fn new(
|
|
144
175
|
max_concurrent: usize,
|
|
145
176
|
namespace: String,
|
|
177
|
+
heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
|
|
146
178
|
metrics_context: MetricsContext,
|
|
147
179
|
) -> Self {
|
|
148
180
|
let (act_req_tx, act_req_rx) = unbounded_channel();
|
|
149
181
|
let (cancels_req_tx, cancels_req_rx) = unbounded_channel();
|
|
150
182
|
let shutdown_complete_tok = CancellationToken::new();
|
|
183
|
+
let semaphore = MeteredSemaphore::new(
|
|
184
|
+
max_concurrent,
|
|
185
|
+
metrics_context,
|
|
186
|
+
MetricsContext::available_task_slots,
|
|
187
|
+
);
|
|
151
188
|
Self {
|
|
152
189
|
namespace,
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
190
|
+
rcvs: tokio::sync::Mutex::new(RcvChans::new(
|
|
191
|
+
act_req_rx,
|
|
192
|
+
semaphore,
|
|
193
|
+
cancels_req_rx,
|
|
194
|
+
shutdown_complete_tok.clone(),
|
|
195
|
+
)),
|
|
158
196
|
act_req_tx,
|
|
159
197
|
cancels_req_tx,
|
|
198
|
+
heartbeat_timeout_tx,
|
|
160
199
|
complete_notify: Notify::new(),
|
|
161
|
-
rcvs: tokio::sync::Mutex::new(RcvChans {
|
|
162
|
-
act_req_rx,
|
|
163
|
-
cancels_req_rx,
|
|
164
|
-
shutdown: shutdown_complete_tok.clone(),
|
|
165
|
-
}),
|
|
166
200
|
shutdown_complete_tok,
|
|
167
201
|
dat: Mutex::new(LAMData {
|
|
168
202
|
outstanding_activity_tasks: Default::default(),
|
|
169
|
-
|
|
170
|
-
backing_off_tasks: Default::default(),
|
|
171
|
-
timeout_tasks: Default::default(),
|
|
203
|
+
la_info: Default::default(),
|
|
172
204
|
next_tt_num: 0,
|
|
173
205
|
}),
|
|
206
|
+
workflows_have_shut_down: Default::default(),
|
|
174
207
|
}
|
|
175
208
|
}
|
|
176
209
|
|
|
177
210
|
#[cfg(test)]
|
|
178
211
|
fn test(max_concurrent: usize) -> Self {
|
|
212
|
+
let (hb_tx, _hb_rx) = unbounded_channel();
|
|
179
213
|
Self::new(
|
|
180
214
|
max_concurrent,
|
|
181
215
|
"fake_ns".to_string(),
|
|
216
|
+
hb_tx,
|
|
182
217
|
MetricsContext::no_op(),
|
|
183
218
|
)
|
|
184
219
|
}
|
|
@@ -190,76 +225,103 @@ impl LocalActivityManager {
|
|
|
190
225
|
|
|
191
226
|
#[cfg(test)]
|
|
192
227
|
fn num_in_backoff(&self) -> usize {
|
|
193
|
-
self.dat
|
|
228
|
+
self.dat
|
|
229
|
+
.lock()
|
|
230
|
+
.la_info
|
|
231
|
+
.values()
|
|
232
|
+
.filter(|lai| lai.backing_off_task.is_some())
|
|
233
|
+
.count()
|
|
194
234
|
}
|
|
195
235
|
|
|
196
236
|
pub(crate) fn enqueue(
|
|
197
237
|
&self,
|
|
198
238
|
reqs: impl IntoIterator<Item = LocalActRequest>,
|
|
199
239
|
) -> Vec<LocalActivityResolution> {
|
|
240
|
+
if self.workflows_have_shut_down.is_cancelled() {
|
|
241
|
+
dbg_panic!("Tried to enqueue local activity after workflows were shut down");
|
|
242
|
+
return vec![];
|
|
243
|
+
}
|
|
200
244
|
let mut immediate_resolutions = vec![];
|
|
201
245
|
for req in reqs {
|
|
202
|
-
debug!(local_activity = ?req, "Queuing local activity");
|
|
203
246
|
match req {
|
|
204
247
|
LocalActRequest::New(act) => {
|
|
248
|
+
debug!(local_activity=?act, "Queuing local activity");
|
|
205
249
|
let id = ExecutingLAId {
|
|
206
250
|
run_id: act.workflow_exec_info.run_id.clone(),
|
|
207
251
|
seq_num: act.schedule_cmd.seq,
|
|
208
252
|
};
|
|
209
253
|
let mut dlock = self.dat.lock();
|
|
210
|
-
if dlock.id_to_tt.contains_key(&id) {
|
|
211
|
-
// Do not queue local activities which are in fact already executing.
|
|
212
|
-
// This can happen during evictions.
|
|
213
|
-
debug!("Tried to queue already-executing local activity {:?}", &id);
|
|
214
|
-
continue;
|
|
215
|
-
}
|
|
216
|
-
// Pre-generate and insert the task token now, before we may or may not dispatch
|
|
217
|
-
// the activity, so we can enforce idempotency. Prevents two identical LAs
|
|
218
|
-
// ending up in the queue at once.
|
|
219
254
|
let tt = dlock.gen_next_token();
|
|
220
|
-
dlock.
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
255
|
+
match dlock.la_info.entry(id) {
|
|
256
|
+
Entry::Occupied(o) => {
|
|
257
|
+
// Do not queue local activities which are in fact already executing.
|
|
258
|
+
// This can happen during evictions.
|
|
259
|
+
debug!(
|
|
260
|
+
"Tried to queue already-executing local activity {:?}",
|
|
261
|
+
o.key()
|
|
262
|
+
);
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
Entry::Vacant(ve) => {
|
|
266
|
+
// Insert the task token now, before we may or may not dispatch the
|
|
267
|
+
// activity, so we can enforce idempotency. Prevents two identical LAs
|
|
268
|
+
// ending up in the queue at once.
|
|
269
|
+
let lai = ve.insert(LocalActivityInfo {
|
|
270
|
+
task_token: tt,
|
|
271
|
+
backing_off_task: None,
|
|
272
|
+
timeout_bag: None,
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
// Set up timeouts for the new activity
|
|
276
|
+
match TimeoutBag::new(&act, self.cancels_req_tx.clone()) {
|
|
277
|
+
Ok(tb) => {
|
|
278
|
+
lai.timeout_bag = Some(tb);
|
|
279
|
+
|
|
280
|
+
self.act_req_tx.send(NewOrRetry::New(act)).expect(
|
|
281
|
+
"Receive half of LA request channel cannot be dropped",
|
|
282
|
+
);
|
|
283
|
+
}
|
|
284
|
+
Err(res) => immediate_resolutions.push(res),
|
|
285
|
+
}
|
|
230
286
|
}
|
|
231
|
-
Err(res) => immediate_resolutions.push(res),
|
|
232
287
|
}
|
|
233
288
|
}
|
|
289
|
+
LocalActRequest::StartHeartbeatTimeout {
|
|
290
|
+
send_on_elapse,
|
|
291
|
+
deadline,
|
|
292
|
+
abort_reg,
|
|
293
|
+
} => {
|
|
294
|
+
let chan = self.heartbeat_timeout_tx.clone();
|
|
295
|
+
tokio::spawn(future::Abortable::new(
|
|
296
|
+
async move {
|
|
297
|
+
tokio::time::sleep_until(deadline.into()).await;
|
|
298
|
+
let _ = chan.send(send_on_elapse);
|
|
299
|
+
},
|
|
300
|
+
abort_reg,
|
|
301
|
+
));
|
|
302
|
+
}
|
|
234
303
|
LocalActRequest::Cancel(id) => {
|
|
304
|
+
debug!(id=?id, "Cancelling local activity");
|
|
235
305
|
let mut dlock = self.dat.lock();
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
t.abort();
|
|
241
|
-
immediate_resolutions.push(LocalActivityResolution {
|
|
242
|
-
seq: id.seq_num,
|
|
243
|
-
result: LocalActivityExecutionResult::Cancelled(
|
|
244
|
-
Cancellation::from_details(None),
|
|
245
|
-
),
|
|
246
|
-
runtime: Duration::from_secs(0),
|
|
247
|
-
attempt: 0,
|
|
248
|
-
backoff: None,
|
|
249
|
-
original_schedule_time: None,
|
|
250
|
-
});
|
|
251
|
-
continue;
|
|
306
|
+
if let Some(lai) = dlock.la_info.get_mut(&id) {
|
|
307
|
+
if let Some(immediate_res) = self.cancel_one_la(id.seq_num, lai) {
|
|
308
|
+
immediate_resolutions.push(immediate_res);
|
|
309
|
+
}
|
|
252
310
|
}
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
311
|
+
}
|
|
312
|
+
LocalActRequest::CancelAllInRun(run_id) => {
|
|
313
|
+
debug!(run_id=%run_id, "Cancelling all local activities for run");
|
|
314
|
+
let mut dlock = self.dat.lock();
|
|
315
|
+
// Even if we've got 100k+ LAs this should only take a ms or two. Not worth
|
|
316
|
+
// adding another map to keep in sync.
|
|
317
|
+
let las_for_run = dlock
|
|
318
|
+
.la_info
|
|
319
|
+
.iter_mut()
|
|
320
|
+
.filter(|(id, _)| id.run_id == run_id);
|
|
321
|
+
for (laid, lainf) in las_for_run {
|
|
322
|
+
if let Some(immediate_res) = self.cancel_one_la(laid.seq_num, lainf) {
|
|
323
|
+
immediate_resolutions.push(immediate_res);
|
|
324
|
+
}
|
|
263
325
|
}
|
|
264
326
|
}
|
|
265
327
|
}
|
|
@@ -270,7 +332,7 @@ impl LocalActivityManager {
|
|
|
270
332
|
/// Returns the next pending local-activity related action, or None if shutdown has initiated
|
|
271
333
|
/// and there are no more remaining actions to take.
|
|
272
334
|
pub(crate) async fn next_pending(&self) -> Option<DispatchOrTimeoutLA> {
|
|
273
|
-
let (new_or_retry, permit) = match self.rcvs.lock().await.next(
|
|
335
|
+
let (new_or_retry, permit) = match self.rcvs.lock().await.next().await? {
|
|
274
336
|
NewOrCancel::Cancel(c) => {
|
|
275
337
|
return match c {
|
|
276
338
|
CancelOrTimeout::Cancel(c) => Some(DispatchOrTimeoutLA::Dispatch(c)),
|
|
@@ -283,12 +345,13 @@ impl LocalActivityManager {
|
|
|
283
345
|
let tt = self
|
|
284
346
|
.dat
|
|
285
347
|
.lock()
|
|
286
|
-
.
|
|
348
|
+
.la_info
|
|
287
349
|
.get(&ExecutingLAId {
|
|
288
350
|
run_id: run_id.clone(),
|
|
289
351
|
seq_num: resolution.seq,
|
|
290
352
|
})
|
|
291
|
-
.
|
|
353
|
+
.as_ref()
|
|
354
|
+
.map(|lai| lai.task_token.clone());
|
|
292
355
|
if let Some(task_token) = tt {
|
|
293
356
|
self.complete(&task_token, &resolution.result);
|
|
294
357
|
Some(ActivityTask {
|
|
@@ -323,18 +386,21 @@ impl LocalActivityManager {
|
|
|
323
386
|
}
|
|
324
387
|
NewOrRetry::Retry { in_flight, attempt } => (in_flight, attempt),
|
|
325
388
|
};
|
|
326
|
-
let
|
|
389
|
+
let la_info_for_in_flight_map = new_la.clone();
|
|
327
390
|
let id = ExecutingLAId {
|
|
328
391
|
run_id: new_la.workflow_exec_info.run_id.clone(),
|
|
329
392
|
seq_num: new_la.schedule_cmd.seq,
|
|
330
393
|
};
|
|
394
|
+
let orig_sched_time = new_la.schedule_cmd.original_schedule_time;
|
|
331
395
|
let sa = new_la.schedule_cmd;
|
|
332
396
|
|
|
333
397
|
let mut dat = self.dat.lock();
|
|
334
398
|
// If this request originated from a local backoff task, clear the entry for it. We
|
|
335
399
|
// don't await the handle because we know it must already be done, and there's no
|
|
336
400
|
// meaningful value.
|
|
337
|
-
dat.
|
|
401
|
+
dat.la_info
|
|
402
|
+
.get_mut(&id)
|
|
403
|
+
.map(|lai| lai.backing_off_task.take());
|
|
338
404
|
|
|
339
405
|
// If this task sat in the queue for too long, return a timeout for it instead
|
|
340
406
|
if let Some(s2s) = sa.schedule_to_start_timeout.as_ref() {
|
|
@@ -348,30 +414,27 @@ impl LocalActivityManager {
|
|
|
348
414
|
runtime: sat_for,
|
|
349
415
|
attempt,
|
|
350
416
|
backoff: None,
|
|
351
|
-
original_schedule_time:
|
|
417
|
+
original_schedule_time: orig_sched_time,
|
|
352
418
|
},
|
|
353
419
|
task: None,
|
|
354
420
|
});
|
|
355
421
|
}
|
|
356
422
|
}
|
|
357
423
|
|
|
358
|
-
let
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
.
|
|
362
|
-
|
|
424
|
+
let la_info = dat.la_info.get_mut(&id).expect("Activity must exist");
|
|
425
|
+
let tt = la_info.task_token.clone();
|
|
426
|
+
if let Some(to) = la_info.timeout_bag.as_mut() {
|
|
427
|
+
to.mark_started();
|
|
428
|
+
}
|
|
363
429
|
dat.outstanding_activity_tasks.insert(
|
|
364
430
|
tt.clone(),
|
|
365
431
|
LocalInFlightActInfo {
|
|
366
|
-
la_info:
|
|
432
|
+
la_info: la_info_for_in_flight_map,
|
|
367
433
|
dispatch_time: Instant::now(),
|
|
368
434
|
attempt,
|
|
369
435
|
_permit: permit,
|
|
370
436
|
},
|
|
371
437
|
);
|
|
372
|
-
if let Some(to) = dat.timeout_tasks.get_mut(&id) {
|
|
373
|
-
to.mark_started();
|
|
374
|
-
}
|
|
375
438
|
|
|
376
439
|
let (schedule_to_close, start_to_close) = sa.close_timeouts.into_sched_and_start();
|
|
377
440
|
Some(DispatchOrTimeoutLA::Dispatch(ActivityTask {
|
|
@@ -406,11 +469,23 @@ impl LocalActivityManager {
|
|
|
406
469
|
) -> LACompleteAction {
|
|
407
470
|
let mut dlock = self.dat.lock();
|
|
408
471
|
if let Some(info) = dlock.outstanding_activity_tasks.remove(task_token) {
|
|
472
|
+
if self.workflows_have_shut_down.is_cancelled() {
|
|
473
|
+
// If workflows are already shut down, the results of all this don't matter.
|
|
474
|
+
// Just say we're done if there's nothing outstanding any more.
|
|
475
|
+
self.set_shutdown_complete_if_ready(&mut dlock);
|
|
476
|
+
}
|
|
477
|
+
|
|
409
478
|
let exec_id = ExecutingLAId {
|
|
410
479
|
run_id: info.la_info.workflow_exec_info.run_id.clone(),
|
|
411
480
|
seq_num: info.la_info.schedule_cmd.seq,
|
|
412
481
|
};
|
|
413
|
-
dlock.
|
|
482
|
+
let maybe_old_lai = dlock.la_info.remove(&exec_id);
|
|
483
|
+
if let Some(ref oldlai) = maybe_old_lai {
|
|
484
|
+
if let Some(ref bot) = oldlai.backing_off_task {
|
|
485
|
+
dbg_panic!("Just-resolved LA should not have backoff task");
|
|
486
|
+
bot.abort();
|
|
487
|
+
}
|
|
488
|
+
}
|
|
414
489
|
|
|
415
490
|
match status {
|
|
416
491
|
LocalActivityExecutionResult::Completed(_)
|
|
@@ -446,8 +521,6 @@ impl LocalActivityManager {
|
|
|
446
521
|
}
|
|
447
522
|
// Immediately create a new task token for the to-be-retried LA
|
|
448
523
|
let tt = dlock.gen_next_token();
|
|
449
|
-
dlock.id_to_tt.insert(exec_id.clone(), tt);
|
|
450
|
-
|
|
451
524
|
// Send the retry request after waiting the backoff duration
|
|
452
525
|
let send_chan = self.act_req_tx.clone();
|
|
453
526
|
let jh = tokio::spawn(async move {
|
|
@@ -460,7 +533,14 @@ impl LocalActivityManager {
|
|
|
460
533
|
})
|
|
461
534
|
.expect("Receive half of LA request channel cannot be dropped");
|
|
462
535
|
});
|
|
463
|
-
dlock.
|
|
536
|
+
dlock.la_info.insert(
|
|
537
|
+
exec_id,
|
|
538
|
+
LocalActivityInfo {
|
|
539
|
+
task_token: tt,
|
|
540
|
+
backing_off_task: Some(jh),
|
|
541
|
+
timeout_bag: maybe_old_lai.and_then(|old| old.timeout_bag),
|
|
542
|
+
},
|
|
543
|
+
);
|
|
464
544
|
|
|
465
545
|
LACompleteAction::WillBeRetried
|
|
466
546
|
} else {
|
|
@@ -473,11 +553,53 @@ impl LocalActivityManager {
|
|
|
473
553
|
}
|
|
474
554
|
}
|
|
475
555
|
|
|
476
|
-
pub(crate)
|
|
477
|
-
|
|
556
|
+
pub(crate) fn workflows_have_shutdown(&self) {
|
|
557
|
+
self.workflows_have_shut_down.cancel();
|
|
558
|
+
self.set_shutdown_complete_if_ready(&mut self.dat.lock());
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
pub(crate) async fn wait_all_outstanding_tasks_finished(&self) {
|
|
562
|
+
while !self.set_shutdown_complete_if_ready(&mut self.dat.lock()) {
|
|
478
563
|
self.complete_notify.notified().await;
|
|
479
564
|
}
|
|
480
|
-
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
fn set_shutdown_complete_if_ready(&self, dlock: &mut MutexGuard<LAMData>) -> bool {
|
|
568
|
+
let nothing_outstanding = dlock.outstanding_activity_tasks.is_empty();
|
|
569
|
+
if nothing_outstanding {
|
|
570
|
+
self.shutdown_complete_tok.cancel();
|
|
571
|
+
}
|
|
572
|
+
nothing_outstanding
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
fn cancel_one_la(
|
|
576
|
+
&self,
|
|
577
|
+
seq: u32,
|
|
578
|
+
lai: &mut LocalActivityInfo,
|
|
579
|
+
) -> Option<LocalActivityResolution> {
|
|
580
|
+
// First check if this ID is currently backing off, if so abort the backoff
|
|
581
|
+
// task
|
|
582
|
+
if let Some(t) = lai.backing_off_task.take() {
|
|
583
|
+
t.abort();
|
|
584
|
+
return Some(LocalActivityResolution {
|
|
585
|
+
seq,
|
|
586
|
+
result: LocalActivityExecutionResult::Cancelled(Cancellation::from_details(None)),
|
|
587
|
+
runtime: Duration::from_secs(0),
|
|
588
|
+
attempt: 0,
|
|
589
|
+
backoff: None,
|
|
590
|
+
original_schedule_time: None,
|
|
591
|
+
});
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
self.cancels_req_tx
|
|
595
|
+
.send(CancelOrTimeout::Cancel(ActivityTask {
|
|
596
|
+
task_token: lai.task_token.0.clone(),
|
|
597
|
+
variant: Some(activity_task::Variant::Cancel(Cancel {
|
|
598
|
+
reason: ActivityCancelReason::Cancelled as i32,
|
|
599
|
+
})),
|
|
600
|
+
}))
|
|
601
|
+
.expect("Receive half of LA cancel channel cannot be dropped");
|
|
602
|
+
None
|
|
481
603
|
}
|
|
482
604
|
}
|
|
483
605
|
|
|
@@ -521,32 +643,45 @@ enum NewOrCancel {
|
|
|
521
643
|
Cancel(CancelOrTimeout),
|
|
522
644
|
}
|
|
523
645
|
|
|
646
|
+
#[pin_project::pin_project]
|
|
524
647
|
struct RcvChans {
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
/// Cancels to send to lang or apply internally
|
|
528
|
-
cancels_req_rx: UnboundedReceiver<CancelOrTimeout>,
|
|
529
|
-
shutdown: CancellationToken,
|
|
648
|
+
#[pin]
|
|
649
|
+
inner: BoxStream<'static, NewOrCancel>,
|
|
530
650
|
}
|
|
531
651
|
|
|
532
652
|
impl RcvChans {
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
653
|
+
fn new(
|
|
654
|
+
new_reqs: UnboundedReceiver<NewOrRetry>,
|
|
655
|
+
new_sem: MeteredSemaphore,
|
|
656
|
+
cancels: UnboundedReceiver<CancelOrTimeout>,
|
|
657
|
+
shutdown_completed: CancellationToken,
|
|
658
|
+
) -> Self {
|
|
659
|
+
let cancel_stream = UnboundedReceiverStream::new(cancels).map(NewOrCancel::Cancel);
|
|
660
|
+
let new_stream = UnboundedReceiverStream::new(new_reqs)
|
|
661
|
+
// Get a permit for each new activity request
|
|
662
|
+
.zip(stream::unfold(new_sem, |new_sem| async move {
|
|
663
|
+
let permit = new_sem
|
|
664
|
+
.acquire_owned()
|
|
665
|
+
.await
|
|
666
|
+
.expect("Local activity semaphore is never closed");
|
|
667
|
+
Some((permit, new_sem))
|
|
668
|
+
}))
|
|
669
|
+
.map(|(req, permit)| NewOrCancel::New(req, permit));
|
|
670
|
+
Self {
|
|
671
|
+
inner: tokio_stream::StreamExt::merge(cancel_stream, new_stream)
|
|
672
|
+
.take_until(async move { shutdown_completed.cancelled().await })
|
|
673
|
+
.boxed(),
|
|
547
674
|
}
|
|
548
675
|
}
|
|
549
676
|
}
|
|
677
|
+
impl Stream for RcvChans {
|
|
678
|
+
type Item = NewOrCancel;
|
|
679
|
+
|
|
680
|
+
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
|
681
|
+
let this = self.project();
|
|
682
|
+
this.inner.poll_next(cx)
|
|
683
|
+
}
|
|
684
|
+
}
|
|
550
685
|
|
|
551
686
|
struct TimeoutBag {
|
|
552
687
|
sched_to_close_handle: JoinHandle<()>,
|
|
@@ -567,17 +702,21 @@ impl TimeoutBag {
|
|
|
567
702
|
let (schedule_to_close, start_to_close) =
|
|
568
703
|
new_la.schedule_cmd.close_timeouts.into_sched_and_start();
|
|
569
704
|
|
|
705
|
+
let sched_time = new_la
|
|
706
|
+
.schedule_cmd
|
|
707
|
+
.original_schedule_time
|
|
708
|
+
.unwrap_or(new_la.schedule_time);
|
|
570
709
|
let resolution = LocalActivityResolution {
|
|
571
710
|
seq: new_la.schedule_cmd.seq,
|
|
572
711
|
result: LocalActivityExecutionResult::timeout(TimeoutType::ScheduleToClose),
|
|
573
712
|
runtime: Default::default(),
|
|
574
713
|
attempt: new_la.schedule_cmd.attempt,
|
|
575
714
|
backoff: None,
|
|
576
|
-
original_schedule_time:
|
|
715
|
+
original_schedule_time: new_la.schedule_cmd.original_schedule_time,
|
|
577
716
|
};
|
|
578
717
|
// Remove any time already elapsed since the scheduling time
|
|
579
718
|
let schedule_to_close = schedule_to_close
|
|
580
|
-
.map(|s2c| s2c.saturating_sub(
|
|
719
|
+
.map(|s2c| s2c.saturating_sub(sched_time.elapsed().unwrap_or_default()));
|
|
581
720
|
if let Some(ref s2c) = schedule_to_close {
|
|
582
721
|
if s2c.is_zero() {
|
|
583
722
|
return Err(resolution);
|
|
@@ -640,18 +779,19 @@ impl Drop for TimeoutBag {
|
|
|
640
779
|
mod tests {
|
|
641
780
|
use super::*;
|
|
642
781
|
use crate::{prost_dur, protosext::LACloseTimeouts};
|
|
782
|
+
use futures_util::FutureExt;
|
|
643
783
|
use temporal_sdk_core_protos::temporal::api::{
|
|
644
784
|
common::v1::RetryPolicy,
|
|
645
785
|
failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
|
|
646
786
|
};
|
|
647
|
-
use tokio::
|
|
787
|
+
use tokio::task::yield_now;
|
|
648
788
|
|
|
649
789
|
impl DispatchOrTimeoutLA {
|
|
650
790
|
fn unwrap(self) -> ActivityTask {
|
|
651
791
|
match self {
|
|
652
792
|
DispatchOrTimeoutLA::Dispatch(t) => t,
|
|
653
|
-
|
|
654
|
-
panic!("
|
|
793
|
+
_ => {
|
|
794
|
+
panic!("Non-dispatched action returned")
|
|
655
795
|
}
|
|
656
796
|
}
|
|
657
797
|
}
|
|
@@ -1026,18 +1166,12 @@ mod tests {
|
|
|
1026
1166
|
lam.next_pending().await.unwrap().unwrap();
|
|
1027
1167
|
assert_eq!(lam.num_outstanding(), 1);
|
|
1028
1168
|
// There should be nothing else in the queue
|
|
1029
|
-
|
|
1030
|
-
lam.rcvs.lock().await.act_req_rx.try_recv().unwrap_err(),
|
|
1031
|
-
TryRecvError::Empty
|
|
1032
|
-
);
|
|
1169
|
+
assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
|
|
1033
1170
|
|
|
1034
1171
|
// Verify that if we now enqueue the same act again, after the task is outstanding, we still
|
|
1035
1172
|
// don't add it.
|
|
1036
1173
|
lam.enqueue([new_la.into()]);
|
|
1037
1174
|
assert_eq!(lam.num_outstanding(), 1);
|
|
1038
|
-
|
|
1039
|
-
lam.rcvs.lock().await.act_req_rx.try_recv().unwrap_err(),
|
|
1040
|
-
TryRecvError::Empty
|
|
1041
|
-
);
|
|
1175
|
+
assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
|
|
1042
1176
|
}
|
|
1043
1177
|
}
|
|
@@ -7,11 +7,12 @@ pub(crate) use local_activities::{
|
|
|
7
7
|
LocalInFlightActInfo, NewLocalAct,
|
|
8
8
|
};
|
|
9
9
|
|
|
10
|
-
use crate::telemetry::metrics::eager;
|
|
11
10
|
use crate::{
|
|
12
11
|
abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
|
|
13
12
|
pollers::BoxedActPoller,
|
|
14
|
-
telemetry::metrics::{
|
|
13
|
+
telemetry::metrics::{
|
|
14
|
+
activity_type, activity_worker_type, eager, workflow_type, MetricsContext,
|
|
15
|
+
},
|
|
15
16
|
worker::{
|
|
16
17
|
activities::activity_heartbeat_manager::ActivityHeartbeatError, client::WorkerClient,
|
|
17
18
|
},
|