@temporalio/core-bridge 0.19.2 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +90 -157
- package/Cargo.toml +1 -0
- package/index.d.ts +11 -27
- package/package.json +3 -3
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/Dockerfile +1 -1
- package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
- package/sdk-core/.cargo/config.toml +1 -0
- package/sdk-core/CODEOWNERS +1 -1
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +119 -86
- package/sdk-core/bridge-ffi/src/lib.rs +311 -315
- package/sdk-core/bridge-ffi/src/wrappers.rs +108 -113
- package/sdk-core/client/Cargo.toml +13 -9
- package/sdk-core/client/LICENSE.txt +23 -0
- package/sdk-core/client/src/lib.rs +286 -174
- package/sdk-core/client/src/metrics.rs +86 -12
- package/sdk-core/client/src/raw.rs +566 -0
- package/sdk-core/client/src/retry.rs +137 -99
- package/sdk-core/core/Cargo.toml +15 -10
- package/sdk-core/core/LICENSE.txt +23 -0
- package/sdk-core/core/benches/workflow_replay.rs +79 -0
- package/sdk-core/core/src/abstractions.rs +38 -0
- package/sdk-core/core/src/core_tests/activity_tasks.rs +108 -182
- package/sdk-core/core/src/core_tests/child_workflows.rs +16 -11
- package/sdk-core/core/src/core_tests/determinism.rs +24 -12
- package/sdk-core/core/src/core_tests/local_activities.rs +53 -27
- package/sdk-core/core/src/core_tests/mod.rs +30 -43
- package/sdk-core/core/src/core_tests/queries.rs +82 -81
- package/sdk-core/core/src/core_tests/workers.rs +111 -296
- package/sdk-core/core/src/core_tests/workflow_cancels.rs +4 -4
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +213 -241
- package/sdk-core/core/src/lib.rs +73 -318
- package/sdk-core/core/src/pollers/mod.rs +4 -6
- package/sdk-core/core/src/pollers/poll_buffer.rs +20 -14
- package/sdk-core/core/src/protosext/mod.rs +7 -10
- package/sdk-core/core/src/replay/mod.rs +11 -150
- package/sdk-core/core/src/telemetry/metrics.rs +35 -2
- package/sdk-core/core/src/telemetry/mod.rs +49 -16
- package/sdk-core/core/src/telemetry/prometheus_server.rs +14 -35
- package/sdk-core/core/src/test_help/mod.rs +104 -170
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +57 -34
- package/sdk-core/core/src/worker/activities/local_activities.rs +37 -17
- package/sdk-core/core/src/worker/activities.rs +23 -16
- package/sdk-core/core/src/worker/client/mocks.rs +86 -0
- package/sdk-core/core/src/worker/client.rs +209 -0
- package/sdk-core/core/src/worker/mod.rs +193 -105
- package/sdk-core/core/src/workflow/driven_workflow.rs +21 -6
- package/sdk-core/core/src/workflow/history_update.rs +107 -24
- package/sdk-core/core/src/workflow/machines/activity_state_machine.rs +2 -3
- package/sdk-core/core/src/workflow/machines/child_workflow_state_machine.rs +2 -3
- package/sdk-core/core/src/workflow/machines/mod.rs +20 -17
- package/sdk-core/core/src/workflow/machines/signal_external_state_machine.rs +56 -19
- package/sdk-core/core/src/workflow/machines/transition_coverage.rs +5 -0
- package/sdk-core/core/src/workflow/machines/upsert_search_attributes_state_machine.rs +230 -22
- package/sdk-core/core/src/workflow/machines/workflow_machines.rs +81 -115
- package/sdk-core/core/src/workflow/machines/workflow_task_state_machine.rs +4 -4
- package/sdk-core/core/src/workflow/mod.rs +13 -1
- package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +43 -33
- package/sdk-core/core-api/Cargo.toml +9 -1
- package/sdk-core/core-api/LICENSE.txt +23 -0
- package/sdk-core/core-api/src/errors.rs +7 -38
- package/sdk-core/core-api/src/lib.rs +44 -52
- package/sdk-core/core-api/src/worker.rs +10 -2
- package/sdk-core/etc/deps.svg +127 -96
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +11 -7
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +10 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +6 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +2 -1
- package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +12 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +25 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +4 -0
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +19 -35
- package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -6
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +53 -11
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +14 -7
- package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +3 -5
- package/sdk-core/sdk/Cargo.toml +16 -2
- package/sdk-core/sdk/LICENSE.txt +23 -0
- package/sdk-core/sdk/src/interceptors.rs +11 -0
- package/sdk-core/sdk/src/lib.rs +138 -150
- package/sdk-core/sdk/src/workflow_context/options.rs +86 -1
- package/sdk-core/sdk/src/workflow_context.rs +36 -17
- package/sdk-core/sdk/src/workflow_future.rs +19 -25
- package/sdk-core/sdk-core-protos/Cargo.toml +1 -1
- package/sdk-core/sdk-core-protos/build.rs +1 -0
- package/sdk-core/sdk-core-protos/src/history_info.rs +17 -4
- package/sdk-core/sdk-core-protos/src/lib.rs +251 -47
- package/sdk-core/test-utils/Cargo.toml +3 -1
- package/sdk-core/test-utils/src/canned_histories.rs +27 -0
- package/sdk-core/test-utils/src/histfetch.rs +3 -3
- package/sdk-core/test-utils/src/lib.rs +223 -68
- package/sdk-core/tests/integ_tests/client_tests.rs +27 -4
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +93 -14
- package/sdk-core/tests/integ_tests/polling_tests.rs +18 -12
- package/sdk-core/tests/integ_tests/queries_tests.rs +50 -53
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +117 -103
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +8 -1
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +10 -5
- package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +7 -1
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +8 -3
- package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +7 -1
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +76 -15
- package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +19 -3
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +39 -42
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +84 -0
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +30 -8
- package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +21 -6
- package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +26 -16
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +66 -0
- package/sdk-core/tests/integ_tests/workflow_tests.rs +78 -74
- package/sdk-core/tests/load_tests.rs +9 -6
- package/sdk-core/tests/main.rs +43 -10
- package/src/conversions.rs +7 -12
- package/src/lib.rs +322 -357
- package/sdk-core/client/src/mocks.rs +0 -167
- package/sdk-core/core/src/worker/dispatcher.rs +0 -171
- package/sdk-core/protos/local/temporal/sdk/core/bridge/service.proto +0 -61
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mod activities;
|
|
2
|
-
mod
|
|
2
|
+
pub(crate) mod client;
|
|
3
3
|
mod wft_delivery;
|
|
4
4
|
|
|
5
5
|
pub use temporal_sdk_core_api::worker::{WorkerConfig, WorkerConfigBuilder};
|
|
@@ -8,20 +8,25 @@ pub(crate) use activities::{
|
|
|
8
8
|
ExecutingLAId, LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
|
|
9
9
|
NewLocalAct,
|
|
10
10
|
};
|
|
11
|
-
pub(crate) use dispatcher::WorkerDispatcher;
|
|
12
11
|
|
|
13
12
|
use crate::{
|
|
13
|
+
abstractions::MeteredSemaphore,
|
|
14
14
|
errors::CompleteWfError,
|
|
15
15
|
pollers::{
|
|
16
16
|
new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller, BoxedWFPoller, Poller,
|
|
17
17
|
WorkflowTaskPoller,
|
|
18
18
|
},
|
|
19
19
|
protosext::{legacy_query_failure, ValidPollWFTQResponse},
|
|
20
|
-
telemetry::
|
|
21
|
-
|
|
20
|
+
telemetry::{
|
|
21
|
+
metrics::{
|
|
22
|
+
activity_poller, local_activity_worker_type, workflow_poller, workflow_sticky_poller,
|
|
23
|
+
workflow_worker_type, MetricsContext,
|
|
24
|
+
},
|
|
25
|
+
VecDisplayer,
|
|
22
26
|
},
|
|
23
27
|
worker::{
|
|
24
28
|
activities::{DispatchOrTimeoutLA, LACompleteAction, LocalActivityManager},
|
|
29
|
+
client::WorkerClientBag,
|
|
25
30
|
wft_delivery::WFTSource,
|
|
26
31
|
},
|
|
27
32
|
workflow::{
|
|
@@ -31,18 +36,19 @@ use crate::{
|
|
|
31
36
|
},
|
|
32
37
|
EmptyWorkflowCommandErr, LocalResolution, WFMachinesError, WorkflowCachingPolicy,
|
|
33
38
|
},
|
|
34
|
-
ActivityHeartbeat, CompleteActivityError, PollActivityError, PollWfError,
|
|
39
|
+
ActivityHeartbeat, CompleteActivityError, PollActivityError, PollWfError, WorkerTrait,
|
|
35
40
|
};
|
|
36
41
|
use activities::{LocalInFlightActInfo, WorkerActivityTasks};
|
|
37
42
|
use futures::{Future, TryFutureExt};
|
|
38
43
|
use std::{convert::TryInto, sync::Arc};
|
|
39
|
-
use temporal_client::
|
|
44
|
+
use temporal_client::WorkflowTaskCompletion;
|
|
40
45
|
use temporal_sdk_core_protos::{
|
|
41
46
|
coresdk::{
|
|
42
47
|
activity_result::activity_execution_result,
|
|
43
48
|
activity_task::ActivityTask,
|
|
44
49
|
workflow_activation::{remove_from_cache::EvictionReason, WorkflowActivation},
|
|
45
50
|
workflow_completion::{self, workflow_activation_completion, WorkflowActivationCompletion},
|
|
51
|
+
ActivityTaskCompletion,
|
|
46
52
|
},
|
|
47
53
|
temporal::api::{
|
|
48
54
|
enums::v1::{TaskQueueKind, WorkflowTaskFailedCause},
|
|
@@ -52,14 +58,18 @@ use temporal_sdk_core_protos::{
|
|
|
52
58
|
},
|
|
53
59
|
TaskToken,
|
|
54
60
|
};
|
|
55
|
-
use tokio::sync::
|
|
61
|
+
use tokio::sync::Notify;
|
|
62
|
+
use tokio_util::sync::CancellationToken;
|
|
56
63
|
use tonic::Code;
|
|
57
64
|
use tracing_futures::Instrument;
|
|
58
65
|
|
|
66
|
+
#[cfg(test)]
|
|
67
|
+
use crate::worker::client::WorkerClient;
|
|
68
|
+
|
|
59
69
|
/// A worker polls on a certain task queue
|
|
60
70
|
pub struct Worker {
|
|
61
71
|
config: WorkerConfig,
|
|
62
|
-
|
|
72
|
+
wf_client: Arc<WorkerClientBag>,
|
|
63
73
|
|
|
64
74
|
/// Will be populated when this worker should poll on a sticky WFT queue
|
|
65
75
|
sticky_name: Option<String>,
|
|
@@ -67,14 +77,14 @@ pub struct Worker {
|
|
|
67
77
|
/// Buffers workflow task polling in the event we need to return a pending activation while
|
|
68
78
|
/// a poll is ongoing. Sticky and nonsticky polling happens inside of it.
|
|
69
79
|
wf_task_source: WFTSource,
|
|
70
|
-
/// Workflow task management
|
|
71
|
-
|
|
80
|
+
/// Workflow task management
|
|
81
|
+
wft_manager: WorkflowTaskManager,
|
|
72
82
|
/// Manages activity tasks for this worker/task queue
|
|
73
83
|
at_task_mgr: Option<WorkerActivityTasks>,
|
|
74
84
|
/// Manages local activities
|
|
75
85
|
local_act_mgr: LocalActivityManager,
|
|
76
86
|
/// Ensures we stay at or below this worker's maximum concurrent workflow limit
|
|
77
|
-
workflows_semaphore:
|
|
87
|
+
workflows_semaphore: MeteredSemaphore,
|
|
78
88
|
/// Used to wake blocked workflow task polling when there is some change to workflow activations
|
|
79
89
|
/// that should cause us to restart the loop
|
|
80
90
|
pending_activations_notify: Arc<Notify>,
|
|
@@ -82,21 +92,106 @@ pub struct Worker {
|
|
|
82
92
|
/// a WFT is completed.
|
|
83
93
|
wfts_drained_notify: Arc<Notify>,
|
|
84
94
|
/// Has shutdown been called?
|
|
85
|
-
|
|
86
|
-
shutdown_sender: watch::Sender<bool>,
|
|
95
|
+
shutdown_token: CancellationToken,
|
|
87
96
|
/// Will be called at the end of each activation completion
|
|
88
97
|
post_activate_hook: Option<Box<dyn Fn(&Self) + Send + Sync>>,
|
|
89
98
|
|
|
90
99
|
metrics: MetricsContext,
|
|
91
100
|
}
|
|
92
101
|
|
|
102
|
+
#[async_trait::async_trait]
|
|
103
|
+
impl WorkerTrait for Worker {
|
|
104
|
+
#[instrument(level = "debug", skip(self), fields(run_id))]
|
|
105
|
+
async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
|
|
106
|
+
self.next_workflow_activation().await
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
#[instrument(level = "debug", skip(self))]
|
|
110
|
+
async fn poll_activity_task(&self) -> Result<ActivityTask, PollActivityError> {
|
|
111
|
+
loop {
|
|
112
|
+
match self.activity_poll().await.transpose() {
|
|
113
|
+
Some(r) => break r,
|
|
114
|
+
None => {
|
|
115
|
+
tokio::task::yield_now().await;
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
#[instrument(level = "debug", skip(self, completion),
|
|
123
|
+
fields(completion=%&completion, run_id=%completion.run_id))]
|
|
124
|
+
async fn complete_workflow_activation(
|
|
125
|
+
&self,
|
|
126
|
+
completion: WorkflowActivationCompletion,
|
|
127
|
+
) -> Result<(), CompleteWfError> {
|
|
128
|
+
self.complete_workflow_activation(completion).await
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
#[instrument(level = "debug", skip(self, completion),
|
|
132
|
+
fields(completion=%&completion))]
|
|
133
|
+
async fn complete_activity_task(
|
|
134
|
+
&self,
|
|
135
|
+
completion: ActivityTaskCompletion,
|
|
136
|
+
) -> Result<(), CompleteActivityError> {
|
|
137
|
+
let task_token = TaskToken(completion.task_token);
|
|
138
|
+
let status = if let Some(s) = completion.result.and_then(|r| r.status) {
|
|
139
|
+
s
|
|
140
|
+
} else {
|
|
141
|
+
return Err(CompleteActivityError::MalformedActivityCompletion {
|
|
142
|
+
reason: "Activity completion had empty result/status field".to_owned(),
|
|
143
|
+
completion: None,
|
|
144
|
+
});
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
self.complete_activity(task_token, status).await
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
fn record_activity_heartbeat(&self, details: ActivityHeartbeat) {
|
|
151
|
+
self.record_heartbeat(details);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
fn request_workflow_eviction(&self, run_id: &str) {
|
|
155
|
+
self.request_wf_eviction(
|
|
156
|
+
run_id,
|
|
157
|
+
"Eviction explicitly requested by lang",
|
|
158
|
+
EvictionReason::LangRequested,
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
fn get_config(&self) -> &WorkerConfig {
|
|
163
|
+
&self.config
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/// Begins the shutdown process, tells pollers they should stop. Is idempotent.
|
|
167
|
+
// TODO: will be in trait after Roey's shutdown refactor
|
|
168
|
+
fn initiate_shutdown(&self) {
|
|
169
|
+
self.shutdown_token.cancel();
|
|
170
|
+
// First, we want to stop polling of both activity and workflow tasks
|
|
171
|
+
if let Some(atm) = self.at_task_mgr.as_ref() {
|
|
172
|
+
atm.notify_shutdown();
|
|
173
|
+
}
|
|
174
|
+
self.wf_task_source.stop_pollers();
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async fn shutdown(&self) {
|
|
178
|
+
self.shutdown().await
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
async fn finalize_shutdown(self) {
|
|
182
|
+
self.shutdown().await;
|
|
183
|
+
self.finalize_shutdown().await
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
93
187
|
impl Worker {
|
|
94
188
|
pub(crate) fn new(
|
|
95
189
|
config: WorkerConfig,
|
|
96
190
|
sticky_queue_name: Option<String>,
|
|
97
|
-
|
|
191
|
+
client: Arc<WorkerClientBag>,
|
|
98
192
|
metrics: MetricsContext,
|
|
99
193
|
) -> Self {
|
|
194
|
+
info!(task_queue = %config.task_queue, "Initializing worker");
|
|
100
195
|
metrics.worker_registered();
|
|
101
196
|
|
|
102
197
|
let max_nonsticky_polls = if sticky_queue_name.is_some() {
|
|
@@ -107,7 +202,7 @@ impl Worker {
|
|
|
107
202
|
let max_sticky_polls = config.max_sticky_polls();
|
|
108
203
|
let wft_metrics = metrics.with_new_attrs([workflow_poller()]);
|
|
109
204
|
let mut wf_task_poll_buffer = new_workflow_task_buffer(
|
|
110
|
-
|
|
205
|
+
client.clone(),
|
|
111
206
|
config.task_queue.clone(),
|
|
112
207
|
false,
|
|
113
208
|
max_nonsticky_polls,
|
|
@@ -117,7 +212,7 @@ impl Worker {
|
|
|
117
212
|
let sticky_queue_poller = sticky_queue_name.as_ref().map(|sqn| {
|
|
118
213
|
let sticky_metrics = metrics.with_new_attrs([workflow_sticky_poller()]);
|
|
119
214
|
let mut sp = new_workflow_task_buffer(
|
|
120
|
-
|
|
215
|
+
client.clone(),
|
|
121
216
|
sqn.clone(),
|
|
122
217
|
true,
|
|
123
218
|
max_sticky_polls,
|
|
@@ -130,10 +225,11 @@ impl Worker {
|
|
|
130
225
|
None
|
|
131
226
|
} else {
|
|
132
227
|
let mut ap = new_activity_task_buffer(
|
|
133
|
-
|
|
228
|
+
client.clone(),
|
|
134
229
|
config.task_queue.clone(),
|
|
135
230
|
config.max_concurrent_at_polls,
|
|
136
231
|
config.max_concurrent_at_polls * 2,
|
|
232
|
+
config.max_task_queue_activities_per_second,
|
|
137
233
|
);
|
|
138
234
|
let act_metrics = metrics.with_new_attrs([activity_poller()]);
|
|
139
235
|
ap.set_num_pollers_handler(move |np| act_metrics.record_num_pollers(np));
|
|
@@ -149,17 +245,22 @@ impl Worker {
|
|
|
149
245
|
Self::new_with_pollers(
|
|
150
246
|
config,
|
|
151
247
|
sticky_queue_name,
|
|
152
|
-
|
|
248
|
+
client,
|
|
153
249
|
wf_task_poll_buffer,
|
|
154
250
|
act_poll_buffer,
|
|
155
251
|
metrics,
|
|
156
252
|
)
|
|
157
253
|
}
|
|
158
254
|
|
|
255
|
+
#[cfg(test)]
|
|
256
|
+
pub(crate) fn new_test(config: WorkerConfig, client: impl WorkerClient + 'static) -> Self {
|
|
257
|
+
Self::new(config, None, Arc::new(client.into()), Default::default())
|
|
258
|
+
}
|
|
259
|
+
|
|
159
260
|
pub(crate) fn new_with_pollers(
|
|
160
261
|
config: WorkerConfig,
|
|
161
262
|
sticky_queue_name: Option<String>,
|
|
162
|
-
|
|
263
|
+
client: Arc<WorkerClientBag>,
|
|
163
264
|
wft_poller: BoxedWFPoller,
|
|
164
265
|
act_poller: Option<BoxedActPoller>,
|
|
165
266
|
metrics: MetricsContext,
|
|
@@ -173,9 +274,8 @@ impl Worker {
|
|
|
173
274
|
};
|
|
174
275
|
let pa_notif = Arc::new(Notify::new());
|
|
175
276
|
let wfts_drained_notify = Arc::new(Notify::new());
|
|
176
|
-
let (shut_tx, shut_rx) = watch::channel(false);
|
|
177
277
|
Self {
|
|
178
|
-
|
|
278
|
+
wf_client: client.clone(),
|
|
179
279
|
sticky_name: sticky_queue_name,
|
|
180
280
|
wf_task_source: WFTSource::new(wft_poller),
|
|
181
281
|
wft_manager: WorkflowTaskManager::new(pa_notif.clone(), cache_policy, metrics.clone()),
|
|
@@ -183,7 +283,7 @@ impl Worker {
|
|
|
183
283
|
WorkerActivityTasks::new(
|
|
184
284
|
config.max_outstanding_activities,
|
|
185
285
|
ap,
|
|
186
|
-
|
|
286
|
+
client.clone(),
|
|
187
287
|
metrics.clone(),
|
|
188
288
|
config.max_heartbeat_throttle_interval,
|
|
189
289
|
config.default_heartbeat_throttle_interval,
|
|
@@ -191,12 +291,16 @@ impl Worker {
|
|
|
191
291
|
}),
|
|
192
292
|
local_act_mgr: LocalActivityManager::new(
|
|
193
293
|
config.max_outstanding_local_activities,
|
|
194
|
-
|
|
294
|
+
config.namespace.clone(),
|
|
295
|
+
metrics.with_new_attrs([local_activity_worker_type()]),
|
|
296
|
+
),
|
|
297
|
+
workflows_semaphore: MeteredSemaphore::new(
|
|
298
|
+
config.max_outstanding_workflow_tasks,
|
|
299
|
+
metrics.with_new_attrs([workflow_worker_type()]),
|
|
300
|
+
MetricsContext::available_task_slots,
|
|
195
301
|
),
|
|
196
|
-
workflows_semaphore: Semaphore::new(config.max_outstanding_workflow_tasks),
|
|
197
302
|
config,
|
|
198
|
-
|
|
199
|
-
shutdown_sender: shut_tx,
|
|
303
|
+
shutdown_token: CancellationToken::new(),
|
|
200
304
|
post_activate_hook: None,
|
|
201
305
|
pending_activations_notify: pa_notif,
|
|
202
306
|
wfts_drained_notify,
|
|
@@ -204,20 +308,11 @@ impl Worker {
|
|
|
204
308
|
}
|
|
205
309
|
}
|
|
206
310
|
|
|
207
|
-
/// Begins the shutdown process, tells pollers they should stop. Is idempotent.
|
|
208
|
-
pub(crate) fn initiate_shutdown(&self) {
|
|
209
|
-
let _ = self.shutdown_sender.send(true);
|
|
210
|
-
// First, we want to stop polling of both activity and workflow tasks
|
|
211
|
-
if let Some(atm) = self.at_task_mgr.as_ref() {
|
|
212
|
-
atm.notify_shutdown();
|
|
213
|
-
}
|
|
214
|
-
self.wf_task_source.stop_pollers();
|
|
215
|
-
}
|
|
216
|
-
|
|
217
311
|
/// Will shutdown the worker. Does not resolve until all outstanding workflow tasks have been
|
|
218
312
|
/// completed
|
|
219
313
|
pub(crate) async fn shutdown(&self) {
|
|
220
314
|
self.initiate_shutdown();
|
|
315
|
+
info!("Initiated shutdown");
|
|
221
316
|
// Next we need to wait for all local activities to finish so no more workflow task
|
|
222
317
|
// heartbeats will be generated
|
|
223
318
|
self.local_act_mgr.shutdown_and_wait_all_finished().await;
|
|
@@ -249,7 +344,7 @@ impl Worker {
|
|
|
249
344
|
|
|
250
345
|
#[cfg(test)]
|
|
251
346
|
pub(crate) fn available_wft_permits(&self) -> usize {
|
|
252
|
-
self.workflows_semaphore.available_permits()
|
|
347
|
+
self.workflows_semaphore.sem.available_permits()
|
|
253
348
|
}
|
|
254
349
|
|
|
255
350
|
/// Get new activity tasks (may be local or nonlocal). Local activities are returned first
|
|
@@ -257,12 +352,12 @@ impl Worker {
|
|
|
257
352
|
///
|
|
258
353
|
/// Returns `Ok(None)` in the event of a poll timeout or if the polling loop should otherwise
|
|
259
354
|
/// be restarted
|
|
260
|
-
|
|
355
|
+
async fn activity_poll(&self) -> Result<Option<ActivityTask>, PollActivityError> {
|
|
261
356
|
let act_mgr_poll = async {
|
|
262
357
|
if let Some(ref act_mgr) = self.at_task_mgr {
|
|
263
358
|
act_mgr.poll().await
|
|
264
359
|
} else {
|
|
265
|
-
|
|
360
|
+
self.shutdown_token.cancelled().await;
|
|
266
361
|
Err(PollActivityError::ShutDown)
|
|
267
362
|
}
|
|
268
363
|
};
|
|
@@ -278,7 +373,12 @@ impl Worker {
|
|
|
278
373
|
&run_id, LocalResolution::LocalActivity(resolution)).await;
|
|
279
374
|
Ok(task)
|
|
280
375
|
},
|
|
281
|
-
None =>
|
|
376
|
+
None => {
|
|
377
|
+
if self.shutdown_token.is_cancelled() {
|
|
378
|
+
return Err(PollActivityError::ShutDown);
|
|
379
|
+
}
|
|
380
|
+
Ok(None)
|
|
381
|
+
}
|
|
282
382
|
}
|
|
283
383
|
},
|
|
284
384
|
r = act_mgr_poll => r,
|
|
@@ -326,8 +426,7 @@ impl Worker {
|
|
|
326
426
|
}
|
|
327
427
|
|
|
328
428
|
if let Some(atm) = &self.at_task_mgr {
|
|
329
|
-
atm.complete(task_token, status, self.
|
|
330
|
-
.await
|
|
429
|
+
atm.complete(task_token, status, &**self.wf_client).await
|
|
331
430
|
} else {
|
|
332
431
|
error!(
|
|
333
432
|
"Tried to complete activity {} on a worker that does not have an activity manager",
|
|
@@ -428,7 +527,7 @@ impl Worker {
|
|
|
428
527
|
|
|
429
528
|
/// Tell the worker a workflow task has completed, for tracking max outstanding WFTs
|
|
430
529
|
pub(crate) fn return_workflow_task_permit(&self) {
|
|
431
|
-
self.workflows_semaphore.
|
|
530
|
+
self.workflows_semaphore.add_permit();
|
|
432
531
|
}
|
|
433
532
|
|
|
434
533
|
pub(crate) fn request_wf_eviction(
|
|
@@ -448,11 +547,25 @@ impl Worker {
|
|
|
448
547
|
self.post_activate_hook = Some(Box::new(callback))
|
|
449
548
|
}
|
|
450
549
|
|
|
550
|
+
/// Used for replay workers - causes the worker to shutdown when the given run reaches the
|
|
551
|
+
/// given event number
|
|
552
|
+
pub(crate) fn set_shutdown_on_run_reaches_event(&mut self, run_id: String, last_event: i64) {
|
|
553
|
+
self.set_post_activate_hook(move |worker| {
|
|
554
|
+
if worker
|
|
555
|
+
.wft_manager
|
|
556
|
+
.most_recently_processed_event(&run_id)
|
|
557
|
+
.unwrap_or_default()
|
|
558
|
+
>= last_event
|
|
559
|
+
{
|
|
560
|
+
worker.initiate_shutdown();
|
|
561
|
+
}
|
|
562
|
+
});
|
|
563
|
+
}
|
|
564
|
+
|
|
451
565
|
/// Resolves with WFT poll response or `PollWfError::ShutDown` if WFTs have been drained
|
|
452
566
|
async fn workflow_poll_or_wfts_drained(
|
|
453
567
|
&self,
|
|
454
568
|
) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
|
|
455
|
-
let mut shutdown_requested = self.shutdown_requested.clone();
|
|
456
569
|
loop {
|
|
457
570
|
tokio::select! {
|
|
458
571
|
biased;
|
|
@@ -466,7 +579,7 @@ impl Worker {
|
|
|
466
579
|
}
|
|
467
580
|
return r
|
|
468
581
|
},
|
|
469
|
-
_ =
|
|
582
|
+
_ = self.shutdown_token.cancelled() => {},
|
|
470
583
|
}
|
|
471
584
|
}
|
|
472
585
|
}
|
|
@@ -481,7 +594,7 @@ impl Worker {
|
|
|
481
594
|
// heartbeating which is a "new" workflow task that we need to accept and process as long as
|
|
482
595
|
// the LA is outstanding. Similarly, if we already have such tasks (from a WFT completion),
|
|
483
596
|
// then we must fetch them from the source before we can say workflow polling is shutdown.
|
|
484
|
-
if
|
|
597
|
+
if self.shutdown_token.is_cancelled()
|
|
485
598
|
&& !self.wf_task_source.has_tasks_from_complete()
|
|
486
599
|
&& self.local_act_mgr.num_outstanding() == 0
|
|
487
600
|
{
|
|
@@ -535,10 +648,9 @@ impl Worker {
|
|
|
535
648
|
work: ValidPollWFTQResponse,
|
|
536
649
|
) -> Result<Option<WorkflowActivation>, PollWfError> {
|
|
537
650
|
let we = work.workflow_execution.clone();
|
|
538
|
-
let tt = work.task_token.clone();
|
|
539
651
|
let res = self
|
|
540
652
|
.wft_manager
|
|
541
|
-
.apply_new_poll_resp(work, self.
|
|
653
|
+
.apply_new_poll_resp(work, self.wf_client.clone())
|
|
542
654
|
.await;
|
|
543
655
|
Ok(match res {
|
|
544
656
|
NewWfTaskOutcome::IssueActivation(a) => {
|
|
@@ -555,31 +667,12 @@ impl Worker {
|
|
|
555
667
|
debug!(workflow_execution=?we,
|
|
556
668
|
"No new work for lang to perform after polling server");
|
|
557
669
|
self.complete_workflow_activation(WorkflowActivationCompletion {
|
|
558
|
-
task_queue: self.config.task_queue.clone(),
|
|
559
670
|
run_id: we.run_id,
|
|
560
671
|
status: Some(workflow_completion::Success::from_variants(vec![]).into()),
|
|
561
672
|
})
|
|
562
673
|
.await?;
|
|
563
674
|
None
|
|
564
675
|
}
|
|
565
|
-
NewWfTaskOutcome::CacheMiss => {
|
|
566
|
-
debug!(workflow_execution=?we, "Unable to process workflow task with partial \
|
|
567
|
-
history because workflow cache does not contain workflow anymore.");
|
|
568
|
-
self.server_gateway
|
|
569
|
-
.fail_workflow_task(
|
|
570
|
-
tt,
|
|
571
|
-
WorkflowTaskFailedCause::ResetStickyTaskQueue,
|
|
572
|
-
Some(Failure {
|
|
573
|
-
message: "Unable to process workflow task with partial history \
|
|
574
|
-
because workflow cache does not contain workflow anymore."
|
|
575
|
-
.to_string(),
|
|
576
|
-
..Default::default()
|
|
577
|
-
}),
|
|
578
|
-
)
|
|
579
|
-
.await?;
|
|
580
|
-
self.return_workflow_task_permit();
|
|
581
|
-
None
|
|
582
|
-
}
|
|
583
676
|
NewWfTaskOutcome::Evict(e) => {
|
|
584
677
|
warn!(error=?e, run_id=%we.run_id, "Error while applying poll response to workflow");
|
|
585
678
|
self.request_wf_eviction(
|
|
@@ -627,9 +720,12 @@ impl Worker {
|
|
|
627
720
|
force_new_wft,
|
|
628
721
|
},
|
|
629
722
|
})) => {
|
|
630
|
-
debug!("Sending commands to server: {
|
|
723
|
+
debug!("Sending commands to server: {}", commands.display());
|
|
631
724
|
if !query_responses.is_empty() {
|
|
632
|
-
debug!(
|
|
725
|
+
debug!(
|
|
726
|
+
"Sending query responses to server: {}",
|
|
727
|
+
query_responses.display()
|
|
728
|
+
);
|
|
633
729
|
}
|
|
634
730
|
let mut completion = WorkflowTaskCompletion {
|
|
635
731
|
task_token,
|
|
@@ -649,7 +745,7 @@ impl Worker {
|
|
|
649
745
|
|
|
650
746
|
self.handle_wft_reporting_errs(run_id, || async {
|
|
651
747
|
let maybe_wft = self
|
|
652
|
-
.
|
|
748
|
+
.wf_client
|
|
653
749
|
.complete_workflow_task(completion)
|
|
654
750
|
.instrument(span!(tracing::Level::DEBUG, "Complete WFT call"))
|
|
655
751
|
.await?;
|
|
@@ -669,7 +765,7 @@ impl Worker {
|
|
|
669
765
|
action: ActivationAction::RespondLegacyQuery { result },
|
|
670
766
|
..
|
|
671
767
|
})) => {
|
|
672
|
-
self.
|
|
768
|
+
self.wf_client
|
|
673
769
|
.respond_legacy_query(task_token, result)
|
|
674
770
|
.await?;
|
|
675
771
|
Ok(WFTReportOutcome {
|
|
@@ -720,7 +816,7 @@ impl Worker {
|
|
|
720
816
|
FailedActivationOutcome::Report(tt) => {
|
|
721
817
|
warn!(run_id, failure=?failure, "Failing workflow activation");
|
|
722
818
|
self.handle_wft_reporting_errs(run_id, || async {
|
|
723
|
-
self.
|
|
819
|
+
self.wf_client
|
|
724
820
|
.fail_workflow_task(tt, cause, failure.failure.map(Into::into))
|
|
725
821
|
.await
|
|
726
822
|
})
|
|
@@ -732,7 +828,7 @@ impl Worker {
|
|
|
732
828
|
}
|
|
733
829
|
FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
|
|
734
830
|
warn!(run_id, failure=?failure, "Failing legacy query request");
|
|
735
|
-
self.
|
|
831
|
+
self.wf_client
|
|
736
832
|
.respond_legacy_query(task_token, legacy_query_failure(failure))
|
|
737
833
|
.await?;
|
|
738
834
|
WFTReportOutcome {
|
|
@@ -765,7 +861,7 @@ impl Worker {
|
|
|
765
861
|
// Silence unhandled command errors since the lang SDK cannot do anything about
|
|
766
862
|
// them besides poll again, which it will do anyway.
|
|
767
863
|
tonic::Code::InvalidArgument if err.message() == "UnhandledCommand" => {
|
|
768
|
-
|
|
864
|
+
debug!(error = %err, run_id, "Unhandled command response when completing");
|
|
769
865
|
should_evict = Some(EvictionReason::UnhandledCommand);
|
|
770
866
|
Ok(())
|
|
771
867
|
}
|
|
@@ -851,91 +947,83 @@ struct WFTReportOutcome {
|
|
|
851
947
|
#[cfg(test)]
|
|
852
948
|
mod tests {
|
|
853
949
|
use super::*;
|
|
854
|
-
use
|
|
950
|
+
use crate::{test_help::test_worker_cfg, worker::client::mocks::mock_workflow_client};
|
|
855
951
|
use temporal_sdk_core_protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse;
|
|
856
952
|
|
|
857
953
|
#[tokio::test]
|
|
858
954
|
async fn activity_timeouts_dont_eat_permits() {
|
|
859
|
-
let mut
|
|
860
|
-
|
|
955
|
+
let mut mock_client = mock_workflow_client();
|
|
956
|
+
mock_client
|
|
861
957
|
.expect_poll_activity_task()
|
|
862
|
-
.returning(|_| Ok(PollActivityTaskQueueResponse::default()));
|
|
958
|
+
.returning(|_, _| Ok(PollActivityTaskQueueResponse::default()));
|
|
863
959
|
|
|
864
|
-
let cfg =
|
|
865
|
-
.task_queue("whatever")
|
|
960
|
+
let cfg = test_worker_cfg()
|
|
866
961
|
.max_outstanding_activities(5_usize)
|
|
867
962
|
.build()
|
|
868
963
|
.unwrap();
|
|
869
|
-
let worker = Worker::
|
|
964
|
+
let worker = Worker::new_test(cfg, mock_client);
|
|
870
965
|
assert_eq!(worker.activity_poll().await.unwrap(), None);
|
|
871
966
|
assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
|
|
872
967
|
}
|
|
873
968
|
|
|
874
969
|
#[tokio::test]
|
|
875
970
|
async fn workflow_timeouts_dont_eat_permits() {
|
|
876
|
-
let mut
|
|
877
|
-
|
|
971
|
+
let mut mock_client = mock_workflow_client();
|
|
972
|
+
mock_client
|
|
878
973
|
.expect_poll_workflow_task()
|
|
879
974
|
.returning(|_, _| Ok(PollWorkflowTaskQueueResponse::default()));
|
|
880
975
|
|
|
881
|
-
let cfg =
|
|
882
|
-
.task_queue("whatever")
|
|
976
|
+
let cfg = test_worker_cfg()
|
|
883
977
|
.max_outstanding_workflow_tasks(5_usize)
|
|
884
978
|
.build()
|
|
885
979
|
.unwrap();
|
|
886
|
-
let worker = Worker::
|
|
980
|
+
let worker = Worker::new_test(cfg, mock_client);
|
|
887
981
|
assert_eq!(worker.workflow_poll().await.unwrap(), None);
|
|
888
|
-
assert_eq!(worker.workflows_semaphore.available_permits(), 5);
|
|
982
|
+
assert_eq!(worker.workflows_semaphore.sem.available_permits(), 5);
|
|
889
983
|
}
|
|
890
984
|
|
|
891
985
|
#[tokio::test]
|
|
892
986
|
async fn activity_errs_dont_eat_permits() {
|
|
893
|
-
let mut
|
|
894
|
-
|
|
987
|
+
let mut mock_client = mock_workflow_client();
|
|
988
|
+
mock_client
|
|
895
989
|
.expect_poll_activity_task()
|
|
896
|
-
.returning(|_| Err(tonic::Status::internal("ahhh")));
|
|
990
|
+
.returning(|_, _| Err(tonic::Status::internal("ahhh")));
|
|
897
991
|
|
|
898
|
-
let cfg =
|
|
899
|
-
.task_queue("whatever")
|
|
992
|
+
let cfg = test_worker_cfg()
|
|
900
993
|
.max_outstanding_activities(5_usize)
|
|
901
994
|
.build()
|
|
902
995
|
.unwrap();
|
|
903
|
-
let worker = Worker::
|
|
996
|
+
let worker = Worker::new_test(cfg, mock_client);
|
|
904
997
|
assert!(worker.activity_poll().await.is_err());
|
|
905
998
|
assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
|
|
906
999
|
}
|
|
907
1000
|
|
|
908
1001
|
#[tokio::test]
|
|
909
1002
|
async fn workflow_errs_dont_eat_permits() {
|
|
910
|
-
let mut
|
|
911
|
-
|
|
1003
|
+
let mut mock_client = mock_workflow_client();
|
|
1004
|
+
mock_client
|
|
912
1005
|
.expect_poll_workflow_task()
|
|
913
1006
|
.returning(|_, _| Err(tonic::Status::internal("ahhh")));
|
|
914
1007
|
|
|
915
|
-
let cfg =
|
|
916
|
-
.task_queue("whatever")
|
|
1008
|
+
let cfg = test_worker_cfg()
|
|
917
1009
|
.max_outstanding_workflow_tasks(5_usize)
|
|
918
1010
|
.build()
|
|
919
1011
|
.unwrap();
|
|
920
|
-
let worker = Worker::
|
|
1012
|
+
let worker = Worker::new_test(cfg, mock_client);
|
|
921
1013
|
assert!(worker.workflow_poll().await.is_err());
|
|
922
|
-
assert_eq!(worker.workflows_semaphore.available_permits(), 5);
|
|
1014
|
+
assert_eq!(worker.workflows_semaphore.sem.available_permits(), 5);
|
|
923
1015
|
}
|
|
924
1016
|
|
|
925
1017
|
#[test]
|
|
926
1018
|
fn max_polls_calculated_properly() {
|
|
927
|
-
let cfg =
|
|
928
|
-
.task_queue("whatever")
|
|
929
|
-
.build()
|
|
930
|
-
.unwrap();
|
|
1019
|
+
let cfg = test_worker_cfg().build().unwrap();
|
|
931
1020
|
assert_eq!(cfg.max_nonsticky_polls(), 1);
|
|
932
1021
|
assert_eq!(cfg.max_sticky_polls(), 4);
|
|
933
1022
|
}
|
|
934
1023
|
|
|
935
1024
|
#[test]
|
|
936
1025
|
fn max_polls_zero_is_err() {
|
|
937
|
-
assert!(
|
|
938
|
-
.task_queue("whatever")
|
|
1026
|
+
assert!(test_worker_cfg()
|
|
939
1027
|
.max_concurrent_wft_polls(0_usize)
|
|
940
1028
|
.build()
|
|
941
1029
|
.is_err());
|
|
@@ -1,16 +1,18 @@
|
|
|
1
|
-
use crate::workflow::WFCommand;
|
|
1
|
+
use crate::workflow::{WFCommand, WorkflowStartedInfo};
|
|
2
2
|
use std::collections::VecDeque;
|
|
3
3
|
use temporal_sdk_core_protos::{
|
|
4
4
|
coresdk::workflow_activation::{
|
|
5
|
-
workflow_activation_job, CancelWorkflow, SignalWorkflow,
|
|
5
|
+
start_workflow_from_attribs, workflow_activation_job, CancelWorkflow, SignalWorkflow,
|
|
6
|
+
WorkflowActivationJob,
|
|
6
7
|
},
|
|
7
8
|
temporal::api::history::v1::WorkflowExecutionStartedEventAttributes,
|
|
9
|
+
utilities::TryIntoOrNone,
|
|
8
10
|
};
|
|
9
11
|
|
|
10
12
|
/// Abstracts away the concept of an actual workflow implementation, handling sending it new
|
|
11
13
|
/// jobs and fetching output from it.
|
|
12
14
|
pub struct DrivenWorkflow {
|
|
13
|
-
started_attrs: Option<
|
|
15
|
+
started_attrs: Option<WorkflowStartedInfo>,
|
|
14
16
|
fetcher: Box<dyn WorkflowFetcher>,
|
|
15
17
|
/// Outgoing activation jobs that need to be sent to the lang sdk
|
|
16
18
|
outgoing_wf_activation_jobs: VecDeque<workflow_activation_job::Variant>,
|
|
@@ -31,13 +33,26 @@ where
|
|
|
31
33
|
|
|
32
34
|
impl DrivenWorkflow {
|
|
33
35
|
/// Start the workflow
|
|
34
|
-
pub fn start(
|
|
36
|
+
pub fn start(
|
|
37
|
+
&mut self,
|
|
38
|
+
workflow_id: String,
|
|
39
|
+
randomness_seed: u64,
|
|
40
|
+
attribs: WorkflowExecutionStartedEventAttributes,
|
|
41
|
+
) {
|
|
35
42
|
debug!(run_id = %attribs.original_execution_run_id, "Driven WF start");
|
|
36
|
-
|
|
43
|
+
let started_info = WorkflowStartedInfo {
|
|
44
|
+
workflow_task_timeout: attribs.workflow_task_timeout.clone().try_into_or_none(),
|
|
45
|
+
workflow_execution_timeout: attribs
|
|
46
|
+
.workflow_execution_timeout
|
|
47
|
+
.clone()
|
|
48
|
+
.try_into_or_none(),
|
|
49
|
+
};
|
|
50
|
+
self.send_job(start_workflow_from_attribs(attribs, workflow_id, randomness_seed).into());
|
|
51
|
+
self.started_attrs = Some(started_info);
|
|
37
52
|
}
|
|
38
53
|
|
|
39
54
|
/// Return the attributes from the workflow execution started event if this workflow has started
|
|
40
|
-
pub fn
|
|
55
|
+
pub fn get_started_info(&self) -> Option<&WorkflowStartedInfo> {
|
|
41
56
|
self.started_attrs.as_ref()
|
|
42
57
|
}
|
|
43
58
|
|