@temporalio/core-bridge 0.19.2 → 0.20.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +90 -157
- package/Cargo.toml +1 -0
- package/index.d.ts +11 -27
- package/package.json +3 -3
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/Dockerfile +1 -1
- package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
- package/sdk-core/.cargo/config.toml +1 -0
- package/sdk-core/CODEOWNERS +1 -1
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +119 -86
- package/sdk-core/bridge-ffi/src/lib.rs +311 -315
- package/sdk-core/bridge-ffi/src/wrappers.rs +108 -113
- package/sdk-core/client/Cargo.toml +13 -9
- package/sdk-core/client/LICENSE.txt +23 -0
- package/sdk-core/client/src/lib.rs +286 -174
- package/sdk-core/client/src/metrics.rs +86 -12
- package/sdk-core/client/src/raw.rs +566 -0
- package/sdk-core/client/src/retry.rs +137 -99
- package/sdk-core/core/Cargo.toml +15 -10
- package/sdk-core/core/LICENSE.txt +23 -0
- package/sdk-core/core/benches/workflow_replay.rs +79 -0
- package/sdk-core/core/src/abstractions.rs +38 -0
- package/sdk-core/core/src/core_tests/activity_tasks.rs +108 -182
- package/sdk-core/core/src/core_tests/child_workflows.rs +16 -11
- package/sdk-core/core/src/core_tests/determinism.rs +24 -12
- package/sdk-core/core/src/core_tests/local_activities.rs +53 -27
- package/sdk-core/core/src/core_tests/mod.rs +30 -43
- package/sdk-core/core/src/core_tests/queries.rs +82 -81
- package/sdk-core/core/src/core_tests/workers.rs +111 -296
- package/sdk-core/core/src/core_tests/workflow_cancels.rs +4 -4
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +257 -242
- package/sdk-core/core/src/lib.rs +73 -318
- package/sdk-core/core/src/pollers/mod.rs +4 -6
- package/sdk-core/core/src/pollers/poll_buffer.rs +20 -14
- package/sdk-core/core/src/protosext/mod.rs +7 -10
- package/sdk-core/core/src/replay/mod.rs +11 -150
- package/sdk-core/core/src/telemetry/metrics.rs +35 -2
- package/sdk-core/core/src/telemetry/mod.rs +49 -16
- package/sdk-core/core/src/telemetry/prometheus_server.rs +14 -35
- package/sdk-core/core/src/test_help/mod.rs +104 -170
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +57 -34
- package/sdk-core/core/src/worker/activities/local_activities.rs +95 -23
- package/sdk-core/core/src/worker/activities.rs +23 -16
- package/sdk-core/core/src/worker/client/mocks.rs +86 -0
- package/sdk-core/core/src/worker/client.rs +209 -0
- package/sdk-core/core/src/worker/mod.rs +207 -108
- package/sdk-core/core/src/workflow/driven_workflow.rs +21 -6
- package/sdk-core/core/src/workflow/history_update.rs +107 -24
- package/sdk-core/core/src/workflow/machines/activity_state_machine.rs +2 -3
- package/sdk-core/core/src/workflow/machines/child_workflow_state_machine.rs +2 -3
- package/sdk-core/core/src/workflow/machines/mod.rs +20 -17
- package/sdk-core/core/src/workflow/machines/signal_external_state_machine.rs +56 -19
- package/sdk-core/core/src/workflow/machines/transition_coverage.rs +5 -0
- package/sdk-core/core/src/workflow/machines/upsert_search_attributes_state_machine.rs +230 -22
- package/sdk-core/core/src/workflow/machines/workflow_machines.rs +81 -115
- package/sdk-core/core/src/workflow/machines/workflow_task_state_machine.rs +4 -4
- package/sdk-core/core/src/workflow/mod.rs +13 -1
- package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +70 -11
- package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +65 -41
- package/sdk-core/core-api/Cargo.toml +9 -1
- package/sdk-core/core-api/LICENSE.txt +23 -0
- package/sdk-core/core-api/src/errors.rs +7 -38
- package/sdk-core/core-api/src/lib.rs +44 -52
- package/sdk-core/core-api/src/worker.rs +10 -2
- package/sdk-core/etc/deps.svg +127 -96
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +11 -7
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +10 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +6 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +2 -1
- package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +12 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +25 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +4 -0
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +19 -35
- package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -6
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +53 -11
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +14 -7
- package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +3 -5
- package/sdk-core/sdk/Cargo.toml +16 -2
- package/sdk-core/sdk/LICENSE.txt +23 -0
- package/sdk-core/sdk/src/interceptors.rs +11 -0
- package/sdk-core/sdk/src/lib.rs +139 -151
- package/sdk-core/sdk/src/workflow_context/options.rs +86 -1
- package/sdk-core/sdk/src/workflow_context.rs +36 -17
- package/sdk-core/sdk/src/workflow_future.rs +19 -25
- package/sdk-core/sdk-core-protos/Cargo.toml +1 -1
- package/sdk-core/sdk-core-protos/build.rs +1 -0
- package/sdk-core/sdk-core-protos/src/history_info.rs +17 -4
- package/sdk-core/sdk-core-protos/src/lib.rs +251 -47
- package/sdk-core/test-utils/Cargo.toml +3 -1
- package/sdk-core/test-utils/src/canned_histories.rs +27 -0
- package/sdk-core/test-utils/src/histfetch.rs +3 -3
- package/sdk-core/test-utils/src/lib.rs +223 -68
- package/sdk-core/tests/integ_tests/client_tests.rs +27 -4
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +93 -14
- package/sdk-core/tests/integ_tests/polling_tests.rs +18 -12
- package/sdk-core/tests/integ_tests/queries_tests.rs +50 -53
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +117 -103
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +8 -1
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +10 -5
- package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +7 -1
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +32 -9
- package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +7 -1
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +76 -15
- package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +19 -3
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +39 -42
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +84 -0
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +30 -8
- package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +21 -6
- package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +26 -16
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +66 -0
- package/sdk-core/tests/integ_tests/workflow_tests.rs +78 -74
- package/sdk-core/tests/load_tests.rs +9 -6
- package/sdk-core/tests/main.rs +43 -10
- package/src/conversions.rs +7 -12
- package/src/lib.rs +322 -357
- package/sdk-core/client/src/mocks.rs +0 -167
- package/sdk-core/core/src/worker/dispatcher.rs +0 -171
- package/sdk-core/protos/local/temporal/sdk/core/bridge/service.proto +0 -61
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mod activities;
|
|
2
|
-
mod
|
|
2
|
+
pub(crate) mod client;
|
|
3
3
|
mod wft_delivery;
|
|
4
4
|
|
|
5
5
|
pub use temporal_sdk_core_api::worker::{WorkerConfig, WorkerConfigBuilder};
|
|
@@ -8,20 +8,25 @@ pub(crate) use activities::{
|
|
|
8
8
|
ExecutingLAId, LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
|
|
9
9
|
NewLocalAct,
|
|
10
10
|
};
|
|
11
|
-
pub(crate) use dispatcher::WorkerDispatcher;
|
|
12
11
|
|
|
13
12
|
use crate::{
|
|
13
|
+
abstractions::MeteredSemaphore,
|
|
14
14
|
errors::CompleteWfError,
|
|
15
15
|
pollers::{
|
|
16
16
|
new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller, BoxedWFPoller, Poller,
|
|
17
17
|
WorkflowTaskPoller,
|
|
18
18
|
},
|
|
19
19
|
protosext::{legacy_query_failure, ValidPollWFTQResponse},
|
|
20
|
-
telemetry::
|
|
21
|
-
|
|
20
|
+
telemetry::{
|
|
21
|
+
metrics::{
|
|
22
|
+
activity_poller, local_activity_worker_type, workflow_poller, workflow_sticky_poller,
|
|
23
|
+
workflow_worker_type, MetricsContext,
|
|
24
|
+
},
|
|
25
|
+
VecDisplayer,
|
|
22
26
|
},
|
|
23
27
|
worker::{
|
|
24
28
|
activities::{DispatchOrTimeoutLA, LACompleteAction, LocalActivityManager},
|
|
29
|
+
client::WorkerClientBag,
|
|
25
30
|
wft_delivery::WFTSource,
|
|
26
31
|
},
|
|
27
32
|
workflow::{
|
|
@@ -31,18 +36,19 @@ use crate::{
|
|
|
31
36
|
},
|
|
32
37
|
EmptyWorkflowCommandErr, LocalResolution, WFMachinesError, WorkflowCachingPolicy,
|
|
33
38
|
},
|
|
34
|
-
ActivityHeartbeat, CompleteActivityError, PollActivityError, PollWfError,
|
|
39
|
+
ActivityHeartbeat, CompleteActivityError, PollActivityError, PollWfError, WorkerTrait,
|
|
35
40
|
};
|
|
36
41
|
use activities::{LocalInFlightActInfo, WorkerActivityTasks};
|
|
37
42
|
use futures::{Future, TryFutureExt};
|
|
38
43
|
use std::{convert::TryInto, sync::Arc};
|
|
39
|
-
use temporal_client::
|
|
44
|
+
use temporal_client::WorkflowTaskCompletion;
|
|
40
45
|
use temporal_sdk_core_protos::{
|
|
41
46
|
coresdk::{
|
|
42
47
|
activity_result::activity_execution_result,
|
|
43
48
|
activity_task::ActivityTask,
|
|
44
49
|
workflow_activation::{remove_from_cache::EvictionReason, WorkflowActivation},
|
|
45
50
|
workflow_completion::{self, workflow_activation_completion, WorkflowActivationCompletion},
|
|
51
|
+
ActivityTaskCompletion,
|
|
46
52
|
},
|
|
47
53
|
temporal::api::{
|
|
48
54
|
enums::v1::{TaskQueueKind, WorkflowTaskFailedCause},
|
|
@@ -52,14 +58,19 @@ use temporal_sdk_core_protos::{
|
|
|
52
58
|
},
|
|
53
59
|
TaskToken,
|
|
54
60
|
};
|
|
55
|
-
use tokio::sync::
|
|
61
|
+
use tokio::sync::Notify;
|
|
62
|
+
use tokio_util::sync::CancellationToken;
|
|
56
63
|
use tonic::Code;
|
|
57
64
|
use tracing_futures::Instrument;
|
|
58
65
|
|
|
66
|
+
#[cfg(test)]
|
|
67
|
+
use crate::worker::client::WorkerClient;
|
|
68
|
+
use crate::workflow::workflow_tasks::EvictionRequestResult;
|
|
69
|
+
|
|
59
70
|
/// A worker polls on a certain task queue
|
|
60
71
|
pub struct Worker {
|
|
61
72
|
config: WorkerConfig,
|
|
62
|
-
|
|
73
|
+
wf_client: Arc<WorkerClientBag>,
|
|
63
74
|
|
|
64
75
|
/// Will be populated when this worker should poll on a sticky WFT queue
|
|
65
76
|
sticky_name: Option<String>,
|
|
@@ -67,14 +78,14 @@ pub struct Worker {
|
|
|
67
78
|
/// Buffers workflow task polling in the event we need to return a pending activation while
|
|
68
79
|
/// a poll is ongoing. Sticky and nonsticky polling happens inside of it.
|
|
69
80
|
wf_task_source: WFTSource,
|
|
70
|
-
/// Workflow task management
|
|
71
|
-
|
|
81
|
+
/// Workflow task management
|
|
82
|
+
wft_manager: WorkflowTaskManager,
|
|
72
83
|
/// Manages activity tasks for this worker/task queue
|
|
73
84
|
at_task_mgr: Option<WorkerActivityTasks>,
|
|
74
85
|
/// Manages local activities
|
|
75
86
|
local_act_mgr: LocalActivityManager,
|
|
76
87
|
/// Ensures we stay at or below this worker's maximum concurrent workflow limit
|
|
77
|
-
workflows_semaphore:
|
|
88
|
+
workflows_semaphore: MeteredSemaphore,
|
|
78
89
|
/// Used to wake blocked workflow task polling when there is some change to workflow activations
|
|
79
90
|
/// that should cause us to restart the loop
|
|
80
91
|
pending_activations_notify: Arc<Notify>,
|
|
@@ -82,21 +93,106 @@ pub struct Worker {
|
|
|
82
93
|
/// a WFT is completed.
|
|
83
94
|
wfts_drained_notify: Arc<Notify>,
|
|
84
95
|
/// Has shutdown been called?
|
|
85
|
-
|
|
86
|
-
shutdown_sender: watch::Sender<bool>,
|
|
96
|
+
shutdown_token: CancellationToken,
|
|
87
97
|
/// Will be called at the end of each activation completion
|
|
88
98
|
post_activate_hook: Option<Box<dyn Fn(&Self) + Send + Sync>>,
|
|
89
99
|
|
|
90
100
|
metrics: MetricsContext,
|
|
91
101
|
}
|
|
92
102
|
|
|
103
|
+
#[async_trait::async_trait]
|
|
104
|
+
impl WorkerTrait for Worker {
|
|
105
|
+
#[instrument(level = "debug", skip(self), fields(run_id))]
|
|
106
|
+
async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
|
|
107
|
+
self.next_workflow_activation().await
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
#[instrument(level = "debug", skip(self))]
|
|
111
|
+
async fn poll_activity_task(&self) -> Result<ActivityTask, PollActivityError> {
|
|
112
|
+
loop {
|
|
113
|
+
match self.activity_poll().await.transpose() {
|
|
114
|
+
Some(r) => break r,
|
|
115
|
+
None => {
|
|
116
|
+
tokio::task::yield_now().await;
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
#[instrument(level = "debug", skip(self, completion),
|
|
124
|
+
fields(completion=%&completion, run_id=%completion.run_id))]
|
|
125
|
+
async fn complete_workflow_activation(
|
|
126
|
+
&self,
|
|
127
|
+
completion: WorkflowActivationCompletion,
|
|
128
|
+
) -> Result<(), CompleteWfError> {
|
|
129
|
+
self.complete_workflow_activation(completion).await
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
#[instrument(level = "debug", skip(self, completion),
|
|
133
|
+
fields(completion=%&completion))]
|
|
134
|
+
async fn complete_activity_task(
|
|
135
|
+
&self,
|
|
136
|
+
completion: ActivityTaskCompletion,
|
|
137
|
+
) -> Result<(), CompleteActivityError> {
|
|
138
|
+
let task_token = TaskToken(completion.task_token);
|
|
139
|
+
let status = if let Some(s) = completion.result.and_then(|r| r.status) {
|
|
140
|
+
s
|
|
141
|
+
} else {
|
|
142
|
+
return Err(CompleteActivityError::MalformedActivityCompletion {
|
|
143
|
+
reason: "Activity completion had empty result/status field".to_owned(),
|
|
144
|
+
completion: None,
|
|
145
|
+
});
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
self.complete_activity(task_token, status).await
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
fn record_activity_heartbeat(&self, details: ActivityHeartbeat) {
|
|
152
|
+
self.record_heartbeat(details);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
fn request_workflow_eviction(&self, run_id: &str) {
|
|
156
|
+
self.request_wf_eviction(
|
|
157
|
+
run_id,
|
|
158
|
+
"Eviction explicitly requested by lang",
|
|
159
|
+
EvictionReason::LangRequested,
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
fn get_config(&self) -> &WorkerConfig {
|
|
164
|
+
&self.config
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/// Begins the shutdown process, tells pollers they should stop. Is idempotent.
|
|
168
|
+
// TODO: will be in trait after Roey's shutdown refactor
|
|
169
|
+
fn initiate_shutdown(&self) {
|
|
170
|
+
self.shutdown_token.cancel();
|
|
171
|
+
// First, we want to stop polling of both activity and workflow tasks
|
|
172
|
+
if let Some(atm) = self.at_task_mgr.as_ref() {
|
|
173
|
+
atm.notify_shutdown();
|
|
174
|
+
}
|
|
175
|
+
self.wf_task_source.stop_pollers();
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
async fn shutdown(&self) {
|
|
179
|
+
self.shutdown().await
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async fn finalize_shutdown(self) {
|
|
183
|
+
self.shutdown().await;
|
|
184
|
+
self.finalize_shutdown().await
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
93
188
|
impl Worker {
|
|
94
189
|
pub(crate) fn new(
|
|
95
190
|
config: WorkerConfig,
|
|
96
191
|
sticky_queue_name: Option<String>,
|
|
97
|
-
|
|
192
|
+
client: Arc<WorkerClientBag>,
|
|
98
193
|
metrics: MetricsContext,
|
|
99
194
|
) -> Self {
|
|
195
|
+
info!(task_queue = %config.task_queue, "Initializing worker");
|
|
100
196
|
metrics.worker_registered();
|
|
101
197
|
|
|
102
198
|
let max_nonsticky_polls = if sticky_queue_name.is_some() {
|
|
@@ -107,7 +203,7 @@ impl Worker {
|
|
|
107
203
|
let max_sticky_polls = config.max_sticky_polls();
|
|
108
204
|
let wft_metrics = metrics.with_new_attrs([workflow_poller()]);
|
|
109
205
|
let mut wf_task_poll_buffer = new_workflow_task_buffer(
|
|
110
|
-
|
|
206
|
+
client.clone(),
|
|
111
207
|
config.task_queue.clone(),
|
|
112
208
|
false,
|
|
113
209
|
max_nonsticky_polls,
|
|
@@ -117,7 +213,7 @@ impl Worker {
|
|
|
117
213
|
let sticky_queue_poller = sticky_queue_name.as_ref().map(|sqn| {
|
|
118
214
|
let sticky_metrics = metrics.with_new_attrs([workflow_sticky_poller()]);
|
|
119
215
|
let mut sp = new_workflow_task_buffer(
|
|
120
|
-
|
|
216
|
+
client.clone(),
|
|
121
217
|
sqn.clone(),
|
|
122
218
|
true,
|
|
123
219
|
max_sticky_polls,
|
|
@@ -130,10 +226,11 @@ impl Worker {
|
|
|
130
226
|
None
|
|
131
227
|
} else {
|
|
132
228
|
let mut ap = new_activity_task_buffer(
|
|
133
|
-
|
|
229
|
+
client.clone(),
|
|
134
230
|
config.task_queue.clone(),
|
|
135
231
|
config.max_concurrent_at_polls,
|
|
136
232
|
config.max_concurrent_at_polls * 2,
|
|
233
|
+
config.max_task_queue_activities_per_second,
|
|
137
234
|
);
|
|
138
235
|
let act_metrics = metrics.with_new_attrs([activity_poller()]);
|
|
139
236
|
ap.set_num_pollers_handler(move |np| act_metrics.record_num_pollers(np));
|
|
@@ -149,17 +246,22 @@ impl Worker {
|
|
|
149
246
|
Self::new_with_pollers(
|
|
150
247
|
config,
|
|
151
248
|
sticky_queue_name,
|
|
152
|
-
|
|
249
|
+
client,
|
|
153
250
|
wf_task_poll_buffer,
|
|
154
251
|
act_poll_buffer,
|
|
155
252
|
metrics,
|
|
156
253
|
)
|
|
157
254
|
}
|
|
158
255
|
|
|
256
|
+
#[cfg(test)]
|
|
257
|
+
pub(crate) fn new_test(config: WorkerConfig, client: impl WorkerClient + 'static) -> Self {
|
|
258
|
+
Self::new(config, None, Arc::new(client.into()), Default::default())
|
|
259
|
+
}
|
|
260
|
+
|
|
159
261
|
pub(crate) fn new_with_pollers(
|
|
160
262
|
config: WorkerConfig,
|
|
161
263
|
sticky_queue_name: Option<String>,
|
|
162
|
-
|
|
264
|
+
client: Arc<WorkerClientBag>,
|
|
163
265
|
wft_poller: BoxedWFPoller,
|
|
164
266
|
act_poller: Option<BoxedActPoller>,
|
|
165
267
|
metrics: MetricsContext,
|
|
@@ -173,9 +275,8 @@ impl Worker {
|
|
|
173
275
|
};
|
|
174
276
|
let pa_notif = Arc::new(Notify::new());
|
|
175
277
|
let wfts_drained_notify = Arc::new(Notify::new());
|
|
176
|
-
let (shut_tx, shut_rx) = watch::channel(false);
|
|
177
278
|
Self {
|
|
178
|
-
|
|
279
|
+
wf_client: client.clone(),
|
|
179
280
|
sticky_name: sticky_queue_name,
|
|
180
281
|
wf_task_source: WFTSource::new(wft_poller),
|
|
181
282
|
wft_manager: WorkflowTaskManager::new(pa_notif.clone(), cache_policy, metrics.clone()),
|
|
@@ -183,7 +284,7 @@ impl Worker {
|
|
|
183
284
|
WorkerActivityTasks::new(
|
|
184
285
|
config.max_outstanding_activities,
|
|
185
286
|
ap,
|
|
186
|
-
|
|
287
|
+
client.clone(),
|
|
187
288
|
metrics.clone(),
|
|
188
289
|
config.max_heartbeat_throttle_interval,
|
|
189
290
|
config.default_heartbeat_throttle_interval,
|
|
@@ -191,12 +292,16 @@ impl Worker {
|
|
|
191
292
|
}),
|
|
192
293
|
local_act_mgr: LocalActivityManager::new(
|
|
193
294
|
config.max_outstanding_local_activities,
|
|
194
|
-
|
|
295
|
+
config.namespace.clone(),
|
|
296
|
+
metrics.with_new_attrs([local_activity_worker_type()]),
|
|
297
|
+
),
|
|
298
|
+
workflows_semaphore: MeteredSemaphore::new(
|
|
299
|
+
config.max_outstanding_workflow_tasks,
|
|
300
|
+
metrics.with_new_attrs([workflow_worker_type()]),
|
|
301
|
+
MetricsContext::available_task_slots,
|
|
195
302
|
),
|
|
196
|
-
workflows_semaphore: Semaphore::new(config.max_outstanding_workflow_tasks),
|
|
197
303
|
config,
|
|
198
|
-
|
|
199
|
-
shutdown_sender: shut_tx,
|
|
304
|
+
shutdown_token: CancellationToken::new(),
|
|
200
305
|
post_activate_hook: None,
|
|
201
306
|
pending_activations_notify: pa_notif,
|
|
202
307
|
wfts_drained_notify,
|
|
@@ -204,20 +309,11 @@ impl Worker {
|
|
|
204
309
|
}
|
|
205
310
|
}
|
|
206
311
|
|
|
207
|
-
/// Begins the shutdown process, tells pollers they should stop. Is idempotent.
|
|
208
|
-
pub(crate) fn initiate_shutdown(&self) {
|
|
209
|
-
let _ = self.shutdown_sender.send(true);
|
|
210
|
-
// First, we want to stop polling of both activity and workflow tasks
|
|
211
|
-
if let Some(atm) = self.at_task_mgr.as_ref() {
|
|
212
|
-
atm.notify_shutdown();
|
|
213
|
-
}
|
|
214
|
-
self.wf_task_source.stop_pollers();
|
|
215
|
-
}
|
|
216
|
-
|
|
217
312
|
/// Will shutdown the worker. Does not resolve until all outstanding workflow tasks have been
|
|
218
313
|
/// completed
|
|
219
314
|
pub(crate) async fn shutdown(&self) {
|
|
220
315
|
self.initiate_shutdown();
|
|
316
|
+
info!("Initiated shutdown");
|
|
221
317
|
// Next we need to wait for all local activities to finish so no more workflow task
|
|
222
318
|
// heartbeats will be generated
|
|
223
319
|
self.local_act_mgr.shutdown_and_wait_all_finished().await;
|
|
@@ -249,7 +345,7 @@ impl Worker {
|
|
|
249
345
|
|
|
250
346
|
#[cfg(test)]
|
|
251
347
|
pub(crate) fn available_wft_permits(&self) -> usize {
|
|
252
|
-
self.workflows_semaphore.available_permits()
|
|
348
|
+
self.workflows_semaphore.sem.available_permits()
|
|
253
349
|
}
|
|
254
350
|
|
|
255
351
|
/// Get new activity tasks (may be local or nonlocal). Local activities are returned first
|
|
@@ -257,12 +353,12 @@ impl Worker {
|
|
|
257
353
|
///
|
|
258
354
|
/// Returns `Ok(None)` in the event of a poll timeout or if the polling loop should otherwise
|
|
259
355
|
/// be restarted
|
|
260
|
-
|
|
356
|
+
async fn activity_poll(&self) -> Result<Option<ActivityTask>, PollActivityError> {
|
|
261
357
|
let act_mgr_poll = async {
|
|
262
358
|
if let Some(ref act_mgr) = self.at_task_mgr {
|
|
263
359
|
act_mgr.poll().await
|
|
264
360
|
} else {
|
|
265
|
-
|
|
361
|
+
self.shutdown_token.cancelled().await;
|
|
266
362
|
Err(PollActivityError::ShutDown)
|
|
267
363
|
}
|
|
268
364
|
};
|
|
@@ -278,7 +374,12 @@ impl Worker {
|
|
|
278
374
|
&run_id, LocalResolution::LocalActivity(resolution)).await;
|
|
279
375
|
Ok(task)
|
|
280
376
|
},
|
|
281
|
-
None =>
|
|
377
|
+
None => {
|
|
378
|
+
if self.shutdown_token.is_cancelled() {
|
|
379
|
+
return Err(PollActivityError::ShutDown);
|
|
380
|
+
}
|
|
381
|
+
Ok(None)
|
|
382
|
+
}
|
|
282
383
|
}
|
|
283
384
|
},
|
|
284
385
|
r = act_mgr_poll => r,
|
|
@@ -326,8 +427,7 @@ impl Worker {
|
|
|
326
427
|
}
|
|
327
428
|
|
|
328
429
|
if let Some(atm) = &self.at_task_mgr {
|
|
329
|
-
atm.complete(task_token, status, self.
|
|
330
|
-
.await
|
|
430
|
+
atm.complete(task_token, status, &**self.wf_client).await
|
|
331
431
|
} else {
|
|
332
432
|
error!(
|
|
333
433
|
"Tried to complete activity {} on a worker that does not have an activity manager",
|
|
@@ -428,16 +528,21 @@ impl Worker {
|
|
|
428
528
|
|
|
429
529
|
/// Tell the worker a workflow task has completed, for tracking max outstanding WFTs
|
|
430
530
|
pub(crate) fn return_workflow_task_permit(&self) {
|
|
431
|
-
self.workflows_semaphore.
|
|
531
|
+
self.workflows_semaphore.add_permit();
|
|
432
532
|
}
|
|
433
533
|
|
|
534
|
+
/// Request a workflow eviction. Returns true if we actually queued up a new eviction request.
|
|
434
535
|
pub(crate) fn request_wf_eviction(
|
|
435
536
|
&self,
|
|
436
537
|
run_id: &str,
|
|
437
538
|
message: impl Into<String>,
|
|
438
539
|
reason: EvictionReason,
|
|
439
|
-
) {
|
|
440
|
-
self.wft_manager.request_eviction(run_id, message, reason)
|
|
540
|
+
) -> bool {
|
|
541
|
+
match self.wft_manager.request_eviction(run_id, message, reason) {
|
|
542
|
+
EvictionRequestResult::EvictionIssued(_) => true,
|
|
543
|
+
EvictionRequestResult::NotFound => false,
|
|
544
|
+
EvictionRequestResult::EvictionAlreadyOutstanding => false,
|
|
545
|
+
}
|
|
441
546
|
}
|
|
442
547
|
|
|
443
548
|
/// Sets a function to be called at the end of each activation completion
|
|
@@ -448,11 +553,25 @@ impl Worker {
|
|
|
448
553
|
self.post_activate_hook = Some(Box::new(callback))
|
|
449
554
|
}
|
|
450
555
|
|
|
556
|
+
/// Used for replay workers - causes the worker to shutdown when the given run reaches the
|
|
557
|
+
/// given event number
|
|
558
|
+
pub(crate) fn set_shutdown_on_run_reaches_event(&mut self, run_id: String, last_event: i64) {
|
|
559
|
+
self.set_post_activate_hook(move |worker| {
|
|
560
|
+
if worker
|
|
561
|
+
.wft_manager
|
|
562
|
+
.most_recently_processed_event(&run_id)
|
|
563
|
+
.unwrap_or_default()
|
|
564
|
+
>= last_event
|
|
565
|
+
{
|
|
566
|
+
worker.initiate_shutdown();
|
|
567
|
+
}
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
|
|
451
571
|
/// Resolves with WFT poll response or `PollWfError::ShutDown` if WFTs have been drained
|
|
452
572
|
async fn workflow_poll_or_wfts_drained(
|
|
453
573
|
&self,
|
|
454
574
|
) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
|
|
455
|
-
let mut shutdown_requested = self.shutdown_requested.clone();
|
|
456
575
|
loop {
|
|
457
576
|
tokio::select! {
|
|
458
577
|
biased;
|
|
@@ -466,7 +585,7 @@ impl Worker {
|
|
|
466
585
|
}
|
|
467
586
|
return r
|
|
468
587
|
},
|
|
469
|
-
_ =
|
|
588
|
+
_ = self.shutdown_token.cancelled() => {},
|
|
470
589
|
}
|
|
471
590
|
}
|
|
472
591
|
}
|
|
@@ -481,7 +600,7 @@ impl Worker {
|
|
|
481
600
|
// heartbeating which is a "new" workflow task that we need to accept and process as long as
|
|
482
601
|
// the LA is outstanding. Similarly, if we already have such tasks (from a WFT completion),
|
|
483
602
|
// then we must fetch them from the source before we can say workflow polling is shutdown.
|
|
484
|
-
if
|
|
603
|
+
if self.shutdown_token.is_cancelled()
|
|
485
604
|
&& !self.wf_task_source.has_tasks_from_complete()
|
|
486
605
|
&& self.local_act_mgr.num_outstanding() == 0
|
|
487
606
|
{
|
|
@@ -535,10 +654,9 @@ impl Worker {
|
|
|
535
654
|
work: ValidPollWFTQResponse,
|
|
536
655
|
) -> Result<Option<WorkflowActivation>, PollWfError> {
|
|
537
656
|
let we = work.workflow_execution.clone();
|
|
538
|
-
let tt = work.task_token.clone();
|
|
539
657
|
let res = self
|
|
540
658
|
.wft_manager
|
|
541
|
-
.apply_new_poll_resp(work, self.
|
|
659
|
+
.apply_new_poll_resp(work, self.wf_client.clone())
|
|
542
660
|
.await;
|
|
543
661
|
Ok(match res {
|
|
544
662
|
NewWfTaskOutcome::IssueActivation(a) => {
|
|
@@ -555,38 +673,24 @@ impl Worker {
|
|
|
555
673
|
debug!(workflow_execution=?we,
|
|
556
674
|
"No new work for lang to perform after polling server");
|
|
557
675
|
self.complete_workflow_activation(WorkflowActivationCompletion {
|
|
558
|
-
task_queue: self.config.task_queue.clone(),
|
|
559
676
|
run_id: we.run_id,
|
|
560
677
|
status: Some(workflow_completion::Success::from_variants(vec![]).into()),
|
|
561
678
|
})
|
|
562
679
|
.await?;
|
|
563
680
|
None
|
|
564
681
|
}
|
|
565
|
-
NewWfTaskOutcome::CacheMiss => {
|
|
566
|
-
debug!(workflow_execution=?we, "Unable to process workflow task with partial \
|
|
567
|
-
history because workflow cache does not contain workflow anymore.");
|
|
568
|
-
self.server_gateway
|
|
569
|
-
.fail_workflow_task(
|
|
570
|
-
tt,
|
|
571
|
-
WorkflowTaskFailedCause::ResetStickyTaskQueue,
|
|
572
|
-
Some(Failure {
|
|
573
|
-
message: "Unable to process workflow task with partial history \
|
|
574
|
-
because workflow cache does not contain workflow anymore."
|
|
575
|
-
.to_string(),
|
|
576
|
-
..Default::default()
|
|
577
|
-
}),
|
|
578
|
-
)
|
|
579
|
-
.await?;
|
|
580
|
-
self.return_workflow_task_permit();
|
|
581
|
-
None
|
|
582
|
-
}
|
|
583
682
|
NewWfTaskOutcome::Evict(e) => {
|
|
584
683
|
warn!(error=?e, run_id=%we.run_id, "Error while applying poll response to workflow");
|
|
585
|
-
self.request_wf_eviction(
|
|
684
|
+
let did_issue_eviction = self.request_wf_eviction(
|
|
586
685
|
&we.run_id,
|
|
587
686
|
format!("Error while applying poll response to workflow: {:?}", e),
|
|
588
687
|
e.evict_reason(),
|
|
589
688
|
);
|
|
689
|
+
// If we didn't actually need to issue an eviction, then return the WFT permit.
|
|
690
|
+
// EX: The workflow we tried to evict wasn't in the cache.
|
|
691
|
+
if !did_issue_eviction {
|
|
692
|
+
self.return_workflow_task_permit();
|
|
693
|
+
}
|
|
590
694
|
None
|
|
591
695
|
}
|
|
592
696
|
})
|
|
@@ -627,9 +731,12 @@ impl Worker {
|
|
|
627
731
|
force_new_wft,
|
|
628
732
|
},
|
|
629
733
|
})) => {
|
|
630
|
-
debug!("Sending commands to server: {
|
|
734
|
+
debug!("Sending commands to server: {}", commands.display());
|
|
631
735
|
if !query_responses.is_empty() {
|
|
632
|
-
debug!(
|
|
736
|
+
debug!(
|
|
737
|
+
"Sending query responses to server: {}",
|
|
738
|
+
query_responses.display()
|
|
739
|
+
);
|
|
633
740
|
}
|
|
634
741
|
let mut completion = WorkflowTaskCompletion {
|
|
635
742
|
task_token,
|
|
@@ -649,7 +756,7 @@ impl Worker {
|
|
|
649
756
|
|
|
650
757
|
self.handle_wft_reporting_errs(run_id, || async {
|
|
651
758
|
let maybe_wft = self
|
|
652
|
-
.
|
|
759
|
+
.wf_client
|
|
653
760
|
.complete_workflow_task(completion)
|
|
654
761
|
.instrument(span!(tracing::Level::DEBUG, "Complete WFT call"))
|
|
655
762
|
.await?;
|
|
@@ -669,7 +776,7 @@ impl Worker {
|
|
|
669
776
|
action: ActivationAction::RespondLegacyQuery { result },
|
|
670
777
|
..
|
|
671
778
|
})) => {
|
|
672
|
-
self.
|
|
779
|
+
self.wf_client
|
|
673
780
|
.respond_legacy_query(task_token, result)
|
|
674
781
|
.await?;
|
|
675
782
|
Ok(WFTReportOutcome {
|
|
@@ -720,7 +827,7 @@ impl Worker {
|
|
|
720
827
|
FailedActivationOutcome::Report(tt) => {
|
|
721
828
|
warn!(run_id, failure=?failure, "Failing workflow activation");
|
|
722
829
|
self.handle_wft_reporting_errs(run_id, || async {
|
|
723
|
-
self.
|
|
830
|
+
self.wf_client
|
|
724
831
|
.fail_workflow_task(tt, cause, failure.failure.map(Into::into))
|
|
725
832
|
.await
|
|
726
833
|
})
|
|
@@ -732,7 +839,7 @@ impl Worker {
|
|
|
732
839
|
}
|
|
733
840
|
FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
|
|
734
841
|
warn!(run_id, failure=?failure, "Failing legacy query request");
|
|
735
|
-
self.
|
|
842
|
+
self.wf_client
|
|
736
843
|
.respond_legacy_query(task_token, legacy_query_failure(failure))
|
|
737
844
|
.await?;
|
|
738
845
|
WFTReportOutcome {
|
|
@@ -765,7 +872,7 @@ impl Worker {
|
|
|
765
872
|
// Silence unhandled command errors since the lang SDK cannot do anything about
|
|
766
873
|
// them besides poll again, which it will do anyway.
|
|
767
874
|
tonic::Code::InvalidArgument if err.message() == "UnhandledCommand" => {
|
|
768
|
-
|
|
875
|
+
debug!(error = %err, run_id, "Unhandled command response when completing");
|
|
769
876
|
should_evict = Some(EvictionReason::UnhandledCommand);
|
|
770
877
|
Ok(())
|
|
771
878
|
}
|
|
@@ -851,91 +958,83 @@ struct WFTReportOutcome {
|
|
|
851
958
|
#[cfg(test)]
|
|
852
959
|
mod tests {
|
|
853
960
|
use super::*;
|
|
854
|
-
use
|
|
961
|
+
use crate::{test_help::test_worker_cfg, worker::client::mocks::mock_workflow_client};
|
|
855
962
|
use temporal_sdk_core_protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse;
|
|
856
963
|
|
|
857
964
|
#[tokio::test]
|
|
858
965
|
async fn activity_timeouts_dont_eat_permits() {
|
|
859
|
-
let mut
|
|
860
|
-
|
|
966
|
+
let mut mock_client = mock_workflow_client();
|
|
967
|
+
mock_client
|
|
861
968
|
.expect_poll_activity_task()
|
|
862
|
-
.returning(|_| Ok(PollActivityTaskQueueResponse::default()));
|
|
969
|
+
.returning(|_, _| Ok(PollActivityTaskQueueResponse::default()));
|
|
863
970
|
|
|
864
|
-
let cfg =
|
|
865
|
-
.task_queue("whatever")
|
|
971
|
+
let cfg = test_worker_cfg()
|
|
866
972
|
.max_outstanding_activities(5_usize)
|
|
867
973
|
.build()
|
|
868
974
|
.unwrap();
|
|
869
|
-
let worker = Worker::
|
|
975
|
+
let worker = Worker::new_test(cfg, mock_client);
|
|
870
976
|
assert_eq!(worker.activity_poll().await.unwrap(), None);
|
|
871
977
|
assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
|
|
872
978
|
}
|
|
873
979
|
|
|
874
980
|
#[tokio::test]
|
|
875
981
|
async fn workflow_timeouts_dont_eat_permits() {
|
|
876
|
-
let mut
|
|
877
|
-
|
|
982
|
+
let mut mock_client = mock_workflow_client();
|
|
983
|
+
mock_client
|
|
878
984
|
.expect_poll_workflow_task()
|
|
879
985
|
.returning(|_, _| Ok(PollWorkflowTaskQueueResponse::default()));
|
|
880
986
|
|
|
881
|
-
let cfg =
|
|
882
|
-
.task_queue("whatever")
|
|
987
|
+
let cfg = test_worker_cfg()
|
|
883
988
|
.max_outstanding_workflow_tasks(5_usize)
|
|
884
989
|
.build()
|
|
885
990
|
.unwrap();
|
|
886
|
-
let worker = Worker::
|
|
991
|
+
let worker = Worker::new_test(cfg, mock_client);
|
|
887
992
|
assert_eq!(worker.workflow_poll().await.unwrap(), None);
|
|
888
|
-
assert_eq!(worker.workflows_semaphore.available_permits(), 5);
|
|
993
|
+
assert_eq!(worker.workflows_semaphore.sem.available_permits(), 5);
|
|
889
994
|
}
|
|
890
995
|
|
|
891
996
|
#[tokio::test]
|
|
892
997
|
async fn activity_errs_dont_eat_permits() {
|
|
893
|
-
let mut
|
|
894
|
-
|
|
998
|
+
let mut mock_client = mock_workflow_client();
|
|
999
|
+
mock_client
|
|
895
1000
|
.expect_poll_activity_task()
|
|
896
|
-
.returning(|_| Err(tonic::Status::internal("ahhh")));
|
|
1001
|
+
.returning(|_, _| Err(tonic::Status::internal("ahhh")));
|
|
897
1002
|
|
|
898
|
-
let cfg =
|
|
899
|
-
.task_queue("whatever")
|
|
1003
|
+
let cfg = test_worker_cfg()
|
|
900
1004
|
.max_outstanding_activities(5_usize)
|
|
901
1005
|
.build()
|
|
902
1006
|
.unwrap();
|
|
903
|
-
let worker = Worker::
|
|
1007
|
+
let worker = Worker::new_test(cfg, mock_client);
|
|
904
1008
|
assert!(worker.activity_poll().await.is_err());
|
|
905
1009
|
assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
|
|
906
1010
|
}
|
|
907
1011
|
|
|
908
1012
|
#[tokio::test]
|
|
909
1013
|
async fn workflow_errs_dont_eat_permits() {
|
|
910
|
-
let mut
|
|
911
|
-
|
|
1014
|
+
let mut mock_client = mock_workflow_client();
|
|
1015
|
+
mock_client
|
|
912
1016
|
.expect_poll_workflow_task()
|
|
913
1017
|
.returning(|_, _| Err(tonic::Status::internal("ahhh")));
|
|
914
1018
|
|
|
915
|
-
let cfg =
|
|
916
|
-
.task_queue("whatever")
|
|
1019
|
+
let cfg = test_worker_cfg()
|
|
917
1020
|
.max_outstanding_workflow_tasks(5_usize)
|
|
918
1021
|
.build()
|
|
919
1022
|
.unwrap();
|
|
920
|
-
let worker = Worker::
|
|
1023
|
+
let worker = Worker::new_test(cfg, mock_client);
|
|
921
1024
|
assert!(worker.workflow_poll().await.is_err());
|
|
922
|
-
assert_eq!(worker.workflows_semaphore.available_permits(), 5);
|
|
1025
|
+
assert_eq!(worker.workflows_semaphore.sem.available_permits(), 5);
|
|
923
1026
|
}
|
|
924
1027
|
|
|
925
1028
|
#[test]
|
|
926
1029
|
fn max_polls_calculated_properly() {
|
|
927
|
-
let cfg =
|
|
928
|
-
.task_queue("whatever")
|
|
929
|
-
.build()
|
|
930
|
-
.unwrap();
|
|
1030
|
+
let cfg = test_worker_cfg().build().unwrap();
|
|
931
1031
|
assert_eq!(cfg.max_nonsticky_polls(), 1);
|
|
932
1032
|
assert_eq!(cfg.max_sticky_polls(), 4);
|
|
933
1033
|
}
|
|
934
1034
|
|
|
935
1035
|
#[test]
|
|
936
1036
|
fn max_polls_zero_is_err() {
|
|
937
|
-
assert!(
|
|
938
|
-
.task_queue("whatever")
|
|
1037
|
+
assert!(test_worker_cfg()
|
|
939
1038
|
.max_concurrent_wft_polls(0_usize)
|
|
940
1039
|
.build()
|
|
941
1040
|
.is_err());
|