@temporalio/core-bridge 0.23.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +118 -15
- package/Cargo.toml +2 -1
- package/LICENSE.md +1 -1
- package/README.md +1 -1
- package/index.d.ts +47 -18
- package/package.json +7 -7
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/docker-compose.yaml +4 -2
- package/sdk-core/ARCHITECTURE.md +9 -7
- package/sdk-core/README.md +5 -1
- package/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
- package/sdk-core/bridge-ffi/src/wrappers.rs +0 -3
- package/sdk-core/client/src/lib.rs +26 -8
- package/sdk-core/client/src/raw.rs +166 -54
- package/sdk-core/client/src/retry.rs +9 -4
- package/sdk-core/client/src/workflow_handle/mod.rs +4 -2
- package/sdk-core/core/Cargo.toml +2 -0
- package/sdk-core/core/src/abstractions.rs +137 -16
- package/sdk-core/core/src/core_tests/activity_tasks.rs +258 -63
- package/sdk-core/core/src/core_tests/child_workflows.rs +1 -2
- package/sdk-core/core/src/core_tests/determinism.rs +2 -2
- package/sdk-core/core/src/core_tests/local_activities.rs +8 -7
- package/sdk-core/core/src/core_tests/queries.rs +146 -60
- package/sdk-core/core/src/core_tests/replay_flag.rs +1 -1
- package/sdk-core/core/src/core_tests/workers.rs +39 -23
- package/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +387 -280
- package/sdk-core/core/src/lib.rs +6 -4
- package/sdk-core/core/src/pollers/poll_buffer.rs +16 -10
- package/sdk-core/core/src/protosext/mod.rs +6 -6
- package/sdk-core/core/src/retry_logic.rs +1 -1
- package/sdk-core/core/src/telemetry/metrics.rs +21 -7
- package/sdk-core/core/src/telemetry/mod.rs +18 -4
- package/sdk-core/core/src/test_help/mod.rs +341 -109
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +18 -9
- package/sdk-core/core/src/worker/activities/local_activities.rs +19 -16
- package/sdk-core/core/src/worker/activities.rs +156 -29
- package/sdk-core/core/src/worker/client.rs +1 -0
- package/sdk-core/core/src/worker/mod.rs +132 -659
- package/sdk-core/core/src/{workflow → worker/workflow}/bridge.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/driven_workflow.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/history_update.rs +16 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/activity_state_machine.rs +39 -4
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_external_state_machine.rs +5 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/cancel_workflow_state_machine.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/child_workflow_state_machine.rs +2 -4
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/complete_workflow_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/continue_as_new_workflow_state_machine.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/fail_workflow_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/local_activity_state_machine.rs +2 -5
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/mod.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/mutable_side_effect_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/patch_state_machine.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/side_effect_state_machine.rs +0 -0
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/signal_external_state_machine.rs +4 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/timer_state_machine.rs +1 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/transition_coverage.rs +1 -1
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/upsert_search_attributes_state_machine.rs +5 -7
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines/local_acts.rs +2 -2
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_machines.rs +40 -16
- package/sdk-core/core/src/{workflow → worker/workflow}/machines/workflow_task_state_machine.rs +0 -0
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
- package/sdk-core/core/src/worker/workflow/managed_run.rs +627 -0
- package/sdk-core/core/src/worker/workflow/mod.rs +1115 -0
- package/sdk-core/core/src/worker/workflow/run_cache.rs +143 -0
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +936 -0
- package/sdk-core/core-api/src/errors.rs +3 -10
- package/sdk-core/core-api/src/lib.rs +2 -1
- package/sdk-core/core-api/src/worker.rs +26 -2
- package/sdk-core/etc/dynamic-config.yaml +2 -0
- package/sdk-core/integ-with-otel.sh +1 -1
- package/sdk-core/protos/api_upstream/Makefile +4 -4
- package/sdk-core/protos/api_upstream/api-linter.yaml +2 -0
- package/sdk-core/protos/api_upstream/buf.yaml +8 -9
- package/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +83 -0
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +40 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +3 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +32 -4
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +69 -19
- package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +13 -0
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +163 -0
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +97 -0
- package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +300 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +25 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +180 -3
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +53 -3
- package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +2 -2
- package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +6 -5
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -1
- package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +2 -1
- package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +0 -64
- package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +2 -1
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +11 -8
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +30 -25
- package/sdk-core/sdk/src/activity_context.rs +12 -5
- package/sdk-core/sdk/src/app_data.rs +37 -0
- package/sdk-core/sdk/src/lib.rs +76 -43
- package/sdk-core/sdk/src/workflow_context/options.rs +8 -6
- package/sdk-core/sdk/src/workflow_context.rs +14 -19
- package/sdk-core/sdk/src/workflow_future.rs +11 -6
- package/sdk-core/sdk-core-protos/src/history_builder.rs +19 -5
- package/sdk-core/sdk-core-protos/src/history_info.rs +11 -6
- package/sdk-core/sdk-core-protos/src/lib.rs +74 -176
- package/sdk-core/test-utils/src/lib.rs +85 -72
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +11 -9
- package/sdk-core/tests/integ_tests/polling_tests.rs +12 -0
- package/sdk-core/tests/integ_tests/queries_tests.rs +39 -22
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +49 -4
- package/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +74 -13
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +19 -0
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -3
- package/sdk-core/tests/integ_tests/workflow_tests.rs +10 -23
- package/sdk-core/tests/load_tests.rs +8 -3
- package/sdk-core/tests/main.rs +2 -1
- package/src/conversions.rs +47 -39
- package/src/errors.rs +10 -21
- package/src/lib.rs +342 -325
- package/sdk-core/core/src/pending_activations.rs +0 -173
- package/sdk-core/core/src/worker/wft_delivery.rs +0 -81
- package/sdk-core/core/src/workflow/mod.rs +0 -478
- package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +0 -194
- package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +0 -418
- package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +0 -989
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
mod activities;
|
|
2
2
|
pub(crate) mod client;
|
|
3
|
-
mod
|
|
3
|
+
mod workflow;
|
|
4
4
|
|
|
5
5
|
pub use temporal_sdk_core_api::worker::{WorkerConfig, WorkerConfigBuilder};
|
|
6
6
|
|
|
@@ -8,96 +8,68 @@ pub(crate) use activities::{
|
|
|
8
8
|
ExecutingLAId, LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
|
|
9
9
|
NewLocalAct,
|
|
10
10
|
};
|
|
11
|
+
#[cfg(test)]
|
|
12
|
+
pub(crate) use workflow::ManagedWFFunc;
|
|
13
|
+
pub(crate) use workflow::{wft_poller::new_wft_poller, LEGACY_QUERY_ID};
|
|
11
14
|
|
|
12
15
|
use crate::{
|
|
13
|
-
abstractions::MeteredSemaphore,
|
|
14
16
|
errors::CompleteWfError,
|
|
15
17
|
pollers::{
|
|
16
|
-
new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller,
|
|
18
|
+
new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller, Poller,
|
|
17
19
|
WorkflowTaskPoller,
|
|
18
20
|
},
|
|
19
|
-
protosext::
|
|
20
|
-
telemetry::{
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
workflow_worker_type, MetricsContext,
|
|
24
|
-
},
|
|
25
|
-
VecDisplayer,
|
|
21
|
+
protosext::ValidPollWFTQResponse,
|
|
22
|
+
telemetry::metrics::{
|
|
23
|
+
activity_poller, local_activity_worker_type, workflow_poller, workflow_sticky_poller,
|
|
24
|
+
MetricsContext,
|
|
26
25
|
},
|
|
27
26
|
worker::{
|
|
28
27
|
activities::{DispatchOrTimeoutLA, LACompleteAction, LocalActivityManager},
|
|
29
28
|
client::WorkerClientBag,
|
|
30
|
-
|
|
31
|
-
},
|
|
32
|
-
workflow::{
|
|
33
|
-
workflow_tasks::{
|
|
34
|
-
ActivationAction, FailedActivationOutcome, NewWfTaskOutcome,
|
|
35
|
-
ServerCommandsWithWorkflowInfo, WorkflowTaskManager,
|
|
36
|
-
},
|
|
37
|
-
EmptyWorkflowCommandErr, LocalResolution, WFMachinesError, WorkflowCachingPolicy,
|
|
29
|
+
workflow::{LocalResolution, Workflows},
|
|
38
30
|
},
|
|
39
31
|
ActivityHeartbeat, CompleteActivityError, PollActivityError, PollWfError, WorkerTrait,
|
|
40
32
|
};
|
|
41
33
|
use activities::{LocalInFlightActInfo, WorkerActivityTasks};
|
|
42
|
-
use futures::
|
|
43
|
-
use std::{convert::TryInto,
|
|
44
|
-
use temporal_client::WorkflowTaskCompletion;
|
|
34
|
+
use futures::Stream;
|
|
35
|
+
use std::{convert::TryInto, sync::Arc};
|
|
45
36
|
use temporal_sdk_core_protos::{
|
|
46
37
|
coresdk::{
|
|
47
38
|
activity_result::activity_execution_result,
|
|
48
39
|
activity_task::ActivityTask,
|
|
49
40
|
workflow_activation::{remove_from_cache::EvictionReason, WorkflowActivation},
|
|
50
|
-
workflow_completion::
|
|
41
|
+
workflow_completion::WorkflowActivationCompletion,
|
|
51
42
|
ActivityTaskCompletion,
|
|
52
43
|
},
|
|
53
44
|
temporal::api::{
|
|
54
|
-
enums::v1::
|
|
55
|
-
failure::v1::Failure,
|
|
45
|
+
enums::v1::TaskQueueKind,
|
|
56
46
|
taskqueue::v1::{StickyExecutionAttributes, TaskQueue},
|
|
57
|
-
workflowservice::v1::
|
|
47
|
+
workflowservice::v1::PollActivityTaskQueueResponse,
|
|
58
48
|
},
|
|
59
49
|
TaskToken,
|
|
60
50
|
};
|
|
61
|
-
use tokio::sync::Notify;
|
|
62
51
|
use tokio_util::sync::CancellationToken;
|
|
63
|
-
use tonic::Code;
|
|
64
|
-
use tracing_futures::Instrument;
|
|
65
52
|
|
|
66
53
|
#[cfg(test)]
|
|
67
54
|
use crate::worker::client::WorkerClient;
|
|
68
|
-
use crate::workflow::
|
|
55
|
+
use crate::worker::workflow::WorkflowBasics;
|
|
69
56
|
|
|
70
57
|
/// A worker polls on a certain task queue
|
|
71
58
|
pub struct Worker {
|
|
72
59
|
config: WorkerConfig,
|
|
73
60
|
wf_client: Arc<WorkerClientBag>,
|
|
74
61
|
|
|
75
|
-
///
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
/// Buffers workflow task polling in the event we need to return a pending activation while
|
|
79
|
-
/// a poll is ongoing. Sticky and nonsticky polling happens inside of it.
|
|
80
|
-
wf_task_source: WFTSource,
|
|
81
|
-
/// Workflow task management
|
|
82
|
-
wft_manager: WorkflowTaskManager,
|
|
62
|
+
/// Manages all workflows and WFT processing
|
|
63
|
+
workflows: Workflows,
|
|
83
64
|
/// Manages activity tasks for this worker/task queue
|
|
84
65
|
at_task_mgr: Option<WorkerActivityTasks>,
|
|
85
66
|
/// Manages local activities
|
|
86
|
-
local_act_mgr: LocalActivityManager
|
|
87
|
-
/// Ensures we stay at or below this worker's maximum concurrent workflow limit
|
|
88
|
-
workflows_semaphore: MeteredSemaphore,
|
|
89
|
-
/// Used to wake blocked workflow task polling when there is some change to workflow activations
|
|
90
|
-
/// that should cause us to restart the loop
|
|
91
|
-
pending_activations_notify: Arc<Notify>,
|
|
92
|
-
/// Watched during shutdown to wait for all WFTs to complete. Should be notified any time
|
|
93
|
-
/// a WFT is completed.
|
|
94
|
-
wfts_drained_notify: Arc<Notify>,
|
|
67
|
+
local_act_mgr: Arc<LocalActivityManager>,
|
|
95
68
|
/// Has shutdown been called?
|
|
96
69
|
shutdown_token: CancellationToken,
|
|
97
70
|
/// Will be called at the end of each activation completion
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
metrics: MetricsContext,
|
|
71
|
+
#[allow(clippy::type_complexity)] // Sorry clippy, there's no simple way to re-use here.
|
|
72
|
+
post_activate_hook: Option<Box<dyn Fn(&Self, &str, usize) + Send + Sync>>,
|
|
101
73
|
}
|
|
102
74
|
|
|
103
75
|
#[async_trait::async_trait]
|
|
@@ -126,8 +98,7 @@ impl WorkerTrait for Worker {
|
|
|
126
98
|
self.complete_workflow_activation(completion).await
|
|
127
99
|
}
|
|
128
100
|
|
|
129
|
-
#[instrument(level = "debug", skip(self, completion),
|
|
130
|
-
fields(completion=%&completion))]
|
|
101
|
+
#[instrument(level = "debug", skip(self, completion), fields(completion=%&completion))]
|
|
131
102
|
async fn complete_activity_task(
|
|
132
103
|
&self,
|
|
133
104
|
completion: ActivityTaskCompletion,
|
|
@@ -168,7 +139,6 @@ impl WorkerTrait for Worker {
|
|
|
168
139
|
if let Some(atm) = self.at_task_mgr.as_ref() {
|
|
169
140
|
atm.notify_shutdown();
|
|
170
141
|
}
|
|
171
|
-
self.wf_task_source.stop_pollers();
|
|
172
142
|
info!("Initiated shutdown");
|
|
173
143
|
}
|
|
174
144
|
|
|
@@ -192,6 +162,7 @@ impl Worker {
|
|
|
192
162
|
info!(task_queue = %config.task_queue, "Initializing worker");
|
|
193
163
|
metrics.worker_registered();
|
|
194
164
|
|
|
165
|
+
let shutdown_token = CancellationToken::new();
|
|
195
166
|
let max_nonsticky_polls = if sticky_queue_name.is_some() {
|
|
196
167
|
config.max_nonsticky_polls()
|
|
197
168
|
} else {
|
|
@@ -205,6 +176,7 @@ impl Worker {
|
|
|
205
176
|
false,
|
|
206
177
|
max_nonsticky_polls,
|
|
207
178
|
max_nonsticky_polls * 2,
|
|
179
|
+
shutdown_token.child_token(),
|
|
208
180
|
);
|
|
209
181
|
wf_task_poll_buffer.set_num_pollers_handler(move |np| wft_metrics.record_num_pollers(np));
|
|
210
182
|
let sticky_queue_poller = sticky_queue_name.as_ref().map(|sqn| {
|
|
@@ -215,6 +187,7 @@ impl Worker {
|
|
|
215
187
|
true,
|
|
216
188
|
max_sticky_polls,
|
|
217
189
|
max_sticky_polls * 2,
|
|
190
|
+
shutdown_token.child_token(),
|
|
218
191
|
);
|
|
219
192
|
sp.set_num_pollers_handler(move |np| sticky_metrics.record_num_pollers(np));
|
|
220
193
|
sp
|
|
@@ -228,6 +201,7 @@ impl Worker {
|
|
|
228
201
|
config.max_concurrent_at_polls,
|
|
229
202
|
config.max_concurrent_at_polls * 2,
|
|
230
203
|
config.max_task_queue_activities_per_second,
|
|
204
|
+
shutdown_token.child_token(),
|
|
231
205
|
);
|
|
232
206
|
let act_metrics = metrics.with_new_attrs([activity_poller()]);
|
|
233
207
|
ap.set_num_pollers_handler(move |np| act_metrics.record_num_pollers(np));
|
|
@@ -240,13 +214,15 @@ impl Worker {
|
|
|
240
214
|
wf_task_poll_buffer,
|
|
241
215
|
sticky_queue_poller,
|
|
242
216
|
));
|
|
217
|
+
let wft_stream = new_wft_poller(wf_task_poll_buffer, metrics.clone());
|
|
243
218
|
Self::new_with_pollers(
|
|
244
219
|
config,
|
|
245
220
|
sticky_queue_name,
|
|
246
221
|
client,
|
|
247
|
-
|
|
222
|
+
wft_stream,
|
|
248
223
|
act_poll_buffer,
|
|
249
224
|
metrics,
|
|
225
|
+
shutdown_token,
|
|
250
226
|
)
|
|
251
227
|
}
|
|
252
228
|
|
|
@@ -255,76 +231,78 @@ impl Worker {
|
|
|
255
231
|
Self::new(config, None, Arc::new(client.into()), Default::default())
|
|
256
232
|
}
|
|
257
233
|
|
|
258
|
-
/// Returns number of currently cached workflows
|
|
259
|
-
pub fn cached_workflows(&self) -> usize {
|
|
260
|
-
self.wft_manager.cached_workflows()
|
|
261
|
-
}
|
|
262
|
-
|
|
263
234
|
pub(crate) fn new_with_pollers(
|
|
264
235
|
config: WorkerConfig,
|
|
265
236
|
sticky_queue_name: Option<String>,
|
|
266
237
|
client: Arc<WorkerClientBag>,
|
|
267
|
-
|
|
238
|
+
wft_stream: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
|
|
268
239
|
act_poller: Option<BoxedActPoller>,
|
|
269
240
|
metrics: MetricsContext,
|
|
241
|
+
shutdown_token: CancellationToken,
|
|
270
242
|
) -> Self {
|
|
271
|
-
let
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
let
|
|
279
|
-
|
|
243
|
+
let local_act_mgr = Arc::new(LocalActivityManager::new(
|
|
244
|
+
config.max_outstanding_local_activities,
|
|
245
|
+
config.namespace.clone(),
|
|
246
|
+
metrics.with_new_attrs([local_activity_worker_type()]),
|
|
247
|
+
));
|
|
248
|
+
let lam_clone = local_act_mgr.clone();
|
|
249
|
+
let local_act_req_sink = move |requests| lam_clone.enqueue(requests);
|
|
250
|
+
let at_task_mgr = act_poller.map(|ap| {
|
|
251
|
+
WorkerActivityTasks::new(
|
|
252
|
+
config.max_outstanding_activities,
|
|
253
|
+
config.max_worker_activities_per_second,
|
|
254
|
+
ap,
|
|
255
|
+
client.clone(),
|
|
256
|
+
metrics.clone(),
|
|
257
|
+
config.max_heartbeat_throttle_interval,
|
|
258
|
+
config.default_heartbeat_throttle_interval,
|
|
259
|
+
)
|
|
260
|
+
});
|
|
280
261
|
Self {
|
|
281
262
|
wf_client: client.clone(),
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
263
|
+
workflows: Workflows::new(
|
|
264
|
+
WorkflowBasics {
|
|
265
|
+
max_cached_workflows: config.max_cached_workflows,
|
|
266
|
+
max_outstanding_wfts: config.max_outstanding_workflow_tasks,
|
|
267
|
+
shutdown_token: shutdown_token.child_token(),
|
|
268
|
+
metrics,
|
|
269
|
+
},
|
|
270
|
+
sticky_queue_name.map(|sq| StickyExecutionAttributes {
|
|
271
|
+
worker_task_queue: Some(TaskQueue {
|
|
272
|
+
name: sq,
|
|
273
|
+
kind: TaskQueueKind::Sticky as i32,
|
|
274
|
+
}),
|
|
275
|
+
schedule_to_start_timeout: Some(
|
|
276
|
+
config.sticky_queue_schedule_to_start_timeout.into(),
|
|
277
|
+
),
|
|
278
|
+
}),
|
|
279
|
+
client,
|
|
280
|
+
wft_stream,
|
|
281
|
+
local_act_req_sink,
|
|
282
|
+
at_task_mgr
|
|
283
|
+
.as_ref()
|
|
284
|
+
.map(|mgr| mgr.get_handle_for_workflows()),
|
|
304
285
|
),
|
|
286
|
+
at_task_mgr,
|
|
287
|
+
local_act_mgr,
|
|
305
288
|
config,
|
|
306
|
-
shutdown_token
|
|
289
|
+
shutdown_token,
|
|
307
290
|
post_activate_hook: None,
|
|
308
|
-
pending_activations_notify: pa_notif,
|
|
309
|
-
wfts_drained_notify,
|
|
310
|
-
metrics,
|
|
311
291
|
}
|
|
312
292
|
}
|
|
313
293
|
|
|
314
294
|
/// Will shutdown the worker. Does not resolve until all outstanding workflow tasks have been
|
|
315
295
|
/// completed
|
|
316
|
-
|
|
296
|
+
async fn shutdown(&self) {
|
|
317
297
|
self.initiate_shutdown();
|
|
318
298
|
// Next we need to wait for all local activities to finish so no more workflow task
|
|
319
299
|
// heartbeats will be generated
|
|
320
300
|
self.local_act_mgr.shutdown_and_wait_all_finished().await;
|
|
321
|
-
//
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
.
|
|
325
|
-
.
|
|
326
|
-
// wait until all outstanding workflow tasks have been completed
|
|
327
|
-
self.all_wfts_drained().await;
|
|
301
|
+
// Wait for workflows to finish
|
|
302
|
+
self.workflows
|
|
303
|
+
.shutdown()
|
|
304
|
+
.await
|
|
305
|
+
.expect("Workflow processing terminates cleanly");
|
|
328
306
|
// Wait for activities to finish
|
|
329
307
|
if let Some(acts) = self.at_task_mgr.as_ref() {
|
|
330
308
|
acts.wait_all_finished().await;
|
|
@@ -332,21 +310,38 @@ impl Worker {
|
|
|
332
310
|
}
|
|
333
311
|
|
|
334
312
|
/// Finish shutting down by consuming the background pollers and freeing all resources
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
}
|
|
340
|
-
});
|
|
313
|
+
async fn finalize_shutdown(self) {
|
|
314
|
+
if let Some(b) = self.at_task_mgr {
|
|
315
|
+
b.shutdown().await;
|
|
316
|
+
}
|
|
341
317
|
}
|
|
342
318
|
|
|
343
|
-
|
|
344
|
-
|
|
319
|
+
/// Returns number of currently cached workflows
|
|
320
|
+
pub async fn cached_workflows(&self) -> usize {
|
|
321
|
+
self.workflows
|
|
322
|
+
.get_state_info()
|
|
323
|
+
.await
|
|
324
|
+
.map(|r| r.cached_workflows)
|
|
325
|
+
.unwrap_or_default()
|
|
345
326
|
}
|
|
346
327
|
|
|
328
|
+
/// Returns number of currently outstanding workflow tasks
|
|
347
329
|
#[cfg(test)]
|
|
348
|
-
pub(crate) fn
|
|
349
|
-
self.
|
|
330
|
+
pub(crate) async fn outstanding_workflow_tasks(&self) -> usize {
|
|
331
|
+
self.workflows
|
|
332
|
+
.get_state_info()
|
|
333
|
+
.await
|
|
334
|
+
.map(|r| r.outstanding_wft)
|
|
335
|
+
.unwrap_or_default()
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
#[cfg(test)]
|
|
339
|
+
pub(crate) async fn available_wft_permits(&self) -> usize {
|
|
340
|
+
self.workflows
|
|
341
|
+
.get_state_info()
|
|
342
|
+
.await
|
|
343
|
+
.expect("You can only check for available permits before shutdown")
|
|
344
|
+
.available_wft_permits
|
|
350
345
|
}
|
|
351
346
|
|
|
352
347
|
/// Get new activity tasks (may be local or nonlocal). Local activities are returned first
|
|
@@ -359,6 +354,7 @@ impl Worker {
|
|
|
359
354
|
if let Some(ref act_mgr) = self.at_task_mgr {
|
|
360
355
|
act_mgr.poll().await
|
|
361
356
|
} else {
|
|
357
|
+
info!("Activity polling is disabled for this worker");
|
|
362
358
|
self.shutdown_token.cancelled().await;
|
|
363
359
|
Err(PollActivityError::ShutDown)
|
|
364
360
|
}
|
|
@@ -372,7 +368,7 @@ impl Worker {
|
|
|
372
368
|
Some(DispatchOrTimeoutLA::Dispatch(r)) => Ok(Some(r)),
|
|
373
369
|
Some(DispatchOrTimeoutLA::Timeout { run_id, resolution, task }) => {
|
|
374
370
|
self.notify_local_result(
|
|
375
|
-
&run_id, LocalResolution::LocalActivity(resolution))
|
|
371
|
+
&run_id, LocalResolution::LocalActivity(resolution));
|
|
376
372
|
Ok(task)
|
|
377
373
|
},
|
|
378
374
|
None => {
|
|
@@ -405,9 +401,7 @@ impl Worker {
|
|
|
405
401
|
if task_token.is_local_activity_task() {
|
|
406
402
|
let as_la_res: LocalActivityExecutionResult = status.try_into()?;
|
|
407
403
|
match self.local_act_mgr.complete(&task_token, &as_la_res) {
|
|
408
|
-
LACompleteAction::Report(info) =>
|
|
409
|
-
self.complete_local_act(as_la_res, info, None).await
|
|
410
|
-
}
|
|
404
|
+
LACompleteAction::Report(info) => self.complete_local_act(as_la_res, info, None),
|
|
411
405
|
LACompleteAction::LangDoesTimerBackoff(backoff, info) => {
|
|
412
406
|
// This la needs to write a failure marker, and then we will tell lang how
|
|
413
407
|
// long of a timer to schedule to back off for. We do this because there are
|
|
@@ -415,7 +409,6 @@ impl Worker {
|
|
|
415
409
|
// simpler for lang to reply with the timer / next LA command than to do it
|
|
416
410
|
// internally. Plus, this backoff hack we'd like to eliminate eventually.
|
|
417
411
|
self.complete_local_act(as_la_res, info, Some(backoff))
|
|
418
|
-
.await
|
|
419
412
|
}
|
|
420
413
|
LACompleteAction::WillBeRetried => {
|
|
421
414
|
// Nothing to do here
|
|
@@ -440,135 +433,37 @@ impl Worker {
|
|
|
440
433
|
|
|
441
434
|
#[instrument(level = "debug", skip(self), fields(run_id))]
|
|
442
435
|
pub(crate) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
|
|
443
|
-
|
|
444
|
-
// (simply) and we really, really need that for long-poll retries.
|
|
445
|
-
loop {
|
|
446
|
-
// We must first check if there are pending workflow activations for workflows that are
|
|
447
|
-
// currently replaying or otherwise need immediate jobs, and issue those before
|
|
448
|
-
// bothering the server.
|
|
449
|
-
if let Some(pa) = self.wft_manager.next_pending_activation() {
|
|
450
|
-
debug!(activation=%pa, "Sending pending activation to lang");
|
|
451
|
-
return Ok(pa);
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
if self.config.max_cached_workflows > 0 {
|
|
455
|
-
if let Some(cache_cap_fut) = self.wft_manager.wait_for_cache_capacity() {
|
|
456
|
-
tokio::select! {
|
|
457
|
-
biased;
|
|
458
|
-
// We must loop up if there's a new pending activation, since those are for
|
|
459
|
-
// already-cached workflows and may include evictions which will change if
|
|
460
|
-
// we are still waiting or not.
|
|
461
|
-
_ = self.pending_activations_notify.notified() => {
|
|
462
|
-
continue
|
|
463
|
-
},
|
|
464
|
-
_ = cache_cap_fut => {}
|
|
465
|
-
};
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
// Apply any buffered poll responses from the server. Must come after pending
|
|
470
|
-
// activations, since there may be an eviction etc for whatever run is popped here.
|
|
471
|
-
if let Some(buff_wft) = self.wft_manager.next_buffered_poll() {
|
|
472
|
-
match self.apply_server_work(buff_wft).await? {
|
|
473
|
-
Some(a) => return Ok(a),
|
|
474
|
-
_ => continue,
|
|
475
|
-
}
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
let selected_f = tokio::select! {
|
|
479
|
-
biased;
|
|
480
|
-
|
|
481
|
-
// If an activation is completed while we are waiting on polling, we need to restart
|
|
482
|
-
// the loop right away to provide any potential new pending activation.
|
|
483
|
-
// Continue here means that we unnecessarily add another permit to the poll buffer,
|
|
484
|
-
// this will go away when polling is done in the background.
|
|
485
|
-
_ = self.pending_activations_notify.notified() => continue,
|
|
486
|
-
r = self.workflow_poll_or_wfts_drained() => r,
|
|
487
|
-
}?;
|
|
488
|
-
|
|
489
|
-
if let Some(work) = selected_f {
|
|
490
|
-
self.metrics.wf_tq_poll_ok();
|
|
491
|
-
if let Some(a) = self.apply_server_work(work).await? {
|
|
492
|
-
return Ok(a);
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
// Make sure that polling looping doesn't hog up the whole scheduler. Realistically
|
|
497
|
-
// this probably only happens when mock responses return at full speed.
|
|
498
|
-
tokio::task::yield_now().await;
|
|
499
|
-
}
|
|
436
|
+
self.workflows.next_workflow_activation().await
|
|
500
437
|
}
|
|
501
438
|
|
|
502
439
|
#[instrument(level = "debug", skip(self, completion),
|
|
503
|
-
|
|
440
|
+
fields(completion=%&completion, run_id=%completion.run_id))]
|
|
504
441
|
pub(crate) async fn complete_workflow_activation(
|
|
505
442
|
&self,
|
|
506
443
|
completion: WorkflowActivationCompletion,
|
|
507
444
|
) -> Result<(), CompleteWfError> {
|
|
508
|
-
let
|
|
509
|
-
let
|
|
510
|
-
Some(workflow_activation_completion::Status::Successful(success)) => {
|
|
511
|
-
self.wf_activation_success(&completion.run_id, success)
|
|
512
|
-
.await
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
Some(workflow_activation_completion::Status::Failed(failure)) => {
|
|
516
|
-
self.wf_activation_failed(
|
|
517
|
-
&completion.run_id,
|
|
518
|
-
WorkflowTaskFailedCause::Unspecified,
|
|
519
|
-
EvictionReason::LangFail,
|
|
520
|
-
failure,
|
|
521
|
-
)
|
|
522
|
-
.await
|
|
523
|
-
}
|
|
524
|
-
None => {
|
|
525
|
-
return Err(CompleteWfError::MalformedWorkflowCompletion {
|
|
526
|
-
reason: "Workflow completion had empty status field".to_owned(),
|
|
527
|
-
completion: None,
|
|
528
|
-
})
|
|
529
|
-
}
|
|
530
|
-
}?;
|
|
531
|
-
|
|
532
|
-
self.wft_manager
|
|
533
|
-
.after_wft_report(&completion.run_id, report_outcome.reported_to_server);
|
|
534
|
-
if report_outcome.reported_to_server || report_outcome.failed {
|
|
535
|
-
// If we failed the WFT but didn't report anything, we still want to release the WFT
|
|
536
|
-
// permit since the server will eventually time out the task and we've already evicted
|
|
537
|
-
// the run.
|
|
538
|
-
self.return_workflow_task_permit();
|
|
539
|
-
}
|
|
540
|
-
self.wfts_drained_notify.notify_waiters();
|
|
541
|
-
|
|
445
|
+
let run_id = completion.run_id.clone();
|
|
446
|
+
let most_recent_event = self.workflows.activation_completed(completion).await?;
|
|
542
447
|
if let Some(h) = &self.post_activate_hook {
|
|
543
|
-
h(self);
|
|
448
|
+
h(self, &run_id, most_recent_event);
|
|
544
449
|
}
|
|
545
|
-
|
|
546
450
|
Ok(())
|
|
547
451
|
}
|
|
548
452
|
|
|
549
|
-
///
|
|
550
|
-
pub(crate) fn return_workflow_task_permit(&self) {
|
|
551
|
-
self.workflows_semaphore.add_permit();
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
/// Request a workflow eviction. Returns true if we actually queued up a new eviction request.
|
|
453
|
+
/// Request a workflow eviction
|
|
555
454
|
pub(crate) fn request_wf_eviction(
|
|
556
455
|
&self,
|
|
557
456
|
run_id: &str,
|
|
558
457
|
message: impl Into<String>,
|
|
559
458
|
reason: EvictionReason,
|
|
560
|
-
)
|
|
561
|
-
|
|
562
|
-
EvictionRequestResult::EvictionRequested(_) => true,
|
|
563
|
-
EvictionRequestResult::NotFound => false,
|
|
564
|
-
EvictionRequestResult::EvictionAlreadyRequested(_) => false,
|
|
565
|
-
}
|
|
459
|
+
) {
|
|
460
|
+
self.workflows.request_eviction(run_id, message, reason);
|
|
566
461
|
}
|
|
567
462
|
|
|
568
463
|
/// Sets a function to be called at the end of each activation completion
|
|
569
464
|
pub(crate) fn set_post_activate_hook(
|
|
570
465
|
&mut self,
|
|
571
|
-
callback: impl Fn(&Self) + Send + Sync + 'static,
|
|
466
|
+
callback: impl Fn(&Self, &str, usize) + Send + Sync + 'static,
|
|
572
467
|
) {
|
|
573
468
|
self.post_activate_hook = Some(Box::new(callback))
|
|
574
469
|
}
|
|
@@ -576,369 +471,14 @@ impl Worker {
|
|
|
576
471
|
/// Used for replay workers - causes the worker to shutdown when the given run reaches the
|
|
577
472
|
/// given event number
|
|
578
473
|
pub(crate) fn set_shutdown_on_run_reaches_event(&mut self, run_id: String, last_event: i64) {
|
|
579
|
-
self.set_post_activate_hook(move |worker| {
|
|
580
|
-
if
|
|
581
|
-
.wft_manager
|
|
582
|
-
.most_recently_processed_event(&run_id)
|
|
583
|
-
.unwrap_or_default()
|
|
584
|
-
>= last_event
|
|
585
|
-
{
|
|
474
|
+
self.set_post_activate_hook(move |worker, activated_run_id, last_processed_event| {
|
|
475
|
+
if activated_run_id == run_id && last_processed_event >= last_event as usize {
|
|
586
476
|
worker.initiate_shutdown();
|
|
587
477
|
}
|
|
588
478
|
});
|
|
589
479
|
}
|
|
590
480
|
|
|
591
|
-
|
|
592
|
-
async fn workflow_poll_or_wfts_drained(
|
|
593
|
-
&self,
|
|
594
|
-
) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
|
|
595
|
-
let mut shutdown_seen = false;
|
|
596
|
-
loop {
|
|
597
|
-
// If we've already seen shutdown once it's important we don't freak out and
|
|
598
|
-
// restart the loop constantly while waiting for poll to finish shutting down.
|
|
599
|
-
let shutdown_restarter = async {
|
|
600
|
-
if shutdown_seen {
|
|
601
|
-
future::pending::<()>().await;
|
|
602
|
-
} else {
|
|
603
|
-
self.shutdown_token.cancelled().await;
|
|
604
|
-
};
|
|
605
|
-
};
|
|
606
|
-
tokio::select! {
|
|
607
|
-
biased;
|
|
608
|
-
|
|
609
|
-
r = self.workflow_poll().map_err(Into::into) => {
|
|
610
|
-
if matches!(r, Err(PollWfError::ShutDown)) {
|
|
611
|
-
// Don't actually return shutdown until workflow tasks are drained.
|
|
612
|
-
// Outstanding tasks being completed will generate new pending activations
|
|
613
|
-
// which will cause us to abort this function.
|
|
614
|
-
self.all_wfts_drained().await;
|
|
615
|
-
}
|
|
616
|
-
return r
|
|
617
|
-
},
|
|
618
|
-
_ = shutdown_restarter => {
|
|
619
|
-
shutdown_seen = true;
|
|
620
|
-
},
|
|
621
|
-
}
|
|
622
|
-
}
|
|
623
|
-
}
|
|
624
|
-
|
|
625
|
-
/// Wait until not at the outstanding workflow task limit, and then poll this worker's task
|
|
626
|
-
/// queue for new workflow tasks.
|
|
627
|
-
///
|
|
628
|
-
/// Returns `Ok(None)` in the event of a poll timeout, if there was some gRPC error that
|
|
629
|
-
/// callers can't do anything about, or any other reason to restart the poll loop.
|
|
630
|
-
async fn workflow_poll(&self) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
|
|
631
|
-
// We can't say we're shut down if there are outstanding LAs, as they could end up WFT
|
|
632
|
-
// heartbeating which is a "new" workflow task that we need to accept and process as long as
|
|
633
|
-
// the LA is outstanding. Similarly, if we already have such tasks (from a WFT completion),
|
|
634
|
-
// then we must fetch them from the source before we can say workflow polling is shutdown.
|
|
635
|
-
if self.shutdown_token.is_cancelled()
|
|
636
|
-
&& !self.wf_task_source.has_tasks_from_complete()
|
|
637
|
-
&& self.local_act_mgr.num_outstanding() == 0
|
|
638
|
-
{
|
|
639
|
-
return Err(PollWfError::ShutDown);
|
|
640
|
-
}
|
|
641
|
-
|
|
642
|
-
let sem = self
|
|
643
|
-
.workflows_semaphore
|
|
644
|
-
.acquire()
|
|
645
|
-
.await
|
|
646
|
-
.expect("outstanding workflow tasks semaphore not dropped");
|
|
647
|
-
|
|
648
|
-
let res = self
|
|
649
|
-
.wf_task_source
|
|
650
|
-
.next_wft()
|
|
651
|
-
.await
|
|
652
|
-
.ok_or(PollWfError::ShutDown)??;
|
|
653
|
-
|
|
654
|
-
if res == PollWorkflowTaskQueueResponse::default() {
|
|
655
|
-
// We get the default proto in the event that the long poll times out.
|
|
656
|
-
debug!("Poll wft timeout");
|
|
657
|
-
self.metrics.wf_tq_poll_empty();
|
|
658
|
-
return Ok(None);
|
|
659
|
-
}
|
|
660
|
-
|
|
661
|
-
if let Some(dur) = res.sched_to_start() {
|
|
662
|
-
self.metrics.wf_task_sched_to_start_latency(dur);
|
|
663
|
-
}
|
|
664
|
-
|
|
665
|
-
let work: ValidPollWFTQResponse = res.try_into().map_err(|resp| {
|
|
666
|
-
PollWfError::TonicError(tonic::Status::new(
|
|
667
|
-
Code::DataLoss,
|
|
668
|
-
format!(
|
|
669
|
-
"Server returned a poll WFT response we couldn't interpret: {:?}",
|
|
670
|
-
resp
|
|
671
|
-
),
|
|
672
|
-
))
|
|
673
|
-
})?;
|
|
674
|
-
|
|
675
|
-
// Only permanently take a permit in the event the poll finished completely
|
|
676
|
-
sem.forget();
|
|
677
|
-
|
|
678
|
-
let work = if self.config.max_cached_workflows > 0 {
|
|
679
|
-
// Add the workflow to cache management. We do not even attempt insert if cache
|
|
680
|
-
// size is zero because we do not want to generate eviction requests for
|
|
681
|
-
// workflows which may immediately generate pending activations.
|
|
682
|
-
if let Some(ready_to_work) = self.wft_manager.add_new_run_to_cache(work).await {
|
|
683
|
-
ready_to_work
|
|
684
|
-
} else {
|
|
685
|
-
return Ok(None);
|
|
686
|
-
}
|
|
687
|
-
} else {
|
|
688
|
-
work
|
|
689
|
-
};
|
|
690
|
-
|
|
691
|
-
Ok(Some(work))
|
|
692
|
-
}
|
|
693
|
-
|
|
694
|
-
/// Apply validated poll responses from the server. Returns an activation if one should be
|
|
695
|
-
/// issued to lang, or returns `None` in which case the polling loop should be restarted
|
|
696
|
-
/// (ex: Got a new workflow task for a run but lang is already handling an activation for that
|
|
697
|
-
/// same run)
|
|
698
|
-
async fn apply_server_work(
|
|
699
|
-
&self,
|
|
700
|
-
work: ValidPollWFTQResponse,
|
|
701
|
-
) -> Result<Option<WorkflowActivation>, PollWfError> {
|
|
702
|
-
let we = work.workflow_execution.clone();
|
|
703
|
-
let res = self
|
|
704
|
-
.wft_manager
|
|
705
|
-
.apply_new_poll_resp(work, self.wf_client.clone())
|
|
706
|
-
.await;
|
|
707
|
-
Ok(match res {
|
|
708
|
-
NewWfTaskOutcome::IssueActivation(a) => {
|
|
709
|
-
debug!(activation=%a, "Sending activation to lang");
|
|
710
|
-
Some(a)
|
|
711
|
-
}
|
|
712
|
-
NewWfTaskOutcome::TaskBuffered => {
|
|
713
|
-
// Though the task is not outstanding in the lang sense, it is outstanding from the
|
|
714
|
-
// server perspective. We used to return a permit here, but that doesn't actually
|
|
715
|
-
// make much sense.
|
|
716
|
-
None
|
|
717
|
-
}
|
|
718
|
-
NewWfTaskOutcome::Autocomplete | NewWfTaskOutcome::LocalActsOutstanding => {
|
|
719
|
-
debug!(workflow_execution=?we,
|
|
720
|
-
"No new work for lang to perform after polling server");
|
|
721
|
-
self.complete_workflow_activation(WorkflowActivationCompletion {
|
|
722
|
-
run_id: we.run_id,
|
|
723
|
-
status: Some(workflow_completion::Success::from_variants(vec![]).into()),
|
|
724
|
-
})
|
|
725
|
-
.await?;
|
|
726
|
-
None
|
|
727
|
-
}
|
|
728
|
-
NewWfTaskOutcome::Evict(e) => {
|
|
729
|
-
warn!(error=?e, run_id=%we.run_id, "Error while applying poll response to workflow");
|
|
730
|
-
let did_issue_eviction = self.request_wf_eviction(
|
|
731
|
-
&we.run_id,
|
|
732
|
-
format!("Error while applying poll response to workflow: {:?}", e),
|
|
733
|
-
e.evict_reason(),
|
|
734
|
-
);
|
|
735
|
-
// If we didn't actually need to issue an eviction, then return the WFT permit.
|
|
736
|
-
// EX: The workflow we tried to evict wasn't in the cache.
|
|
737
|
-
if !did_issue_eviction {
|
|
738
|
-
self.return_workflow_task_permit();
|
|
739
|
-
}
|
|
740
|
-
None
|
|
741
|
-
}
|
|
742
|
-
})
|
|
743
|
-
}
|
|
744
|
-
|
|
745
|
-
/// Handle a successful workflow activation
|
|
746
|
-
///
|
|
747
|
-
/// Returns true if we actually reported WFT completion to server (success or failure)
|
|
748
|
-
async fn wf_activation_success(
|
|
749
|
-
&self,
|
|
750
|
-
run_id: &str,
|
|
751
|
-
success: workflow_completion::Success,
|
|
752
|
-
) -> Result<WFTReportOutcome, CompleteWfError> {
|
|
753
|
-
// Convert to wf commands
|
|
754
|
-
let cmds = success
|
|
755
|
-
.commands
|
|
756
|
-
.into_iter()
|
|
757
|
-
.map(|c| c.try_into())
|
|
758
|
-
.collect::<Result<Vec<_>, EmptyWorkflowCommandErr>>()
|
|
759
|
-
.map_err(|_| CompleteWfError::MalformedWorkflowCompletion {
|
|
760
|
-
reason:
|
|
761
|
-
"At least one workflow command in the completion contained an empty variant"
|
|
762
|
-
.to_owned(),
|
|
763
|
-
completion: None,
|
|
764
|
-
})?;
|
|
765
|
-
|
|
766
|
-
match self
|
|
767
|
-
.wft_manager
|
|
768
|
-
.successful_activation(run_id, cmds, |acts| self.local_act_mgr.enqueue(acts))
|
|
769
|
-
.await
|
|
770
|
-
{
|
|
771
|
-
Ok(Some(ServerCommandsWithWorkflowInfo {
|
|
772
|
-
task_token,
|
|
773
|
-
action:
|
|
774
|
-
ActivationAction::WftComplete {
|
|
775
|
-
commands,
|
|
776
|
-
query_responses,
|
|
777
|
-
force_new_wft,
|
|
778
|
-
},
|
|
779
|
-
})) => {
|
|
780
|
-
debug!("Sending commands to server: {}", commands.display());
|
|
781
|
-
if !query_responses.is_empty() {
|
|
782
|
-
debug!(
|
|
783
|
-
"Sending query responses to server: {}",
|
|
784
|
-
query_responses.display()
|
|
785
|
-
);
|
|
786
|
-
}
|
|
787
|
-
let mut completion = WorkflowTaskCompletion {
|
|
788
|
-
task_token,
|
|
789
|
-
commands,
|
|
790
|
-
query_responses,
|
|
791
|
-
sticky_attributes: None,
|
|
792
|
-
return_new_workflow_task: true,
|
|
793
|
-
force_create_new_workflow_task: force_new_wft,
|
|
794
|
-
};
|
|
795
|
-
let sticky_attrs = self.get_sticky_attrs();
|
|
796
|
-
// Do not return new WFT if we would not cache, because returned new WFTs are always
|
|
797
|
-
// partial.
|
|
798
|
-
if sticky_attrs.is_none() {
|
|
799
|
-
completion.return_new_workflow_task = false;
|
|
800
|
-
}
|
|
801
|
-
completion.sticky_attributes = sticky_attrs;
|
|
802
|
-
|
|
803
|
-
self.handle_wft_reporting_errs(run_id, || async {
|
|
804
|
-
let maybe_wft = self
|
|
805
|
-
.wf_client
|
|
806
|
-
.complete_workflow_task(completion)
|
|
807
|
-
.instrument(span!(tracing::Level::DEBUG, "Complete WFT call"))
|
|
808
|
-
.await?;
|
|
809
|
-
if let Some(wft) = maybe_wft.workflow_task {
|
|
810
|
-
self.wf_task_source.add_wft_from_completion(wft);
|
|
811
|
-
}
|
|
812
|
-
Ok(())
|
|
813
|
-
})
|
|
814
|
-
.await?;
|
|
815
|
-
Ok(WFTReportOutcome {
|
|
816
|
-
reported_to_server: true,
|
|
817
|
-
failed: false,
|
|
818
|
-
})
|
|
819
|
-
}
|
|
820
|
-
Ok(Some(ServerCommandsWithWorkflowInfo {
|
|
821
|
-
task_token,
|
|
822
|
-
action: ActivationAction::RespondLegacyQuery { result },
|
|
823
|
-
..
|
|
824
|
-
})) => {
|
|
825
|
-
self.wf_client
|
|
826
|
-
.respond_legacy_query(task_token, result)
|
|
827
|
-
.await?;
|
|
828
|
-
Ok(WFTReportOutcome {
|
|
829
|
-
reported_to_server: true,
|
|
830
|
-
failed: false,
|
|
831
|
-
})
|
|
832
|
-
}
|
|
833
|
-
Ok(None) => Ok(WFTReportOutcome {
|
|
834
|
-
reported_to_server: false,
|
|
835
|
-
failed: false,
|
|
836
|
-
}),
|
|
837
|
-
Err(update_err) => {
|
|
838
|
-
// Automatically fail the workflow task in the event we couldn't update machines
|
|
839
|
-
let fail_cause = if matches!(&update_err.source, WFMachinesError::Nondeterminism(_))
|
|
840
|
-
{
|
|
841
|
-
WorkflowTaskFailedCause::NonDeterministicError
|
|
842
|
-
} else {
|
|
843
|
-
WorkflowTaskFailedCause::Unspecified
|
|
844
|
-
};
|
|
845
|
-
let wft_fail_str = format!("{:?}", update_err);
|
|
846
|
-
self.wf_activation_failed(
|
|
847
|
-
run_id,
|
|
848
|
-
fail_cause,
|
|
849
|
-
update_err.evict_reason(),
|
|
850
|
-
Failure::application_failure(wft_fail_str.clone(), false).into(),
|
|
851
|
-
)
|
|
852
|
-
.await
|
|
853
|
-
}
|
|
854
|
-
}
|
|
855
|
-
}
|
|
856
|
-
|
|
857
|
-
/// Handle a failed workflow completion
|
|
858
|
-
///
|
|
859
|
-
/// Returns true if we actually reported WFT completion to server
|
|
860
|
-
async fn wf_activation_failed(
|
|
861
|
-
&self,
|
|
862
|
-
run_id: &str,
|
|
863
|
-
cause: WorkflowTaskFailedCause,
|
|
864
|
-
reason: EvictionReason,
|
|
865
|
-
failure: workflow_completion::Failure,
|
|
866
|
-
) -> Result<WFTReportOutcome, CompleteWfError> {
|
|
867
|
-
Ok(
|
|
868
|
-
match self.wft_manager.failed_activation(
|
|
869
|
-
run_id,
|
|
870
|
-
reason,
|
|
871
|
-
format!("Workflow activation completion failed: {:?}", failure),
|
|
872
|
-
) {
|
|
873
|
-
FailedActivationOutcome::Report(tt) => {
|
|
874
|
-
warn!(run_id, failure=?failure, "Failing workflow activation");
|
|
875
|
-
self.handle_wft_reporting_errs(run_id, || async {
|
|
876
|
-
self.wf_client
|
|
877
|
-
.fail_workflow_task(tt, cause, failure.failure.map(Into::into))
|
|
878
|
-
.await
|
|
879
|
-
})
|
|
880
|
-
.await?;
|
|
881
|
-
WFTReportOutcome {
|
|
882
|
-
reported_to_server: true,
|
|
883
|
-
failed: true,
|
|
884
|
-
}
|
|
885
|
-
}
|
|
886
|
-
FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
|
|
887
|
-
warn!(run_id, failure=?failure, "Failing legacy query request");
|
|
888
|
-
self.wf_client
|
|
889
|
-
.respond_legacy_query(task_token, legacy_query_failure(failure))
|
|
890
|
-
.await?;
|
|
891
|
-
WFTReportOutcome {
|
|
892
|
-
reported_to_server: true,
|
|
893
|
-
failed: true,
|
|
894
|
-
}
|
|
895
|
-
}
|
|
896
|
-
FailedActivationOutcome::NoReport => WFTReportOutcome {
|
|
897
|
-
reported_to_server: false,
|
|
898
|
-
failed: true,
|
|
899
|
-
},
|
|
900
|
-
},
|
|
901
|
-
)
|
|
902
|
-
}
|
|
903
|
-
|
|
904
|
-
/// Handle server errors from either completing or failing a workflow task. Returns any errors
|
|
905
|
-
/// that can't be automatically handled.
|
|
906
|
-
async fn handle_wft_reporting_errs<T, Fut>(
|
|
907
|
-
&self,
|
|
908
|
-
run_id: &str,
|
|
909
|
-
completer: impl FnOnce() -> Fut,
|
|
910
|
-
) -> Result<(), CompleteWfError>
|
|
911
|
-
where
|
|
912
|
-
Fut: Future<Output = Result<T, tonic::Status>>,
|
|
913
|
-
{
|
|
914
|
-
let mut should_evict = None;
|
|
915
|
-
let res = match completer().await {
|
|
916
|
-
Err(err) => {
|
|
917
|
-
match err.code() {
|
|
918
|
-
// Silence unhandled command errors since the lang SDK cannot do anything about
|
|
919
|
-
// them besides poll again, which it will do anyway.
|
|
920
|
-
tonic::Code::InvalidArgument if err.message() == "UnhandledCommand" => {
|
|
921
|
-
debug!(error = %err, run_id, "Unhandled command response when completing");
|
|
922
|
-
should_evict = Some(EvictionReason::UnhandledCommand);
|
|
923
|
-
Ok(())
|
|
924
|
-
}
|
|
925
|
-
tonic::Code::NotFound => {
|
|
926
|
-
warn!(error = %err, run_id, "Task not found when completing");
|
|
927
|
-
should_evict = Some(EvictionReason::TaskNotFound);
|
|
928
|
-
Ok(())
|
|
929
|
-
}
|
|
930
|
-
_ => Err(err),
|
|
931
|
-
}
|
|
932
|
-
}
|
|
933
|
-
_ => Ok(()),
|
|
934
|
-
};
|
|
935
|
-
if let Some(reason) = should_evict {
|
|
936
|
-
self.request_wf_eviction(run_id, "Error reporting WFT to server", reason);
|
|
937
|
-
}
|
|
938
|
-
res.map_err(Into::into)
|
|
939
|
-
}
|
|
940
|
-
|
|
941
|
-
async fn complete_local_act(
|
|
481
|
+
fn complete_local_act(
|
|
942
482
|
&self,
|
|
943
483
|
la_res: LocalActivityExecutionResult,
|
|
944
484
|
info: LocalInFlightActInfo,
|
|
@@ -955,51 +495,11 @@ impl Worker {
|
|
|
955
495
|
original_schedule_time: Some(info.la_info.schedule_time),
|
|
956
496
|
}),
|
|
957
497
|
)
|
|
958
|
-
.await
|
|
959
|
-
}
|
|
960
|
-
|
|
961
|
-
async fn notify_local_result(&self, run_id: &str, res: LocalResolution) {
|
|
962
|
-
if let Err(e) = self.wft_manager.notify_of_local_result(run_id, res).await {
|
|
963
|
-
error!(
|
|
964
|
-
"Problem with local resolution on run {}: {:?} -- will evict the workflow",
|
|
965
|
-
run_id, e
|
|
966
|
-
);
|
|
967
|
-
self.request_wf_eviction(
|
|
968
|
-
run_id,
|
|
969
|
-
"Issue while processing local resolution",
|
|
970
|
-
e.evict_reason(),
|
|
971
|
-
);
|
|
972
|
-
}
|
|
973
498
|
}
|
|
974
499
|
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
fn get_sticky_attrs(&self) -> Option<StickyExecutionAttributes> {
|
|
978
|
-
self.sticky_name
|
|
979
|
-
.as_ref()
|
|
980
|
-
.map(|sq| StickyExecutionAttributes {
|
|
981
|
-
worker_task_queue: Some(TaskQueue {
|
|
982
|
-
name: sq.clone(),
|
|
983
|
-
kind: TaskQueueKind::Sticky as i32,
|
|
984
|
-
}),
|
|
985
|
-
schedule_to_start_timeout: Some(
|
|
986
|
-
self.config.sticky_queue_schedule_to_start_timeout.into(),
|
|
987
|
-
),
|
|
988
|
-
})
|
|
500
|
+
fn notify_local_result(&self, run_id: &str, res: LocalResolution) {
|
|
501
|
+
self.workflows.notify_of_local_result(run_id, res);
|
|
989
502
|
}
|
|
990
|
-
|
|
991
|
-
/// Resolves when there are no more outstanding WFTs
|
|
992
|
-
async fn all_wfts_drained(&self) {
|
|
993
|
-
while self.outstanding_workflow_tasks() != 0 {
|
|
994
|
-
self.wfts_drained_notify.notified().await;
|
|
995
|
-
}
|
|
996
|
-
}
|
|
997
|
-
}
|
|
998
|
-
|
|
999
|
-
#[derive(Debug, Copy, Clone)]
|
|
1000
|
-
struct WFTReportOutcome {
|
|
1001
|
-
reported_to_server: bool,
|
|
1002
|
-
failed: bool,
|
|
1003
503
|
}
|
|
1004
504
|
|
|
1005
505
|
#[cfg(test)]
|
|
@@ -1024,23 +524,6 @@ mod tests {
|
|
|
1024
524
|
assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
|
|
1025
525
|
}
|
|
1026
526
|
|
|
1027
|
-
#[tokio::test]
|
|
1028
|
-
async fn workflow_timeouts_dont_eat_permits() {
|
|
1029
|
-
let mut mock_client = mock_workflow_client();
|
|
1030
|
-
mock_client
|
|
1031
|
-
.expect_poll_workflow_task()
|
|
1032
|
-
.returning(|_, _| Ok(PollWorkflowTaskQueueResponse::default()));
|
|
1033
|
-
|
|
1034
|
-
let cfg = test_worker_cfg()
|
|
1035
|
-
.max_outstanding_workflow_tasks(5_usize)
|
|
1036
|
-
.max_cached_workflows(5_usize)
|
|
1037
|
-
.build()
|
|
1038
|
-
.unwrap();
|
|
1039
|
-
let worker = Worker::new_test(cfg, mock_client);
|
|
1040
|
-
assert_eq!(worker.workflow_poll().await.unwrap(), None);
|
|
1041
|
-
assert_eq!(worker.workflows_semaphore.sem.available_permits(), 5);
|
|
1042
|
-
}
|
|
1043
|
-
|
|
1044
527
|
#[tokio::test]
|
|
1045
528
|
async fn activity_errs_dont_eat_permits() {
|
|
1046
529
|
let mut mock_client = mock_workflow_client();
|
|
@@ -1057,26 +540,16 @@ mod tests {
|
|
|
1057
540
|
assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
|
|
1058
541
|
}
|
|
1059
542
|
|
|
1060
|
-
#[tokio::test]
|
|
1061
|
-
async fn workflow_errs_dont_eat_permits() {
|
|
1062
|
-
let mut mock_client = mock_workflow_client();
|
|
1063
|
-
mock_client
|
|
1064
|
-
.expect_poll_workflow_task()
|
|
1065
|
-
.returning(|_, _| Err(tonic::Status::internal("ahhh")));
|
|
1066
|
-
|
|
1067
|
-
let cfg = test_worker_cfg()
|
|
1068
|
-
.max_outstanding_workflow_tasks(5_usize)
|
|
1069
|
-
.max_cached_workflows(5_usize)
|
|
1070
|
-
.build()
|
|
1071
|
-
.unwrap();
|
|
1072
|
-
let worker = Worker::new_test(cfg, mock_client);
|
|
1073
|
-
assert!(worker.workflow_poll().await.is_err());
|
|
1074
|
-
assert_eq!(worker.workflows_semaphore.sem.available_permits(), 5);
|
|
1075
|
-
}
|
|
1076
|
-
|
|
1077
543
|
#[test]
|
|
1078
544
|
fn max_polls_calculated_properly() {
|
|
1079
|
-
let
|
|
545
|
+
let mut wcb = WorkerConfigBuilder::default();
|
|
546
|
+
let cfg = wcb
|
|
547
|
+
.namespace("default")
|
|
548
|
+
.task_queue("whatever")
|
|
549
|
+
.worker_build_id("test_bin_id")
|
|
550
|
+
.max_concurrent_wft_polls(5_usize)
|
|
551
|
+
.build()
|
|
552
|
+
.unwrap();
|
|
1080
553
|
assert_eq!(cfg.max_nonsticky_polls(), 1);
|
|
1081
554
|
assert_eq!(cfg.max_sticky_polls(), 4);
|
|
1082
555
|
}
|