@temporalio/core-bridge 0.16.4 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +339 -226
- package/Cargo.toml +7 -3
- package/common.js +50 -0
- package/index.d.ts +7 -0
- package/index.js +12 -0
- package/package.json +7 -4
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/{index.node → releases/index.node} +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/scripts/build.js +10 -50
- package/sdk-core/.buildkite/docker/Dockerfile +1 -1
- package/sdk-core/.buildkite/docker/docker-compose.yaml +2 -2
- package/sdk-core/.buildkite/pipeline.yml +2 -0
- package/sdk-core/Cargo.toml +1 -88
- package/sdk-core/README.md +30 -6
- package/sdk-core/bridge-ffi/Cargo.toml +24 -0
- package/sdk-core/bridge-ffi/LICENSE.txt +23 -0
- package/sdk-core/bridge-ffi/build.rs +25 -0
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +216 -0
- package/sdk-core/bridge-ffi/src/lib.rs +829 -0
- package/sdk-core/bridge-ffi/src/wrappers.rs +193 -0
- package/sdk-core/client/Cargo.toml +32 -0
- package/sdk-core/{src/pollers/gateway.rs → client/src/lib.rs} +101 -195
- package/sdk-core/client/src/metrics.rs +89 -0
- package/sdk-core/client/src/mocks.rs +167 -0
- package/sdk-core/{src/pollers → client/src}/retry.rs +172 -14
- package/sdk-core/core/Cargo.toml +96 -0
- package/sdk-core/{src → core/src}/core_tests/activity_tasks.rs +193 -37
- package/sdk-core/{src → core/src}/core_tests/child_workflows.rs +14 -14
- package/sdk-core/{src → core/src}/core_tests/determinism.rs +8 -8
- package/sdk-core/core/src/core_tests/local_activities.rs +328 -0
- package/sdk-core/{src → core/src}/core_tests/mod.rs +6 -9
- package/sdk-core/{src → core/src}/core_tests/queries.rs +54 -54
- package/sdk-core/{src → core/src}/core_tests/replay_flag.rs +8 -12
- package/sdk-core/{src → core/src}/core_tests/workers.rs +120 -33
- package/sdk-core/{src → core/src}/core_tests/workflow_cancels.rs +16 -26
- package/sdk-core/{src → core/src}/core_tests/workflow_tasks.rs +280 -292
- package/sdk-core/core/src/lib.rs +374 -0
- package/sdk-core/{src → core/src}/log_export.rs +3 -27
- package/sdk-core/core/src/pending_activations.rs +162 -0
- package/sdk-core/{src → core/src}/pollers/mod.rs +4 -22
- package/sdk-core/{src → core/src}/pollers/poll_buffer.rs +1 -1
- package/sdk-core/core/src/protosext/mod.rs +396 -0
- package/sdk-core/core/src/replay/mod.rs +210 -0
- package/sdk-core/core/src/retry_logic.rs +144 -0
- package/sdk-core/{src → core/src}/telemetry/metrics.rs +3 -58
- package/sdk-core/{src → core/src}/telemetry/mod.rs +8 -8
- package/sdk-core/{src → core/src}/telemetry/prometheus_server.rs +0 -0
- package/sdk-core/{src → core/src}/test_help/mod.rs +35 -83
- package/sdk-core/{src → core/src}/worker/activities/activity_heartbeat_manager.rs +95 -42
- package/sdk-core/core/src/worker/activities/local_activities.rs +973 -0
- package/sdk-core/{src → core/src}/worker/activities.rs +52 -33
- package/sdk-core/{src → core/src}/worker/dispatcher.rs +8 -6
- package/sdk-core/{src → core/src}/worker/mod.rs +347 -221
- package/sdk-core/core/src/worker/wft_delivery.rs +81 -0
- package/sdk-core/{src → core/src}/workflow/bridge.rs +5 -2
- package/sdk-core/{src → core/src}/workflow/driven_workflow.rs +17 -7
- package/sdk-core/{src → core/src}/workflow/history_update.rs +33 -7
- package/sdk-core/{src → core/src/workflow}/machines/activity_state_machine.rs +26 -26
- package/sdk-core/{src → core/src/workflow}/machines/cancel_external_state_machine.rs +8 -11
- package/sdk-core/{src → core/src/workflow}/machines/cancel_workflow_state_machine.rs +19 -21
- package/sdk-core/{src → core/src/workflow}/machines/child_workflow_state_machine.rs +20 -31
- package/sdk-core/{src → core/src/workflow}/machines/complete_workflow_state_machine.rs +3 -5
- package/sdk-core/{src → core/src/workflow}/machines/continue_as_new_workflow_state_machine.rs +18 -18
- package/sdk-core/{src → core/src/workflow}/machines/fail_workflow_state_machine.rs +5 -6
- package/sdk-core/core/src/workflow/machines/local_activity_state_machine.rs +1451 -0
- package/sdk-core/{src → core/src/workflow}/machines/mod.rs +54 -107
- package/sdk-core/{src → core/src/workflow}/machines/mutable_side_effect_state_machine.rs +0 -0
- package/sdk-core/{src → core/src/workflow}/machines/patch_state_machine.rs +29 -30
- package/sdk-core/{src → core/src/workflow}/machines/side_effect_state_machine.rs +0 -0
- package/sdk-core/{src → core/src/workflow}/machines/signal_external_state_machine.rs +17 -19
- package/sdk-core/{src → core/src/workflow}/machines/timer_state_machine.rs +20 -21
- package/sdk-core/{src → core/src/workflow}/machines/transition_coverage.rs +5 -2
- package/sdk-core/{src → core/src/workflow}/machines/upsert_search_attributes_state_machine.rs +0 -0
- package/sdk-core/core/src/workflow/machines/workflow_machines/local_acts.rs +96 -0
- package/sdk-core/{src → core/src/workflow}/machines/workflow_machines.rs +357 -171
- package/sdk-core/{src → core/src/workflow}/machines/workflow_task_state_machine.rs +1 -1
- package/sdk-core/{src → core/src}/workflow/mod.rs +200 -39
- package/sdk-core/{src → core/src}/workflow/workflow_tasks/cache_manager.rs +0 -0
- package/sdk-core/{src → core/src}/workflow/workflow_tasks/concurrency_manager.rs +38 -5
- package/sdk-core/{src → core/src}/workflow/workflow_tasks/mod.rs +317 -103
- package/sdk-core/{test_utils → core-api}/Cargo.toml +10 -7
- package/sdk-core/{src → core-api/src}/errors.rs +42 -92
- package/sdk-core/core-api/src/lib.rs +158 -0
- package/sdk-core/{src/worker/config.rs → core-api/src/worker.rs} +18 -23
- package/sdk-core/etc/deps.svg +156 -0
- package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +5 -5
- package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +3 -5
- package/sdk-core/fsm/rustfsm_trait/src/lib.rs +7 -1
- package/sdk-core/histories/fail_wf_task.bin +0 -0
- package/sdk-core/histories/timer_workflow_history.bin +0 -0
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +44 -13
- package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +19 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +1 -1
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +9 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +1 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +1 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +13 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +14 -7
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +176 -18
- package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +11 -0
- package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +156 -7
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +135 -104
- package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +78 -0
- package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +78 -0
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +205 -0
- package/sdk-core/protos/local/temporal/sdk/core/bridge/service.proto +61 -0
- package/sdk-core/protos/local/{child_workflow.proto → temporal/sdk/core/child_workflow/child_workflow.proto} +1 -1
- package/sdk-core/protos/local/{common.proto → temporal/sdk/core/common/common.proto} +5 -3
- package/sdk-core/protos/local/{core_interface.proto → temporal/sdk/core/core_interface.proto} +10 -10
- package/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +30 -0
- package/sdk-core/protos/local/{workflow_activation.proto → temporal/sdk/core/workflow_activation/workflow_activation.proto} +35 -11
- package/sdk-core/protos/local/{workflow_commands.proto → temporal/sdk/core/workflow_commands/workflow_commands.proto} +55 -4
- package/sdk-core/protos/local/{workflow_completion.proto → temporal/sdk/core/workflow_completion/workflow_completion.proto} +3 -3
- package/sdk-core/sdk/Cargo.toml +32 -0
- package/sdk-core/{src/prototype_rust_sdk → sdk/src}/conversions.rs +0 -0
- package/sdk-core/sdk/src/lib.rs +699 -0
- package/sdk-core/sdk/src/payload_converter.rs +11 -0
- package/sdk-core/sdk/src/workflow_context/options.rs +180 -0
- package/sdk-core/{src/prototype_rust_sdk → sdk/src}/workflow_context.rs +201 -124
- package/sdk-core/{src/prototype_rust_sdk → sdk/src}/workflow_future.rs +63 -30
- package/sdk-core/sdk-core-protos/Cargo.toml +10 -0
- package/sdk-core/sdk-core-protos/build.rs +28 -6
- package/sdk-core/sdk-core-protos/src/constants.rs +7 -0
- package/sdk-core/{src/test_help → sdk-core-protos/src}/history_builder.rs +134 -49
- package/sdk-core/sdk-core-protos/src/history_info.rs +216 -0
- package/sdk-core/sdk-core-protos/src/lib.rs +601 -168
- package/sdk-core/sdk-core-protos/src/task_token.rs +38 -0
- package/sdk-core/sdk-core-protos/src/utilities.rs +14 -0
- package/sdk-core/test-utils/Cargo.toml +32 -0
- package/sdk-core/{src/test_help → test-utils/src}/canned_histories.rs +59 -78
- package/sdk-core/test-utils/src/histfetch.rs +28 -0
- package/sdk-core/{test_utils → test-utils}/src/lib.rs +131 -68
- package/sdk-core/tests/integ_tests/client_tests.rs +1 -1
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +11 -7
- package/sdk-core/tests/integ_tests/polling_tests.rs +12 -11
- package/sdk-core/tests/integ_tests/queries_tests.rs +82 -78
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +91 -71
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +3 -4
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +2 -4
- package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +4 -6
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +4 -6
- package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -4
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +496 -0
- package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +5 -8
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +125 -0
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +7 -13
- package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +33 -5
- package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +12 -16
- package/sdk-core/tests/integ_tests/workflow_tests.rs +85 -82
- package/sdk-core/tests/load_tests.rs +6 -6
- package/sdk-core/tests/main.rs +2 -2
- package/src/conversions.rs +24 -21
- package/src/errors.rs +8 -0
- package/src/lib.rs +323 -211
- package/sdk-core/protos/local/activity_result.proto +0 -46
- package/sdk-core/protos/local/activity_task.proto +0 -66
- package/sdk-core/src/core_tests/retry.rs +0 -147
- package/sdk-core/src/lib.rs +0 -403
- package/sdk-core/src/machines/local_activity_state_machine.rs +0 -117
- package/sdk-core/src/pending_activations.rs +0 -249
- package/sdk-core/src/protosext/mod.rs +0 -160
- package/sdk-core/src/prototype_rust_sdk.rs +0 -412
- package/sdk-core/src/task_token.rs +0 -20
- package/sdk-core/src/test_help/history_info.rs +0 -157
|
@@ -1,40 +1,48 @@
|
|
|
1
1
|
mod activities;
|
|
2
|
-
mod config;
|
|
3
2
|
mod dispatcher;
|
|
3
|
+
mod wft_delivery;
|
|
4
4
|
|
|
5
|
-
pub use
|
|
5
|
+
pub use temporal_sdk_core_api::worker::{WorkerConfig, WorkerConfigBuilder};
|
|
6
|
+
|
|
7
|
+
pub(crate) use activities::{
|
|
8
|
+
ExecutingLAId, LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
|
|
9
|
+
NewLocalAct,
|
|
10
|
+
};
|
|
6
11
|
pub(crate) use dispatcher::WorkerDispatcher;
|
|
7
12
|
|
|
8
13
|
use crate::{
|
|
9
14
|
errors::CompleteWfError,
|
|
10
|
-
machines::{EmptyWorkflowCommandErr, WFMachinesError},
|
|
11
15
|
pollers::{
|
|
12
|
-
new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller, BoxedWFPoller,
|
|
13
|
-
|
|
16
|
+
new_activity_task_buffer, new_workflow_task_buffer, BoxedActPoller, BoxedWFPoller, Poller,
|
|
17
|
+
WorkflowTaskPoller,
|
|
14
18
|
},
|
|
15
|
-
protosext::{legacy_query_failure, ValidPollWFTQResponse
|
|
16
|
-
task_token::TaskToken,
|
|
19
|
+
protosext::{legacy_query_failure, ValidPollWFTQResponse},
|
|
17
20
|
telemetry::metrics::{
|
|
18
21
|
activity_poller, workflow_poller, workflow_sticky_poller, MetricsContext,
|
|
19
22
|
},
|
|
23
|
+
worker::{
|
|
24
|
+
activities::{DispatchOrTimeoutLA, LACompleteAction, LocalActivityManager},
|
|
25
|
+
wft_delivery::WFTSource,
|
|
26
|
+
},
|
|
20
27
|
workflow::{
|
|
21
28
|
workflow_tasks::{
|
|
22
29
|
ActivationAction, FailedActivationOutcome, NewWfTaskOutcome,
|
|
23
30
|
ServerCommandsWithWorkflowInfo, WorkflowTaskManager,
|
|
24
31
|
},
|
|
25
|
-
WorkflowCachingPolicy,
|
|
32
|
+
EmptyWorkflowCommandErr, LocalResolution, WFMachinesError, WorkflowCachingPolicy,
|
|
26
33
|
},
|
|
27
34
|
ActivityHeartbeat, CompleteActivityError, PollActivityError, PollWfError,
|
|
28
35
|
};
|
|
29
|
-
use activities::WorkerActivityTasks;
|
|
36
|
+
use activities::{LocalInFlightActInfo, WorkerActivityTasks};
|
|
30
37
|
use futures::{Future, TryFutureExt};
|
|
31
38
|
use std::{convert::TryInto, sync::Arc};
|
|
39
|
+
use temporal_client::{ServerGatewayApis, WorkflowTaskCompletion};
|
|
32
40
|
use temporal_sdk_core_protos::{
|
|
33
41
|
coresdk::{
|
|
34
|
-
activity_result::
|
|
42
|
+
activity_result::activity_execution_result,
|
|
35
43
|
activity_task::ActivityTask,
|
|
36
|
-
workflow_activation::
|
|
37
|
-
workflow_completion::{self,
|
|
44
|
+
workflow_activation::{remove_from_cache::EvictionReason, WorkflowActivation},
|
|
45
|
+
workflow_completion::{self, workflow_activation_completion, WorkflowActivationCompletion},
|
|
38
46
|
},
|
|
39
47
|
temporal::api::{
|
|
40
48
|
enums::v1::{TaskQueueKind, WorkflowTaskFailedCause},
|
|
@@ -42,39 +50,42 @@ use temporal_sdk_core_protos::{
|
|
|
42
50
|
taskqueue::v1::{StickyExecutionAttributes, TaskQueue},
|
|
43
51
|
workflowservice::v1::{PollActivityTaskQueueResponse, PollWorkflowTaskQueueResponse},
|
|
44
52
|
},
|
|
53
|
+
TaskToken,
|
|
45
54
|
};
|
|
46
|
-
use tokio::sync::{watch,
|
|
55
|
+
use tokio::sync::{watch, Notify, Semaphore};
|
|
47
56
|
use tonic::Code;
|
|
48
57
|
use tracing_futures::Instrument;
|
|
49
58
|
|
|
50
59
|
/// A worker polls on a certain task queue
|
|
51
60
|
pub struct Worker {
|
|
52
61
|
config: WorkerConfig,
|
|
53
|
-
server_gateway: Arc<
|
|
62
|
+
server_gateway: Arc<dyn ServerGatewayApis + Send + Sync>,
|
|
54
63
|
|
|
55
64
|
/// Will be populated when this worker should poll on a sticky WFT queue
|
|
56
65
|
sticky_name: Option<String>,
|
|
57
66
|
|
|
58
|
-
// TODO: Worth moving inside wf task mgr too?
|
|
59
67
|
/// Buffers workflow task polling in the event we need to return a pending activation while
|
|
60
68
|
/// a poll is ongoing. Sticky and nonsticky polling happens inside of it.
|
|
61
|
-
|
|
62
|
-
/// Workflow task management
|
|
63
|
-
wft_manager: WorkflowTaskManager,
|
|
69
|
+
wf_task_source: WFTSource,
|
|
70
|
+
/// Workflow task management TODO: No pub
|
|
71
|
+
pub(crate) wft_manager: WorkflowTaskManager,
|
|
64
72
|
/// Manages activity tasks for this worker/task queue
|
|
65
73
|
at_task_mgr: Option<WorkerActivityTasks>,
|
|
74
|
+
/// Manages local activities
|
|
75
|
+
local_act_mgr: LocalActivityManager,
|
|
66
76
|
/// Ensures we stay at or below this worker's maximum concurrent workflow limit
|
|
67
77
|
workflows_semaphore: Semaphore,
|
|
68
78
|
/// Used to wake blocked workflow task polling when there is some change to workflow activations
|
|
69
79
|
/// that should cause us to restart the loop
|
|
70
|
-
|
|
71
|
-
/// Watched during shutdown to wait for all WFTs to complete
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
wfts_drained_sender: watch::Sender<bool>,
|
|
80
|
+
pending_activations_notify: Arc<Notify>,
|
|
81
|
+
/// Watched during shutdown to wait for all WFTs to complete. Should be notified any time
|
|
82
|
+
/// a WFT is completed.
|
|
83
|
+
wfts_drained_notify: Arc<Notify>,
|
|
75
84
|
/// Has shutdown been called?
|
|
76
85
|
shutdown_requested: watch::Receiver<bool>,
|
|
77
86
|
shutdown_sender: watch::Sender<bool>,
|
|
87
|
+
/// Will be called at the end of each activation completion
|
|
88
|
+
post_activate_hook: Option<Box<dyn Fn(&Self) + Send + Sync>>,
|
|
78
89
|
|
|
79
90
|
metrics: MetricsContext,
|
|
80
91
|
}
|
|
@@ -83,7 +94,7 @@ impl Worker {
|
|
|
83
94
|
pub(crate) fn new(
|
|
84
95
|
config: WorkerConfig,
|
|
85
96
|
sticky_queue_name: Option<String>,
|
|
86
|
-
sg: Arc<
|
|
97
|
+
sg: Arc<dyn ServerGatewayApis + Send + Sync>,
|
|
87
98
|
metrics: MetricsContext,
|
|
88
99
|
) -> Self {
|
|
89
100
|
metrics.worker_registered();
|
|
@@ -96,7 +107,7 @@ impl Worker {
|
|
|
96
107
|
let max_sticky_polls = config.max_sticky_polls();
|
|
97
108
|
let wft_metrics = metrics.with_new_attrs([workflow_poller()]);
|
|
98
109
|
let mut wf_task_poll_buffer = new_workflow_task_buffer(
|
|
99
|
-
sg.
|
|
110
|
+
sg.clone(),
|
|
100
111
|
config.task_queue.clone(),
|
|
101
112
|
false,
|
|
102
113
|
max_nonsticky_polls,
|
|
@@ -106,7 +117,7 @@ impl Worker {
|
|
|
106
117
|
let sticky_queue_poller = sticky_queue_name.as_ref().map(|sqn| {
|
|
107
118
|
let sticky_metrics = metrics.with_new_attrs([workflow_sticky_poller()]);
|
|
108
119
|
let mut sp = new_workflow_task_buffer(
|
|
109
|
-
sg.
|
|
120
|
+
sg.clone(),
|
|
110
121
|
sqn.clone(),
|
|
111
122
|
true,
|
|
112
123
|
max_sticky_polls,
|
|
@@ -119,7 +130,7 @@ impl Worker {
|
|
|
119
130
|
None
|
|
120
131
|
} else {
|
|
121
132
|
let mut ap = new_activity_task_buffer(
|
|
122
|
-
sg.
|
|
133
|
+
sg.clone(),
|
|
123
134
|
config.task_queue.clone(),
|
|
124
135
|
config.max_concurrent_at_polls,
|
|
125
136
|
config.max_concurrent_at_polls * 2,
|
|
@@ -148,7 +159,7 @@ impl Worker {
|
|
|
148
159
|
pub(crate) fn new_with_pollers(
|
|
149
160
|
config: WorkerConfig,
|
|
150
161
|
sticky_queue_name: Option<String>,
|
|
151
|
-
sg: Arc<
|
|
162
|
+
sg: Arc<dyn ServerGatewayApis + Send + Sync>,
|
|
152
163
|
wft_poller: BoxedWFPoller,
|
|
153
164
|
act_poller: Option<BoxedActPoller>,
|
|
154
165
|
metrics: MetricsContext,
|
|
@@ -160,63 +171,76 @@ impl Worker {
|
|
|
160
171
|
max_cached_workflows: config.max_cached_workflows,
|
|
161
172
|
}
|
|
162
173
|
};
|
|
163
|
-
let
|
|
164
|
-
let
|
|
174
|
+
let pa_notif = Arc::new(Notify::new());
|
|
175
|
+
let wfts_drained_notify = Arc::new(Notify::new());
|
|
165
176
|
let (shut_tx, shut_rx) = watch::channel(false);
|
|
166
177
|
Self {
|
|
167
178
|
server_gateway: sg.clone(),
|
|
168
179
|
sticky_name: sticky_queue_name,
|
|
169
|
-
|
|
170
|
-
wft_manager: WorkflowTaskManager::new(
|
|
180
|
+
wf_task_source: WFTSource::new(wft_poller),
|
|
181
|
+
wft_manager: WorkflowTaskManager::new(pa_notif.clone(), cache_policy, metrics.clone()),
|
|
171
182
|
at_task_mgr: act_poller.map(|ap| {
|
|
172
183
|
WorkerActivityTasks::new(
|
|
173
184
|
config.max_outstanding_activities,
|
|
174
185
|
ap,
|
|
175
|
-
sg.
|
|
186
|
+
sg.clone(),
|
|
176
187
|
metrics.clone(),
|
|
177
188
|
config.max_heartbeat_throttle_interval,
|
|
178
189
|
config.default_heartbeat_throttle_interval,
|
|
179
190
|
)
|
|
180
191
|
}),
|
|
192
|
+
local_act_mgr: LocalActivityManager::new(
|
|
193
|
+
config.max_outstanding_local_activities,
|
|
194
|
+
sg.get_options().namespace.clone(),
|
|
195
|
+
),
|
|
181
196
|
workflows_semaphore: Semaphore::new(config.max_outstanding_workflow_tasks),
|
|
182
197
|
config,
|
|
183
198
|
shutdown_requested: shut_rx,
|
|
184
199
|
shutdown_sender: shut_tx,
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
200
|
+
post_activate_hook: None,
|
|
201
|
+
pending_activations_notify: pa_notif,
|
|
202
|
+
wfts_drained_notify,
|
|
188
203
|
metrics,
|
|
189
204
|
}
|
|
190
205
|
}
|
|
191
206
|
|
|
192
|
-
///
|
|
193
|
-
|
|
194
|
-
pub(crate) async fn shutdown(&self) {
|
|
207
|
+
/// Begins the shutdown process, tells pollers they should stop. Is idempotent.
|
|
208
|
+
pub(crate) fn initiate_shutdown(&self) {
|
|
195
209
|
let _ = self.shutdown_sender.send(true);
|
|
210
|
+
// First, we want to stop polling of both activity and workflow tasks
|
|
196
211
|
if let Some(atm) = self.at_task_mgr.as_ref() {
|
|
197
212
|
atm.notify_shutdown();
|
|
198
213
|
}
|
|
199
|
-
self.
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
214
|
+
self.wf_task_source.stop_pollers();
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/// Will shutdown the worker. Does not resolve until all outstanding workflow tasks have been
|
|
218
|
+
/// completed
|
|
219
|
+
pub(crate) async fn shutdown(&self) {
|
|
220
|
+
self.initiate_shutdown();
|
|
221
|
+
// Next we need to wait for all local activities to finish so no more workflow task
|
|
222
|
+
// heartbeats will be generated
|
|
223
|
+
self.local_act_mgr.shutdown_and_wait_all_finished().await;
|
|
224
|
+
// Then we need to wait for any tasks generated as a result of completing WFTs, which
|
|
225
|
+
// heartbeating generates
|
|
226
|
+
self.wf_task_source
|
|
227
|
+
.wait_for_tasks_from_complete_to_drain()
|
|
228
|
+
.await;
|
|
229
|
+
// wait until all outstanding workflow tasks have been completed
|
|
230
|
+
self.all_wfts_drained().await;
|
|
231
|
+
// Wait for activities to finish
|
|
232
|
+
if let Some(acts) = self.at_task_mgr.as_ref() {
|
|
233
|
+
acts.wait_all_finished().await;
|
|
211
234
|
}
|
|
212
235
|
}
|
|
213
236
|
|
|
214
237
|
/// Finish shutting down by consuming the background pollers and freeing all resources
|
|
215
238
|
pub(crate) async fn finalize_shutdown(self) {
|
|
216
|
-
self.
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
239
|
+
tokio::join!(self.wf_task_source.shutdown(), async {
|
|
240
|
+
if let Some(b) = self.at_task_mgr {
|
|
241
|
+
b.shutdown().await;
|
|
242
|
+
}
|
|
243
|
+
});
|
|
220
244
|
}
|
|
221
245
|
|
|
222
246
|
pub(crate) fn outstanding_workflow_tasks(&self) -> usize {
|
|
@@ -228,25 +252,36 @@ impl Worker {
|
|
|
228
252
|
self.workflows_semaphore.available_permits()
|
|
229
253
|
}
|
|
230
254
|
|
|
231
|
-
///
|
|
232
|
-
///
|
|
255
|
+
/// Get new activity tasks (may be local or nonlocal). Local activities are returned first
|
|
256
|
+
/// before polling the server if there are any.
|
|
233
257
|
///
|
|
234
258
|
/// Returns `Ok(None)` in the event of a poll timeout or if the polling loop should otherwise
|
|
235
259
|
/// be restarted
|
|
236
260
|
pub(crate) async fn activity_poll(&self) -> Result<Option<ActivityTask>, PollActivityError> {
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
261
|
+
let act_mgr_poll = async {
|
|
262
|
+
if let Some(ref act_mgr) = self.at_task_mgr {
|
|
263
|
+
act_mgr.poll().await
|
|
264
|
+
} else {
|
|
265
|
+
let _ = self.shutdown_requested.clone().changed().await;
|
|
266
|
+
Err(PollActivityError::ShutDown)
|
|
267
|
+
}
|
|
268
|
+
};
|
|
242
269
|
|
|
243
270
|
tokio::select! {
|
|
244
271
|
biased;
|
|
245
272
|
|
|
246
|
-
r =
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
273
|
+
r = self.local_act_mgr.next_pending() => {
|
|
274
|
+
match r {
|
|
275
|
+
Some(DispatchOrTimeoutLA::Dispatch(r)) => Ok(Some(r)),
|
|
276
|
+
Some(DispatchOrTimeoutLA::Timeout { run_id, resolution, task }) => {
|
|
277
|
+
self.notify_local_result(
|
|
278
|
+
&run_id, LocalResolution::LocalActivity(resolution)).await;
|
|
279
|
+
Ok(task)
|
|
280
|
+
},
|
|
281
|
+
None => Ok(None)
|
|
282
|
+
}
|
|
283
|
+
},
|
|
284
|
+
r = act_mgr_poll => r,
|
|
250
285
|
}
|
|
251
286
|
}
|
|
252
287
|
|
|
@@ -263,10 +298,35 @@ impl Worker {
|
|
|
263
298
|
pub(crate) async fn complete_activity(
|
|
264
299
|
&self,
|
|
265
300
|
task_token: TaskToken,
|
|
266
|
-
status:
|
|
301
|
+
status: activity_execution_result::Status,
|
|
267
302
|
) -> Result<(), CompleteActivityError> {
|
|
303
|
+
if task_token.is_local_activity_task() {
|
|
304
|
+
let as_la_res: LocalActivityExecutionResult = status.try_into()?;
|
|
305
|
+
match self.local_act_mgr.complete(&task_token, &as_la_res) {
|
|
306
|
+
LACompleteAction::Report(info) => {
|
|
307
|
+
self.complete_local_act(as_la_res, info, None).await
|
|
308
|
+
}
|
|
309
|
+
LACompleteAction::LangDoesTimerBackoff(backoff, info) => {
|
|
310
|
+
// This la needs to write a failure marker, and then we will tell lang how
|
|
311
|
+
// long of a timer to schedule to back off for. We do this because there are
|
|
312
|
+
// no other situations where core generates "internal" commands so it is much
|
|
313
|
+
// simpler for lang to reply with the timer / next LA command than to do it
|
|
314
|
+
// internally. Plus, this backoff hack we'd like to eliminate eventually.
|
|
315
|
+
self.complete_local_act(as_la_res, info, Some(backoff))
|
|
316
|
+
.await
|
|
317
|
+
}
|
|
318
|
+
LACompleteAction::WillBeRetried => {
|
|
319
|
+
// Nothing to do here
|
|
320
|
+
}
|
|
321
|
+
LACompleteAction::Untracked => {
|
|
322
|
+
warn!("Tried to complete untracked local activity {}", task_token);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return Ok(());
|
|
326
|
+
}
|
|
327
|
+
|
|
268
328
|
if let Some(atm) = &self.at_task_mgr {
|
|
269
|
-
atm.complete(task_token, status, self.server_gateway.
|
|
329
|
+
atm.complete(task_token, status, self.server_gateway.as_ref())
|
|
270
330
|
.await
|
|
271
331
|
} else {
|
|
272
332
|
error!(
|
|
@@ -276,8 +336,7 @@ impl Worker {
|
|
|
276
336
|
Ok(())
|
|
277
337
|
}
|
|
278
338
|
}
|
|
279
|
-
|
|
280
|
-
pub(crate) async fn next_workflow_activation(&self) -> Result<WfActivation, PollWfError> {
|
|
339
|
+
pub(crate) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
|
|
281
340
|
// The poll needs to be in a loop because we can't guarantee tail call optimization in Rust
|
|
282
341
|
// (simply) and we really, really need that for long-poll retries.
|
|
283
342
|
loop {
|
|
@@ -297,8 +356,6 @@ impl Worker {
|
|
|
297
356
|
_ => continue,
|
|
298
357
|
}
|
|
299
358
|
}
|
|
300
|
-
let mut pending_activations_notification =
|
|
301
|
-
self.pending_activations_notification_receiver.lock().await;
|
|
302
359
|
|
|
303
360
|
let selected_f = tokio::select! {
|
|
304
361
|
biased;
|
|
@@ -307,7 +364,7 @@ impl Worker {
|
|
|
307
364
|
// the loop right away to provide any potential new pending activation.
|
|
308
365
|
// Continue here means that we unnecessarily add another permit to the poll buffer,
|
|
309
366
|
// this will go away when polling is done in the background.
|
|
310
|
-
_ =
|
|
367
|
+
_ = self.pending_activations_notify.notified() => continue,
|
|
311
368
|
r = self.workflow_poll_or_wfts_drained() => r,
|
|
312
369
|
}?;
|
|
313
370
|
|
|
@@ -326,32 +383,47 @@ impl Worker {
|
|
|
326
383
|
|
|
327
384
|
pub(crate) async fn complete_workflow_activation(
|
|
328
385
|
&self,
|
|
329
|
-
completion:
|
|
386
|
+
completion: WorkflowActivationCompletion,
|
|
330
387
|
) -> Result<(), CompleteWfError> {
|
|
331
388
|
let wfstatus = completion.status;
|
|
332
|
-
let
|
|
333
|
-
Some(
|
|
389
|
+
let report_outcome = match wfstatus {
|
|
390
|
+
Some(workflow_activation_completion::Status::Successful(success)) => {
|
|
334
391
|
self.wf_activation_success(&completion.run_id, success)
|
|
335
392
|
.await
|
|
336
393
|
}
|
|
337
|
-
|
|
338
|
-
|
|
394
|
+
|
|
395
|
+
Some(workflow_activation_completion::Status::Failed(failure)) => {
|
|
396
|
+
self.wf_activation_failed(
|
|
397
|
+
&completion.run_id,
|
|
398
|
+
WorkflowTaskFailedCause::Unspecified,
|
|
399
|
+
EvictionReason::LangFail,
|
|
400
|
+
failure,
|
|
401
|
+
)
|
|
402
|
+
.await
|
|
403
|
+
}
|
|
404
|
+
None => {
|
|
405
|
+
return Err(CompleteWfError::MalformedWorkflowCompletion {
|
|
406
|
+
reason: "Workflow completion had empty status field".to_owned(),
|
|
407
|
+
completion: None,
|
|
408
|
+
})
|
|
339
409
|
}
|
|
340
|
-
None => Err(CompleteWfError::MalformedWorkflowCompletion {
|
|
341
|
-
reason: "Workflow completion had empty status field".to_owned(),
|
|
342
|
-
completion: None,
|
|
343
|
-
}),
|
|
344
410
|
}?;
|
|
345
|
-
self.after_workflow_activation(&completion.run_id, did_complete_wft);
|
|
346
|
-
Ok(())
|
|
347
|
-
}
|
|
348
411
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
412
|
+
self.wft_manager
|
|
413
|
+
.after_wft_report(&completion.run_id, report_outcome.reported_to_server);
|
|
414
|
+
if report_outcome.reported_to_server || report_outcome.failed {
|
|
415
|
+
// If we failed the WFT but didn't report anything, we still want to release the WFT
|
|
416
|
+
// permit since the server will eventually time out the task and we've already evicted
|
|
417
|
+
// the run.
|
|
418
|
+
self.return_workflow_task_permit();
|
|
419
|
+
}
|
|
420
|
+
self.wfts_drained_notify.notify_waiters();
|
|
421
|
+
|
|
422
|
+
if let Some(h) = &self.post_activate_hook {
|
|
423
|
+
h(self);
|
|
354
424
|
}
|
|
425
|
+
|
|
426
|
+
Ok(())
|
|
355
427
|
}
|
|
356
428
|
|
|
357
429
|
/// Tell the worker a workflow task has completed, for tracking max outstanding WFTs
|
|
@@ -359,37 +431,43 @@ impl Worker {
|
|
|
359
431
|
self.workflows_semaphore.add_permits(1);
|
|
360
432
|
}
|
|
361
433
|
|
|
362
|
-
pub(crate) fn request_wf_eviction(
|
|
363
|
-
self
|
|
434
|
+
pub(crate) fn request_wf_eviction(
|
|
435
|
+
&self,
|
|
436
|
+
run_id: &str,
|
|
437
|
+
message: impl Into<String>,
|
|
438
|
+
reason: EvictionReason,
|
|
439
|
+
) {
|
|
440
|
+
self.wft_manager.request_eviction(run_id, message, reason);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/// Sets a function to be called at the end of each activation completion
|
|
444
|
+
pub(crate) fn set_post_activate_hook(
|
|
445
|
+
&mut self,
|
|
446
|
+
callback: impl Fn(&Self) + Send + Sync + 'static,
|
|
447
|
+
) {
|
|
448
|
+
self.post_activate_hook = Some(Box::new(callback))
|
|
364
449
|
}
|
|
365
450
|
|
|
366
451
|
/// Resolves with WFT poll response or `PollWfError::ShutDown` if WFTs have been drained
|
|
367
452
|
async fn workflow_poll_or_wfts_drained(
|
|
368
453
|
&self,
|
|
369
454
|
) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
|
|
455
|
+
let mut shutdown_requested = self.shutdown_requested.clone();
|
|
370
456
|
loop {
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
r = self.workflow_poll()
|
|
386
|
-
.map_err(Into::into) => match r {
|
|
387
|
-
Err(PollWfError::ShutDown) => {},
|
|
388
|
-
_ => return r,
|
|
389
|
-
},
|
|
390
|
-
_ = shutdown_requested.changed() => {},
|
|
391
|
-
}
|
|
392
|
-
};
|
|
457
|
+
tokio::select! {
|
|
458
|
+
biased;
|
|
459
|
+
|
|
460
|
+
r = self.workflow_poll().map_err(Into::into) => {
|
|
461
|
+
if matches!(r, Err(PollWfError::ShutDown)) {
|
|
462
|
+
// Don't actually return shutdown until workflow tasks are drained.
|
|
463
|
+
// Outstanding tasks being completed will generate new pending activations
|
|
464
|
+
// which will cause us to abort this function.
|
|
465
|
+
self.all_wfts_drained().await;
|
|
466
|
+
}
|
|
467
|
+
return r
|
|
468
|
+
},
|
|
469
|
+
_ = shutdown_requested.changed() => {},
|
|
470
|
+
}
|
|
393
471
|
}
|
|
394
472
|
}
|
|
395
473
|
|
|
@@ -399,6 +477,17 @@ impl Worker {
|
|
|
399
477
|
/// Returns `Ok(None)` in the event of a poll timeout, or if there was some gRPC error that
|
|
400
478
|
/// callers can't do anything about.
|
|
401
479
|
async fn workflow_poll(&self) -> Result<Option<ValidPollWFTQResponse>, PollWfError> {
|
|
480
|
+
// We can't say we're shut down if there are outstanding LAs, as they could end up WFT
|
|
481
|
+
// heartbeating which is a "new" workflow task that we need to accept and process as long as
|
|
482
|
+
// the LA is outstanding. Similarly, if we already have such tasks (from a WFT completion),
|
|
483
|
+
// then we must fetch them from the source before we can say workflow polling is shutdown.
|
|
484
|
+
if *self.shutdown_requested.borrow()
|
|
485
|
+
&& !self.wf_task_source.has_tasks_from_complete()
|
|
486
|
+
&& self.local_act_mgr.num_outstanding() == 0
|
|
487
|
+
{
|
|
488
|
+
return Err(PollWfError::ShutDown);
|
|
489
|
+
}
|
|
490
|
+
|
|
402
491
|
let sem = self
|
|
403
492
|
.workflows_semaphore
|
|
404
493
|
.acquire()
|
|
@@ -406,8 +495,8 @@ impl Worker {
|
|
|
406
495
|
.expect("outstanding workflow tasks semaphore not dropped");
|
|
407
496
|
|
|
408
497
|
let res = self
|
|
409
|
-
.
|
|
410
|
-
.
|
|
498
|
+
.wf_task_source
|
|
499
|
+
.next_wft()
|
|
411
500
|
.await
|
|
412
501
|
.ok_or(PollWfError::ShutDown)??;
|
|
413
502
|
|
|
@@ -444,12 +533,12 @@ impl Worker {
|
|
|
444
533
|
async fn apply_server_work(
|
|
445
534
|
&self,
|
|
446
535
|
work: ValidPollWFTQResponse,
|
|
447
|
-
) -> Result<Option<
|
|
536
|
+
) -> Result<Option<WorkflowActivation>, PollWfError> {
|
|
448
537
|
let we = work.workflow_execution.clone();
|
|
449
538
|
let tt = work.task_token.clone();
|
|
450
539
|
let res = self
|
|
451
540
|
.wft_manager
|
|
452
|
-
.apply_new_poll_resp(work,
|
|
541
|
+
.apply_new_poll_resp(work, self.server_gateway.clone())
|
|
453
542
|
.await;
|
|
454
543
|
Ok(match res {
|
|
455
544
|
NewWfTaskOutcome::IssueActivation(a) => {
|
|
@@ -462,10 +551,10 @@ impl Worker {
|
|
|
462
551
|
self.return_workflow_task_permit();
|
|
463
552
|
None
|
|
464
553
|
}
|
|
465
|
-
NewWfTaskOutcome::Autocomplete => {
|
|
554
|
+
NewWfTaskOutcome::Autocomplete | NewWfTaskOutcome::LocalActsOutstanding => {
|
|
466
555
|
debug!(workflow_execution=?we,
|
|
467
|
-
"No work for lang to perform after polling server
|
|
468
|
-
self.complete_workflow_activation(
|
|
556
|
+
"No new work for lang to perform after polling server");
|
|
557
|
+
self.complete_workflow_activation(WorkflowActivationCompletion {
|
|
469
558
|
task_queue: self.config.task_queue.clone(),
|
|
470
559
|
run_id: we.run_id,
|
|
471
560
|
status: Some(workflow_completion::Success::from_variants(vec![]).into()),
|
|
@@ -496,6 +585,7 @@ impl Worker {
|
|
|
496
585
|
self.request_wf_eviction(
|
|
497
586
|
&we.run_id,
|
|
498
587
|
format!("Error while applying poll response to workflow: {:?}", e),
|
|
588
|
+
e.evict_reason(),
|
|
499
589
|
);
|
|
500
590
|
None
|
|
501
591
|
}
|
|
@@ -509,7 +599,7 @@ impl Worker {
|
|
|
509
599
|
&self,
|
|
510
600
|
run_id: &str,
|
|
511
601
|
success: workflow_completion::Success,
|
|
512
|
-
) -> Result<
|
|
602
|
+
) -> Result<WFTReportOutcome, CompleteWfError> {
|
|
513
603
|
// Convert to wf commands
|
|
514
604
|
let cmds = success
|
|
515
605
|
.commands
|
|
@@ -523,13 +613,18 @@ impl Worker {
|
|
|
523
613
|
completion: None,
|
|
524
614
|
})?;
|
|
525
615
|
|
|
526
|
-
match self
|
|
616
|
+
match self
|
|
617
|
+
.wft_manager
|
|
618
|
+
.successful_activation(run_id, cmds, |acts| self.local_act_mgr.enqueue(acts))
|
|
619
|
+
.await
|
|
620
|
+
{
|
|
527
621
|
Ok(Some(ServerCommandsWithWorkflowInfo {
|
|
528
622
|
task_token,
|
|
529
623
|
action:
|
|
530
624
|
ActivationAction::WftComplete {
|
|
531
625
|
commands,
|
|
532
626
|
query_responses,
|
|
627
|
+
force_new_wft,
|
|
533
628
|
},
|
|
534
629
|
})) => {
|
|
535
630
|
debug!("Sending commands to server: {:?}", &commands);
|
|
@@ -541,19 +636,33 @@ impl Worker {
|
|
|
541
636
|
commands,
|
|
542
637
|
query_responses,
|
|
543
638
|
sticky_attributes: None,
|
|
544
|
-
return_new_workflow_task:
|
|
545
|
-
force_create_new_workflow_task:
|
|
639
|
+
return_new_workflow_task: force_new_wft,
|
|
640
|
+
force_create_new_workflow_task: force_new_wft,
|
|
546
641
|
};
|
|
547
642
|
let sticky_attrs = self.get_sticky_attrs();
|
|
643
|
+
// Do not return new WFT if we would not cache, because returned new WFTs are always
|
|
644
|
+
// partial.
|
|
645
|
+
if sticky_attrs.is_none() {
|
|
646
|
+
completion.return_new_workflow_task = false;
|
|
647
|
+
}
|
|
548
648
|
completion.sticky_attributes = sticky_attrs;
|
|
649
|
+
|
|
549
650
|
self.handle_wft_reporting_errs(run_id, || async {
|
|
550
|
-
self
|
|
651
|
+
let maybe_wft = self
|
|
652
|
+
.server_gateway
|
|
551
653
|
.complete_workflow_task(completion)
|
|
552
654
|
.instrument(span!(tracing::Level::DEBUG, "Complete WFT call"))
|
|
553
|
-
.await
|
|
655
|
+
.await?;
|
|
656
|
+
if let Some(wft) = maybe_wft.workflow_task {
|
|
657
|
+
self.wf_task_source.add_wft_from_completion(wft);
|
|
658
|
+
}
|
|
659
|
+
Ok(())
|
|
554
660
|
})
|
|
555
661
|
.await?;
|
|
556
|
-
Ok(
|
|
662
|
+
Ok(WFTReportOutcome {
|
|
663
|
+
reported_to_server: true,
|
|
664
|
+
failed: false,
|
|
665
|
+
})
|
|
557
666
|
}
|
|
558
667
|
Ok(Some(ServerCommandsWithWorkflowInfo {
|
|
559
668
|
task_token,
|
|
@@ -563,9 +672,15 @@ impl Worker {
|
|
|
563
672
|
self.server_gateway
|
|
564
673
|
.respond_legacy_query(task_token, result)
|
|
565
674
|
.await?;
|
|
566
|
-
Ok(
|
|
675
|
+
Ok(WFTReportOutcome {
|
|
676
|
+
reported_to_server: true,
|
|
677
|
+
failed: false,
|
|
678
|
+
})
|
|
567
679
|
}
|
|
568
|
-
Ok(None) => Ok(
|
|
680
|
+
Ok(None) => Ok(WFTReportOutcome {
|
|
681
|
+
reported_to_server: false,
|
|
682
|
+
failed: false,
|
|
683
|
+
}),
|
|
569
684
|
Err(update_err) => {
|
|
570
685
|
// Automatically fail the workflow task in the event we couldn't update machines
|
|
571
686
|
let fail_cause = if matches!(&update_err.source, WFMachinesError::Nondeterminism(_))
|
|
@@ -574,30 +689,14 @@ impl Worker {
|
|
|
574
689
|
} else {
|
|
575
690
|
WorkflowTaskFailedCause::Unspecified
|
|
576
691
|
};
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
tt.clone(),
|
|
586
|
-
fail_cause,
|
|
587
|
-
Some(Failure::application_failure(wft_fail_str.clone(), false)),
|
|
588
|
-
)
|
|
589
|
-
.await
|
|
590
|
-
})
|
|
591
|
-
.await?;
|
|
592
|
-
// We must evict the workflow since we've failed a WFT
|
|
593
|
-
self.request_wf_eviction(
|
|
594
|
-
run_id,
|
|
595
|
-
format!("Workflow task failure: {}", wft_fail_str),
|
|
596
|
-
);
|
|
597
|
-
Ok(true)
|
|
598
|
-
} else {
|
|
599
|
-
Ok(false)
|
|
600
|
-
}
|
|
692
|
+
let wft_fail_str = format!("{:?}", update_err);
|
|
693
|
+
self.wf_activation_failed(
|
|
694
|
+
run_id,
|
|
695
|
+
fail_cause,
|
|
696
|
+
update_err.evict_reason(),
|
|
697
|
+
Failure::application_failure(wft_fail_str.clone(), false).into(),
|
|
698
|
+
)
|
|
699
|
+
.await
|
|
601
700
|
}
|
|
602
701
|
}
|
|
603
702
|
}
|
|
@@ -608,39 +707,45 @@ impl Worker {
|
|
|
608
707
|
async fn wf_activation_failed(
|
|
609
708
|
&self,
|
|
610
709
|
run_id: &str,
|
|
710
|
+
cause: WorkflowTaskFailedCause,
|
|
711
|
+
reason: EvictionReason,
|
|
611
712
|
failure: workflow_completion::Failure,
|
|
612
|
-
) -> Result<
|
|
613
|
-
Ok(
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
.
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
}
|
|
627
|
-
FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
|
|
628
|
-
self.server_gateway
|
|
629
|
-
.respond_legacy_query(task_token, legacy_query_failure(failure))
|
|
713
|
+
) -> Result<WFTReportOutcome, CompleteWfError> {
|
|
714
|
+
Ok(
|
|
715
|
+
match self.wft_manager.failed_activation(
|
|
716
|
+
run_id,
|
|
717
|
+
reason,
|
|
718
|
+
format!("Workflow activation completion failed: {:?}", failure),
|
|
719
|
+
) {
|
|
720
|
+
FailedActivationOutcome::Report(tt) => {
|
|
721
|
+
warn!(run_id, failure=?failure, "Failing workflow activation");
|
|
722
|
+
self.handle_wft_reporting_errs(run_id, || async {
|
|
723
|
+
self.server_gateway
|
|
724
|
+
.fail_workflow_task(tt, cause, failure.failure.map(Into::into))
|
|
725
|
+
.await
|
|
726
|
+
})
|
|
630
727
|
.await?;
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
728
|
+
WFTReportOutcome {
|
|
729
|
+
reported_to_server: true,
|
|
730
|
+
failed: true,
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
|
|
734
|
+
warn!(run_id, failure=?failure, "Failing legacy query request");
|
|
735
|
+
self.server_gateway
|
|
736
|
+
.respond_legacy_query(task_token, legacy_query_failure(failure))
|
|
737
|
+
.await?;
|
|
738
|
+
WFTReportOutcome {
|
|
739
|
+
reported_to_server: true,
|
|
740
|
+
failed: true,
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
FailedActivationOutcome::NoReport => WFTReportOutcome {
|
|
744
|
+
reported_to_server: false,
|
|
745
|
+
failed: true,
|
|
746
|
+
},
|
|
747
|
+
},
|
|
748
|
+
)
|
|
644
749
|
}
|
|
645
750
|
|
|
646
751
|
/// Handle server errors from either completing or failing a workflow task. Returns any errors
|
|
@@ -653,20 +758,20 @@ impl Worker {
|
|
|
653
758
|
where
|
|
654
759
|
Fut: Future<Output = Result<T, tonic::Status>>,
|
|
655
760
|
{
|
|
656
|
-
let mut should_evict =
|
|
761
|
+
let mut should_evict = None;
|
|
657
762
|
let res = match completer().await {
|
|
658
763
|
Err(err) => {
|
|
659
764
|
match err.code() {
|
|
660
765
|
// Silence unhandled command errors since the lang SDK cannot do anything about
|
|
661
766
|
// them besides poll again, which it will do anyway.
|
|
662
767
|
tonic::Code::InvalidArgument if err.message() == "UnhandledCommand" => {
|
|
663
|
-
warn!(error = %err, "Unhandled command response when completing");
|
|
664
|
-
should_evict =
|
|
768
|
+
warn!(error = %err, run_id, "Unhandled command response when completing");
|
|
769
|
+
should_evict = Some(EvictionReason::UnhandledCommand);
|
|
665
770
|
Ok(())
|
|
666
771
|
}
|
|
667
772
|
tonic::Code::NotFound => {
|
|
668
|
-
warn!(error = %err, "Task not found when completing");
|
|
669
|
-
should_evict =
|
|
773
|
+
warn!(error = %err, run_id, "Task not found when completing");
|
|
774
|
+
should_evict = Some(EvictionReason::TaskNotFound);
|
|
670
775
|
Ok(())
|
|
671
776
|
}
|
|
672
777
|
_ => Err(err),
|
|
@@ -674,12 +779,46 @@ impl Worker {
|
|
|
674
779
|
}
|
|
675
780
|
_ => Ok(()),
|
|
676
781
|
};
|
|
677
|
-
if should_evict {
|
|
678
|
-
self.request_wf_eviction(run_id, "Error reporting WFT to server");
|
|
782
|
+
if let Some(reason) = should_evict {
|
|
783
|
+
self.request_wf_eviction(run_id, "Error reporting WFT to server", reason);
|
|
679
784
|
}
|
|
680
785
|
res.map_err(Into::into)
|
|
681
786
|
}
|
|
682
787
|
|
|
788
|
+
async fn complete_local_act(
|
|
789
|
+
&self,
|
|
790
|
+
la_res: LocalActivityExecutionResult,
|
|
791
|
+
info: LocalInFlightActInfo,
|
|
792
|
+
backoff: Option<prost_types::Duration>,
|
|
793
|
+
) {
|
|
794
|
+
self.notify_local_result(
|
|
795
|
+
&info.la_info.workflow_exec_info.run_id,
|
|
796
|
+
LocalResolution::LocalActivity(LocalActivityResolution {
|
|
797
|
+
seq: info.la_info.schedule_cmd.seq,
|
|
798
|
+
result: la_res,
|
|
799
|
+
runtime: info.dispatch_time.elapsed(),
|
|
800
|
+
attempt: info.attempt,
|
|
801
|
+
backoff,
|
|
802
|
+
original_schedule_time: Some(info.la_info.schedule_time),
|
|
803
|
+
}),
|
|
804
|
+
)
|
|
805
|
+
.await
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
async fn notify_local_result(&self, run_id: &str, res: LocalResolution) {
|
|
809
|
+
if let Err(e) = self.wft_manager.notify_of_local_result(run_id, res).await {
|
|
810
|
+
error!(
|
|
811
|
+
"Problem with local resolution on run {}: {:?} -- will evict the workflow",
|
|
812
|
+
run_id, e
|
|
813
|
+
);
|
|
814
|
+
self.request_wf_eviction(
|
|
815
|
+
run_id,
|
|
816
|
+
"Issue while processing local resolution",
|
|
817
|
+
e.evict_reason(),
|
|
818
|
+
);
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
|
|
683
822
|
/// Return the sticky execution attributes that should be used to complete workflow tasks
|
|
684
823
|
/// for this worker (if any).
|
|
685
824
|
fn get_sticky_attrs(&self) -> Option<StickyExecutionAttributes> {
|
|
@@ -696,102 +835,89 @@ impl Worker {
|
|
|
696
835
|
})
|
|
697
836
|
}
|
|
698
837
|
|
|
699
|
-
///
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
return;
|
|
838
|
+
/// Resolves when there are no more outstanding WFTs
|
|
839
|
+
async fn all_wfts_drained(&self) {
|
|
840
|
+
while self.outstanding_workflow_tasks() != 0 {
|
|
841
|
+
self.wfts_drained_notify.notified().await;
|
|
704
842
|
}
|
|
705
|
-
let _ = self.shutdown_requested.clone().changed().await;
|
|
706
843
|
}
|
|
707
844
|
}
|
|
708
845
|
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
.max(1)
|
|
713
|
-
}
|
|
714
|
-
fn max_sticky_polls(&self) -> usize {
|
|
715
|
-
self.max_concurrent_wft_polls
|
|
716
|
-
.saturating_sub(self.max_nonsticky_polls())
|
|
717
|
-
.max(1)
|
|
718
|
-
}
|
|
846
|
+
struct WFTReportOutcome {
|
|
847
|
+
reported_to_server: bool,
|
|
848
|
+
failed: bool,
|
|
719
849
|
}
|
|
720
850
|
|
|
721
851
|
#[cfg(test)]
|
|
722
852
|
mod tests {
|
|
723
853
|
use super::*;
|
|
724
|
-
use
|
|
854
|
+
use temporal_client::mocks::mock_gateway;
|
|
725
855
|
use temporal_sdk_core_protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse;
|
|
726
856
|
|
|
727
857
|
#[tokio::test]
|
|
728
858
|
async fn activity_timeouts_dont_eat_permits() {
|
|
729
|
-
let mut mock_gateway =
|
|
859
|
+
let mut mock_gateway = mock_gateway();
|
|
730
860
|
mock_gateway
|
|
731
861
|
.expect_poll_activity_task()
|
|
732
862
|
.returning(|_| Ok(PollActivityTaskQueueResponse::default()));
|
|
733
|
-
let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
|
|
734
863
|
|
|
735
864
|
let cfg = WorkerConfigBuilder::default()
|
|
736
865
|
.task_queue("whatever")
|
|
737
866
|
.max_outstanding_activities(5_usize)
|
|
738
867
|
.build()
|
|
739
868
|
.unwrap();
|
|
740
|
-
let worker = Worker::new(cfg, None, Arc::new(
|
|
869
|
+
let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
|
|
741
870
|
assert_eq!(worker.activity_poll().await.unwrap(), None);
|
|
742
871
|
assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
|
|
743
872
|
}
|
|
744
873
|
|
|
745
874
|
#[tokio::test]
|
|
746
875
|
async fn workflow_timeouts_dont_eat_permits() {
|
|
747
|
-
let mut mock_gateway =
|
|
876
|
+
let mut mock_gateway = mock_gateway();
|
|
748
877
|
mock_gateway
|
|
749
878
|
.expect_poll_workflow_task()
|
|
750
879
|
.returning(|_, _| Ok(PollWorkflowTaskQueueResponse::default()));
|
|
751
|
-
let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
|
|
752
880
|
|
|
753
881
|
let cfg = WorkerConfigBuilder::default()
|
|
754
882
|
.task_queue("whatever")
|
|
755
883
|
.max_outstanding_workflow_tasks(5_usize)
|
|
756
884
|
.build()
|
|
757
885
|
.unwrap();
|
|
758
|
-
let worker = Worker::new(cfg, None, Arc::new(
|
|
886
|
+
let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
|
|
759
887
|
assert_eq!(worker.workflow_poll().await.unwrap(), None);
|
|
760
888
|
assert_eq!(worker.workflows_semaphore.available_permits(), 5);
|
|
761
889
|
}
|
|
762
890
|
|
|
763
891
|
#[tokio::test]
|
|
764
892
|
async fn activity_errs_dont_eat_permits() {
|
|
765
|
-
let mut mock_gateway =
|
|
893
|
+
let mut mock_gateway = mock_gateway();
|
|
766
894
|
mock_gateway
|
|
767
895
|
.expect_poll_activity_task()
|
|
768
896
|
.returning(|_| Err(tonic::Status::internal("ahhh")));
|
|
769
|
-
let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
|
|
770
897
|
|
|
771
898
|
let cfg = WorkerConfigBuilder::default()
|
|
772
899
|
.task_queue("whatever")
|
|
773
900
|
.max_outstanding_activities(5_usize)
|
|
774
901
|
.build()
|
|
775
902
|
.unwrap();
|
|
776
|
-
let worker = Worker::new(cfg, None, Arc::new(
|
|
903
|
+
let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
|
|
777
904
|
assert!(worker.activity_poll().await.is_err());
|
|
778
905
|
assert_eq!(worker.at_task_mgr.unwrap().remaining_activity_capacity(), 5);
|
|
779
906
|
}
|
|
780
907
|
|
|
781
908
|
#[tokio::test]
|
|
782
909
|
async fn workflow_errs_dont_eat_permits() {
|
|
783
|
-
let mut mock_gateway =
|
|
910
|
+
let mut mock_gateway = mock_gateway();
|
|
784
911
|
mock_gateway
|
|
785
912
|
.expect_poll_workflow_task()
|
|
786
913
|
.returning(|_, _| Err(tonic::Status::internal("ahhh")));
|
|
787
|
-
let gwref = GatewayRef::new(Arc::new(mock_gateway), fake_sg_opts());
|
|
788
914
|
|
|
789
915
|
let cfg = WorkerConfigBuilder::default()
|
|
790
916
|
.task_queue("whatever")
|
|
791
917
|
.max_outstanding_workflow_tasks(5_usize)
|
|
792
918
|
.build()
|
|
793
919
|
.unwrap();
|
|
794
|
-
let worker = Worker::new(cfg, None, Arc::new(
|
|
920
|
+
let worker = Worker::new(cfg, None, Arc::new(mock_gateway), Default::default());
|
|
795
921
|
assert!(worker.workflow_poll().await.is_err());
|
|
796
922
|
assert_eq!(worker.workflows_semaphore.available_permits(), 5);
|
|
797
923
|
}
|