@temporalio/core-bridge 1.15.0 → 1.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +172 -70
- package/lib/native.d.ts +1 -1
- package/package.json +2 -2
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.github/workflows/per-pr.yml +6 -6
- package/sdk-core/AGENTS.md +41 -30
- package/sdk-core/Cargo.toml +3 -0
- package/sdk-core/README.md +15 -9
- package/sdk-core/crates/client/Cargo.toml +4 -0
- package/sdk-core/crates/client/README.md +139 -0
- package/sdk-core/crates/client/src/async_activity_handle.rs +297 -0
- package/sdk-core/crates/client/src/callback_based.rs +7 -0
- package/sdk-core/crates/client/src/errors.rs +294 -0
- package/sdk-core/crates/client/src/{raw.rs → grpc.rs} +280 -159
- package/sdk-core/crates/client/src/lib.rs +920 -1326
- package/sdk-core/crates/client/src/metrics.rs +24 -33
- package/sdk-core/crates/client/src/options_structs.rs +457 -0
- package/sdk-core/crates/client/src/replaceable.rs +5 -4
- package/sdk-core/crates/client/src/request_extensions.rs +8 -9
- package/sdk-core/crates/client/src/retry.rs +99 -54
- package/sdk-core/crates/client/src/{worker/mod.rs → worker.rs} +1 -1
- package/sdk-core/crates/client/src/workflow_handle.rs +826 -0
- package/sdk-core/crates/common/Cargo.toml +61 -2
- package/sdk-core/crates/common/build.rs +742 -12
- package/sdk-core/crates/common/protos/api_upstream/.github/workflows/ci.yml +2 -0
- package/sdk-core/crates/common/protos/api_upstream/Makefile +2 -1
- package/sdk-core/crates/common/protos/api_upstream/buf.yaml +0 -3
- package/sdk-core/crates/common/protos/api_upstream/cmd/check-path-conflicts/main.go +137 -0
- package/sdk-core/crates/common/protos/api_upstream/openapi/openapiv2.json +1166 -770
- package/sdk-core/crates/common/protos/api_upstream/openapi/openapiv3.yaml +1243 -750
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/deployment/v1/message.proto +2 -2
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/enums/v1/workflow.proto +4 -3
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/failure/v1/message.proto +1 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/history/v1/message.proto +4 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/namespace/v1/message.proto +6 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/nexus/v1/message.proto +16 -1
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +64 -6
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +88 -33
- package/sdk-core/crates/common/protos/local/temporal/sdk/core/nexus/nexus.proto +4 -2
- package/sdk-core/crates/common/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +4 -0
- package/sdk-core/crates/common/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +5 -5
- package/sdk-core/crates/common/src/activity_definition.rs +20 -0
- package/sdk-core/crates/common/src/data_converters.rs +770 -0
- package/sdk-core/crates/common/src/envconfig.rs +5 -0
- package/sdk-core/crates/common/src/lib.rs +15 -211
- package/sdk-core/crates/common/src/payload_visitor.rs +648 -0
- package/sdk-core/crates/common/src/priority.rs +110 -0
- package/sdk-core/crates/common/src/protos/canned_histories.rs +3 -0
- package/sdk-core/crates/common/src/protos/history_builder.rs +45 -0
- package/sdk-core/crates/common/src/protos/history_info.rs +2 -0
- package/sdk-core/crates/common/src/protos/mod.rs +122 -27
- package/sdk-core/crates/common/src/protos/task_token.rs +3 -3
- package/sdk-core/crates/common/src/protos/utilities.rs +11 -0
- package/sdk-core/crates/{sdk-core → common}/src/telemetry/log_export.rs +5 -7
- package/sdk-core/crates/common/src/telemetry/metrics/core.rs +125 -0
- package/sdk-core/crates/common/src/telemetry/metrics.rs +268 -223
- package/sdk-core/crates/{sdk-core → common}/src/telemetry/otel.rs +8 -13
- package/sdk-core/crates/{sdk-core → common}/src/telemetry/prometheus_meter.rs +49 -50
- package/sdk-core/crates/{sdk-core → common}/src/telemetry/prometheus_server.rs +2 -3
- package/sdk-core/crates/common/src/telemetry.rs +264 -4
- package/sdk-core/crates/common/src/worker.rs +68 -603
- package/sdk-core/crates/common/src/workflow_definition.rs +60 -0
- package/sdk-core/crates/macros/Cargo.toml +5 -1
- package/sdk-core/crates/macros/src/activities_definitions.rs +585 -0
- package/sdk-core/crates/macros/src/fsm_impl.rs +507 -0
- package/sdk-core/crates/macros/src/lib.rs +138 -512
- package/sdk-core/crates/macros/src/macro_utils.rs +106 -0
- package/sdk-core/crates/macros/src/workflow_definitions.rs +1224 -0
- package/sdk-core/crates/sdk/Cargo.toml +19 -6
- package/sdk-core/crates/sdk/README.md +415 -0
- package/sdk-core/crates/sdk/src/activities.rs +417 -0
- package/sdk-core/crates/sdk/src/interceptors.rs +1 -1
- package/sdk-core/crates/sdk/src/lib.rs +757 -442
- package/sdk-core/crates/sdk/src/workflow_context/options.rs +45 -35
- package/sdk-core/crates/sdk/src/workflow_context.rs +1033 -289
- package/sdk-core/crates/sdk/src/workflow_future.rs +277 -213
- package/sdk-core/crates/sdk/src/workflows.rs +711 -0
- package/sdk-core/crates/sdk-core/Cargo.toml +57 -64
- package/sdk-core/crates/sdk-core/benches/workflow_replay_bench.rs +41 -35
- package/sdk-core/crates/sdk-core/machine_coverage/ActivityMachine_Coverage.puml +1 -1
- package/sdk-core/crates/sdk-core/src/abstractions.rs +6 -10
- package/sdk-core/crates/sdk-core/src/core_tests/activity_tasks.rs +6 -5
- package/sdk-core/crates/sdk-core/src/core_tests/mod.rs +13 -15
- package/sdk-core/crates/sdk-core/src/core_tests/queries.rs +21 -25
- package/sdk-core/crates/sdk-core/src/core_tests/replay_flag.rs +7 -10
- package/sdk-core/crates/sdk-core/src/core_tests/updates.rs +14 -17
- package/sdk-core/crates/sdk-core/src/core_tests/workers.rs +493 -26
- package/sdk-core/crates/sdk-core/src/core_tests/workflow_tasks.rs +4 -8
- package/sdk-core/crates/sdk-core/src/ephemeral_server/mod.rs +7 -7
- package/sdk-core/crates/sdk-core/src/histfetch.rs +20 -10
- package/sdk-core/crates/sdk-core/src/lib.rs +41 -111
- package/sdk-core/crates/sdk-core/src/pollers/mod.rs +4 -9
- package/sdk-core/crates/sdk-core/src/pollers/poll_buffer.rs +118 -19
- package/sdk-core/crates/sdk-core/src/protosext/mod.rs +2 -2
- package/sdk-core/crates/sdk-core/src/replay/mod.rs +14 -5
- package/sdk-core/crates/sdk-core/src/telemetry/metrics.rs +179 -196
- package/sdk-core/crates/sdk-core/src/telemetry/mod.rs +3 -280
- package/sdk-core/crates/sdk-core/src/test_help/integ_helpers.rs +6 -9
- package/sdk-core/crates/sdk-core/src/test_help/unit_helpers.rs +3 -6
- package/sdk-core/crates/sdk-core/src/worker/activities/local_activities.rs +11 -14
- package/sdk-core/crates/sdk-core/src/worker/activities.rs +16 -19
- package/sdk-core/crates/sdk-core/src/worker/client/mocks.rs +9 -5
- package/sdk-core/crates/sdk-core/src/worker/client.rs +103 -81
- package/sdk-core/crates/sdk-core/src/worker/heartbeat.rs +7 -11
- package/sdk-core/crates/sdk-core/src/worker/mod.rs +1124 -229
- package/sdk-core/crates/sdk-core/src/worker/nexus.rs +145 -23
- package/sdk-core/crates/sdk-core/src/worker/slot_provider.rs +2 -2
- package/sdk-core/crates/sdk-core/src/worker/tuner/fixed_size.rs +2 -2
- package/sdk-core/crates/sdk-core/src/worker/tuner/resource_based.rs +13 -13
- package/sdk-core/crates/sdk-core/src/worker/tuner.rs +28 -8
- package/sdk-core/crates/sdk-core/src/worker/workflow/driven_workflow.rs +9 -3
- package/sdk-core/crates/sdk-core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +21 -22
- package/sdk-core/crates/sdk-core/src/worker/workflow/machines/workflow_machines.rs +19 -4
- package/sdk-core/crates/sdk-core/src/worker/workflow/managed_run.rs +14 -18
- package/sdk-core/crates/sdk-core/src/worker/workflow/mod.rs +4 -6
- package/sdk-core/crates/sdk-core/src/worker/workflow/run_cache.rs +4 -7
- package/sdk-core/crates/sdk-core/src/worker/workflow/wft_extraction.rs +2 -4
- package/sdk-core/crates/sdk-core/src/worker/workflow/wft_poller.rs +8 -9
- package/sdk-core/crates/sdk-core/src/worker/workflow/workflow_stream.rs +1 -3
- package/sdk-core/crates/sdk-core/tests/activities_procmacro.rs +6 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/basic_pass.rs +54 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/invalid_self_type_fail.rs +18 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/invalid_self_type_fail.stderr +5 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/missing_context_fail.rs +14 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/missing_context_fail.stderr +5 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/multi_arg_pass.rs +48 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/no_input_pass.rs +14 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/no_return_type_pass.rs +19 -0
- package/sdk-core/crates/sdk-core/tests/cloud_tests.rs +14 -5
- package/sdk-core/crates/sdk-core/tests/common/activity_functions.rs +55 -0
- package/sdk-core/crates/sdk-core/tests/common/mod.rs +241 -196
- package/sdk-core/crates/sdk-core/tests/common/workflows.rs +41 -28
- package/sdk-core/crates/sdk-core/tests/global_metric_tests.rs +3 -5
- package/sdk-core/crates/sdk-core/tests/heavy_tests/fuzzy_workflow.rs +73 -64
- package/sdk-core/crates/sdk-core/tests/heavy_tests.rs +298 -252
- package/sdk-core/crates/sdk-core/tests/integ_tests/async_activity_client_tests.rs +230 -0
- package/sdk-core/crates/sdk-core/tests/integ_tests/client_tests.rs +94 -57
- package/sdk-core/crates/sdk-core/tests/integ_tests/data_converter_tests.rs +381 -0
- package/sdk-core/crates/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +16 -12
- package/sdk-core/crates/sdk-core/tests/integ_tests/heartbeat_tests.rs +48 -40
- package/sdk-core/crates/sdk-core/tests/integ_tests/metrics_tests.rs +327 -255
- package/sdk-core/crates/sdk-core/tests/integ_tests/pagination_tests.rs +50 -45
- package/sdk-core/crates/sdk-core/tests/integ_tests/polling_tests.rs +147 -126
- package/sdk-core/crates/sdk-core/tests/integ_tests/queries_tests.rs +103 -89
- package/sdk-core/crates/sdk-core/tests/integ_tests/update_tests.rs +609 -453
- package/sdk-core/crates/sdk-core/tests/integ_tests/visibility_tests.rs +80 -62
- package/sdk-core/crates/sdk-core/tests/integ_tests/worker_heartbeat_tests.rs +360 -231
- package/sdk-core/crates/sdk-core/tests/integ_tests/worker_tests.rs +248 -185
- package/sdk-core/crates/sdk-core/tests/integ_tests/worker_versioning_tests.rs +52 -43
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_client_tests.rs +180 -0
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/activities.rs +428 -315
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +82 -56
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +56 -28
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +364 -243
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/client_interactions.rs +552 -0
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +101 -42
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +243 -147
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/eager.rs +98 -28
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +1475 -1036
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +73 -41
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/nexus.rs +397 -238
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/patches.rs +414 -189
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/queries.rs +415 -0
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/replay.rs +96 -36
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/resets.rs +154 -137
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/signals.rs +183 -105
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +85 -38
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/timers.rs +142 -40
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +73 -54
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests.rs +363 -226
- package/sdk-core/crates/sdk-core/tests/main.rs +17 -15
- package/sdk-core/crates/sdk-core/tests/manual_tests.rs +207 -152
- package/sdk-core/crates/sdk-core/tests/shared_tests/mod.rs +65 -34
- package/sdk-core/crates/sdk-core/tests/shared_tests/priority.rs +107 -84
- package/sdk-core/crates/sdk-core/tests/workflows_procmacro.rs +6 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/async_query_fail.rs +26 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/async_query_fail.stderr +5 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/basic_pass.rs +49 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/minimal_pass.rs +21 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/mut_query_fail.rs +26 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/mut_query_fail.stderr +5 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/sync_run_fail.rs +21 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/sync_run_fail.stderr +5 -0
- package/sdk-core/crates/sdk-core-c-bridge/Cargo.toml +7 -1
- package/sdk-core/crates/sdk-core-c-bridge/include/temporal-sdk-core-c-bridge.h +14 -14
- package/sdk-core/crates/sdk-core-c-bridge/src/client.rs +83 -74
- package/sdk-core/crates/sdk-core-c-bridge/src/metric.rs +9 -14
- package/sdk-core/crates/sdk-core-c-bridge/src/runtime.rs +1 -2
- package/sdk-core/crates/sdk-core-c-bridge/src/tests/context.rs +13 -13
- package/sdk-core/crates/sdk-core-c-bridge/src/tests/mod.rs +6 -6
- package/sdk-core/crates/sdk-core-c-bridge/src/tests/utils.rs +3 -4
- package/sdk-core/crates/sdk-core-c-bridge/src/worker.rs +62 -75
- package/sdk-core/rustfmt.toml +2 -1
- package/src/client.rs +205 -318
- package/src/metrics.rs +22 -30
- package/src/runtime.rs +4 -5
- package/src/worker.rs +16 -19
- package/ts/native.ts +1 -1
- package/sdk-core/crates/client/src/workflow_handle/mod.rs +0 -212
- package/sdk-core/crates/common/src/errors.rs +0 -85
- package/sdk-core/crates/common/tests/worker_task_types_test.rs +0 -129
- package/sdk-core/crates/sdk/src/activity_context.rs +0 -238
- package/sdk-core/crates/sdk/src/app_data.rs +0 -37
- package/sdk-core/crates/sdk-core/tests/integ_tests/activity_functions.rs +0 -5
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +0 -61
|
@@ -6,7 +6,18 @@ mod slot_provider;
|
|
|
6
6
|
pub(crate) mod tuner;
|
|
7
7
|
mod workflow;
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
use temporalio_client::Connection;
|
|
10
|
+
use temporalio_common::{
|
|
11
|
+
protos::{
|
|
12
|
+
coresdk::{
|
|
13
|
+
ActivitySlotInfo, LocalActivitySlotInfo, NamespaceInfo, NexusSlotInfo,
|
|
14
|
+
WorkflowSlotInfo, activity_result::ActivityExecutionResult, namespace_info,
|
|
15
|
+
},
|
|
16
|
+
temporal::api::{enums::v1::VersioningBehavior, worker::v1::PluginInfo},
|
|
17
|
+
},
|
|
18
|
+
telemetry::TelemetryInstance,
|
|
19
|
+
worker::{WorkerDeploymentOptions, WorkerDeploymentVersion},
|
|
20
|
+
};
|
|
10
21
|
pub use tuner::{
|
|
11
22
|
FixedSizeSlotSupplier, ResourceBasedSlotsOptions, ResourceBasedSlotsOptionsBuilder,
|
|
12
23
|
ResourceBasedTuner, ResourceSlotOptions, SlotSupplierOptions, TunerBuilder, TunerHolder,
|
|
@@ -26,18 +37,13 @@ pub(crate) use wft_poller::WFTPollerShared;
|
|
|
26
37
|
pub use workflow::LEGACY_QUERY_ID;
|
|
27
38
|
|
|
28
39
|
use crate::{
|
|
29
|
-
ActivityHeartbeat,
|
|
40
|
+
ActivityHeartbeat,
|
|
30
41
|
abstractions::{MeteredPermitDealer, PermitDealerContextData, dbg_panic},
|
|
31
|
-
errors::CompleteWfError,
|
|
32
42
|
pollers::{ActivityTaskOptions, BoxedActPoller, BoxedNexusPoller, LongPollBuffer},
|
|
33
43
|
protosext::validate_activity_completion,
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
metrics::{
|
|
38
|
-
MetricsContext, activity_poller, activity_worker_type, local_activity_worker_type,
|
|
39
|
-
nexus_poller, nexus_worker_type, workflow_worker_type,
|
|
40
|
-
},
|
|
44
|
+
telemetry::metrics::{
|
|
45
|
+
MetricsContext, WorkerHeartbeatMetrics, activity_poller, activity_worker_type,
|
|
46
|
+
local_activity_worker_type, nexus_poller, nexus_worker_type, workflow_worker_type,
|
|
41
47
|
},
|
|
42
48
|
worker::{
|
|
43
49
|
activities::{LACompleteAction, LocalActivityManager, NextPendingLAAction},
|
|
@@ -58,6 +64,8 @@ use gethostname::gethostname;
|
|
|
58
64
|
use parking_lot::RwLock;
|
|
59
65
|
use slot_provider::SlotProvider;
|
|
60
66
|
use std::{
|
|
67
|
+
any::Any,
|
|
68
|
+
collections::{HashMap, HashSet},
|
|
61
69
|
convert::TryInto,
|
|
62
70
|
future,
|
|
63
71
|
sync::{
|
|
@@ -70,15 +78,12 @@ use temporalio_client::worker::{
|
|
|
70
78
|
ClientWorker, HeartbeatCallback, SharedNamespaceWorkerTrait, Slot as SlotTrait,
|
|
71
79
|
};
|
|
72
80
|
use temporalio_common::{
|
|
73
|
-
errors::{CompleteNexusError, WorkerValidationError},
|
|
74
81
|
protos::{
|
|
75
82
|
TaskToken,
|
|
76
83
|
coresdk::{
|
|
77
|
-
ActivityTaskCompletion,
|
|
78
|
-
activity_result::activity_execution_result,
|
|
84
|
+
ActivityTaskCompletion,
|
|
79
85
|
activity_task::ActivityTask,
|
|
80
|
-
|
|
81
|
-
nexus::{NexusTask, NexusTaskCompletion, nexus_task_completion},
|
|
86
|
+
nexus::{NexusTask, NexusTaskCompletion},
|
|
82
87
|
workflow_activation::{WorkflowActivation, remove_from_cache::EvictionReason},
|
|
83
88
|
workflow_completion::WorkflowActivationCompletion,
|
|
84
89
|
},
|
|
@@ -89,11 +94,8 @@ use temporalio_common::{
|
|
|
89
94
|
worker::v1::{WorkerHeartbeat, WorkerHostInfo, WorkerPollerInfo, WorkerSlotsInfo},
|
|
90
95
|
},
|
|
91
96
|
},
|
|
92
|
-
telemetry::metrics::
|
|
93
|
-
worker::
|
|
94
|
-
ActivitySlotKind, LocalActivitySlotKind, NexusSlotKind, PollerBehavior, SlotKind,
|
|
95
|
-
WorkerTaskTypes, WorkflowSlotKind,
|
|
96
|
-
},
|
|
97
|
+
telemetry::metrics::TemporalMeter,
|
|
98
|
+
worker::WorkerTaskTypes,
|
|
97
99
|
};
|
|
98
100
|
use tokio::sync::{mpsc::unbounded_channel, watch};
|
|
99
101
|
use tokio_stream::wrappers::UnboundedReceiverStream;
|
|
@@ -112,6 +114,278 @@ use {
|
|
|
112
114
|
},
|
|
113
115
|
};
|
|
114
116
|
|
|
117
|
+
/// Defines per-worker configuration options
|
|
118
|
+
#[derive(Clone, bon::Builder)]
|
|
119
|
+
#[builder(on(String, into), state_mod(vis = "pub"), finish_fn(vis = "", name = build_internal))]
|
|
120
|
+
#[non_exhaustive]
|
|
121
|
+
pub struct WorkerConfig {
|
|
122
|
+
/// The Temporal service namespace this worker is bound to
|
|
123
|
+
pub namespace: String,
|
|
124
|
+
/// What task queue will this worker poll from? This task queue name will be used for both
|
|
125
|
+
/// workflow and activity polling.
|
|
126
|
+
pub task_queue: String,
|
|
127
|
+
/// A human-readable string that can identify this worker. Using something like sdk version
|
|
128
|
+
/// and host name is a good default. If set, overrides the identity set (if any) on the client
|
|
129
|
+
/// used by this worker.
|
|
130
|
+
pub client_identity_override: Option<String>,
|
|
131
|
+
/// If set nonzero, workflows will be cached and sticky task queues will be used, meaning that
|
|
132
|
+
/// history updates are applied incrementally to suspended instances of workflow execution.
|
|
133
|
+
/// Workflows are evicted according to a least-recently-used policy once the cache maximum is
|
|
134
|
+
/// reached. Workflows may also be explicitly evicted at any time, or as a result of errors
|
|
135
|
+
/// or failures.
|
|
136
|
+
#[builder(default = 0)]
|
|
137
|
+
pub max_cached_workflows: usize,
|
|
138
|
+
/// Set a [crate::WorkerTuner] for this worker. Either this or at least one of the
|
|
139
|
+
/// `max_outstanding_*` fields must be set.
|
|
140
|
+
pub tuner: Option<Arc<dyn WorkerTuner + Send + Sync>>,
|
|
141
|
+
/// Maximum number of concurrent poll workflow task requests we will perform at a time on this
|
|
142
|
+
/// worker's task queue. See also [WorkerConfig::nonsticky_to_sticky_poll_ratio].
|
|
143
|
+
/// If using SimpleMaximum, Must be at least 2 when `max_cached_workflows` > 0, or is an error.
|
|
144
|
+
#[builder(default = PollerBehavior::SimpleMaximum(5))]
|
|
145
|
+
pub workflow_task_poller_behavior: PollerBehavior,
|
|
146
|
+
/// Only applies when using [PollerBehavior::SimpleMaximum]
|
|
147
|
+
///
|
|
148
|
+
/// (max workflow task polls * this number) = the number of max pollers that will be allowed for
|
|
149
|
+
/// the nonsticky queue when sticky tasks are enabled. If both defaults are used, the sticky
|
|
150
|
+
/// queue will allow 4 max pollers while the nonsticky queue will allow one. The minimum for
|
|
151
|
+
/// either poller is 1, so if the maximum allowed is 1 and sticky queues are enabled, there will
|
|
152
|
+
/// be 2 concurrent polls.
|
|
153
|
+
#[builder(default = 0.2)]
|
|
154
|
+
pub nonsticky_to_sticky_poll_ratio: f32,
|
|
155
|
+
/// Maximum number of concurrent poll activity task requests we will perform at a time on this
|
|
156
|
+
/// worker's task queue
|
|
157
|
+
#[builder(default = PollerBehavior::SimpleMaximum(5))]
|
|
158
|
+
pub activity_task_poller_behavior: PollerBehavior,
|
|
159
|
+
/// Maximum number of concurrent poll nexus task requests we will perform at a time on this
|
|
160
|
+
/// worker's task queue
|
|
161
|
+
#[builder(default = PollerBehavior::SimpleMaximum(5))]
|
|
162
|
+
pub nexus_task_poller_behavior: PollerBehavior,
|
|
163
|
+
/// Specifies which task types this worker will poll for.
|
|
164
|
+
///
|
|
165
|
+
/// Note: At least one task type must be specified or the worker will fail validation.
|
|
166
|
+
pub task_types: WorkerTaskTypes,
|
|
167
|
+
/// How long a workflow task is allowed to sit on the sticky queue before it is timed out
|
|
168
|
+
/// and moved to the non-sticky queue where it may be picked up by any worker.
|
|
169
|
+
#[builder(default = Duration::from_secs(10))]
|
|
170
|
+
pub sticky_queue_schedule_to_start_timeout: Duration,
|
|
171
|
+
|
|
172
|
+
/// Longest interval for throttling activity heartbeats
|
|
173
|
+
#[builder(default = Duration::from_secs(60))]
|
|
174
|
+
pub max_heartbeat_throttle_interval: Duration,
|
|
175
|
+
|
|
176
|
+
/// Default interval for throttling activity heartbeats in case
|
|
177
|
+
/// `ActivityOptions.heartbeat_timeout` is unset.
|
|
178
|
+
/// When the timeout *is* set in the `ActivityOptions`, throttling is set to
|
|
179
|
+
/// `heartbeat_timeout * 0.8`.
|
|
180
|
+
#[builder(default = Duration::from_secs(30))]
|
|
181
|
+
pub default_heartbeat_throttle_interval: Duration,
|
|
182
|
+
|
|
183
|
+
/// Sets the maximum number of activities per second the task queue will dispatch, controlled
|
|
184
|
+
/// server-side. Note that this only takes effect upon an activity poll request. If multiple
|
|
185
|
+
/// workers on the same queue have different values set, they will thrash with the last poller
|
|
186
|
+
/// winning.
|
|
187
|
+
///
|
|
188
|
+
/// Setting this to a nonzero value will also disable eager activity execution.
|
|
189
|
+
pub max_task_queue_activities_per_second: Option<f64>,
|
|
190
|
+
|
|
191
|
+
/// Limits the number of activities per second that this worker will process. The worker will
|
|
192
|
+
/// not poll for new activities if by doing so it might receive and execute an activity which
|
|
193
|
+
/// would cause it to exceed this limit. Negative, zero, or NaN values will cause building
|
|
194
|
+
/// the options to fail.
|
|
195
|
+
pub max_worker_activities_per_second: Option<f64>,
|
|
196
|
+
|
|
197
|
+
/// If set false (default), shutdown will not finish until all pending evictions have been
|
|
198
|
+
/// issued and replied to. If set true shutdown will be considered complete when the only
|
|
199
|
+
/// remaining work is pending evictions.
|
|
200
|
+
///
|
|
201
|
+
/// This flag is useful during tests to avoid needing to deal with lots of uninteresting
|
|
202
|
+
/// evictions during shutdown. Alternatively, if a lang implementation finds it easy to clean
|
|
203
|
+
/// up during shutdown, setting this true saves some back-and-forth.
|
|
204
|
+
#[builder(default = false)]
|
|
205
|
+
pub ignore_evicts_on_shutdown: bool,
|
|
206
|
+
|
|
207
|
+
/// Maximum number of next page (or initial) history event listing requests we'll make
|
|
208
|
+
/// concurrently. I don't this it's worth exposing this to users until we encounter a reason.
|
|
209
|
+
#[builder(default = 5)]
|
|
210
|
+
pub fetching_concurrency: usize,
|
|
211
|
+
|
|
212
|
+
/// If set, core will issue cancels for all outstanding activities and nexus operations after
|
|
213
|
+
/// shutdown has been initiated and this amount of time has elapsed.
|
|
214
|
+
pub graceful_shutdown_period: Option<Duration>,
|
|
215
|
+
|
|
216
|
+
/// The amount of time core will wait before timing out activities using its own local timers
|
|
217
|
+
/// after one of them elapses. This is to avoid racing with server's own tracking of the
|
|
218
|
+
/// timeout.
|
|
219
|
+
#[builder(default = Duration::from_secs(5))]
|
|
220
|
+
pub local_timeout_buffer_for_activities: Duration,
|
|
221
|
+
|
|
222
|
+
/// Any error types listed here will cause any workflow being processed by this worker to fail,
|
|
223
|
+
/// rather than simply failing the workflow task.
|
|
224
|
+
#[builder(default)]
|
|
225
|
+
pub workflow_failure_errors: HashSet<WorkflowErrorType>,
|
|
226
|
+
|
|
227
|
+
/// Like [WorkerConfig::workflow_failure_errors], but specific to certain workflow types (the
|
|
228
|
+
/// map key).
|
|
229
|
+
#[builder(default)]
|
|
230
|
+
pub workflow_types_to_failure_errors: HashMap<String, HashSet<WorkflowErrorType>>,
|
|
231
|
+
|
|
232
|
+
/// The maximum allowed number of workflow tasks that will ever be given to this worker at one
|
|
233
|
+
/// time. Note that one workflow task may require multiple activations - so the WFT counts as
|
|
234
|
+
/// "outstanding" until all activations it requires have been completed. Must be at least 2 if
|
|
235
|
+
/// `max_cached_workflows` is > 0, or is an error.
|
|
236
|
+
///
|
|
237
|
+
/// Mutually exclusive with `tuner`
|
|
238
|
+
#[builder(into)]
|
|
239
|
+
pub max_outstanding_workflow_tasks: Option<usize>,
|
|
240
|
+
/// The maximum number of activity tasks that will ever be given to this worker concurrently.
|
|
241
|
+
///
|
|
242
|
+
/// Mutually exclusive with `tuner`
|
|
243
|
+
#[builder(into)]
|
|
244
|
+
pub max_outstanding_activities: Option<usize>,
|
|
245
|
+
/// The maximum number of local activity tasks that will ever be given to this worker
|
|
246
|
+
/// concurrently.
|
|
247
|
+
///
|
|
248
|
+
/// Mutually exclusive with `tuner`
|
|
249
|
+
#[builder(into)]
|
|
250
|
+
pub max_outstanding_local_activities: Option<usize>,
|
|
251
|
+
/// The maximum number of nexus tasks that will ever be given to this worker
|
|
252
|
+
/// concurrently.
|
|
253
|
+
///
|
|
254
|
+
/// Mutually exclusive with `tuner`
|
|
255
|
+
#[builder(into)]
|
|
256
|
+
pub max_outstanding_nexus_tasks: Option<usize>,
|
|
257
|
+
|
|
258
|
+
/// A versioning strategy for this worker.
|
|
259
|
+
pub versioning_strategy: WorkerVersioningStrategy,
|
|
260
|
+
|
|
261
|
+
/// List of plugins used by lang.
|
|
262
|
+
#[builder(default)]
|
|
263
|
+
pub plugins: HashSet<PluginInfo>,
|
|
264
|
+
|
|
265
|
+
/// Skips the single worker+client+namespace+task_queue check
|
|
266
|
+
#[builder(default = false)]
|
|
267
|
+
pub skip_client_worker_set_check: bool,
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
impl WorkerConfig {
|
|
271
|
+
/// Returns true if the configuration specifies we should fail a workflow on a certain error
|
|
272
|
+
/// type rather than failing the workflow task.
|
|
273
|
+
pub fn should_fail_workflow(
|
|
274
|
+
&self,
|
|
275
|
+
workflow_type: &str,
|
|
276
|
+
error_type: &WorkflowErrorType,
|
|
277
|
+
) -> bool {
|
|
278
|
+
self.workflow_failure_errors.contains(error_type)
|
|
279
|
+
|| self
|
|
280
|
+
.workflow_types_to_failure_errors
|
|
281
|
+
.get(workflow_type)
|
|
282
|
+
.map(|s| s.contains(error_type))
|
|
283
|
+
.unwrap_or(false)
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
pub(crate) fn computed_deployment_version(&self) -> Option<WorkerDeploymentVersion> {
|
|
287
|
+
let wdv = match self.versioning_strategy {
|
|
288
|
+
WorkerVersioningStrategy::None { ref build_id } => WorkerDeploymentVersion {
|
|
289
|
+
deployment_name: "".to_owned(),
|
|
290
|
+
build_id: build_id.clone(),
|
|
291
|
+
},
|
|
292
|
+
WorkerVersioningStrategy::WorkerDeploymentBased(ref opts) => opts.version.clone(),
|
|
293
|
+
WorkerVersioningStrategy::LegacyBuildIdBased { ref build_id } => {
|
|
294
|
+
WorkerDeploymentVersion {
|
|
295
|
+
deployment_name: "".to_owned(),
|
|
296
|
+
build_id: build_id.clone(),
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
};
|
|
300
|
+
if wdv.is_empty() { None } else { Some(wdv) }
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
impl<S: worker_config_builder::IsComplete> WorkerConfigBuilder<S> {
|
|
305
|
+
/// Build and validate the worker configuration
|
|
306
|
+
pub fn build(self) -> Result<WorkerConfig, String> {
|
|
307
|
+
let config = self.build_internal();
|
|
308
|
+
let task_types = &config.task_types;
|
|
309
|
+
if task_types.is_empty() {
|
|
310
|
+
return Err("At least one task type must be enabled in `task_types`".to_string());
|
|
311
|
+
}
|
|
312
|
+
if !task_types.enable_workflows && task_types.enable_local_activities {
|
|
313
|
+
return Err(
|
|
314
|
+
"`task_types` cannot enable local activities without workflows".to_string(),
|
|
315
|
+
);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
config.workflow_task_poller_behavior.validate()?;
|
|
319
|
+
config.activity_task_poller_behavior.validate()?;
|
|
320
|
+
config.nexus_task_poller_behavior.validate()?;
|
|
321
|
+
|
|
322
|
+
if let Some(ref x) = config.max_worker_activities_per_second
|
|
323
|
+
&& (!x.is_normal() || x.is_sign_negative())
|
|
324
|
+
{
|
|
325
|
+
return Err(
|
|
326
|
+
"`max_worker_activities_per_second` must be positive and nonzero".to_string(),
|
|
327
|
+
);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
if matches!(config.max_outstanding_workflow_tasks, Some(v) if v == 0) {
|
|
331
|
+
return Err("`max_outstanding_workflow_tasks` must be > 0".to_string());
|
|
332
|
+
}
|
|
333
|
+
if matches!(config.max_outstanding_activities, Some(v) if v == 0) {
|
|
334
|
+
return Err("`max_outstanding_activities` must be > 0".to_string());
|
|
335
|
+
}
|
|
336
|
+
if matches!(config.max_outstanding_local_activities, Some(v) if v == 0) {
|
|
337
|
+
return Err("`max_outstanding_local_activities` must be > 0".to_string());
|
|
338
|
+
}
|
|
339
|
+
if matches!(config.max_outstanding_nexus_tasks, Some(v) if v == 0) {
|
|
340
|
+
return Err("`max_outstanding_nexus_tasks` must be > 0".to_string());
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if config.max_cached_workflows > 0 {
|
|
344
|
+
if let Some(max_wft) = config.max_outstanding_workflow_tasks
|
|
345
|
+
&& max_wft < 2
|
|
346
|
+
{
|
|
347
|
+
return Err(
|
|
348
|
+
"`max_cached_workflows` > 0 requires `max_outstanding_workflow_tasks` >= 2"
|
|
349
|
+
.to_string(),
|
|
350
|
+
);
|
|
351
|
+
}
|
|
352
|
+
if matches!(config.workflow_task_poller_behavior, PollerBehavior::SimpleMaximum(u) if u < 2)
|
|
353
|
+
{
|
|
354
|
+
return Err("`max_cached_workflows` > 0 requires `workflow_task_poller_behavior` to be at least 2".to_string());
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if config.tuner.is_some()
|
|
359
|
+
&& (config.max_outstanding_workflow_tasks.is_some()
|
|
360
|
+
|| config.max_outstanding_activities.is_some()
|
|
361
|
+
|| config.max_outstanding_local_activities.is_some())
|
|
362
|
+
{
|
|
363
|
+
return Err("max_outstanding_* fields are mutually exclusive with `tuner`".to_string());
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
match &config.versioning_strategy {
|
|
367
|
+
WorkerVersioningStrategy::None { .. } => {}
|
|
368
|
+
WorkerVersioningStrategy::WorkerDeploymentBased(d) => {
|
|
369
|
+
if d.use_worker_versioning
|
|
370
|
+
&& (d.version.build_id.is_empty() || d.version.deployment_name.is_empty())
|
|
371
|
+
{
|
|
372
|
+
return Err("WorkerDeploymentVersion must have a non-empty build_id and deployment_name when deployment-based versioning is enabled".to_string());
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
WorkerVersioningStrategy::LegacyBuildIdBased { build_id } => {
|
|
376
|
+
if build_id.is_empty() {
|
|
377
|
+
return Err(
|
|
378
|
+
"Legacy build id-based versioning must have a non-empty build_id"
|
|
379
|
+
.to_string(),
|
|
380
|
+
);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
Ok(config)
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
115
389
|
/// A worker polls on a certain task queue
|
|
116
390
|
pub struct Worker {
|
|
117
391
|
config: WorkerConfig,
|
|
@@ -141,6 +415,9 @@ pub struct Worker {
|
|
|
141
415
|
client_worker_registrator: Arc<ClientWorkerRegistrator>,
|
|
142
416
|
/// Status of the worker
|
|
143
417
|
status: Arc<RwLock<WorkerStatus>>,
|
|
418
|
+
/// Set during validate() when server supports graceful poll cancellation on shutdown.
|
|
419
|
+
/// Shared with pollers so they can decide per-poll whether to hard-kill or wait.
|
|
420
|
+
graceful_poll_shutdown: Arc<AtomicBool>,
|
|
144
421
|
}
|
|
145
422
|
|
|
146
423
|
struct AllPermitsTracker {
|
|
@@ -172,175 +449,10 @@ impl WorkerTelemetry {
|
|
|
172
449
|
}
|
|
173
450
|
}
|
|
174
451
|
|
|
175
|
-
#[async_trait::async_trait]
|
|
176
|
-
impl WorkerTrait for Worker {
|
|
177
|
-
async fn validate(&self) -> Result<NamespaceInfo, WorkerValidationError> {
|
|
178
|
-
match self.client.describe_namespace().await {
|
|
179
|
-
Ok(info) => {
|
|
180
|
-
let limits = info.namespace_info.and_then(|ns_info| {
|
|
181
|
-
ns_info.limits.map(|api_limits| namespace_info::Limits {
|
|
182
|
-
blob_size_limit_error: api_limits.blob_size_limit_error,
|
|
183
|
-
memo_size_limit_error: api_limits.memo_size_limit_error,
|
|
184
|
-
})
|
|
185
|
-
});
|
|
186
|
-
return Ok(NamespaceInfo { limits });
|
|
187
|
-
}
|
|
188
|
-
Err(e) => {
|
|
189
|
-
if e.code() == tonic::Code::Unimplemented {
|
|
190
|
-
// Ignore if unimplemented since we wouldn't want to fail against an old server, for
|
|
191
|
-
// example.
|
|
192
|
-
return Ok(NamespaceInfo {
|
|
193
|
-
..Default::default()
|
|
194
|
-
});
|
|
195
|
-
}
|
|
196
|
-
return Err(WorkerValidationError::NamespaceDescribeError {
|
|
197
|
-
source: e,
|
|
198
|
-
namespace: self.config.namespace.clone(),
|
|
199
|
-
});
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollError> {
|
|
205
|
-
self.next_workflow_activation().await
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
#[instrument(skip(self))]
|
|
209
|
-
async fn poll_activity_task(&self) -> Result<ActivityTask, PollError> {
|
|
210
|
-
loop {
|
|
211
|
-
match self.activity_poll().await.transpose() {
|
|
212
|
-
Some(r) => break r,
|
|
213
|
-
None => {
|
|
214
|
-
tokio::task::yield_now().await;
|
|
215
|
-
continue;
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
#[instrument(skip(self))]
|
|
222
|
-
async fn poll_nexus_task(&self) -> Result<NexusTask, PollError> {
|
|
223
|
-
match &self.nexus_mgr {
|
|
224
|
-
Some(mgr) => mgr.next_nexus_task().await,
|
|
225
|
-
None => Err(PollError::ShutDown),
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
async fn complete_workflow_activation(
|
|
230
|
-
&self,
|
|
231
|
-
completion: WorkflowActivationCompletion,
|
|
232
|
-
) -> Result<(), CompleteWfError> {
|
|
233
|
-
self.complete_workflow_activation(completion).await
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
async fn complete_activity_task(
|
|
237
|
-
&self,
|
|
238
|
-
completion: ActivityTaskCompletion,
|
|
239
|
-
) -> Result<(), CompleteActivityError> {
|
|
240
|
-
let task_token = TaskToken(completion.task_token);
|
|
241
|
-
let status = if let Some(s) = completion.result.and_then(|r| r.status) {
|
|
242
|
-
s
|
|
243
|
-
} else {
|
|
244
|
-
return Err(CompleteActivityError::MalformedActivityCompletion {
|
|
245
|
-
reason: "Activity completion had empty result/status field".to_owned(),
|
|
246
|
-
completion: None,
|
|
247
|
-
});
|
|
248
|
-
};
|
|
249
|
-
|
|
250
|
-
self.complete_activity(task_token, status).await
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
async fn complete_nexus_task(
|
|
254
|
-
&self,
|
|
255
|
-
completion: NexusTaskCompletion,
|
|
256
|
-
) -> Result<(), CompleteNexusError> {
|
|
257
|
-
let status = if let Some(s) = completion.status {
|
|
258
|
-
s
|
|
259
|
-
} else {
|
|
260
|
-
return Err(CompleteNexusError::MalformedNexusCompletion {
|
|
261
|
-
reason: "Nexus completion had empty status field".to_owned(),
|
|
262
|
-
});
|
|
263
|
-
};
|
|
264
|
-
|
|
265
|
-
self.complete_nexus_task(TaskToken(completion.task_token), status)
|
|
266
|
-
.await
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
fn record_activity_heartbeat(&self, details: ActivityHeartbeat) {
|
|
270
|
-
self.record_heartbeat(details);
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
fn request_workflow_eviction(&self, run_id: &str) {
|
|
274
|
-
self.request_wf_eviction(
|
|
275
|
-
run_id,
|
|
276
|
-
"Eviction explicitly requested by lang",
|
|
277
|
-
EvictionReason::LangRequested,
|
|
278
|
-
);
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
fn get_config(&self) -> &WorkerConfig {
|
|
282
|
-
&self.config
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
/// Begins the shutdown process, tells pollers they should stop. Is idempotent.
|
|
286
|
-
fn initiate_shutdown(&self) {
|
|
287
|
-
if !self.shutdown_token.is_cancelled() {
|
|
288
|
-
info!(
|
|
289
|
-
task_queue=%self.config.task_queue,
|
|
290
|
-
namespace=%self.config.namespace,
|
|
291
|
-
"Initiated shutdown",
|
|
292
|
-
);
|
|
293
|
-
}
|
|
294
|
-
self.shutdown_token.cancel();
|
|
295
|
-
// First, disable Eager Workflow Start
|
|
296
|
-
if !self.client_worker_registrator.shared_namespace_worker {
|
|
297
|
-
let _res = self
|
|
298
|
-
.client
|
|
299
|
-
.workers()
|
|
300
|
-
.unregister_slot_provider(self.worker_instance_key);
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
// Push a BumpStream message to the workflow activation queue. This ensures that
|
|
304
|
-
// any pending workflow activation polls will resolve, even if there are no other inputs.
|
|
305
|
-
if let Some(workflows) = &self.workflows {
|
|
306
|
-
workflows.bump_stream();
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
// Second, we want to stop polling of both activity and workflow tasks
|
|
310
|
-
if let Some(atm) = self.at_task_mgr.as_ref() {
|
|
311
|
-
atm.initiate_shutdown();
|
|
312
|
-
}
|
|
313
|
-
// Let the manager know that shutdown has been initiated to try to unblock the local
|
|
314
|
-
// activity poll in case this worker is an activity-only worker.
|
|
315
|
-
if let Some(la_mgr) = &self.local_act_mgr {
|
|
316
|
-
la_mgr.shutdown_initiated();
|
|
317
|
-
|
|
318
|
-
// If workflows have never been polled, immediately tell the local activity manager
|
|
319
|
-
// that workflows have shut down, so it can proceed with shutdown without waiting.
|
|
320
|
-
// This is particularly important for activity-only workers.
|
|
321
|
-
if self.workflows.as_ref().is_none_or(|w| !w.ever_polled()) {
|
|
322
|
-
la_mgr.workflows_have_shutdown();
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
async fn shutdown(&self) {
|
|
328
|
-
self.shutdown().await
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
async fn finalize_shutdown(self) {
|
|
332
|
-
self.finalize_shutdown().await
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
fn worker_instance_key(&self) -> Uuid {
|
|
336
|
-
self.worker_instance_key
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
|
|
340
452
|
impl Worker {
|
|
341
453
|
/// Creates a new [Worker] from a [WorkerClient] instance with real task pollers and optional
|
|
342
454
|
/// telemetry.
|
|
343
|
-
pub fn new(
|
|
455
|
+
pub(crate) fn new(
|
|
344
456
|
config: WorkerConfig,
|
|
345
457
|
sticky_queue_name: Option<String>,
|
|
346
458
|
client: Arc<dyn WorkerClient>,
|
|
@@ -365,6 +477,39 @@ impl Worker {
|
|
|
365
477
|
)
|
|
366
478
|
}
|
|
367
479
|
|
|
480
|
+
/// Validate that the worker can properly connect to server, plus any other validation that
|
|
481
|
+
/// needs to be done asynchronously. Lang SDKs should call this function once before calling
|
|
482
|
+
/// any others.
|
|
483
|
+
pub async fn validate(&self) -> Result<NamespaceInfo, WorkerValidationError> {
|
|
484
|
+
match self.client.describe_namespace().await {
|
|
485
|
+
Ok(info) => {
|
|
486
|
+
let ns_info = info.namespace_info;
|
|
487
|
+
let limits = ns_info.as_ref().and_then(|ns_info| {
|
|
488
|
+
ns_info.limits.map(|api_limits| namespace_info::Limits {
|
|
489
|
+
blob_size_limit_error: api_limits.blob_size_limit_error,
|
|
490
|
+
memo_size_limit_error: api_limits.memo_size_limit_error,
|
|
491
|
+
})
|
|
492
|
+
});
|
|
493
|
+
if ns_info
|
|
494
|
+
.and_then(|ns| ns.capabilities)
|
|
495
|
+
.is_some_and(|caps| caps.worker_poll_complete_on_shutdown)
|
|
496
|
+
{
|
|
497
|
+
self.graceful_poll_shutdown.store(true, Ordering::Relaxed);
|
|
498
|
+
}
|
|
499
|
+
Ok(NamespaceInfo { limits })
|
|
500
|
+
}
|
|
501
|
+
Err(e) if e.code() == tonic::Code::Unimplemented => {
|
|
502
|
+
// Ignore if unimplemented since we wouldn't want to fail against an old server, for
|
|
503
|
+
// example.
|
|
504
|
+
Ok(NamespaceInfo::default())
|
|
505
|
+
}
|
|
506
|
+
Err(e) => Err(WorkerValidationError::NamespaceDescribeError {
|
|
507
|
+
source: e,
|
|
508
|
+
namespace: self.config.namespace.clone(),
|
|
509
|
+
}),
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
368
513
|
/// Replace client.
|
|
369
514
|
///
|
|
370
515
|
/// For eager workflow purposes, this new client will now apply to future eager start requests
|
|
@@ -374,10 +519,7 @@ impl Worker {
|
|
|
374
519
|
/// For worker heartbeat, this will remove an existing shared worker if it is the last worker of
|
|
375
520
|
/// the old client and create a new nexus worker if it's the first client of the namespace on
|
|
376
521
|
/// the new client.
|
|
377
|
-
pub fn replace_client
|
|
378
|
-
where
|
|
379
|
-
CT: Into<AnyClient>,
|
|
380
|
-
{
|
|
522
|
+
pub fn replace_client(&self, mut new_connection: Connection) -> Result<(), anyhow::Error> {
|
|
381
523
|
// Unregister worker from current client, register in new client at the end
|
|
382
524
|
self.client
|
|
383
525
|
.workers()
|
|
@@ -387,13 +529,12 @@ impl Worker {
|
|
|
387
529
|
.workers()
|
|
388
530
|
.finalize_unregister(self.worker_instance_key)?;
|
|
389
531
|
|
|
390
|
-
|
|
391
|
-
|
|
532
|
+
super::init_worker_client(
|
|
533
|
+
&mut new_connection,
|
|
392
534
|
self.config.client_identity_override.clone(),
|
|
393
|
-
new_client,
|
|
394
535
|
);
|
|
395
536
|
|
|
396
|
-
self.client.
|
|
537
|
+
self.client.replace_connection(new_connection);
|
|
397
538
|
*self.client_worker_registrator.client.write() = self.client.clone();
|
|
398
539
|
self.client
|
|
399
540
|
.workers()
|
|
@@ -475,6 +616,7 @@ impl Worker {
|
|
|
475
616
|
let wf_sticky_last_suc_poll_time = Arc::new(AtomicCell::new(None));
|
|
476
617
|
let act_last_suc_poll_time = Arc::new(AtomicCell::new(None));
|
|
477
618
|
let nexus_last_suc_poll_time = Arc::new(AtomicCell::new(None));
|
|
619
|
+
let graceful_poll_shutdown = Arc::new(AtomicBool::new(false));
|
|
478
620
|
|
|
479
621
|
let nexus_slots = MeteredPermitDealer::new(
|
|
480
622
|
tuner.nexus_task_slot_supplier(),
|
|
@@ -495,6 +637,7 @@ impl Worker {
|
|
|
495
637
|
&wft_slots,
|
|
496
638
|
wf_last_suc_poll_time.clone(),
|
|
497
639
|
wf_sticky_last_suc_poll_time.clone(),
|
|
640
|
+
graceful_poll_shutdown.clone(),
|
|
498
641
|
)
|
|
499
642
|
.boxed();
|
|
500
643
|
let stream = if !client.is_mock() {
|
|
@@ -524,6 +667,7 @@ impl Worker {
|
|
|
524
667
|
max_tps: config.max_task_queue_activities_per_second,
|
|
525
668
|
},
|
|
526
669
|
act_last_suc_poll_time.clone(),
|
|
670
|
+
graceful_poll_shutdown.clone(),
|
|
527
671
|
);
|
|
528
672
|
Some(Box::from(ap) as BoxedActPoller)
|
|
529
673
|
} else {
|
|
@@ -541,6 +685,7 @@ impl Worker {
|
|
|
541
685
|
Some(move |np| np_metrics.record_num_pollers(np)),
|
|
542
686
|
nexus_last_suc_poll_time.clone(),
|
|
543
687
|
shared_namespace_worker,
|
|
688
|
+
graceful_poll_shutdown.clone(),
|
|
544
689
|
)) as BoxedNexusPoller)
|
|
545
690
|
} else {
|
|
546
691
|
None
|
|
@@ -645,9 +790,7 @@ impl Worker {
|
|
|
645
790
|
});
|
|
646
791
|
|
|
647
792
|
let deployment_options = match &config.versioning_strategy {
|
|
648
|
-
|
|
649
|
-
Some(opts.clone())
|
|
650
|
-
}
|
|
793
|
+
WorkerVersioningStrategy::WorkerDeploymentBased(opts) => Some(opts.clone()),
|
|
651
794
|
_ => None,
|
|
652
795
|
};
|
|
653
796
|
let provider = SlotProvider::new(
|
|
@@ -657,7 +800,7 @@ impl Worker {
|
|
|
657
800
|
external_wft_tx,
|
|
658
801
|
deployment_options,
|
|
659
802
|
);
|
|
660
|
-
let worker_instance_key =
|
|
803
|
+
let worker_instance_key = client.worker_instance_key();
|
|
661
804
|
let worker_status = Arc::new(RwLock::new(WorkerStatus::Running));
|
|
662
805
|
|
|
663
806
|
let sdk_name_and_ver = client.sdk_name_and_version();
|
|
@@ -762,12 +905,23 @@ impl Worker {
|
|
|
762
905
|
nexus_mgr,
|
|
763
906
|
client_worker_registrator,
|
|
764
907
|
status: worker_status,
|
|
908
|
+
graceful_poll_shutdown,
|
|
765
909
|
})
|
|
766
910
|
}
|
|
767
911
|
|
|
768
|
-
///
|
|
769
|
-
///
|
|
770
|
-
|
|
912
|
+
/// Initiates async shutdown procedure, eventually ceases all polling of the server and shuts
|
|
913
|
+
/// down this worker. [Worker::poll_workflow_activation] and [Worker::poll_activity_task] should
|
|
914
|
+
/// be called until both return a `ShutDown` error to ensure that all outstanding work is
|
|
915
|
+
/// complete. This means that the lang sdk will need to call
|
|
916
|
+
/// [Worker::complete_workflow_activation] and [Worker::complete_activity_task] for those
|
|
917
|
+
/// workflows & activities until they are done. At that point, the lang SDK can end the process,
|
|
918
|
+
/// or drop the [Worker] instance via [Worker::finalize_shutdown], which will close the
|
|
919
|
+
/// connection and free resources. If you have set [WorkerConfig::task_types] to exclude
|
|
920
|
+
/// [WorkerTaskTypes::activity_only()], you may skip calling [Worker::poll_activity_task].
|
|
921
|
+
///
|
|
922
|
+
/// Lang implementations should use [Worker::initiate_shutdown] followed by
|
|
923
|
+
/// [Worker::finalize_shutdown].
|
|
924
|
+
pub async fn shutdown(&self) {
|
|
771
925
|
self.initiate_shutdown();
|
|
772
926
|
{
|
|
773
927
|
*self.status.write() = WorkerStatus::ShuttingDown;
|
|
@@ -783,7 +937,17 @@ impl Worker {
|
|
|
783
937
|
.and_then(|wf| wf.get_sticky_queue_name())
|
|
784
938
|
.unwrap_or_default();
|
|
785
939
|
// This is a best effort call and we can still shutdown the worker if it fails
|
|
786
|
-
|
|
940
|
+
let task_queue_types = self.config.task_types.to_task_queue_types();
|
|
941
|
+
match self
|
|
942
|
+
.client
|
|
943
|
+
.shutdown_worker(
|
|
944
|
+
sticky_name,
|
|
945
|
+
self.config.task_queue.clone(),
|
|
946
|
+
task_queue_types,
|
|
947
|
+
heartbeat,
|
|
948
|
+
)
|
|
949
|
+
.await
|
|
950
|
+
{
|
|
787
951
|
Err(err)
|
|
788
952
|
if !matches!(
|
|
789
953
|
err.code(),
|
|
@@ -827,8 +991,12 @@ impl Worker {
|
|
|
827
991
|
}
|
|
828
992
|
}
|
|
829
993
|
|
|
830
|
-
///
|
|
831
|
-
async
|
|
994
|
+
/// Completes shutdown and frees all resources. You should avoid simply dropping workers, as
|
|
995
|
+
/// this does not allow async tasks to report any panics that may have occurred cleanly.
|
|
996
|
+
///
|
|
997
|
+
/// This should be called only after [Worker::shutdown] has resolved and/or both polling
|
|
998
|
+
/// functions have returned `ShutDown` errors.
|
|
999
|
+
pub async fn finalize_shutdown(self) {
|
|
832
1000
|
self.shutdown().await;
|
|
833
1001
|
if let Some(b) = self.at_task_mgr {
|
|
834
1002
|
b.shutdown().await;
|
|
@@ -882,11 +1050,26 @@ impl Worker {
|
|
|
882
1050
|
self.workflows.as_ref().and_then(|w| w.unused_wft_permits())
|
|
883
1051
|
}
|
|
884
1052
|
|
|
885
|
-
///
|
|
886
|
-
///
|
|
1053
|
+
/// Ask the worker for some work, returning an [ActivityTask]. It is then the language SDK's
|
|
1054
|
+
/// responsibility to call the appropriate activity code with the provided inputs. Blocks
|
|
1055
|
+
/// indefinitely until such work is available or [Worker::shutdown] is called.
|
|
1056
|
+
///
|
|
1057
|
+
/// Do not call poll concurrently. It handles polling the server concurrently internally.
|
|
887
1058
|
///
|
|
888
|
-
///
|
|
889
|
-
|
|
1059
|
+
/// Local activities are returned first before polling the server if there are any.
|
|
1060
|
+
#[instrument(skip(self))]
|
|
1061
|
+
pub async fn poll_activity_task(&self) -> Result<ActivityTask, PollError> {
|
|
1062
|
+
loop {
|
|
1063
|
+
match self.activity_poll().await.transpose() {
|
|
1064
|
+
Some(r) => break r,
|
|
1065
|
+
None => {
|
|
1066
|
+
tokio::task::yield_now().await;
|
|
1067
|
+
continue;
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
|
|
890
1073
|
async fn activity_poll(&self) -> Result<Option<ActivityTask>, PollError> {
|
|
891
1074
|
let local_activities_complete = self.local_activities_complete.load(Ordering::Relaxed);
|
|
892
1075
|
let non_local_activities_complete =
|
|
@@ -969,8 +1152,26 @@ impl Worker {
|
|
|
969
1152
|
r
|
|
970
1153
|
}
|
|
971
1154
|
|
|
972
|
-
///
|
|
973
|
-
|
|
1155
|
+
/// Notify the Temporal service that an activity is still alive. Long running activities that
|
|
1156
|
+
/// take longer than `activity_heartbeat_timeout` to finish must call this function in order to
|
|
1157
|
+
/// report progress, otherwise the activity will timeout and a new attempt will be scheduled.
|
|
1158
|
+
///
|
|
1159
|
+
/// The first heartbeat request will be sent immediately, subsequent rapid calls to this
|
|
1160
|
+
/// function will result in heartbeat requests being aggregated and the last one received during
|
|
1161
|
+
/// the aggregation period will be sent to the server, where that period is defined as half the
|
|
1162
|
+
/// heartbeat timeout.
|
|
1163
|
+
///
|
|
1164
|
+
/// Unlike Java/Go SDKs we do not return cancellation status as part of heartbeat response and
|
|
1165
|
+
/// instead send it as a separate activity task to the lang, decoupling heartbeat and
|
|
1166
|
+
/// cancellation processing.
|
|
1167
|
+
///
|
|
1168
|
+
/// For now activity still need to send heartbeats if they want to receive cancellation
|
|
1169
|
+
/// requests. In the future we will change this and will dispatch cancellations more
|
|
1170
|
+
/// proactively. Note that this function does not block on the server call and returns
|
|
1171
|
+
/// immediately. Underlying validation errors are swallowed and logged, this has been agreed to
|
|
1172
|
+
/// be optimal behavior for the user as we don't want to break activity execution due to badly
|
|
1173
|
+
/// configured heartbeat options.
|
|
1174
|
+
pub fn record_activity_heartbeat(&self, details: ActivityHeartbeat) {
|
|
974
1175
|
if let Some(at_mgr) = self.at_task_mgr.as_ref() {
|
|
975
1176
|
let tt = TaskToken(details.task_token.clone());
|
|
976
1177
|
if let Err(e) = at_mgr.record_heartbeat(details) {
|
|
@@ -979,14 +1180,28 @@ impl Worker {
|
|
|
979
1180
|
}
|
|
980
1181
|
}
|
|
981
1182
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
1183
|
+
/// Tell the worker that an activity has finished executing. May (and should) be freely called
|
|
1184
|
+
/// concurrently.
|
|
1185
|
+
#[instrument(skip(self, completion),
|
|
1186
|
+
fields(task_token, status,
|
|
1187
|
+
task_queue=%self.config.task_queue, workflow_id, run_id))]
|
|
1188
|
+
pub async fn complete_activity_task(
|
|
986
1189
|
&self,
|
|
987
|
-
|
|
988
|
-
status: activity_execution_result::Status,
|
|
1190
|
+
completion: ActivityTaskCompletion,
|
|
989
1191
|
) -> Result<(), CompleteActivityError> {
|
|
1192
|
+
let task_token = TaskToken(completion.task_token);
|
|
1193
|
+
let status = if let Some(s) = completion.result.and_then(|r| r.status) {
|
|
1194
|
+
s
|
|
1195
|
+
} else {
|
|
1196
|
+
return Err(CompleteActivityError::MalformedActivityCompletion {
|
|
1197
|
+
reason: "Activity completion had empty result/status field".to_owned(),
|
|
1198
|
+
completion: None,
|
|
1199
|
+
});
|
|
1200
|
+
};
|
|
1201
|
+
|
|
1202
|
+
tracing::Span::current().record("task_token", task_token.to_string());
|
|
1203
|
+
tracing::Span::current().record("status", status.to_string());
|
|
1204
|
+
|
|
990
1205
|
validate_activity_completion(&status)?;
|
|
991
1206
|
if task_token.is_local_activity_task() {
|
|
992
1207
|
let as_la_res: LocalActivityExecutionResult = status.try_into()?;
|
|
@@ -1002,8 +1217,20 @@ impl Worker {
|
|
|
1002
1217
|
}
|
|
1003
1218
|
}
|
|
1004
1219
|
|
|
1220
|
+
/// Ask the worker for some work, returning a [WorkflowActivation]. It is then the language
|
|
1221
|
+
/// SDK's responsibility to call the appropriate workflow code with the provided inputs. Blocks
|
|
1222
|
+
/// indefinitely until such work is available or [Worker::shutdown] is called.
|
|
1223
|
+
///
|
|
1224
|
+
/// It is important to understand that all activations must be responded to. There can only
|
|
1225
|
+
/// be one outstanding activation for a particular run of a workflow at any time. If an
|
|
1226
|
+
/// activation is not responded to, it will cause that workflow to become stuck forever.
|
|
1227
|
+
///
|
|
1228
|
+
/// See [WorkflowActivation] for more details on the expected behavior of lang w.r.t activation
|
|
1229
|
+
/// & job processing.
|
|
1230
|
+
///
|
|
1231
|
+
/// Do not call poll concurrently. It handles polling the server concurrently internally.
|
|
1005
1232
|
#[instrument(skip(self), fields(run_id, workflow_id, task_queue=%self.config.task_queue))]
|
|
1006
|
-
pub
|
|
1233
|
+
pub async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollError> {
|
|
1007
1234
|
match &self.workflows {
|
|
1008
1235
|
Some(workflows) => {
|
|
1009
1236
|
let r = workflows.next_workflow_activation().await;
|
|
@@ -1025,10 +1252,14 @@ impl Worker {
|
|
|
1025
1252
|
}
|
|
1026
1253
|
}
|
|
1027
1254
|
|
|
1255
|
+
/// Tell the worker that a workflow activation has completed. May (and should) be freely called
|
|
1256
|
+
/// concurrently. The future may take some time to resolve, as fetching more events might be
|
|
1257
|
+
/// necessary for completion to... complete - thus SDK implementers should make sure they do
|
|
1258
|
+
/// not serialize completions.
|
|
1028
1259
|
#[instrument(skip(self, completion),
|
|
1029
1260
|
fields(completion=%&completion, run_id=%completion.run_id, workflow_id,
|
|
1030
1261
|
task_queue=%self.config.task_queue))]
|
|
1031
|
-
pub
|
|
1262
|
+
pub async fn complete_workflow_activation(
|
|
1032
1263
|
&self,
|
|
1033
1264
|
completion: WorkflowActivationCompletion,
|
|
1034
1265
|
) -> Result<(), CompleteWfError> {
|
|
@@ -1049,21 +1280,61 @@ impl Worker {
|
|
|
1049
1280
|
}
|
|
1050
1281
|
}
|
|
1051
1282
|
|
|
1283
|
+
/// Ask the worker for some nexus related work. It is then the language SDK's
|
|
1284
|
+
/// responsibility to call the appropriate nexus operation handler code with the provided
|
|
1285
|
+
/// inputs. Blocks indefinitely until such work is available or [Worker::shutdown] is called.
|
|
1286
|
+
///
|
|
1287
|
+
/// All tasks must be responded to for shutdown to complete.
|
|
1288
|
+
///
|
|
1289
|
+
/// Do not call poll concurrently. It handles polling the server concurrently internally.
|
|
1290
|
+
#[instrument(skip(self))]
|
|
1291
|
+
pub async fn poll_nexus_task(&self) -> Result<NexusTask, PollError> {
|
|
1292
|
+
match &self.nexus_mgr {
|
|
1293
|
+
Some(mgr) => mgr.next_nexus_task().await,
|
|
1294
|
+
None => Err(PollError::ShutDown),
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
/// Tell the worker that a nexus task has completed. May (and should) be freely called
|
|
1299
|
+
/// concurrently.
|
|
1052
1300
|
#[instrument(
|
|
1053
|
-
skip(self,
|
|
1054
|
-
fields(task_token
|
|
1301
|
+
skip(self, completion),
|
|
1302
|
+
fields(task_token, status, task_queue=%self.config.task_queue)
|
|
1055
1303
|
)]
|
|
1056
|
-
async fn complete_nexus_task(
|
|
1304
|
+
pub async fn complete_nexus_task(
|
|
1057
1305
|
&self,
|
|
1058
|
-
|
|
1059
|
-
status: nexus_task_completion::Status,
|
|
1306
|
+
completion: NexusTaskCompletion,
|
|
1060
1307
|
) -> Result<(), CompleteNexusError> {
|
|
1308
|
+
let status = if let Some(s) = completion.status {
|
|
1309
|
+
s
|
|
1310
|
+
} else {
|
|
1311
|
+
return Err(CompleteNexusError::MalformedNexusCompletion {
|
|
1312
|
+
reason: "Nexus completion had empty status field".to_owned(),
|
|
1313
|
+
});
|
|
1314
|
+
};
|
|
1315
|
+
let tt = TaskToken(completion.task_token);
|
|
1316
|
+
tracing::Span::current().record("task_token", tt.to_string());
|
|
1317
|
+
tracing::Span::current().record("status", status.to_string());
|
|
1318
|
+
|
|
1061
1319
|
match &self.nexus_mgr {
|
|
1062
1320
|
Some(mgr) => mgr.complete_task(tt, status, &*self.client).await,
|
|
1063
1321
|
None => Err(CompleteNexusError::NexusNotEnabled),
|
|
1064
1322
|
}
|
|
1065
1323
|
}
|
|
1066
1324
|
|
|
1325
|
+
/// Request that a workflow be evicted by its run id. This will generate a workflow activation
|
|
1326
|
+
/// with the eviction job inside it to be eventually returned by
|
|
1327
|
+
/// [Worker::poll_workflow_activation]. If the workflow had any existing outstanding
|
|
1328
|
+
/// activations, such activations are invalidated and subsequent completions of them will do
|
|
1329
|
+
/// nothing and log a warning.
|
|
1330
|
+
pub fn request_workflow_eviction(&self, run_id: &str) {
|
|
1331
|
+
self.request_wf_eviction(
|
|
1332
|
+
run_id,
|
|
1333
|
+
"Eviction explicitly requested by lang",
|
|
1334
|
+
EvictionReason::LangRequested,
|
|
1335
|
+
);
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1067
1338
|
/// Request a workflow eviction
|
|
1068
1339
|
pub(crate) fn request_wf_eviction(
|
|
1069
1340
|
&self,
|
|
@@ -1078,6 +1349,63 @@ impl Worker {
|
|
|
1078
1349
|
}
|
|
1079
1350
|
}
|
|
1080
1351
|
|
|
1352
|
+
/// Return this worker's config
|
|
1353
|
+
pub fn get_config(&self) -> &WorkerConfig {
|
|
1354
|
+
&self.config
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
/// Initiate shutdown. See [Worker::shutdown], this is just a sync version that starts the
|
|
1358
|
+
/// process. You can then wait on `shutdown` or [Worker::finalize_shutdown].
|
|
1359
|
+
pub fn initiate_shutdown(&self) {
|
|
1360
|
+
if !self.shutdown_token.is_cancelled() {
|
|
1361
|
+
info!(
|
|
1362
|
+
task_queue=%self.config.task_queue,
|
|
1363
|
+
namespace=%self.config.namespace,
|
|
1364
|
+
"Initiated shutdown",
|
|
1365
|
+
);
|
|
1366
|
+
}
|
|
1367
|
+
self.shutdown_token.cancel();
|
|
1368
|
+
{
|
|
1369
|
+
*self.status.write() = WorkerStatus::ShuttingDown;
|
|
1370
|
+
}
|
|
1371
|
+
// First, disable Eager Workflow Start
|
|
1372
|
+
if !self.client_worker_registrator.shared_namespace_worker {
|
|
1373
|
+
let _res = self
|
|
1374
|
+
.client
|
|
1375
|
+
.workers()
|
|
1376
|
+
.unregister_slot_provider(self.worker_instance_key);
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
// Push a BumpStream message to the workflow activation queue. This ensures that
|
|
1380
|
+
// any pending workflow activation polls will resolve, even if there are no other inputs.
|
|
1381
|
+
if let Some(workflows) = &self.workflows {
|
|
1382
|
+
workflows.bump_stream();
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
// Second, we want to stop polling of both activity and workflow tasks
|
|
1386
|
+
if let Some(atm) = self.at_task_mgr.as_ref() {
|
|
1387
|
+
atm.initiate_shutdown();
|
|
1388
|
+
}
|
|
1389
|
+
// Let the manager know that shutdown has been initiated to try to unblock the local
|
|
1390
|
+
// activity poll in case this worker is an activity-only worker.
|
|
1391
|
+
if let Some(la_mgr) = &self.local_act_mgr {
|
|
1392
|
+
la_mgr.shutdown_initiated();
|
|
1393
|
+
|
|
1394
|
+
// If workflows have never been polled, immediately tell the local activity manager
|
|
1395
|
+
// that workflows have shut down, so it can proceed with shutdown without waiting.
|
|
1396
|
+
// This is particularly important for activity-only workers.
|
|
1397
|
+
if self.workflows.as_ref().is_none_or(|w| !w.ever_polled()) {
|
|
1398
|
+
la_mgr.workflows_have_shutdown();
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
/// Unique identifier for this worker instance.
|
|
1404
|
+
/// This must be stable across the worker's lifetime and unique per instance.
|
|
1405
|
+
pub fn worker_instance_key(&self) -> Uuid {
|
|
1406
|
+
self.worker_instance_key
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1081
1409
|
/// Sets a function to be called at the end of each activation completion
|
|
1082
1410
|
pub(crate) fn set_post_activate_hook(
|
|
1083
1411
|
&mut self,
|
|
@@ -1120,6 +1448,446 @@ impl Worker {
|
|
|
1120
1448
|
}
|
|
1121
1449
|
}
|
|
1122
1450
|
|
|
1451
|
+
/// Errors thrown by [crate::Worker::validate]
|
|
1452
|
+
#[derive(thiserror::Error, Debug)]
|
|
1453
|
+
pub enum WorkerValidationError {
|
|
1454
|
+
/// The namespace provided to the worker does not exist on the server.
|
|
1455
|
+
#[error("Namespace {namespace} was not found or otherwise could not be described: {source:?}")]
|
|
1456
|
+
NamespaceDescribeError {
|
|
1457
|
+
/// The underlying server error.
|
|
1458
|
+
source: tonic::Status,
|
|
1459
|
+
/// The associated namespace.
|
|
1460
|
+
namespace: String,
|
|
1461
|
+
},
|
|
1462
|
+
}
|
|
1463
|
+
|
|
1464
|
+
/// Errors thrown by [crate::Worker] polling methods
|
|
1465
|
+
#[derive(thiserror::Error, Debug)]
|
|
1466
|
+
pub enum PollError {
|
|
1467
|
+
/// [crate::Worker::shutdown] was called, and there are no more tasks to be handled from this
|
|
1468
|
+
/// poll function. Lang must call [crate::Worker::complete_workflow_activation],
|
|
1469
|
+
/// [crate::Worker::complete_activity_task], or
|
|
1470
|
+
/// [crate::Worker::complete_nexus_task] for any remaining tasks, and then may exit.
|
|
1471
|
+
#[error("Core is shut down and there are no more tasks of this kind")]
|
|
1472
|
+
ShutDown,
|
|
1473
|
+
/// Unhandled error when calling the temporal server. Core will attempt to retry any non-fatal
|
|
1474
|
+
/// errors, so lang should consider this fatal.
|
|
1475
|
+
#[error("Unhandled grpc error when polling: {0:?}")]
|
|
1476
|
+
TonicError(#[from] tonic::Status),
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
/// Errors thrown by [crate::Worker::complete_workflow_activation]
|
|
1480
|
+
#[derive(thiserror::Error, Debug)]
|
|
1481
|
+
#[allow(clippy::large_enum_variant)]
|
|
1482
|
+
pub enum CompleteWfError {
|
|
1483
|
+
/// Lang SDK sent us a malformed workflow completion. This likely means a bug in the lang sdk.
|
|
1484
|
+
#[error("Lang SDK sent us a malformed workflow completion for run ({run_id}): {reason}")]
|
|
1485
|
+
MalformedWorkflowCompletion {
|
|
1486
|
+
/// Reason the completion was malformed
|
|
1487
|
+
reason: String,
|
|
1488
|
+
/// The run associated with the completion
|
|
1489
|
+
run_id: String,
|
|
1490
|
+
},
|
|
1491
|
+
/// Workflows have not been enabled on this worker.
|
|
1492
|
+
#[error("Workflows are not enabled on this worker")]
|
|
1493
|
+
WorkflowNotEnabled,
|
|
1494
|
+
}
|
|
1495
|
+
|
|
1496
|
+
/// Errors thrown by [crate::Worker::complete_activity_task]
|
|
1497
|
+
#[derive(thiserror::Error, Debug)]
|
|
1498
|
+
#[allow(clippy::large_enum_variant)]
|
|
1499
|
+
pub enum CompleteActivityError {
|
|
1500
|
+
/// Lang SDK sent us a malformed activity completion. This likely means a bug in the lang sdk.
|
|
1501
|
+
#[error("Lang SDK sent us a malformed activity completion ({reason}): {completion:?}")]
|
|
1502
|
+
MalformedActivityCompletion {
|
|
1503
|
+
/// Reason the completion was malformed
|
|
1504
|
+
reason: String,
|
|
1505
|
+
/// The completion, which may not be included to avoid unnecessary copies.
|
|
1506
|
+
completion: Option<ActivityExecutionResult>,
|
|
1507
|
+
},
|
|
1508
|
+
/// Activities have not been enabled on this worker.
|
|
1509
|
+
#[error("Activities are not enabled on this worker")]
|
|
1510
|
+
ActivityNotEnabled,
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
/// Errors thrown by [crate::Worker::complete_nexus_task]
|
|
1514
|
+
#[derive(thiserror::Error, Debug)]
|
|
1515
|
+
pub enum CompleteNexusError {
|
|
1516
|
+
/// Lang SDK sent us a malformed nexus completion. This likely means a bug in the lang sdk.
|
|
1517
|
+
#[error("Lang SDK sent us a malformed nexus completion: {reason}")]
|
|
1518
|
+
MalformedNexusCompletion {
|
|
1519
|
+
/// Reason the completion was malformed
|
|
1520
|
+
reason: String,
|
|
1521
|
+
},
|
|
1522
|
+
/// Nexus has not been enabled on this worker. If a user registers any Nexus handlers, the
|
|
1523
|
+
#[error("Nexus is not enabled on this worker")]
|
|
1524
|
+
NexusNotEnabled,
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
/// Errors we can encounter during workflow processing which we may treat as either WFT failures
|
|
1528
|
+
/// or whole-workflow failures depending on user preference.
|
|
1529
|
+
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
|
|
1530
|
+
pub enum WorkflowErrorType {
|
|
1531
|
+
/// A nondeterminism error
|
|
1532
|
+
Nondeterminism,
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1535
|
+
/// This trait allows users to customize the performance characteristics of workers dynamically.
|
|
1536
|
+
/// For more, see the docstrings of the traits in the return types of its functions.
|
|
1537
|
+
pub trait WorkerTuner {
|
|
1538
|
+
/// Return a [SlotSupplier] for workflow tasks. Note that workflow task slot suppliers must be
|
|
1539
|
+
/// willing to hand out a minimum of one non-sticky slot and one sticky slot if workflow caching
|
|
1540
|
+
/// is enabled, otherwise the worker may fail to process new tasks.
|
|
1541
|
+
fn workflow_task_slot_supplier(
|
|
1542
|
+
&self,
|
|
1543
|
+
) -> Arc<dyn SlotSupplier<SlotKind = WorkflowSlotKind> + Send + Sync>;
|
|
1544
|
+
|
|
1545
|
+
/// Return a [SlotSupplier] for activity tasks
|
|
1546
|
+
fn activity_task_slot_supplier(
|
|
1547
|
+
&self,
|
|
1548
|
+
) -> Arc<dyn SlotSupplier<SlotKind = ActivitySlotKind> + Send + Sync>;
|
|
1549
|
+
|
|
1550
|
+
/// Return a [SlotSupplier] for local activities
|
|
1551
|
+
fn local_activity_slot_supplier(
|
|
1552
|
+
&self,
|
|
1553
|
+
) -> Arc<dyn SlotSupplier<SlotKind = LocalActivitySlotKind> + Send + Sync>;
|
|
1554
|
+
|
|
1555
|
+
/// Return a [SlotSupplier] for nexus tasks
|
|
1556
|
+
fn nexus_task_slot_supplier(
|
|
1557
|
+
&self,
|
|
1558
|
+
) -> Arc<dyn SlotSupplier<SlotKind = NexusSlotKind> + Send + Sync>;
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
/// Implementing this trait allows users to customize how many tasks of certain kinds the worker
|
|
1562
|
+
/// will perform concurrently.
|
|
1563
|
+
///
|
|
1564
|
+
/// Note that, for implementations on workflow tasks ([WorkflowSlotKind]), workers that have the
|
|
1565
|
+
/// workflow cache enabled should be willing to hand out _at least_ two slots, to avoid the worker
|
|
1566
|
+
/// becoming stuck only polling on the worker's sticky queue.
|
|
1567
|
+
#[async_trait::async_trait]
|
|
1568
|
+
pub trait SlotSupplier {
|
|
1569
|
+
/// The kind of slot this supplier is supplying.
|
|
1570
|
+
type SlotKind: SlotKind;
|
|
1571
|
+
/// Block until a slot is available, then return a permit for the slot.
|
|
1572
|
+
async fn reserve_slot(&self, ctx: &dyn SlotReservationContext) -> SlotSupplierPermit;
|
|
1573
|
+
|
|
1574
|
+
/// Try to immediately reserve a slot, returning None if one is not available. Implementations
|
|
1575
|
+
/// must not block, or risk blocking the async event loop.
|
|
1576
|
+
fn try_reserve_slot(&self, ctx: &dyn SlotReservationContext) -> Option<SlotSupplierPermit>;
|
|
1577
|
+
|
|
1578
|
+
/// Marks a slot as actually now being used. This is separate from reserving one because the
|
|
1579
|
+
/// pollers need to reserve a slot before they have actually obtained work from server. Once
|
|
1580
|
+
/// that task is obtained (and validated) then the slot can actually be used to work on the
|
|
1581
|
+
/// task.
|
|
1582
|
+
///
|
|
1583
|
+
/// Users' implementation of this can choose to emit metrics, or otherwise leverage the
|
|
1584
|
+
/// information provided by the `info` parameter to be better able to make future decisions
|
|
1585
|
+
/// about whether a slot should be handed out.
|
|
1586
|
+
fn mark_slot_used(&self, ctx: &dyn SlotMarkUsedContext<SlotKind = Self::SlotKind>);
|
|
1587
|
+
|
|
1588
|
+
/// Frees a slot.
|
|
1589
|
+
fn release_slot(&self, ctx: &dyn SlotReleaseContext<SlotKind = Self::SlotKind>);
|
|
1590
|
+
|
|
1591
|
+
/// If this implementation knows how many slots are available at any moment, it should return
|
|
1592
|
+
/// that here.
|
|
1593
|
+
fn available_slots(&self) -> Option<usize> {
|
|
1594
|
+
None
|
|
1595
|
+
}
|
|
1596
|
+
|
|
1597
|
+
/// Returns a human-friendly identifier describing this supplier implementation for
|
|
1598
|
+
/// diagnostics and telemetry.
|
|
1599
|
+
fn slot_supplier_kind(&self) -> String {
|
|
1600
|
+
"Custom".to_string()
|
|
1601
|
+
}
|
|
1602
|
+
}
|
|
1603
|
+
|
|
1604
|
+
/// Context for slot reservation.
|
|
1605
|
+
pub trait SlotReservationContext: Send + Sync {
|
|
1606
|
+
/// Returns the name of the task queue this worker is polling
|
|
1607
|
+
fn task_queue(&self) -> &str;
|
|
1608
|
+
|
|
1609
|
+
/// Returns the identity of the worker
|
|
1610
|
+
fn worker_identity(&self) -> &str;
|
|
1611
|
+
|
|
1612
|
+
/// Returns the deployment version of the worker, if one is set.
|
|
1613
|
+
fn worker_deployment_version(&self) -> &Option<WorkerDeploymentVersion>;
|
|
1614
|
+
|
|
1615
|
+
/// Returns the number of currently outstanding slot permits, whether used or un-used.
|
|
1616
|
+
fn num_issued_slots(&self) -> usize;
|
|
1617
|
+
|
|
1618
|
+
/// Returns true iff this is a sticky poll for a workflow task
|
|
1619
|
+
fn is_sticky(&self) -> bool;
|
|
1620
|
+
|
|
1621
|
+
/// Returns the metrics meter if metrics are enabled
|
|
1622
|
+
fn get_metrics_meter(&self) -> Option<TemporalMeter> {
|
|
1623
|
+
None
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
|
|
1627
|
+
/// Context for slots being marked as used.
|
|
1628
|
+
pub trait SlotMarkUsedContext: Send + Sync {
|
|
1629
|
+
/// The kind of slot being marked used.
|
|
1630
|
+
type SlotKind: SlotKind;
|
|
1631
|
+
/// The slot permit that is being used
|
|
1632
|
+
fn permit(&self) -> &SlotSupplierPermit;
|
|
1633
|
+
/// Returns the info of slot that was marked as used
|
|
1634
|
+
fn info(&self) -> &<Self::SlotKind as SlotKind>::Info;
|
|
1635
|
+
|
|
1636
|
+
/// Returns the metrics meter if metrics are enabled
|
|
1637
|
+
fn get_metrics_meter(&self) -> Option<TemporalMeter> {
|
|
1638
|
+
None
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
/// Context for slots being released.
|
|
1643
|
+
pub trait SlotReleaseContext: Send + Sync {
|
|
1644
|
+
/// The kind of slot being marked released.
|
|
1645
|
+
type SlotKind: SlotKind;
|
|
1646
|
+
/// The slot permit that is being used
|
|
1647
|
+
fn permit(&self) -> &SlotSupplierPermit;
|
|
1648
|
+
/// Returns the info of slot that was released, if it was used
|
|
1649
|
+
fn info(&self) -> Option<&<Self::SlotKind as SlotKind>::Info>;
|
|
1650
|
+
|
|
1651
|
+
/// Returns the metrics meter if metrics are enabled
|
|
1652
|
+
fn get_metrics_meter(&self) -> Option<TemporalMeter> {
|
|
1653
|
+
None
|
|
1654
|
+
}
|
|
1655
|
+
}
|
|
1656
|
+
|
|
1657
|
+
/// A permit issued by a [SlotSupplier].
|
|
1658
|
+
#[derive(Default, Debug)]
|
|
1659
|
+
pub struct SlotSupplierPermit {
|
|
1660
|
+
user_data: Option<Box<dyn Any + Send + Sync>>,
|
|
1661
|
+
}
|
|
1662
|
+
impl SlotSupplierPermit {
|
|
1663
|
+
/// Attach some user data to the slot permit.
|
|
1664
|
+
pub fn with_user_data<T: Any + Send + Sync>(user_data: T) -> Self {
|
|
1665
|
+
Self {
|
|
1666
|
+
user_data: Some(Box::new(user_data)),
|
|
1667
|
+
}
|
|
1668
|
+
}
|
|
1669
|
+
/// Attempts to downcast the inner data, if any, into the provided type and returns it.
|
|
1670
|
+
/// Returns none if there is no data or the data is not of the appropriate type.
|
|
1671
|
+
pub fn user_data<T: Any + Send + Sync>(&self) -> Option<&T> {
|
|
1672
|
+
self.user_data.as_ref().and_then(|b| b.downcast_ref())
|
|
1673
|
+
}
|
|
1674
|
+
/// Attempts to downcast the inner data, if any, into the provided type and returns it mutably.
|
|
1675
|
+
/// Returns none if there is no data or the data is not of the appropriate type.
|
|
1676
|
+
pub fn user_data_mut<T: Any + Send + Sync>(&mut self) -> Option<&mut T> {
|
|
1677
|
+
self.user_data.as_mut().and_then(|b| b.downcast_mut())
|
|
1678
|
+
}
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1681
|
+
/// What kind of task the slot is used for.
|
|
1682
|
+
#[derive(Debug, Copy, Clone, derive_more::Display, Eq, PartialEq)]
|
|
1683
|
+
pub enum SlotKindType {
|
|
1684
|
+
/// Workflow tasks.
|
|
1685
|
+
Workflow,
|
|
1686
|
+
/// Activity tasks.
|
|
1687
|
+
Activity,
|
|
1688
|
+
/// Local activity tasks.
|
|
1689
|
+
LocalActivity,
|
|
1690
|
+
/// Nexus tasks.
|
|
1691
|
+
Nexus,
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1694
|
+
/// Marker struct for workflow slots.
|
|
1695
|
+
#[derive(Debug, Copy, Clone)]
|
|
1696
|
+
pub struct WorkflowSlotKind {}
|
|
1697
|
+
/// Marker struct for activity slots.
|
|
1698
|
+
#[derive(Debug, Copy, Clone)]
|
|
1699
|
+
pub struct ActivitySlotKind {}
|
|
1700
|
+
/// Marker struct for local activity slots.
|
|
1701
|
+
#[derive(Debug, Copy, Clone)]
|
|
1702
|
+
pub struct LocalActivitySlotKind {}
|
|
1703
|
+
/// Marker struct for nexus slots.
|
|
1704
|
+
#[derive(Debug, Copy, Clone)]
|
|
1705
|
+
pub struct NexusSlotKind {}
|
|
1706
|
+
|
|
1707
|
+
/// Contextual information about in-use slots.
|
|
1708
|
+
pub enum SlotInfo<'a> {
|
|
1709
|
+
/// For workflow slots.
|
|
1710
|
+
Workflow(&'a WorkflowSlotInfo),
|
|
1711
|
+
/// For activity slots.
|
|
1712
|
+
Activity(&'a ActivitySlotInfo),
|
|
1713
|
+
/// For local activity slots.
|
|
1714
|
+
LocalActivity(&'a LocalActivitySlotInfo),
|
|
1715
|
+
/// For nexus slots.
|
|
1716
|
+
Nexus(&'a NexusSlotInfo),
|
|
1717
|
+
}
|
|
1718
|
+
|
|
1719
|
+
/// Allows reifying slot info into the appropriate type.
|
|
1720
|
+
pub trait SlotInfoTrait: prost::Message {
|
|
1721
|
+
/// Downcast a protobuf message into the enum.
|
|
1722
|
+
fn downcast(&self) -> SlotInfo<'_>;
|
|
1723
|
+
}
|
|
1724
|
+
impl SlotInfoTrait for WorkflowSlotInfo {
|
|
1725
|
+
fn downcast(&self) -> SlotInfo<'_> {
|
|
1726
|
+
SlotInfo::Workflow(self)
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
impl SlotInfoTrait for ActivitySlotInfo {
|
|
1730
|
+
fn downcast(&self) -> SlotInfo<'_> {
|
|
1731
|
+
SlotInfo::Activity(self)
|
|
1732
|
+
}
|
|
1733
|
+
}
|
|
1734
|
+
impl SlotInfoTrait for LocalActivitySlotInfo {
|
|
1735
|
+
fn downcast(&self) -> SlotInfo<'_> {
|
|
1736
|
+
SlotInfo::LocalActivity(self)
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
impl SlotInfoTrait for NexusSlotInfo {
|
|
1740
|
+
fn downcast(&self) -> SlotInfo<'_> {
|
|
1741
|
+
SlotInfo::Nexus(self)
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
|
|
1745
|
+
/// Associates slot info/kinds together.
|
|
1746
|
+
pub trait SlotKind {
|
|
1747
|
+
/// The associated info for this kind.
|
|
1748
|
+
type Info: SlotInfoTrait;
|
|
1749
|
+
|
|
1750
|
+
/// Return this kind.
|
|
1751
|
+
fn kind() -> SlotKindType;
|
|
1752
|
+
}
|
|
1753
|
+
impl SlotKind for WorkflowSlotKind {
|
|
1754
|
+
type Info = WorkflowSlotInfo;
|
|
1755
|
+
|
|
1756
|
+
fn kind() -> SlotKindType {
|
|
1757
|
+
SlotKindType::Workflow
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
impl SlotKind for ActivitySlotKind {
|
|
1761
|
+
type Info = ActivitySlotInfo;
|
|
1762
|
+
|
|
1763
|
+
fn kind() -> SlotKindType {
|
|
1764
|
+
SlotKindType::Activity
|
|
1765
|
+
}
|
|
1766
|
+
}
|
|
1767
|
+
impl SlotKind for LocalActivitySlotKind {
|
|
1768
|
+
type Info = LocalActivitySlotInfo;
|
|
1769
|
+
|
|
1770
|
+
fn kind() -> SlotKindType {
|
|
1771
|
+
SlotKindType::LocalActivity
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
impl SlotKind for NexusSlotKind {
|
|
1775
|
+
type Info = NexusSlotInfo;
|
|
1776
|
+
|
|
1777
|
+
fn kind() -> SlotKindType {
|
|
1778
|
+
SlotKindType::Nexus
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
/// Different strategies for task polling
|
|
1783
|
+
#[derive(Clone, Copy, Debug, PartialEq)]
|
|
1784
|
+
pub enum PollerBehavior {
|
|
1785
|
+
/// Will attempt to poll as long as a slot is available, up to the provided maximum. Cannot
|
|
1786
|
+
/// be less than two for workflow tasks, or one for other tasks.
|
|
1787
|
+
SimpleMaximum(usize),
|
|
1788
|
+
/// Will automatically scale the number of pollers based on feedback from the server. Still
|
|
1789
|
+
/// requires a slot to be available before beginning polling.
|
|
1790
|
+
Autoscaling {
|
|
1791
|
+
/// At least this many poll calls will always be attempted (assuming slots are available).
|
|
1792
|
+
/// Cannot be zero.
|
|
1793
|
+
minimum: usize,
|
|
1794
|
+
/// At most this many poll calls will ever be open at once. Must be >= `minimum`.
|
|
1795
|
+
maximum: usize,
|
|
1796
|
+
/// This many polls will be attempted initially before scaling kicks in. Must be between
|
|
1797
|
+
/// `minimum` and `maximum`.
|
|
1798
|
+
initial: usize,
|
|
1799
|
+
},
|
|
1800
|
+
}
|
|
1801
|
+
|
|
1802
|
+
impl PollerBehavior {
|
|
1803
|
+
/// Returns true if the behavior is using autoscaling.
|
|
1804
|
+
pub fn is_autoscaling(&self) -> bool {
|
|
1805
|
+
matches!(self, PollerBehavior::Autoscaling { .. })
|
|
1806
|
+
}
|
|
1807
|
+
|
|
1808
|
+
/// Validates the behavior.
|
|
1809
|
+
pub fn validate(&self) -> Result<(), String> {
|
|
1810
|
+
match self {
|
|
1811
|
+
PollerBehavior::SimpleMaximum(x) => {
|
|
1812
|
+
if *x < 1 {
|
|
1813
|
+
return Err("SimpleMaximum poller behavior must be at least 1".to_owned());
|
|
1814
|
+
}
|
|
1815
|
+
}
|
|
1816
|
+
PollerBehavior::Autoscaling {
|
|
1817
|
+
minimum,
|
|
1818
|
+
maximum,
|
|
1819
|
+
initial,
|
|
1820
|
+
} => {
|
|
1821
|
+
if *minimum < 1 {
|
|
1822
|
+
return Err("Autoscaling minimum poller behavior must be at least 1".to_owned());
|
|
1823
|
+
}
|
|
1824
|
+
if *maximum < *minimum {
|
|
1825
|
+
return Err(
|
|
1826
|
+
"Autoscaling maximum must be greater than or equal to minimum".to_owned(),
|
|
1827
|
+
);
|
|
1828
|
+
}
|
|
1829
|
+
if *initial < *minimum || *initial > *maximum {
|
|
1830
|
+
return Err(
|
|
1831
|
+
"Autoscaling initial must be between minimum and maximum".to_owned()
|
|
1832
|
+
);
|
|
1833
|
+
}
|
|
1834
|
+
}
|
|
1835
|
+
}
|
|
1836
|
+
Ok(())
|
|
1837
|
+
}
|
|
1838
|
+
}
|
|
1839
|
+
|
|
1840
|
+
/// Strategy a core worker uses for versioning.
|
|
1841
|
+
#[derive(Clone, Debug)]
|
|
1842
|
+
pub enum WorkerVersioningStrategy {
|
|
1843
|
+
/// Don't enable any versioning
|
|
1844
|
+
None {
|
|
1845
|
+
/// Build ID may still be passed as a way to identify the worker, or may be left empty.
|
|
1846
|
+
build_id: String,
|
|
1847
|
+
},
|
|
1848
|
+
/// Maybe use the modern deployment-based versioning, or just pass a deployment version.
|
|
1849
|
+
WorkerDeploymentBased(WorkerDeploymentOptions),
|
|
1850
|
+
/// Use the legacy build-id-based whole worker versioning.
|
|
1851
|
+
LegacyBuildIdBased {
|
|
1852
|
+
/// A Build ID to use, must be non-empty.
|
|
1853
|
+
build_id: String,
|
|
1854
|
+
},
|
|
1855
|
+
}
|
|
1856
|
+
|
|
1857
|
+
impl Default for WorkerVersioningStrategy {
|
|
1858
|
+
fn default() -> Self {
|
|
1859
|
+
WorkerVersioningStrategy::None {
|
|
1860
|
+
build_id: String::new(),
|
|
1861
|
+
}
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1864
|
+
|
|
1865
|
+
impl WorkerVersioningStrategy {
|
|
1866
|
+
/// Return the build ID associated with this strategy.
|
|
1867
|
+
pub fn build_id(&self) -> &str {
|
|
1868
|
+
match self {
|
|
1869
|
+
WorkerVersioningStrategy::None { build_id } => build_id,
|
|
1870
|
+
WorkerVersioningStrategy::WorkerDeploymentBased(opts) => &opts.version.build_id,
|
|
1871
|
+
WorkerVersioningStrategy::LegacyBuildIdBased { build_id } => build_id,
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
/// Returns true if this uses "build id based" legacy versioning.
|
|
1876
|
+
pub fn uses_build_id_based(&self) -> bool {
|
|
1877
|
+
matches!(self, WorkerVersioningStrategy::LegacyBuildIdBased { .. })
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
/// Returns the default versioning behavior associated with this strategy, if any.
|
|
1881
|
+
pub fn default_versioning_behavior(&self) -> Option<VersioningBehavior> {
|
|
1882
|
+
match self {
|
|
1883
|
+
WorkerVersioningStrategy::WorkerDeploymentBased(opts) => {
|
|
1884
|
+
opts.default_versioning_behavior
|
|
1885
|
+
}
|
|
1886
|
+
_ => None,
|
|
1887
|
+
}
|
|
1888
|
+
}
|
|
1889
|
+
}
|
|
1890
|
+
|
|
1123
1891
|
struct ClientWorkerRegistrator {
|
|
1124
1892
|
worker_instance_key: Uuid,
|
|
1125
1893
|
slot_provider: SlotProvider,
|
|
@@ -1415,13 +2183,13 @@ mod tests {
|
|
|
1415
2183
|
use crate::{
|
|
1416
2184
|
advance_fut,
|
|
1417
2185
|
test_help::test_worker_cfg,
|
|
1418
|
-
worker::
|
|
2186
|
+
worker::{
|
|
2187
|
+
PollerBehavior,
|
|
2188
|
+
client::mocks::{mock_manual_worker_client, mock_worker_client},
|
|
2189
|
+
},
|
|
1419
2190
|
};
|
|
1420
2191
|
use futures_util::FutureExt;
|
|
1421
|
-
use temporalio_common::
|
|
1422
|
-
protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse,
|
|
1423
|
-
worker::PollerBehavior,
|
|
1424
|
-
};
|
|
2192
|
+
use temporalio_common::protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse;
|
|
1425
2193
|
|
|
1426
2194
|
#[tokio::test]
|
|
1427
2195
|
async fn activity_timeouts_maintain_permit() {
|
|
@@ -1484,7 +2252,6 @@ mod tests {
|
|
|
1484
2252
|
|
|
1485
2253
|
#[test]
|
|
1486
2254
|
fn max_polls_zero_is_err() {
|
|
1487
|
-
use temporalio_common::worker::{WorkerConfig, WorkerTaskTypes, WorkerVersioningStrategy};
|
|
1488
2255
|
assert!(
|
|
1489
2256
|
WorkerConfig::builder()
|
|
1490
2257
|
.namespace("test")
|
|
@@ -1498,4 +2265,132 @@ mod tests {
|
|
|
1498
2265
|
.is_err()
|
|
1499
2266
|
);
|
|
1500
2267
|
}
|
|
2268
|
+
|
|
2269
|
+
fn default_versioning_strategy() -> WorkerVersioningStrategy {
|
|
2270
|
+
WorkerVersioningStrategy::None {
|
|
2271
|
+
build_id: String::new(),
|
|
2272
|
+
}
|
|
2273
|
+
}
|
|
2274
|
+
|
|
2275
|
+
#[test]
|
|
2276
|
+
fn test_default_configuration_polls_all_types() {
|
|
2277
|
+
let config = WorkerConfig::builder()
|
|
2278
|
+
.namespace("default")
|
|
2279
|
+
.task_queue("test-queue")
|
|
2280
|
+
.versioning_strategy(default_versioning_strategy())
|
|
2281
|
+
.task_types(WorkerTaskTypes::all())
|
|
2282
|
+
.build()
|
|
2283
|
+
.unwrap();
|
|
2284
|
+
|
|
2285
|
+
let effective = &config.task_types;
|
|
2286
|
+
assert!(
|
|
2287
|
+
effective.enable_workflows,
|
|
2288
|
+
"Should poll workflows by default"
|
|
2289
|
+
);
|
|
2290
|
+
assert!(
|
|
2291
|
+
effective.enable_local_activities,
|
|
2292
|
+
"should poll local activities by default"
|
|
2293
|
+
);
|
|
2294
|
+
assert!(
|
|
2295
|
+
effective.enable_remote_activities,
|
|
2296
|
+
"Should poll remote activities by default"
|
|
2297
|
+
);
|
|
2298
|
+
assert!(effective.enable_nexus, "Should poll nexus by default");
|
|
2299
|
+
}
|
|
2300
|
+
|
|
2301
|
+
#[test]
|
|
2302
|
+
fn test_invalid_task_types_fails_validation() {
|
|
2303
|
+
// empty task types
|
|
2304
|
+
let result = WorkerConfig::builder()
|
|
2305
|
+
.namespace("default")
|
|
2306
|
+
.task_queue("test-queue")
|
|
2307
|
+
.versioning_strategy(default_versioning_strategy())
|
|
2308
|
+
.task_types(WorkerTaskTypes {
|
|
2309
|
+
enable_workflows: false,
|
|
2310
|
+
enable_local_activities: false,
|
|
2311
|
+
enable_remote_activities: false,
|
|
2312
|
+
enable_nexus: false,
|
|
2313
|
+
})
|
|
2314
|
+
.build();
|
|
2315
|
+
|
|
2316
|
+
assert!(result.is_err(), "Empty task_types should fail validation");
|
|
2317
|
+
let err = result.err().unwrap();
|
|
2318
|
+
assert!(
|
|
2319
|
+
err.contains("At least one task type"),
|
|
2320
|
+
"Error should mention task types: {err}",
|
|
2321
|
+
);
|
|
2322
|
+
|
|
2323
|
+
// local activities with no workflows
|
|
2324
|
+
let result = WorkerConfig::builder()
|
|
2325
|
+
.namespace("default")
|
|
2326
|
+
.task_queue("test-queue")
|
|
2327
|
+
.versioning_strategy(default_versioning_strategy())
|
|
2328
|
+
.task_types(WorkerTaskTypes {
|
|
2329
|
+
enable_workflows: false,
|
|
2330
|
+
enable_local_activities: true,
|
|
2331
|
+
enable_remote_activities: false,
|
|
2332
|
+
enable_nexus: false,
|
|
2333
|
+
})
|
|
2334
|
+
.build();
|
|
2335
|
+
|
|
2336
|
+
assert!(result.is_err(), "Empty task_types should fail validation");
|
|
2337
|
+
let err = result.err().unwrap();
|
|
2338
|
+
assert!(
|
|
2339
|
+
err.contains("cannot enable local activities without workflows"),
|
|
2340
|
+
"Error should mention task types: {err}",
|
|
2341
|
+
);
|
|
2342
|
+
}
|
|
2343
|
+
|
|
2344
|
+
#[test]
|
|
2345
|
+
fn test_all_combinations() {
|
|
2346
|
+
let combinations = [
|
|
2347
|
+
(WorkerTaskTypes::workflow_only(), "workflows only"),
|
|
2348
|
+
(WorkerTaskTypes::activity_only(), "activities only"),
|
|
2349
|
+
(WorkerTaskTypes::nexus_only(), "nexus only"),
|
|
2350
|
+
(
|
|
2351
|
+
WorkerTaskTypes {
|
|
2352
|
+
enable_workflows: true,
|
|
2353
|
+
enable_local_activities: true,
|
|
2354
|
+
enable_remote_activities: true,
|
|
2355
|
+
enable_nexus: false,
|
|
2356
|
+
},
|
|
2357
|
+
"workflows + activities",
|
|
2358
|
+
),
|
|
2359
|
+
(
|
|
2360
|
+
WorkerTaskTypes {
|
|
2361
|
+
enable_workflows: true,
|
|
2362
|
+
enable_local_activities: true,
|
|
2363
|
+
enable_remote_activities: false,
|
|
2364
|
+
enable_nexus: true,
|
|
2365
|
+
},
|
|
2366
|
+
"workflows + nexus",
|
|
2367
|
+
),
|
|
2368
|
+
(
|
|
2369
|
+
WorkerTaskTypes {
|
|
2370
|
+
enable_workflows: false,
|
|
2371
|
+
enable_local_activities: false,
|
|
2372
|
+
enable_remote_activities: true,
|
|
2373
|
+
enable_nexus: true,
|
|
2374
|
+
},
|
|
2375
|
+
"activities + nexus",
|
|
2376
|
+
),
|
|
2377
|
+
(WorkerTaskTypes::all(), "all types"),
|
|
2378
|
+
];
|
|
2379
|
+
|
|
2380
|
+
for (task_types, description) in combinations {
|
|
2381
|
+
let config = WorkerConfig::builder()
|
|
2382
|
+
.namespace("default")
|
|
2383
|
+
.task_queue("test-queue")
|
|
2384
|
+
.versioning_strategy(default_versioning_strategy())
|
|
2385
|
+
.task_types(task_types)
|
|
2386
|
+
.build()
|
|
2387
|
+
.unwrap();
|
|
2388
|
+
|
|
2389
|
+
let effective = config.task_types;
|
|
2390
|
+
assert_eq!(
|
|
2391
|
+
effective, task_types,
|
|
2392
|
+
"Effective types should match for {description}",
|
|
2393
|
+
);
|
|
2394
|
+
}
|
|
2395
|
+
}
|
|
1501
2396
|
}
|