@temporalio/core-bridge 1.14.2-canary-release-testing.0 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +794 -650
- package/bridge-macros/src/derive_tryintojs.rs +40 -0
- package/lib/native.d.ts +24 -3
- package/package.json +4 -4
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.github/workflows/per-pr.yml +6 -6
- package/sdk-core/AGENTS.md +42 -31
- package/sdk-core/Cargo.toml +4 -1
- package/sdk-core/README.md +19 -13
- package/sdk-core/crates/client/Cargo.toml +4 -0
- package/sdk-core/crates/client/README.md +139 -0
- package/sdk-core/crates/client/src/async_activity_handle.rs +297 -0
- package/sdk-core/crates/client/src/callback_based.rs +7 -0
- package/sdk-core/crates/client/src/errors.rs +294 -0
- package/sdk-core/crates/client/src/{raw.rs → grpc.rs} +370 -159
- package/sdk-core/crates/client/src/lib.rs +920 -1326
- package/sdk-core/crates/client/src/metrics.rs +24 -33
- package/sdk-core/crates/client/src/options_structs.rs +457 -0
- package/sdk-core/crates/client/src/replaceable.rs +5 -4
- package/sdk-core/crates/client/src/request_extensions.rs +8 -9
- package/sdk-core/crates/client/src/retry.rs +99 -54
- package/sdk-core/crates/client/src/{worker/mod.rs → worker.rs} +104 -29
- package/sdk-core/crates/client/src/workflow_handle.rs +826 -0
- package/sdk-core/crates/common/Cargo.toml +62 -3
- package/sdk-core/crates/common/build.rs +742 -12
- package/sdk-core/crates/common/protos/api_upstream/.github/workflows/ci.yml +2 -0
- package/sdk-core/crates/common/protos/api_upstream/.github/workflows/create-release.yml +0 -5
- package/sdk-core/crates/common/protos/api_upstream/Makefile +2 -1
- package/sdk-core/crates/common/protos/api_upstream/README.md +8 -0
- package/sdk-core/crates/common/protos/api_upstream/cmd/check-path-conflicts/main.go +137 -0
- package/sdk-core/crates/common/protos/api_upstream/openapi/openapiv2.json +3329 -2647
- package/sdk-core/crates/common/protos/api_upstream/openapi/openapiv3.yaml +2734 -708
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/activity/v1/message.proto +155 -3
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/command/v1/message.proto +26 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/common/v1/message.proto +8 -1
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/deployment/v1/message.proto +27 -1
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/enums/v1/activity.proto +81 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/enums/v1/event_type.proto +4 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +4 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +15 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/enums/v1/workflow.proto +63 -15
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/errordetails/v1/message.proto +8 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/failure/v1/message.proto +1 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/history/v1/message.proto +111 -17
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/namespace/v1/message.proto +21 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/nexus/v1/message.proto +20 -1
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +4 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/schedule/v1/message.proto +2 -2
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -0
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/worker/v1/message.proto +4 -7
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/workflow/v1/message.proto +80 -22
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +347 -23
- package/sdk-core/crates/common/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +242 -43
- package/sdk-core/crates/common/protos/local/temporal/sdk/core/core_interface.proto +15 -0
- package/sdk-core/crates/common/protos/local/temporal/sdk/core/nexus/nexus.proto +9 -2
- package/sdk-core/crates/common/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +8 -0
- package/sdk-core/crates/common/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +22 -5
- package/sdk-core/crates/common/src/activity_definition.rs +20 -0
- package/sdk-core/crates/common/src/data_converters.rs +770 -0
- package/sdk-core/crates/common/src/envconfig.rs +5 -0
- package/sdk-core/crates/common/src/lib.rs +15 -211
- package/sdk-core/crates/common/src/payload_visitor.rs +648 -0
- package/sdk-core/crates/common/src/priority.rs +110 -0
- package/sdk-core/crates/common/src/protos/canned_histories.rs +19 -0
- package/sdk-core/crates/common/src/protos/history_builder.rs +45 -0
- package/sdk-core/crates/common/src/protos/history_info.rs +2 -0
- package/sdk-core/crates/common/src/protos/mod.rs +134 -27
- package/sdk-core/crates/common/src/protos/task_token.rs +3 -3
- package/sdk-core/crates/common/src/protos/utilities.rs +11 -0
- package/sdk-core/crates/{sdk-core → common}/src/telemetry/log_export.rs +11 -16
- package/sdk-core/crates/common/src/telemetry/metrics/core.rs +125 -0
- package/sdk-core/crates/common/src/telemetry/metrics.rs +272 -225
- package/sdk-core/crates/{sdk-core → common}/src/telemetry/otel.rs +8 -13
- package/sdk-core/crates/{sdk-core → common}/src/telemetry/prometheus_meter.rs +49 -50
- package/sdk-core/crates/{sdk-core → common}/src/telemetry/prometheus_server.rs +2 -3
- package/sdk-core/crates/common/src/telemetry.rs +278 -19
- package/sdk-core/crates/common/src/worker.rs +68 -636
- package/sdk-core/crates/common/src/workflow_definition.rs +60 -0
- package/sdk-core/crates/macros/Cargo.toml +5 -1
- package/sdk-core/crates/macros/src/activities_definitions.rs +585 -0
- package/sdk-core/crates/macros/src/fsm_impl.rs +507 -0
- package/sdk-core/crates/macros/src/lib.rs +138 -512
- package/sdk-core/crates/macros/src/macro_utils.rs +106 -0
- package/sdk-core/crates/macros/src/workflow_definitions.rs +1224 -0
- package/sdk-core/crates/sdk/Cargo.toml +19 -6
- package/sdk-core/crates/sdk/README.md +415 -0
- package/sdk-core/crates/sdk/src/activities.rs +417 -0
- package/sdk-core/crates/sdk/src/interceptors.rs +1 -1
- package/sdk-core/crates/sdk/src/lib.rs +759 -442
- package/sdk-core/crates/sdk/src/workflow_context/options.rs +64 -35
- package/sdk-core/crates/sdk/src/workflow_context.rs +1033 -289
- package/sdk-core/crates/sdk/src/workflow_future.rs +277 -213
- package/sdk-core/crates/sdk/src/workflows.rs +711 -0
- package/sdk-core/crates/sdk-core/Cargo.toml +59 -65
- package/sdk-core/crates/sdk-core/benches/workflow_replay_bench.rs +45 -54
- package/sdk-core/crates/sdk-core/machine_coverage/ActivityMachine_Coverage.puml +1 -1
- package/sdk-core/crates/sdk-core/src/abstractions.rs +6 -10
- package/sdk-core/crates/sdk-core/src/core_tests/activity_tasks.rs +6 -5
- package/sdk-core/crates/sdk-core/src/core_tests/mod.rs +22 -21
- package/sdk-core/crates/sdk-core/src/core_tests/queries.rs +21 -25
- package/sdk-core/crates/sdk-core/src/core_tests/replay_flag.rs +7 -10
- package/sdk-core/crates/sdk-core/src/core_tests/updates.rs +14 -17
- package/sdk-core/crates/sdk-core/src/core_tests/workers.rs +647 -27
- package/sdk-core/crates/sdk-core/src/core_tests/workflow_tasks.rs +46 -41
- package/sdk-core/crates/sdk-core/src/ephemeral_server/mod.rs +13 -16
- package/sdk-core/crates/sdk-core/src/histfetch.rs +20 -10
- package/sdk-core/crates/sdk-core/src/lib.rs +60 -123
- package/sdk-core/crates/sdk-core/src/pollers/mod.rs +4 -9
- package/sdk-core/crates/sdk-core/src/pollers/poll_buffer.rs +411 -32
- package/sdk-core/crates/sdk-core/src/protosext/mod.rs +2 -2
- package/sdk-core/crates/sdk-core/src/replay/mod.rs +14 -5
- package/sdk-core/crates/sdk-core/src/telemetry/metrics.rs +183 -198
- package/sdk-core/crates/sdk-core/src/telemetry/mod.rs +3 -281
- package/sdk-core/crates/sdk-core/src/test_help/integ_helpers.rs +35 -16
- package/sdk-core/crates/sdk-core/src/test_help/unit_helpers.rs +3 -6
- package/sdk-core/crates/sdk-core/src/worker/activities/activity_heartbeat_manager.rs +1 -0
- package/sdk-core/crates/sdk-core/src/worker/activities/local_activities.rs +11 -14
- package/sdk-core/crates/sdk-core/src/worker/activities.rs +16 -19
- package/sdk-core/crates/sdk-core/src/worker/client/mocks.rs +11 -5
- package/sdk-core/crates/sdk-core/src/worker/client.rs +104 -86
- package/sdk-core/crates/sdk-core/src/worker/heartbeat.rs +10 -14
- package/sdk-core/crates/sdk-core/src/worker/mod.rs +1175 -241
- package/sdk-core/crates/sdk-core/src/worker/nexus.rs +150 -23
- package/sdk-core/crates/sdk-core/src/worker/slot_provider.rs +2 -2
- package/sdk-core/crates/sdk-core/src/worker/tuner/fixed_size.rs +2 -2
- package/sdk-core/crates/sdk-core/src/worker/tuner/resource_based.rs +25 -27
- package/sdk-core/crates/sdk-core/src/worker/tuner.rs +64 -44
- package/sdk-core/crates/sdk-core/src/worker/workflow/driven_workflow.rs +9 -3
- package/sdk-core/crates/sdk-core/src/worker/workflow/machines/patch_state_machine.rs +5 -8
- package/sdk-core/crates/sdk-core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +21 -22
- package/sdk-core/crates/sdk-core/src/worker/workflow/machines/workflow_machines.rs +28 -4
- package/sdk-core/crates/sdk-core/src/worker/workflow/managed_run.rs +20 -41
- package/sdk-core/crates/sdk-core/src/worker/workflow/mod.rs +50 -9
- package/sdk-core/crates/sdk-core/src/worker/workflow/run_cache.rs +4 -7
- package/sdk-core/crates/sdk-core/src/worker/workflow/wft_extraction.rs +2 -4
- package/sdk-core/crates/sdk-core/src/worker/workflow/wft_poller.rs +8 -9
- package/sdk-core/crates/sdk-core/src/worker/workflow/workflow_stream.rs +1 -3
- package/sdk-core/crates/sdk-core/tests/activities_procmacro.rs +6 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/basic_pass.rs +54 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/invalid_self_type_fail.rs +18 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/invalid_self_type_fail.stderr +5 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/missing_context_fail.rs +14 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/missing_context_fail.stderr +5 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/multi_arg_pass.rs +48 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/no_input_pass.rs +14 -0
- package/sdk-core/crates/sdk-core/tests/activities_trybuild/no_return_type_pass.rs +19 -0
- package/sdk-core/crates/sdk-core/tests/cloud_tests.rs +14 -5
- package/sdk-core/crates/sdk-core/tests/common/activity_functions.rs +55 -0
- package/sdk-core/crates/sdk-core/tests/common/mod.rs +281 -236
- package/sdk-core/crates/sdk-core/tests/common/workflows.rs +41 -28
- package/sdk-core/crates/sdk-core/tests/global_metric_tests.rs +9 -14
- package/sdk-core/crates/sdk-core/tests/heavy_tests/fuzzy_workflow.rs +73 -66
- package/sdk-core/crates/sdk-core/tests/heavy_tests.rs +306 -268
- package/sdk-core/crates/sdk-core/tests/integ_tests/async_activity_client_tests.rs +230 -0
- package/sdk-core/crates/sdk-core/tests/integ_tests/client_tests.rs +94 -57
- package/sdk-core/crates/sdk-core/tests/integ_tests/data_converter_tests.rs +381 -0
- package/sdk-core/crates/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +37 -38
- package/sdk-core/crates/sdk-core/tests/integ_tests/heartbeat_tests.rs +49 -40
- package/sdk-core/crates/sdk-core/tests/integ_tests/metrics_tests.rs +447 -300
- package/sdk-core/crates/sdk-core/tests/integ_tests/pagination_tests.rs +50 -45
- package/sdk-core/crates/sdk-core/tests/integ_tests/polling_tests.rs +157 -157
- package/sdk-core/crates/sdk-core/tests/integ_tests/queries_tests.rs +103 -89
- package/sdk-core/crates/sdk-core/tests/integ_tests/update_tests.rs +609 -463
- package/sdk-core/crates/sdk-core/tests/integ_tests/visibility_tests.rs +80 -62
- package/sdk-core/crates/sdk-core/tests/integ_tests/worker_heartbeat_tests.rs +389 -265
- package/sdk-core/crates/sdk-core/tests/integ_tests/worker_tests.rs +250 -185
- package/sdk-core/crates/sdk-core/tests/integ_tests/worker_versioning_tests.rs +52 -49
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_client_tests.rs +180 -0
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/activities.rs +437 -327
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +82 -58
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +56 -30
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +364 -251
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/client_interactions.rs +552 -0
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +110 -46
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +243 -149
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/eager.rs +98 -32
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +1475 -1040
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +73 -43
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/nexus.rs +402 -245
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/patches.rs +343 -207
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/queries.rs +415 -0
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/replay.rs +96 -36
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/resets.rs +155 -140
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/signals.rs +183 -113
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +85 -44
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/timers.rs +142 -48
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +73 -56
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests.rs +365 -242
- package/sdk-core/crates/sdk-core/tests/main.rs +22 -16
- package/sdk-core/crates/sdk-core/tests/manual_tests.rs +233 -187
- package/sdk-core/crates/sdk-core/tests/runner.rs +4 -6
- package/sdk-core/crates/sdk-core/tests/shared_tests/mod.rs +73 -27
- package/sdk-core/crates/sdk-core/tests/shared_tests/priority.rs +107 -84
- package/sdk-core/crates/sdk-core/tests/workflows_procmacro.rs +6 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/async_query_fail.rs +26 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/async_query_fail.stderr +5 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/basic_pass.rs +49 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/minimal_pass.rs +21 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/mut_query_fail.rs +26 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/mut_query_fail.stderr +5 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/sync_run_fail.rs +21 -0
- package/sdk-core/crates/sdk-core/tests/workflows_trybuild/sync_run_fail.stderr +5 -0
- package/sdk-core/crates/sdk-core-c-bridge/Cargo.toml +8 -1
- package/sdk-core/crates/sdk-core-c-bridge/include/temporal-sdk-core-c-bridge.h +37 -26
- package/sdk-core/crates/sdk-core-c-bridge/src/client.rs +180 -87
- package/sdk-core/crates/sdk-core-c-bridge/src/lib.rs +89 -5
- package/sdk-core/crates/sdk-core-c-bridge/src/metric.rs +10 -16
- package/sdk-core/crates/sdk-core-c-bridge/src/runtime.rs +59 -67
- package/sdk-core/crates/sdk-core-c-bridge/src/testing.rs +10 -10
- package/sdk-core/crates/sdk-core-c-bridge/src/tests/context.rs +57 -22
- package/sdk-core/crates/sdk-core-c-bridge/src/tests/mod.rs +108 -12
- package/sdk-core/crates/sdk-core-c-bridge/src/tests/utils.rs +9 -52
- package/sdk-core/crates/sdk-core-c-bridge/src/worker.rs +74 -91
- package/sdk-core/rustfmt.toml +2 -1
- package/src/client.rs +206 -289
- package/src/helpers/try_into_js.rs +88 -2
- package/src/metrics.rs +277 -35
- package/src/runtime.rs +94 -45
- package/src/testing.rs +9 -16
- package/src/worker.rs +86 -68
- package/ts/native.ts +39 -3
- package/sdk-core/crates/client/src/workflow_handle/mod.rs +0 -212
- package/sdk-core/crates/common/src/errors.rs +0 -85
- package/sdk-core/crates/common/tests/worker_task_types_test.rs +0 -129
- package/sdk-core/crates/macros/LICENSE.txt +0 -21
- package/sdk-core/crates/sdk/src/activity_context.rs +0 -238
- package/sdk-core/crates/sdk/src/app_data.rs +0 -37
- package/sdk-core/crates/sdk-core/tests/integ_tests/activity_functions.rs +0 -5
- package/sdk-core/crates/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +0 -61
|
@@ -6,12 +6,25 @@ mod slot_provider;
|
|
|
6
6
|
pub(crate) mod tuner;
|
|
7
7
|
mod workflow;
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
use temporalio_client::Connection;
|
|
10
|
+
use temporalio_common::{
|
|
11
|
+
protos::{
|
|
12
|
+
coresdk::{
|
|
13
|
+
ActivitySlotInfo, LocalActivitySlotInfo, NamespaceInfo, NexusSlotInfo,
|
|
14
|
+
WorkflowSlotInfo, activity_result::ActivityExecutionResult, namespace_info,
|
|
15
|
+
},
|
|
16
|
+
temporal::api::{enums::v1::VersioningBehavior, worker::v1::PluginInfo},
|
|
17
|
+
},
|
|
18
|
+
telemetry::TelemetryInstance,
|
|
19
|
+
worker::{WorkerDeploymentOptions, WorkerDeploymentVersion},
|
|
20
|
+
};
|
|
10
21
|
pub use tuner::{
|
|
11
22
|
FixedSizeSlotSupplier, ResourceBasedSlotsOptions, ResourceBasedSlotsOptionsBuilder,
|
|
12
23
|
ResourceBasedTuner, ResourceSlotOptions, SlotSupplierOptions, TunerBuilder, TunerHolder,
|
|
13
|
-
TunerHolderOptions,
|
|
24
|
+
TunerHolderOptions,
|
|
14
25
|
};
|
|
26
|
+
// Re-export the generated builder (it's in the tuner module)
|
|
27
|
+
pub use tuner::TunerHolderOptionsBuilder;
|
|
15
28
|
pub(crate) use tuner::{RealSysInfo, SystemResourceInfo};
|
|
16
29
|
|
|
17
30
|
pub(crate) use activities::{
|
|
@@ -24,18 +37,13 @@ pub(crate) use wft_poller::WFTPollerShared;
|
|
|
24
37
|
pub use workflow::LEGACY_QUERY_ID;
|
|
25
38
|
|
|
26
39
|
use crate::{
|
|
27
|
-
ActivityHeartbeat,
|
|
40
|
+
ActivityHeartbeat,
|
|
28
41
|
abstractions::{MeteredPermitDealer, PermitDealerContextData, dbg_panic},
|
|
29
|
-
errors::CompleteWfError,
|
|
30
42
|
pollers::{ActivityTaskOptions, BoxedActPoller, BoxedNexusPoller, LongPollBuffer},
|
|
31
43
|
protosext::validate_activity_completion,
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
metrics::{
|
|
36
|
-
MetricsContext, activity_poller, activity_worker_type, local_activity_worker_type,
|
|
37
|
-
nexus_poller, nexus_worker_type, workflow_worker_type,
|
|
38
|
-
},
|
|
44
|
+
telemetry::metrics::{
|
|
45
|
+
MetricsContext, WorkerHeartbeatMetrics, activity_poller, activity_worker_type,
|
|
46
|
+
local_activity_worker_type, nexus_poller, nexus_worker_type, workflow_worker_type,
|
|
39
47
|
},
|
|
40
48
|
worker::{
|
|
41
49
|
activities::{LACompleteAction, LocalActivityManager, NextPendingLAAction},
|
|
@@ -56,6 +64,8 @@ use gethostname::gethostname;
|
|
|
56
64
|
use parking_lot::RwLock;
|
|
57
65
|
use slot_provider::SlotProvider;
|
|
58
66
|
use std::{
|
|
67
|
+
any::Any,
|
|
68
|
+
collections::{HashMap, HashSet},
|
|
59
69
|
convert::TryInto,
|
|
60
70
|
future,
|
|
61
71
|
sync::{
|
|
@@ -68,14 +78,12 @@ use temporalio_client::worker::{
|
|
|
68
78
|
ClientWorker, HeartbeatCallback, SharedNamespaceWorkerTrait, Slot as SlotTrait,
|
|
69
79
|
};
|
|
70
80
|
use temporalio_common::{
|
|
71
|
-
errors::{CompleteNexusError, WorkerValidationError},
|
|
72
81
|
protos::{
|
|
73
82
|
TaskToken,
|
|
74
83
|
coresdk::{
|
|
75
84
|
ActivityTaskCompletion,
|
|
76
|
-
activity_result::activity_execution_result,
|
|
77
85
|
activity_task::ActivityTask,
|
|
78
|
-
nexus::{NexusTask, NexusTaskCompletion
|
|
86
|
+
nexus::{NexusTask, NexusTaskCompletion},
|
|
79
87
|
workflow_activation::{WorkflowActivation, remove_from_cache::EvictionReason},
|
|
80
88
|
workflow_completion::WorkflowActivationCompletion,
|
|
81
89
|
},
|
|
@@ -86,11 +94,8 @@ use temporalio_common::{
|
|
|
86
94
|
worker::v1::{WorkerHeartbeat, WorkerHostInfo, WorkerPollerInfo, WorkerSlotsInfo},
|
|
87
95
|
},
|
|
88
96
|
},
|
|
89
|
-
telemetry::metrics::
|
|
90
|
-
worker::
|
|
91
|
-
ActivitySlotKind, LocalActivitySlotKind, NexusSlotKind, PollerBehavior, SlotKind,
|
|
92
|
-
WorkerTaskTypes, WorkflowSlotKind,
|
|
93
|
-
},
|
|
97
|
+
telemetry::metrics::TemporalMeter,
|
|
98
|
+
worker::WorkerTaskTypes,
|
|
94
99
|
};
|
|
95
100
|
use tokio::sync::{mpsc::unbounded_channel, watch};
|
|
96
101
|
use tokio_stream::wrappers::UnboundedReceiverStream;
|
|
@@ -109,6 +114,278 @@ use {
|
|
|
109
114
|
},
|
|
110
115
|
};
|
|
111
116
|
|
|
117
|
+
/// Defines per-worker configuration options
|
|
118
|
+
#[derive(Clone, bon::Builder)]
|
|
119
|
+
#[builder(on(String, into), state_mod(vis = "pub"), finish_fn(vis = "", name = build_internal))]
|
|
120
|
+
#[non_exhaustive]
|
|
121
|
+
pub struct WorkerConfig {
|
|
122
|
+
/// The Temporal service namespace this worker is bound to
|
|
123
|
+
pub namespace: String,
|
|
124
|
+
/// What task queue will this worker poll from? This task queue name will be used for both
|
|
125
|
+
/// workflow and activity polling.
|
|
126
|
+
pub task_queue: String,
|
|
127
|
+
/// A human-readable string that can identify this worker. Using something like sdk version
|
|
128
|
+
/// and host name is a good default. If set, overrides the identity set (if any) on the client
|
|
129
|
+
/// used by this worker.
|
|
130
|
+
pub client_identity_override: Option<String>,
|
|
131
|
+
/// If set nonzero, workflows will be cached and sticky task queues will be used, meaning that
|
|
132
|
+
/// history updates are applied incrementally to suspended instances of workflow execution.
|
|
133
|
+
/// Workflows are evicted according to a least-recently-used policy once the cache maximum is
|
|
134
|
+
/// reached. Workflows may also be explicitly evicted at any time, or as a result of errors
|
|
135
|
+
/// or failures.
|
|
136
|
+
#[builder(default = 0)]
|
|
137
|
+
pub max_cached_workflows: usize,
|
|
138
|
+
/// Set a [crate::WorkerTuner] for this worker. Either this or at least one of the
|
|
139
|
+
/// `max_outstanding_*` fields must be set.
|
|
140
|
+
pub tuner: Option<Arc<dyn WorkerTuner + Send + Sync>>,
|
|
141
|
+
/// Maximum number of concurrent poll workflow task requests we will perform at a time on this
|
|
142
|
+
/// worker's task queue. See also [WorkerConfig::nonsticky_to_sticky_poll_ratio].
|
|
143
|
+
/// If using SimpleMaximum, Must be at least 2 when `max_cached_workflows` > 0, or is an error.
|
|
144
|
+
#[builder(default = PollerBehavior::SimpleMaximum(5))]
|
|
145
|
+
pub workflow_task_poller_behavior: PollerBehavior,
|
|
146
|
+
/// Only applies when using [PollerBehavior::SimpleMaximum]
|
|
147
|
+
///
|
|
148
|
+
/// (max workflow task polls * this number) = the number of max pollers that will be allowed for
|
|
149
|
+
/// the nonsticky queue when sticky tasks are enabled. If both defaults are used, the sticky
|
|
150
|
+
/// queue will allow 4 max pollers while the nonsticky queue will allow one. The minimum for
|
|
151
|
+
/// either poller is 1, so if the maximum allowed is 1 and sticky queues are enabled, there will
|
|
152
|
+
/// be 2 concurrent polls.
|
|
153
|
+
#[builder(default = 0.2)]
|
|
154
|
+
pub nonsticky_to_sticky_poll_ratio: f32,
|
|
155
|
+
/// Maximum number of concurrent poll activity task requests we will perform at a time on this
|
|
156
|
+
/// worker's task queue
|
|
157
|
+
#[builder(default = PollerBehavior::SimpleMaximum(5))]
|
|
158
|
+
pub activity_task_poller_behavior: PollerBehavior,
|
|
159
|
+
/// Maximum number of concurrent poll nexus task requests we will perform at a time on this
|
|
160
|
+
/// worker's task queue
|
|
161
|
+
#[builder(default = PollerBehavior::SimpleMaximum(5))]
|
|
162
|
+
pub nexus_task_poller_behavior: PollerBehavior,
|
|
163
|
+
/// Specifies which task types this worker will poll for.
|
|
164
|
+
///
|
|
165
|
+
/// Note: At least one task type must be specified or the worker will fail validation.
|
|
166
|
+
pub task_types: WorkerTaskTypes,
|
|
167
|
+
/// How long a workflow task is allowed to sit on the sticky queue before it is timed out
|
|
168
|
+
/// and moved to the non-sticky queue where it may be picked up by any worker.
|
|
169
|
+
#[builder(default = Duration::from_secs(10))]
|
|
170
|
+
pub sticky_queue_schedule_to_start_timeout: Duration,
|
|
171
|
+
|
|
172
|
+
/// Longest interval for throttling activity heartbeats
|
|
173
|
+
#[builder(default = Duration::from_secs(60))]
|
|
174
|
+
pub max_heartbeat_throttle_interval: Duration,
|
|
175
|
+
|
|
176
|
+
/// Default interval for throttling activity heartbeats in case
|
|
177
|
+
/// `ActivityOptions.heartbeat_timeout` is unset.
|
|
178
|
+
/// When the timeout *is* set in the `ActivityOptions`, throttling is set to
|
|
179
|
+
/// `heartbeat_timeout * 0.8`.
|
|
180
|
+
#[builder(default = Duration::from_secs(30))]
|
|
181
|
+
pub default_heartbeat_throttle_interval: Duration,
|
|
182
|
+
|
|
183
|
+
/// Sets the maximum number of activities per second the task queue will dispatch, controlled
|
|
184
|
+
/// server-side. Note that this only takes effect upon an activity poll request. If multiple
|
|
185
|
+
/// workers on the same queue have different values set, they will thrash with the last poller
|
|
186
|
+
/// winning.
|
|
187
|
+
///
|
|
188
|
+
/// Setting this to a nonzero value will also disable eager activity execution.
|
|
189
|
+
pub max_task_queue_activities_per_second: Option<f64>,
|
|
190
|
+
|
|
191
|
+
/// Limits the number of activities per second that this worker will process. The worker will
|
|
192
|
+
/// not poll for new activities if by doing so it might receive and execute an activity which
|
|
193
|
+
/// would cause it to exceed this limit. Negative, zero, or NaN values will cause building
|
|
194
|
+
/// the options to fail.
|
|
195
|
+
pub max_worker_activities_per_second: Option<f64>,
|
|
196
|
+
|
|
197
|
+
/// If set false (default), shutdown will not finish until all pending evictions have been
|
|
198
|
+
/// issued and replied to. If set true shutdown will be considered complete when the only
|
|
199
|
+
/// remaining work is pending evictions.
|
|
200
|
+
///
|
|
201
|
+
/// This flag is useful during tests to avoid needing to deal with lots of uninteresting
|
|
202
|
+
/// evictions during shutdown. Alternatively, if a lang implementation finds it easy to clean
|
|
203
|
+
/// up during shutdown, setting this true saves some back-and-forth.
|
|
204
|
+
#[builder(default = false)]
|
|
205
|
+
pub ignore_evicts_on_shutdown: bool,
|
|
206
|
+
|
|
207
|
+
/// Maximum number of next page (or initial) history event listing requests we'll make
|
|
208
|
+
/// concurrently. I don't this it's worth exposing this to users until we encounter a reason.
|
|
209
|
+
#[builder(default = 5)]
|
|
210
|
+
pub fetching_concurrency: usize,
|
|
211
|
+
|
|
212
|
+
/// If set, core will issue cancels for all outstanding activities and nexus operations after
|
|
213
|
+
/// shutdown has been initiated and this amount of time has elapsed.
|
|
214
|
+
pub graceful_shutdown_period: Option<Duration>,
|
|
215
|
+
|
|
216
|
+
/// The amount of time core will wait before timing out activities using its own local timers
|
|
217
|
+
/// after one of them elapses. This is to avoid racing with server's own tracking of the
|
|
218
|
+
/// timeout.
|
|
219
|
+
#[builder(default = Duration::from_secs(5))]
|
|
220
|
+
pub local_timeout_buffer_for_activities: Duration,
|
|
221
|
+
|
|
222
|
+
/// Any error types listed here will cause any workflow being processed by this worker to fail,
|
|
223
|
+
/// rather than simply failing the workflow task.
|
|
224
|
+
#[builder(default)]
|
|
225
|
+
pub workflow_failure_errors: HashSet<WorkflowErrorType>,
|
|
226
|
+
|
|
227
|
+
/// Like [WorkerConfig::workflow_failure_errors], but specific to certain workflow types (the
|
|
228
|
+
/// map key).
|
|
229
|
+
#[builder(default)]
|
|
230
|
+
pub workflow_types_to_failure_errors: HashMap<String, HashSet<WorkflowErrorType>>,
|
|
231
|
+
|
|
232
|
+
/// The maximum allowed number of workflow tasks that will ever be given to this worker at one
|
|
233
|
+
/// time. Note that one workflow task may require multiple activations - so the WFT counts as
|
|
234
|
+
/// "outstanding" until all activations it requires have been completed. Must be at least 2 if
|
|
235
|
+
/// `max_cached_workflows` is > 0, or is an error.
|
|
236
|
+
///
|
|
237
|
+
/// Mutually exclusive with `tuner`
|
|
238
|
+
#[builder(into)]
|
|
239
|
+
pub max_outstanding_workflow_tasks: Option<usize>,
|
|
240
|
+
/// The maximum number of activity tasks that will ever be given to this worker concurrently.
|
|
241
|
+
///
|
|
242
|
+
/// Mutually exclusive with `tuner`
|
|
243
|
+
#[builder(into)]
|
|
244
|
+
pub max_outstanding_activities: Option<usize>,
|
|
245
|
+
/// The maximum number of local activity tasks that will ever be given to this worker
|
|
246
|
+
/// concurrently.
|
|
247
|
+
///
|
|
248
|
+
/// Mutually exclusive with `tuner`
|
|
249
|
+
#[builder(into)]
|
|
250
|
+
pub max_outstanding_local_activities: Option<usize>,
|
|
251
|
+
/// The maximum number of nexus tasks that will ever be given to this worker
|
|
252
|
+
/// concurrently.
|
|
253
|
+
///
|
|
254
|
+
/// Mutually exclusive with `tuner`
|
|
255
|
+
#[builder(into)]
|
|
256
|
+
pub max_outstanding_nexus_tasks: Option<usize>,
|
|
257
|
+
|
|
258
|
+
/// A versioning strategy for this worker.
|
|
259
|
+
pub versioning_strategy: WorkerVersioningStrategy,
|
|
260
|
+
|
|
261
|
+
/// List of plugins used by lang.
|
|
262
|
+
#[builder(default)]
|
|
263
|
+
pub plugins: HashSet<PluginInfo>,
|
|
264
|
+
|
|
265
|
+
/// Skips the single worker+client+namespace+task_queue check
|
|
266
|
+
#[builder(default = false)]
|
|
267
|
+
pub skip_client_worker_set_check: bool,
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
impl WorkerConfig {
|
|
271
|
+
/// Returns true if the configuration specifies we should fail a workflow on a certain error
|
|
272
|
+
/// type rather than failing the workflow task.
|
|
273
|
+
pub fn should_fail_workflow(
|
|
274
|
+
&self,
|
|
275
|
+
workflow_type: &str,
|
|
276
|
+
error_type: &WorkflowErrorType,
|
|
277
|
+
) -> bool {
|
|
278
|
+
self.workflow_failure_errors.contains(error_type)
|
|
279
|
+
|| self
|
|
280
|
+
.workflow_types_to_failure_errors
|
|
281
|
+
.get(workflow_type)
|
|
282
|
+
.map(|s| s.contains(error_type))
|
|
283
|
+
.unwrap_or(false)
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
pub(crate) fn computed_deployment_version(&self) -> Option<WorkerDeploymentVersion> {
|
|
287
|
+
let wdv = match self.versioning_strategy {
|
|
288
|
+
WorkerVersioningStrategy::None { ref build_id } => WorkerDeploymentVersion {
|
|
289
|
+
deployment_name: "".to_owned(),
|
|
290
|
+
build_id: build_id.clone(),
|
|
291
|
+
},
|
|
292
|
+
WorkerVersioningStrategy::WorkerDeploymentBased(ref opts) => opts.version.clone(),
|
|
293
|
+
WorkerVersioningStrategy::LegacyBuildIdBased { ref build_id } => {
|
|
294
|
+
WorkerDeploymentVersion {
|
|
295
|
+
deployment_name: "".to_owned(),
|
|
296
|
+
build_id: build_id.clone(),
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
};
|
|
300
|
+
if wdv.is_empty() { None } else { Some(wdv) }
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
impl<S: worker_config_builder::IsComplete> WorkerConfigBuilder<S> {
|
|
305
|
+
/// Build and validate the worker configuration
|
|
306
|
+
pub fn build(self) -> Result<WorkerConfig, String> {
|
|
307
|
+
let config = self.build_internal();
|
|
308
|
+
let task_types = &config.task_types;
|
|
309
|
+
if task_types.is_empty() {
|
|
310
|
+
return Err("At least one task type must be enabled in `task_types`".to_string());
|
|
311
|
+
}
|
|
312
|
+
if !task_types.enable_workflows && task_types.enable_local_activities {
|
|
313
|
+
return Err(
|
|
314
|
+
"`task_types` cannot enable local activities without workflows".to_string(),
|
|
315
|
+
);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
config.workflow_task_poller_behavior.validate()?;
|
|
319
|
+
config.activity_task_poller_behavior.validate()?;
|
|
320
|
+
config.nexus_task_poller_behavior.validate()?;
|
|
321
|
+
|
|
322
|
+
if let Some(ref x) = config.max_worker_activities_per_second
|
|
323
|
+
&& (!x.is_normal() || x.is_sign_negative())
|
|
324
|
+
{
|
|
325
|
+
return Err(
|
|
326
|
+
"`max_worker_activities_per_second` must be positive and nonzero".to_string(),
|
|
327
|
+
);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
if matches!(config.max_outstanding_workflow_tasks, Some(v) if v == 0) {
|
|
331
|
+
return Err("`max_outstanding_workflow_tasks` must be > 0".to_string());
|
|
332
|
+
}
|
|
333
|
+
if matches!(config.max_outstanding_activities, Some(v) if v == 0) {
|
|
334
|
+
return Err("`max_outstanding_activities` must be > 0".to_string());
|
|
335
|
+
}
|
|
336
|
+
if matches!(config.max_outstanding_local_activities, Some(v) if v == 0) {
|
|
337
|
+
return Err("`max_outstanding_local_activities` must be > 0".to_string());
|
|
338
|
+
}
|
|
339
|
+
if matches!(config.max_outstanding_nexus_tasks, Some(v) if v == 0) {
|
|
340
|
+
return Err("`max_outstanding_nexus_tasks` must be > 0".to_string());
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if config.max_cached_workflows > 0 {
|
|
344
|
+
if let Some(max_wft) = config.max_outstanding_workflow_tasks
|
|
345
|
+
&& max_wft < 2
|
|
346
|
+
{
|
|
347
|
+
return Err(
|
|
348
|
+
"`max_cached_workflows` > 0 requires `max_outstanding_workflow_tasks` >= 2"
|
|
349
|
+
.to_string(),
|
|
350
|
+
);
|
|
351
|
+
}
|
|
352
|
+
if matches!(config.workflow_task_poller_behavior, PollerBehavior::SimpleMaximum(u) if u < 2)
|
|
353
|
+
{
|
|
354
|
+
return Err("`max_cached_workflows` > 0 requires `workflow_task_poller_behavior` to be at least 2".to_string());
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if config.tuner.is_some()
|
|
359
|
+
&& (config.max_outstanding_workflow_tasks.is_some()
|
|
360
|
+
|| config.max_outstanding_activities.is_some()
|
|
361
|
+
|| config.max_outstanding_local_activities.is_some())
|
|
362
|
+
{
|
|
363
|
+
return Err("max_outstanding_* fields are mutually exclusive with `tuner`".to_string());
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
match &config.versioning_strategy {
|
|
367
|
+
WorkerVersioningStrategy::None { .. } => {}
|
|
368
|
+
WorkerVersioningStrategy::WorkerDeploymentBased(d) => {
|
|
369
|
+
if d.use_worker_versioning
|
|
370
|
+
&& (d.version.build_id.is_empty() || d.version.deployment_name.is_empty())
|
|
371
|
+
{
|
|
372
|
+
return Err("WorkerDeploymentVersion must have a non-empty build_id and deployment_name when deployment-based versioning is enabled".to_string());
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
WorkerVersioningStrategy::LegacyBuildIdBased { build_id } => {
|
|
376
|
+
if build_id.is_empty() {
|
|
377
|
+
return Err(
|
|
378
|
+
"Legacy build id-based versioning must have a non-empty build_id"
|
|
379
|
+
.to_string(),
|
|
380
|
+
);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
Ok(config)
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
112
389
|
/// A worker polls on a certain task queue
|
|
113
390
|
pub struct Worker {
|
|
114
391
|
config: WorkerConfig,
|
|
@@ -138,6 +415,9 @@ pub struct Worker {
|
|
|
138
415
|
client_worker_registrator: Arc<ClientWorkerRegistrator>,
|
|
139
416
|
/// Status of the worker
|
|
140
417
|
status: Arc<RwLock<WorkerStatus>>,
|
|
418
|
+
/// Set during validate() when server supports graceful poll cancellation on shutdown.
|
|
419
|
+
/// Shared with pollers so they can decide per-poll whether to hard-kill or wait.
|
|
420
|
+
graceful_poll_shutdown: Arc<AtomicBool>,
|
|
141
421
|
}
|
|
142
422
|
|
|
143
423
|
struct AllPermitsTracker {
|
|
@@ -160,156 +440,19 @@ pub(crate) struct WorkerTelemetry {
|
|
|
160
440
|
trace_subscriber: Option<Arc<dyn Subscriber + Send + Sync>>,
|
|
161
441
|
}
|
|
162
442
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollError> {
|
|
171
|
-
self.next_workflow_activation().await
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
#[instrument(skip(self))]
|
|
175
|
-
async fn poll_activity_task(&self) -> Result<ActivityTask, PollError> {
|
|
176
|
-
loop {
|
|
177
|
-
match self.activity_poll().await.transpose() {
|
|
178
|
-
Some(r) => break r,
|
|
179
|
-
None => {
|
|
180
|
-
tokio::task::yield_now().await;
|
|
181
|
-
continue;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
#[instrument(skip(self))]
|
|
188
|
-
async fn poll_nexus_task(&self) -> Result<NexusTask, PollError> {
|
|
189
|
-
match &self.nexus_mgr {
|
|
190
|
-
Some(mgr) => mgr.next_nexus_task().await,
|
|
191
|
-
None => Err(PollError::ShutDown),
|
|
443
|
+
impl WorkerTelemetry {
|
|
444
|
+
pub(crate) fn from_meter(meter: TemporalMeter) -> Self {
|
|
445
|
+
Self {
|
|
446
|
+
temporal_metric_meter: Some(meter),
|
|
447
|
+
trace_subscriber: None,
|
|
192
448
|
}
|
|
193
449
|
}
|
|
194
|
-
|
|
195
|
-
async fn complete_workflow_activation(
|
|
196
|
-
&self,
|
|
197
|
-
completion: WorkflowActivationCompletion,
|
|
198
|
-
) -> Result<(), CompleteWfError> {
|
|
199
|
-
self.complete_workflow_activation(completion).await
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
async fn complete_activity_task(
|
|
203
|
-
&self,
|
|
204
|
-
completion: ActivityTaskCompletion,
|
|
205
|
-
) -> Result<(), CompleteActivityError> {
|
|
206
|
-
let task_token = TaskToken(completion.task_token);
|
|
207
|
-
let status = if let Some(s) = completion.result.and_then(|r| r.status) {
|
|
208
|
-
s
|
|
209
|
-
} else {
|
|
210
|
-
return Err(CompleteActivityError::MalformedActivityCompletion {
|
|
211
|
-
reason: "Activity completion had empty result/status field".to_owned(),
|
|
212
|
-
completion: None,
|
|
213
|
-
});
|
|
214
|
-
};
|
|
215
|
-
|
|
216
|
-
self.complete_activity(task_token, status).await
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
async fn complete_nexus_task(
|
|
220
|
-
&self,
|
|
221
|
-
completion: NexusTaskCompletion,
|
|
222
|
-
) -> Result<(), CompleteNexusError> {
|
|
223
|
-
let status = if let Some(s) = completion.status {
|
|
224
|
-
s
|
|
225
|
-
} else {
|
|
226
|
-
return Err(CompleteNexusError::MalformedNexusCompletion {
|
|
227
|
-
reason: "Nexus completion had empty status field".to_owned(),
|
|
228
|
-
});
|
|
229
|
-
};
|
|
230
|
-
|
|
231
|
-
self.complete_nexus_task(TaskToken(completion.task_token), status)
|
|
232
|
-
.await
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
fn record_activity_heartbeat(&self, details: ActivityHeartbeat) {
|
|
236
|
-
self.record_heartbeat(details);
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
fn request_workflow_eviction(&self, run_id: &str) {
|
|
240
|
-
self.request_wf_eviction(
|
|
241
|
-
run_id,
|
|
242
|
-
"Eviction explicitly requested by lang",
|
|
243
|
-
EvictionReason::LangRequested,
|
|
244
|
-
);
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
fn get_config(&self) -> &WorkerConfig {
|
|
248
|
-
&self.config
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
/// Begins the shutdown process, tells pollers they should stop. Is idempotent.
|
|
252
|
-
fn initiate_shutdown(&self) {
|
|
253
|
-
if !self.shutdown_token.is_cancelled() {
|
|
254
|
-
info!(
|
|
255
|
-
task_queue=%self.config.task_queue,
|
|
256
|
-
namespace=%self.config.namespace,
|
|
257
|
-
"Initiated shutdown",
|
|
258
|
-
);
|
|
259
|
-
}
|
|
260
|
-
self.shutdown_token.cancel();
|
|
261
|
-
{
|
|
262
|
-
*self.status.write() = WorkerStatus::ShuttingDown;
|
|
263
|
-
}
|
|
264
|
-
// First, unregister worker from the client
|
|
265
|
-
if !self.client_worker_registrator.shared_namespace_worker {
|
|
266
|
-
let _res = self
|
|
267
|
-
.client
|
|
268
|
-
.workers()
|
|
269
|
-
.unregister_worker(self.worker_instance_key);
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
// Push a BumpStream message to the workflow activation queue. This ensures that
|
|
273
|
-
// any pending workflow activation polls will resolve, even if there are no other inputs.
|
|
274
|
-
if let Some(workflows) = &self.workflows {
|
|
275
|
-
workflows.bump_stream();
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
// Second, we want to stop polling of both activity and workflow tasks
|
|
279
|
-
if let Some(atm) = self.at_task_mgr.as_ref() {
|
|
280
|
-
atm.initiate_shutdown();
|
|
281
|
-
}
|
|
282
|
-
// Let the manager know that shutdown has been initiated to try to unblock the local
|
|
283
|
-
// activity poll in case this worker is an activity-only worker.
|
|
284
|
-
if let Some(la_mgr) = &self.local_act_mgr {
|
|
285
|
-
la_mgr.shutdown_initiated();
|
|
286
|
-
|
|
287
|
-
// If workflows have never been polled, immediately tell the local activity manager
|
|
288
|
-
// that workflows have shut down, so it can proceed with shutdown without waiting.
|
|
289
|
-
// This is particularly important for activity-only workers.
|
|
290
|
-
if self.workflows.as_ref().is_none_or(|w| !w.ever_polled()) {
|
|
291
|
-
la_mgr.workflows_have_shutdown();
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
async fn shutdown(&self) {
|
|
297
|
-
self.shutdown().await
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
async fn finalize_shutdown(self) {
|
|
301
|
-
self.finalize_shutdown().await
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
fn worker_instance_key(&self) -> Uuid {
|
|
305
|
-
self.worker_instance_key
|
|
306
|
-
}
|
|
307
450
|
}
|
|
308
451
|
|
|
309
452
|
impl Worker {
|
|
310
453
|
/// Creates a new [Worker] from a [WorkerClient] instance with real task pollers and optional
|
|
311
454
|
/// telemetry.
|
|
312
|
-
pub fn new(
|
|
455
|
+
pub(crate) fn new(
|
|
313
456
|
config: WorkerConfig,
|
|
314
457
|
sticky_queue_name: Option<String>,
|
|
315
458
|
client: Arc<dyn WorkerClient>,
|
|
@@ -334,6 +477,39 @@ impl Worker {
|
|
|
334
477
|
)
|
|
335
478
|
}
|
|
336
479
|
|
|
480
|
+
/// Validate that the worker can properly connect to server, plus any other validation that
|
|
481
|
+
/// needs to be done asynchronously. Lang SDKs should call this function once before calling
|
|
482
|
+
/// any others.
|
|
483
|
+
pub async fn validate(&self) -> Result<NamespaceInfo, WorkerValidationError> {
|
|
484
|
+
match self.client.describe_namespace().await {
|
|
485
|
+
Ok(info) => {
|
|
486
|
+
let ns_info = info.namespace_info;
|
|
487
|
+
let limits = ns_info.as_ref().and_then(|ns_info| {
|
|
488
|
+
ns_info.limits.map(|api_limits| namespace_info::Limits {
|
|
489
|
+
blob_size_limit_error: api_limits.blob_size_limit_error,
|
|
490
|
+
memo_size_limit_error: api_limits.memo_size_limit_error,
|
|
491
|
+
})
|
|
492
|
+
});
|
|
493
|
+
if ns_info
|
|
494
|
+
.and_then(|ns| ns.capabilities)
|
|
495
|
+
.is_some_and(|caps| caps.worker_poll_complete_on_shutdown)
|
|
496
|
+
{
|
|
497
|
+
self.graceful_poll_shutdown.store(true, Ordering::Relaxed);
|
|
498
|
+
}
|
|
499
|
+
Ok(NamespaceInfo { limits })
|
|
500
|
+
}
|
|
501
|
+
Err(e) if e.code() == tonic::Code::Unimplemented => {
|
|
502
|
+
// Ignore if unimplemented since we wouldn't want to fail against an old server, for
|
|
503
|
+
// example.
|
|
504
|
+
Ok(NamespaceInfo::default())
|
|
505
|
+
}
|
|
506
|
+
Err(e) => Err(WorkerValidationError::NamespaceDescribeError {
|
|
507
|
+
source: e,
|
|
508
|
+
namespace: self.config.namespace.clone(),
|
|
509
|
+
}),
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
337
513
|
/// Replace client.
|
|
338
514
|
///
|
|
339
515
|
/// For eager workflow purposes, this new client will now apply to future eager start requests
|
|
@@ -343,23 +519,22 @@ impl Worker {
|
|
|
343
519
|
/// For worker heartbeat, this will remove an existing shared worker if it is the last worker of
|
|
344
520
|
/// the old client and create a new nexus worker if it's the first client of the namespace on
|
|
345
521
|
/// the new client.
|
|
346
|
-
pub fn replace_client
|
|
347
|
-
where
|
|
348
|
-
CT: Into<AnyClient>,
|
|
349
|
-
{
|
|
522
|
+
pub fn replace_client(&self, mut new_connection: Connection) -> Result<(), anyhow::Error> {
|
|
350
523
|
// Unregister worker from current client, register in new client at the end
|
|
524
|
+
self.client
|
|
525
|
+
.workers()
|
|
526
|
+
.unregister_slot_provider(self.worker_instance_key)?;
|
|
351
527
|
let client_worker = self
|
|
352
528
|
.client
|
|
353
529
|
.workers()
|
|
354
|
-
.
|
|
530
|
+
.finalize_unregister(self.worker_instance_key)?;
|
|
355
531
|
|
|
356
|
-
|
|
357
|
-
|
|
532
|
+
super::init_worker_client(
|
|
533
|
+
&mut new_connection,
|
|
358
534
|
self.config.client_identity_override.clone(),
|
|
359
|
-
new_client,
|
|
360
535
|
);
|
|
361
536
|
|
|
362
|
-
self.client.
|
|
537
|
+
self.client.replace_connection(new_connection);
|
|
363
538
|
*self.client_worker_registrator.client.write() = self.client.clone();
|
|
364
539
|
self.client
|
|
365
540
|
.workers()
|
|
@@ -441,6 +616,7 @@ impl Worker {
|
|
|
441
616
|
let wf_sticky_last_suc_poll_time = Arc::new(AtomicCell::new(None));
|
|
442
617
|
let act_last_suc_poll_time = Arc::new(AtomicCell::new(None));
|
|
443
618
|
let nexus_last_suc_poll_time = Arc::new(AtomicCell::new(None));
|
|
619
|
+
let graceful_poll_shutdown = Arc::new(AtomicBool::new(false));
|
|
444
620
|
|
|
445
621
|
let nexus_slots = MeteredPermitDealer::new(
|
|
446
622
|
tuner.nexus_task_slot_supplier(),
|
|
@@ -461,6 +637,7 @@ impl Worker {
|
|
|
461
637
|
&wft_slots,
|
|
462
638
|
wf_last_suc_poll_time.clone(),
|
|
463
639
|
wf_sticky_last_suc_poll_time.clone(),
|
|
640
|
+
graceful_poll_shutdown.clone(),
|
|
464
641
|
)
|
|
465
642
|
.boxed();
|
|
466
643
|
let stream = if !client.is_mock() {
|
|
@@ -490,6 +667,7 @@ impl Worker {
|
|
|
490
667
|
max_tps: config.max_task_queue_activities_per_second,
|
|
491
668
|
},
|
|
492
669
|
act_last_suc_poll_time.clone(),
|
|
670
|
+
graceful_poll_shutdown.clone(),
|
|
493
671
|
);
|
|
494
672
|
Some(Box::from(ap) as BoxedActPoller)
|
|
495
673
|
} else {
|
|
@@ -507,6 +685,7 @@ impl Worker {
|
|
|
507
685
|
Some(move |np| np_metrics.record_num_pollers(np)),
|
|
508
686
|
nexus_last_suc_poll_time.clone(),
|
|
509
687
|
shared_namespace_worker,
|
|
688
|
+
graceful_poll_shutdown.clone(),
|
|
510
689
|
)) as BoxedNexusPoller)
|
|
511
690
|
} else {
|
|
512
691
|
None
|
|
@@ -611,9 +790,7 @@ impl Worker {
|
|
|
611
790
|
});
|
|
612
791
|
|
|
613
792
|
let deployment_options = match &config.versioning_strategy {
|
|
614
|
-
|
|
615
|
-
Some(opts.clone())
|
|
616
|
-
}
|
|
793
|
+
WorkerVersioningStrategy::WorkerDeploymentBased(opts) => Some(opts.clone()),
|
|
617
794
|
_ => None,
|
|
618
795
|
};
|
|
619
796
|
let provider = SlotProvider::new(
|
|
@@ -623,7 +800,7 @@ impl Worker {
|
|
|
623
800
|
external_wft_tx,
|
|
624
801
|
deployment_options,
|
|
625
802
|
);
|
|
626
|
-
let worker_instance_key =
|
|
803
|
+
let worker_instance_key = client.worker_instance_key();
|
|
627
804
|
let worker_status = Arc::new(RwLock::new(WorkerStatus::Running));
|
|
628
805
|
|
|
629
806
|
let sdk_name_and_ver = client.sdk_name_and_version();
|
|
@@ -728,38 +905,63 @@ impl Worker {
|
|
|
728
905
|
nexus_mgr,
|
|
729
906
|
client_worker_registrator,
|
|
730
907
|
status: worker_status,
|
|
908
|
+
graceful_poll_shutdown,
|
|
731
909
|
})
|
|
732
910
|
}
|
|
733
911
|
|
|
734
|
-
///
|
|
735
|
-
///
|
|
736
|
-
|
|
912
|
+
/// Initiates async shutdown procedure, eventually ceases all polling of the server and shuts
|
|
913
|
+
/// down this worker. [Worker::poll_workflow_activation] and [Worker::poll_activity_task] should
|
|
914
|
+
/// be called until both return a `ShutDown` error to ensure that all outstanding work is
|
|
915
|
+
/// complete. This means that the lang sdk will need to call
|
|
916
|
+
/// [Worker::complete_workflow_activation] and [Worker::complete_activity_task] for those
|
|
917
|
+
/// workflows & activities until they are done. At that point, the lang SDK can end the process,
|
|
918
|
+
/// or drop the [Worker] instance via [Worker::finalize_shutdown], which will close the
|
|
919
|
+
/// connection and free resources. If you have set [WorkerConfig::task_types] to exclude
|
|
920
|
+
/// [WorkerTaskTypes::activity_only()], you may skip calling [Worker::poll_activity_task].
|
|
921
|
+
///
|
|
922
|
+
/// Lang implementations should use [Worker::initiate_shutdown] followed by
|
|
923
|
+
/// [Worker::finalize_shutdown].
|
|
924
|
+
pub async fn shutdown(&self) {
|
|
737
925
|
self.initiate_shutdown();
|
|
738
|
-
if let Some(workflows) = &self.workflows
|
|
739
|
-
&& let Some(name) = workflows.get_sticky_queue_name()
|
|
740
926
|
{
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
927
|
+
*self.status.write() = WorkerStatus::ShuttingDown;
|
|
928
|
+
}
|
|
929
|
+
let heartbeat = self
|
|
930
|
+
.client_worker_registrator
|
|
931
|
+
.heartbeat_manager
|
|
932
|
+
.as_ref()
|
|
933
|
+
.map(|hm| hm.heartbeat_callback.clone()());
|
|
934
|
+
let sticky_name = self
|
|
935
|
+
.workflows
|
|
936
|
+
.as_ref()
|
|
937
|
+
.and_then(|wf| wf.get_sticky_queue_name())
|
|
938
|
+
.unwrap_or_default();
|
|
939
|
+
// This is a best effort call and we can still shutdown the worker if it fails
|
|
940
|
+
let task_queue_types = self.config.task_types.to_task_queue_types();
|
|
941
|
+
match self
|
|
942
|
+
.client
|
|
943
|
+
.shutdown_worker(
|
|
944
|
+
sticky_name,
|
|
945
|
+
self.config.task_queue.clone(),
|
|
946
|
+
task_queue_types,
|
|
947
|
+
heartbeat,
|
|
948
|
+
)
|
|
949
|
+
.await
|
|
950
|
+
{
|
|
951
|
+
Err(err)
|
|
952
|
+
if !matches!(
|
|
953
|
+
err.code(),
|
|
954
|
+
tonic::Code::Unimplemented | tonic::Code::Unavailable
|
|
955
|
+
) =>
|
|
956
|
+
{
|
|
957
|
+
warn!(
|
|
958
|
+
"shutdown_worker rpc errored during worker shutdown: {:?}",
|
|
959
|
+
err
|
|
960
|
+
);
|
|
761
961
|
}
|
|
962
|
+
_ => {}
|
|
762
963
|
}
|
|
964
|
+
|
|
763
965
|
// We need to wait for all local activities to finish so no more workflow task heartbeats
|
|
764
966
|
// will be generated
|
|
765
967
|
if let Some(la_mgr) = &self.local_act_mgr {
|
|
@@ -789,12 +991,23 @@ impl Worker {
|
|
|
789
991
|
}
|
|
790
992
|
}
|
|
791
993
|
|
|
792
|
-
///
|
|
793
|
-
async
|
|
994
|
+
/// Completes shutdown and frees all resources. You should avoid simply dropping workers, as
|
|
995
|
+
/// this does not allow async tasks to report any panics that may have occurred cleanly.
|
|
996
|
+
///
|
|
997
|
+
/// This should be called only after [Worker::shutdown] has resolved and/or both polling
|
|
998
|
+
/// functions have returned `ShutDown` errors.
|
|
999
|
+
pub async fn finalize_shutdown(self) {
|
|
794
1000
|
self.shutdown().await;
|
|
795
1001
|
if let Some(b) = self.at_task_mgr {
|
|
796
1002
|
b.shutdown().await;
|
|
797
1003
|
}
|
|
1004
|
+
// Only after worker is fully shutdown do we remove the heartbeat callback
|
|
1005
|
+
// from SharedNamespaceWorker, allowing for accurate worker shutdown
|
|
1006
|
+
// from Server POV
|
|
1007
|
+
let _res = self
|
|
1008
|
+
.client
|
|
1009
|
+
.workers()
|
|
1010
|
+
.finalize_unregister(self.worker_instance_key);
|
|
798
1011
|
}
|
|
799
1012
|
|
|
800
1013
|
pub(crate) fn shutdown_token(&self) -> CancellationToken {
|
|
@@ -837,11 +1050,26 @@ impl Worker {
|
|
|
837
1050
|
self.workflows.as_ref().and_then(|w| w.unused_wft_permits())
|
|
838
1051
|
}
|
|
839
1052
|
|
|
840
|
-
///
|
|
841
|
-
///
|
|
1053
|
+
/// Ask the worker for some work, returning an [ActivityTask]. It is then the language SDK's
|
|
1054
|
+
/// responsibility to call the appropriate activity code with the provided inputs. Blocks
|
|
1055
|
+
/// indefinitely until such work is available or [Worker::shutdown] is called.
|
|
1056
|
+
///
|
|
1057
|
+
/// Do not call poll concurrently. It handles polling the server concurrently internally.
|
|
842
1058
|
///
|
|
843
|
-
///
|
|
844
|
-
|
|
1059
|
+
/// Local activities are returned first before polling the server if there are any.
|
|
1060
|
+
#[instrument(skip(self))]
|
|
1061
|
+
pub async fn poll_activity_task(&self) -> Result<ActivityTask, PollError> {
|
|
1062
|
+
loop {
|
|
1063
|
+
match self.activity_poll().await.transpose() {
|
|
1064
|
+
Some(r) => break r,
|
|
1065
|
+
None => {
|
|
1066
|
+
tokio::task::yield_now().await;
|
|
1067
|
+
continue;
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
|
|
845
1073
|
async fn activity_poll(&self) -> Result<Option<ActivityTask>, PollError> {
|
|
846
1074
|
let local_activities_complete = self.local_activities_complete.load(Ordering::Relaxed);
|
|
847
1075
|
let non_local_activities_complete =
|
|
@@ -924,8 +1152,26 @@ impl Worker {
|
|
|
924
1152
|
r
|
|
925
1153
|
}
|
|
926
1154
|
|
|
927
|
-
///
|
|
928
|
-
|
|
1155
|
+
/// Notify the Temporal service that an activity is still alive. Long running activities that
|
|
1156
|
+
/// take longer than `activity_heartbeat_timeout` to finish must call this function in order to
|
|
1157
|
+
/// report progress, otherwise the activity will timeout and a new attempt will be scheduled.
|
|
1158
|
+
///
|
|
1159
|
+
/// The first heartbeat request will be sent immediately, subsequent rapid calls to this
|
|
1160
|
+
/// function will result in heartbeat requests being aggregated and the last one received during
|
|
1161
|
+
/// the aggregation period will be sent to the server, where that period is defined as half the
|
|
1162
|
+
/// heartbeat timeout.
|
|
1163
|
+
///
|
|
1164
|
+
/// Unlike Java/Go SDKs we do not return cancellation status as part of heartbeat response and
|
|
1165
|
+
/// instead send it as a separate activity task to the lang, decoupling heartbeat and
|
|
1166
|
+
/// cancellation processing.
|
|
1167
|
+
///
|
|
1168
|
+
/// For now activity still need to send heartbeats if they want to receive cancellation
|
|
1169
|
+
/// requests. In the future we will change this and will dispatch cancellations more
|
|
1170
|
+
/// proactively. Note that this function does not block on the server call and returns
|
|
1171
|
+
/// immediately. Underlying validation errors are swallowed and logged, this has been agreed to
|
|
1172
|
+
/// be optimal behavior for the user as we don't want to break activity execution due to badly
|
|
1173
|
+
/// configured heartbeat options.
|
|
1174
|
+
pub fn record_activity_heartbeat(&self, details: ActivityHeartbeat) {
|
|
929
1175
|
if let Some(at_mgr) = self.at_task_mgr.as_ref() {
|
|
930
1176
|
let tt = TaskToken(details.task_token.clone());
|
|
931
1177
|
if let Err(e) = at_mgr.record_heartbeat(details) {
|
|
@@ -934,14 +1180,28 @@ impl Worker {
|
|
|
934
1180
|
}
|
|
935
1181
|
}
|
|
936
1182
|
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
1183
|
+
/// Tell the worker that an activity has finished executing. May (and should) be freely called
|
|
1184
|
+
/// concurrently.
|
|
1185
|
+
#[instrument(skip(self, completion),
|
|
1186
|
+
fields(task_token, status,
|
|
1187
|
+
task_queue=%self.config.task_queue, workflow_id, run_id))]
|
|
1188
|
+
pub async fn complete_activity_task(
|
|
941
1189
|
&self,
|
|
942
|
-
|
|
943
|
-
status: activity_execution_result::Status,
|
|
1190
|
+
completion: ActivityTaskCompletion,
|
|
944
1191
|
) -> Result<(), CompleteActivityError> {
|
|
1192
|
+
let task_token = TaskToken(completion.task_token);
|
|
1193
|
+
let status = if let Some(s) = completion.result.and_then(|r| r.status) {
|
|
1194
|
+
s
|
|
1195
|
+
} else {
|
|
1196
|
+
return Err(CompleteActivityError::MalformedActivityCompletion {
|
|
1197
|
+
reason: "Activity completion had empty result/status field".to_owned(),
|
|
1198
|
+
completion: None,
|
|
1199
|
+
});
|
|
1200
|
+
};
|
|
1201
|
+
|
|
1202
|
+
tracing::Span::current().record("task_token", task_token.to_string());
|
|
1203
|
+
tracing::Span::current().record("status", status.to_string());
|
|
1204
|
+
|
|
945
1205
|
validate_activity_completion(&status)?;
|
|
946
1206
|
if task_token.is_local_activity_task() {
|
|
947
1207
|
let as_la_res: LocalActivityExecutionResult = status.try_into()?;
|
|
@@ -957,8 +1217,20 @@ impl Worker {
|
|
|
957
1217
|
}
|
|
958
1218
|
}
|
|
959
1219
|
|
|
1220
|
+
/// Ask the worker for some work, returning a [WorkflowActivation]. It is then the language
|
|
1221
|
+
/// SDK's responsibility to call the appropriate workflow code with the provided inputs. Blocks
|
|
1222
|
+
/// indefinitely until such work is available or [Worker::shutdown] is called.
|
|
1223
|
+
///
|
|
1224
|
+
/// It is important to understand that all activations must be responded to. There can only
|
|
1225
|
+
/// be one outstanding activation for a particular run of a workflow at any time. If an
|
|
1226
|
+
/// activation is not responded to, it will cause that workflow to become stuck forever.
|
|
1227
|
+
///
|
|
1228
|
+
/// See [WorkflowActivation] for more details on the expected behavior of lang w.r.t activation
|
|
1229
|
+
/// & job processing.
|
|
1230
|
+
///
|
|
1231
|
+
/// Do not call poll concurrently. It handles polling the server concurrently internally.
|
|
960
1232
|
#[instrument(skip(self), fields(run_id, workflow_id, task_queue=%self.config.task_queue))]
|
|
961
|
-
pub
|
|
1233
|
+
pub async fn poll_workflow_activation(&self) -> Result<WorkflowActivation, PollError> {
|
|
962
1234
|
match &self.workflows {
|
|
963
1235
|
Some(workflows) => {
|
|
964
1236
|
let r = workflows.next_workflow_activation().await;
|
|
@@ -980,10 +1252,14 @@ impl Worker {
|
|
|
980
1252
|
}
|
|
981
1253
|
}
|
|
982
1254
|
|
|
1255
|
+
/// Tell the worker that a workflow activation has completed. May (and should) be freely called
|
|
1256
|
+
/// concurrently. The future may take some time to resolve, as fetching more events might be
|
|
1257
|
+
/// necessary for completion to... complete - thus SDK implementers should make sure they do
|
|
1258
|
+
/// not serialize completions.
|
|
983
1259
|
#[instrument(skip(self, completion),
|
|
984
1260
|
fields(completion=%&completion, run_id=%completion.run_id, workflow_id,
|
|
985
1261
|
task_queue=%self.config.task_queue))]
|
|
986
|
-
pub
|
|
1262
|
+
pub async fn complete_workflow_activation(
|
|
987
1263
|
&self,
|
|
988
1264
|
completion: WorkflowActivationCompletion,
|
|
989
1265
|
) -> Result<(), CompleteWfError> {
|
|
@@ -1004,21 +1280,61 @@ impl Worker {
|
|
|
1004
1280
|
}
|
|
1005
1281
|
}
|
|
1006
1282
|
|
|
1283
|
+
/// Ask the worker for some nexus related work. It is then the language SDK's
|
|
1284
|
+
/// responsibility to call the appropriate nexus operation handler code with the provided
|
|
1285
|
+
/// inputs. Blocks indefinitely until such work is available or [Worker::shutdown] is called.
|
|
1286
|
+
///
|
|
1287
|
+
/// All tasks must be responded to for shutdown to complete.
|
|
1288
|
+
///
|
|
1289
|
+
/// Do not call poll concurrently. It handles polling the server concurrently internally.
|
|
1290
|
+
#[instrument(skip(self))]
|
|
1291
|
+
pub async fn poll_nexus_task(&self) -> Result<NexusTask, PollError> {
|
|
1292
|
+
match &self.nexus_mgr {
|
|
1293
|
+
Some(mgr) => mgr.next_nexus_task().await,
|
|
1294
|
+
None => Err(PollError::ShutDown),
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
/// Tell the worker that a nexus task has completed. May (and should) be freely called
|
|
1299
|
+
/// concurrently.
|
|
1007
1300
|
#[instrument(
|
|
1008
|
-
skip(self,
|
|
1009
|
-
fields(task_token
|
|
1301
|
+
skip(self, completion),
|
|
1302
|
+
fields(task_token, status, task_queue=%self.config.task_queue)
|
|
1010
1303
|
)]
|
|
1011
|
-
async fn complete_nexus_task(
|
|
1304
|
+
pub async fn complete_nexus_task(
|
|
1012
1305
|
&self,
|
|
1013
|
-
|
|
1014
|
-
status: nexus_task_completion::Status,
|
|
1306
|
+
completion: NexusTaskCompletion,
|
|
1015
1307
|
) -> Result<(), CompleteNexusError> {
|
|
1308
|
+
let status = if let Some(s) = completion.status {
|
|
1309
|
+
s
|
|
1310
|
+
} else {
|
|
1311
|
+
return Err(CompleteNexusError::MalformedNexusCompletion {
|
|
1312
|
+
reason: "Nexus completion had empty status field".to_owned(),
|
|
1313
|
+
});
|
|
1314
|
+
};
|
|
1315
|
+
let tt = TaskToken(completion.task_token);
|
|
1316
|
+
tracing::Span::current().record("task_token", tt.to_string());
|
|
1317
|
+
tracing::Span::current().record("status", status.to_string());
|
|
1318
|
+
|
|
1016
1319
|
match &self.nexus_mgr {
|
|
1017
1320
|
Some(mgr) => mgr.complete_task(tt, status, &*self.client).await,
|
|
1018
1321
|
None => Err(CompleteNexusError::NexusNotEnabled),
|
|
1019
1322
|
}
|
|
1020
1323
|
}
|
|
1021
1324
|
|
|
1325
|
+
/// Request that a workflow be evicted by its run id. This will generate a workflow activation
|
|
1326
|
+
/// with the eviction job inside it to be eventually returned by
|
|
1327
|
+
/// [Worker::poll_workflow_activation]. If the workflow had any existing outstanding
|
|
1328
|
+
/// activations, such activations are invalidated and subsequent completions of them will do
|
|
1329
|
+
/// nothing and log a warning.
|
|
1330
|
+
pub fn request_workflow_eviction(&self, run_id: &str) {
|
|
1331
|
+
self.request_wf_eviction(
|
|
1332
|
+
run_id,
|
|
1333
|
+
"Eviction explicitly requested by lang",
|
|
1334
|
+
EvictionReason::LangRequested,
|
|
1335
|
+
);
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1022
1338
|
/// Request a workflow eviction
|
|
1023
1339
|
pub(crate) fn request_wf_eviction(
|
|
1024
1340
|
&self,
|
|
@@ -1033,6 +1349,63 @@ impl Worker {
|
|
|
1033
1349
|
}
|
|
1034
1350
|
}
|
|
1035
1351
|
|
|
1352
|
+
/// Return this worker's config
|
|
1353
|
+
pub fn get_config(&self) -> &WorkerConfig {
|
|
1354
|
+
&self.config
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
/// Initiate shutdown. See [Worker::shutdown], this is just a sync version that starts the
|
|
1358
|
+
/// process. You can then wait on `shutdown` or [Worker::finalize_shutdown].
|
|
1359
|
+
pub fn initiate_shutdown(&self) {
|
|
1360
|
+
if !self.shutdown_token.is_cancelled() {
|
|
1361
|
+
info!(
|
|
1362
|
+
task_queue=%self.config.task_queue,
|
|
1363
|
+
namespace=%self.config.namespace,
|
|
1364
|
+
"Initiated shutdown",
|
|
1365
|
+
);
|
|
1366
|
+
}
|
|
1367
|
+
self.shutdown_token.cancel();
|
|
1368
|
+
{
|
|
1369
|
+
*self.status.write() = WorkerStatus::ShuttingDown;
|
|
1370
|
+
}
|
|
1371
|
+
// First, disable Eager Workflow Start
|
|
1372
|
+
if !self.client_worker_registrator.shared_namespace_worker {
|
|
1373
|
+
let _res = self
|
|
1374
|
+
.client
|
|
1375
|
+
.workers()
|
|
1376
|
+
.unregister_slot_provider(self.worker_instance_key);
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
// Push a BumpStream message to the workflow activation queue. This ensures that
|
|
1380
|
+
// any pending workflow activation polls will resolve, even if there are no other inputs.
|
|
1381
|
+
if let Some(workflows) = &self.workflows {
|
|
1382
|
+
workflows.bump_stream();
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
// Second, we want to stop polling of both activity and workflow tasks
|
|
1386
|
+
if let Some(atm) = self.at_task_mgr.as_ref() {
|
|
1387
|
+
atm.initiate_shutdown();
|
|
1388
|
+
}
|
|
1389
|
+
// Let the manager know that shutdown has been initiated to try to unblock the local
|
|
1390
|
+
// activity poll in case this worker is an activity-only worker.
|
|
1391
|
+
if let Some(la_mgr) = &self.local_act_mgr {
|
|
1392
|
+
la_mgr.shutdown_initiated();
|
|
1393
|
+
|
|
1394
|
+
// If workflows have never been polled, immediately tell the local activity manager
|
|
1395
|
+
// that workflows have shut down, so it can proceed with shutdown without waiting.
|
|
1396
|
+
// This is particularly important for activity-only workers.
|
|
1397
|
+
if self.workflows.as_ref().is_none_or(|w| !w.ever_polled()) {
|
|
1398
|
+
la_mgr.workflows_have_shutdown();
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
/// Unique identifier for this worker instance.
|
|
1404
|
+
/// This must be stable across the worker's lifetime and unique per instance.
|
|
1405
|
+
pub fn worker_instance_key(&self) -> Uuid {
|
|
1406
|
+
self.worker_instance_key
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1036
1409
|
/// Sets a function to be called at the end of each activation completion
|
|
1037
1410
|
pub(crate) fn set_post_activate_hook(
|
|
1038
1411
|
&mut self,
|
|
@@ -1073,22 +1446,448 @@ impl Worker {
|
|
|
1073
1446
|
dbg_panic!("trying to notify local result when workflows not enabled for this worker");
|
|
1074
1447
|
}
|
|
1075
1448
|
}
|
|
1449
|
+
}
|
|
1076
1450
|
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1451
|
+
/// Errors thrown by [crate::Worker::validate]
|
|
1452
|
+
#[derive(thiserror::Error, Debug)]
|
|
1453
|
+
pub enum WorkerValidationError {
|
|
1454
|
+
/// The namespace provided to the worker does not exist on the server.
|
|
1455
|
+
#[error("Namespace {namespace} was not found or otherwise could not be described: {source:?}")]
|
|
1456
|
+
NamespaceDescribeError {
|
|
1457
|
+
/// The underlying server error.
|
|
1458
|
+
source: tonic::Status,
|
|
1459
|
+
/// The associated namespace.
|
|
1460
|
+
namespace: String,
|
|
1461
|
+
},
|
|
1462
|
+
}
|
|
1463
|
+
|
|
1464
|
+
/// Errors thrown by [crate::Worker] polling methods
|
|
1465
|
+
#[derive(thiserror::Error, Debug)]
|
|
1466
|
+
pub enum PollError {
|
|
1467
|
+
/// [crate::Worker::shutdown] was called, and there are no more tasks to be handled from this
|
|
1468
|
+
/// poll function. Lang must call [crate::Worker::complete_workflow_activation],
|
|
1469
|
+
/// [crate::Worker::complete_activity_task], or
|
|
1470
|
+
/// [crate::Worker::complete_nexus_task] for any remaining tasks, and then may exit.
|
|
1471
|
+
#[error("Core is shut down and there are no more tasks of this kind")]
|
|
1472
|
+
ShutDown,
|
|
1473
|
+
/// Unhandled error when calling the temporal server. Core will attempt to retry any non-fatal
|
|
1474
|
+
/// errors, so lang should consider this fatal.
|
|
1475
|
+
#[error("Unhandled grpc error when polling: {0:?}")]
|
|
1476
|
+
TonicError(#[from] tonic::Status),
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
/// Errors thrown by [crate::Worker::complete_workflow_activation]
|
|
1480
|
+
#[derive(thiserror::Error, Debug)]
|
|
1481
|
+
#[allow(clippy::large_enum_variant)]
|
|
1482
|
+
pub enum CompleteWfError {
|
|
1483
|
+
/// Lang SDK sent us a malformed workflow completion. This likely means a bug in the lang sdk.
|
|
1484
|
+
#[error("Lang SDK sent us a malformed workflow completion for run ({run_id}): {reason}")]
|
|
1485
|
+
MalformedWorkflowCompletion {
|
|
1486
|
+
/// Reason the completion was malformed
|
|
1487
|
+
reason: String,
|
|
1488
|
+
/// The run associated with the completion
|
|
1489
|
+
run_id: String,
|
|
1490
|
+
},
|
|
1491
|
+
/// Workflows have not been enabled on this worker.
|
|
1492
|
+
#[error("Workflows are not enabled on this worker")]
|
|
1493
|
+
WorkflowNotEnabled,
|
|
1494
|
+
}
|
|
1495
|
+
|
|
1496
|
+
/// Errors thrown by [crate::Worker::complete_activity_task]
|
|
1497
|
+
#[derive(thiserror::Error, Debug)]
|
|
1498
|
+
#[allow(clippy::large_enum_variant)]
|
|
1499
|
+
pub enum CompleteActivityError {
|
|
1500
|
+
/// Lang SDK sent us a malformed activity completion. This likely means a bug in the lang sdk.
|
|
1501
|
+
#[error("Lang SDK sent us a malformed activity completion ({reason}): {completion:?}")]
|
|
1502
|
+
MalformedActivityCompletion {
|
|
1503
|
+
/// Reason the completion was malformed
|
|
1504
|
+
reason: String,
|
|
1505
|
+
/// The completion, which may not be included to avoid unnecessary copies.
|
|
1506
|
+
completion: Option<ActivityExecutionResult>,
|
|
1507
|
+
},
|
|
1508
|
+
/// Activities have not been enabled on this worker.
|
|
1509
|
+
#[error("Activities are not enabled on this worker")]
|
|
1510
|
+
ActivityNotEnabled,
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
/// Errors thrown by [crate::Worker::complete_nexus_task]
|
|
1514
|
+
#[derive(thiserror::Error, Debug)]
|
|
1515
|
+
pub enum CompleteNexusError {
|
|
1516
|
+
/// Lang SDK sent us a malformed nexus completion. This likely means a bug in the lang sdk.
|
|
1517
|
+
#[error("Lang SDK sent us a malformed nexus completion: {reason}")]
|
|
1518
|
+
MalformedNexusCompletion {
|
|
1519
|
+
/// Reason the completion was malformed
|
|
1520
|
+
reason: String,
|
|
1521
|
+
},
|
|
1522
|
+
/// Nexus has not been enabled on this worker. If a user registers any Nexus handlers, the
|
|
1523
|
+
#[error("Nexus is not enabled on this worker")]
|
|
1524
|
+
NexusNotEnabled,
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
/// Errors we can encounter during workflow processing which we may treat as either WFT failures
|
|
1528
|
+
/// or whole-workflow failures depending on user preference.
|
|
1529
|
+
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
|
|
1530
|
+
pub enum WorkflowErrorType {
|
|
1531
|
+
/// A nondeterminism error
|
|
1532
|
+
Nondeterminism,
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1535
|
+
/// This trait allows users to customize the performance characteristics of workers dynamically.
|
|
1536
|
+
/// For more, see the docstrings of the traits in the return types of its functions.
|
|
1537
|
+
pub trait WorkerTuner {
|
|
1538
|
+
/// Return a [SlotSupplier] for workflow tasks. Note that workflow task slot suppliers must be
|
|
1539
|
+
/// willing to hand out a minimum of one non-sticky slot and one sticky slot if workflow caching
|
|
1540
|
+
/// is enabled, otherwise the worker may fail to process new tasks.
|
|
1541
|
+
fn workflow_task_slot_supplier(
|
|
1542
|
+
&self,
|
|
1543
|
+
) -> Arc<dyn SlotSupplier<SlotKind = WorkflowSlotKind> + Send + Sync>;
|
|
1544
|
+
|
|
1545
|
+
/// Return a [SlotSupplier] for activity tasks
|
|
1546
|
+
fn activity_task_slot_supplier(
|
|
1547
|
+
&self,
|
|
1548
|
+
) -> Arc<dyn SlotSupplier<SlotKind = ActivitySlotKind> + Send + Sync>;
|
|
1549
|
+
|
|
1550
|
+
/// Return a [SlotSupplier] for local activities
|
|
1551
|
+
fn local_activity_slot_supplier(
|
|
1552
|
+
&self,
|
|
1553
|
+
) -> Arc<dyn SlotSupplier<SlotKind = LocalActivitySlotKind> + Send + Sync>;
|
|
1554
|
+
|
|
1555
|
+
/// Return a [SlotSupplier] for nexus tasks
|
|
1556
|
+
fn nexus_task_slot_supplier(
|
|
1557
|
+
&self,
|
|
1558
|
+
) -> Arc<dyn SlotSupplier<SlotKind = NexusSlotKind> + Send + Sync>;
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
/// Implementing this trait allows users to customize how many tasks of certain kinds the worker
|
|
1562
|
+
/// will perform concurrently.
|
|
1563
|
+
///
|
|
1564
|
+
/// Note that, for implementations on workflow tasks ([WorkflowSlotKind]), workers that have the
|
|
1565
|
+
/// workflow cache enabled should be willing to hand out _at least_ two slots, to avoid the worker
|
|
1566
|
+
/// becoming stuck only polling on the worker's sticky queue.
|
|
1567
|
+
#[async_trait::async_trait]
|
|
1568
|
+
pub trait SlotSupplier {
|
|
1569
|
+
/// The kind of slot this supplier is supplying.
|
|
1570
|
+
type SlotKind: SlotKind;
|
|
1571
|
+
/// Block until a slot is available, then return a permit for the slot.
|
|
1572
|
+
async fn reserve_slot(&self, ctx: &dyn SlotReservationContext) -> SlotSupplierPermit;
|
|
1573
|
+
|
|
1574
|
+
/// Try to immediately reserve a slot, returning None if one is not available. Implementations
|
|
1575
|
+
/// must not block, or risk blocking the async event loop.
|
|
1576
|
+
fn try_reserve_slot(&self, ctx: &dyn SlotReservationContext) -> Option<SlotSupplierPermit>;
|
|
1577
|
+
|
|
1578
|
+
/// Marks a slot as actually now being used. This is separate from reserving one because the
|
|
1579
|
+
/// pollers need to reserve a slot before they have actually obtained work from server. Once
|
|
1580
|
+
/// that task is obtained (and validated) then the slot can actually be used to work on the
|
|
1581
|
+
/// task.
|
|
1582
|
+
///
|
|
1583
|
+
/// Users' implementation of this can choose to emit metrics, or otherwise leverage the
|
|
1584
|
+
/// information provided by the `info` parameter to be better able to make future decisions
|
|
1585
|
+
/// about whether a slot should be handed out.
|
|
1586
|
+
fn mark_slot_used(&self, ctx: &dyn SlotMarkUsedContext<SlotKind = Self::SlotKind>);
|
|
1587
|
+
|
|
1588
|
+
/// Frees a slot.
|
|
1589
|
+
fn release_slot(&self, ctx: &dyn SlotReleaseContext<SlotKind = Self::SlotKind>);
|
|
1590
|
+
|
|
1591
|
+
/// If this implementation knows how many slots are available at any moment, it should return
|
|
1592
|
+
/// that here.
|
|
1593
|
+
fn available_slots(&self) -> Option<usize> {
|
|
1594
|
+
None
|
|
1595
|
+
}
|
|
1596
|
+
|
|
1597
|
+
/// Returns a human-friendly identifier describing this supplier implementation for
|
|
1598
|
+
/// diagnostics and telemetry.
|
|
1599
|
+
fn slot_supplier_kind(&self) -> String {
|
|
1600
|
+
"Custom".to_string()
|
|
1601
|
+
}
|
|
1602
|
+
}
|
|
1603
|
+
|
|
1604
|
+
/// Context for slot reservation.
|
|
1605
|
+
pub trait SlotReservationContext: Send + Sync {
|
|
1606
|
+
/// Returns the name of the task queue this worker is polling
|
|
1607
|
+
fn task_queue(&self) -> &str;
|
|
1608
|
+
|
|
1609
|
+
/// Returns the identity of the worker
|
|
1610
|
+
fn worker_identity(&self) -> &str;
|
|
1611
|
+
|
|
1612
|
+
/// Returns the deployment version of the worker, if one is set.
|
|
1613
|
+
fn worker_deployment_version(&self) -> &Option<WorkerDeploymentVersion>;
|
|
1614
|
+
|
|
1615
|
+
/// Returns the number of currently outstanding slot permits, whether used or un-used.
|
|
1616
|
+
fn num_issued_slots(&self) -> usize;
|
|
1617
|
+
|
|
1618
|
+
/// Returns true iff this is a sticky poll for a workflow task
|
|
1619
|
+
fn is_sticky(&self) -> bool;
|
|
1620
|
+
|
|
1621
|
+
/// Returns the metrics meter if metrics are enabled
|
|
1622
|
+
fn get_metrics_meter(&self) -> Option<TemporalMeter> {
|
|
1623
|
+
None
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
|
|
1627
|
+
/// Context for slots being marked as used.
|
|
1628
|
+
pub trait SlotMarkUsedContext: Send + Sync {
|
|
1629
|
+
/// The kind of slot being marked used.
|
|
1630
|
+
type SlotKind: SlotKind;
|
|
1631
|
+
/// The slot permit that is being used
|
|
1632
|
+
fn permit(&self) -> &SlotSupplierPermit;
|
|
1633
|
+
/// Returns the info of slot that was marked as used
|
|
1634
|
+
fn info(&self) -> &<Self::SlotKind as SlotKind>::Info;
|
|
1635
|
+
|
|
1636
|
+
/// Returns the metrics meter if metrics are enabled
|
|
1637
|
+
fn get_metrics_meter(&self) -> Option<TemporalMeter> {
|
|
1638
|
+
None
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
/// Context for slots being released.
|
|
1643
|
+
pub trait SlotReleaseContext: Send + Sync {
|
|
1644
|
+
/// The kind of slot being marked released.
|
|
1645
|
+
type SlotKind: SlotKind;
|
|
1646
|
+
/// The slot permit that is being used
|
|
1647
|
+
fn permit(&self) -> &SlotSupplierPermit;
|
|
1648
|
+
/// Returns the info of slot that was released, if it was used
|
|
1649
|
+
fn info(&self) -> Option<&<Self::SlotKind as SlotKind>::Info>;
|
|
1650
|
+
|
|
1651
|
+
/// Returns the metrics meter if metrics are enabled
|
|
1652
|
+
fn get_metrics_meter(&self) -> Option<TemporalMeter> {
|
|
1653
|
+
None
|
|
1654
|
+
}
|
|
1655
|
+
}
|
|
1656
|
+
|
|
1657
|
+
/// A permit issued by a [SlotSupplier].
|
|
1658
|
+
#[derive(Default, Debug)]
|
|
1659
|
+
pub struct SlotSupplierPermit {
|
|
1660
|
+
user_data: Option<Box<dyn Any + Send + Sync>>,
|
|
1661
|
+
}
|
|
1662
|
+
impl SlotSupplierPermit {
|
|
1663
|
+
/// Attach some user data to the slot permit.
|
|
1664
|
+
pub fn with_user_data<T: Any + Send + Sync>(user_data: T) -> Self {
|
|
1665
|
+
Self {
|
|
1666
|
+
user_data: Some(Box::new(user_data)),
|
|
1667
|
+
}
|
|
1668
|
+
}
|
|
1669
|
+
/// Attempts to downcast the inner data, if any, into the provided type and returns it.
|
|
1670
|
+
/// Returns none if there is no data or the data is not of the appropriate type.
|
|
1671
|
+
pub fn user_data<T: Any + Send + Sync>(&self) -> Option<&T> {
|
|
1672
|
+
self.user_data.as_ref().and_then(|b| b.downcast_ref())
|
|
1673
|
+
}
|
|
1674
|
+
/// Attempts to downcast the inner data, if any, into the provided type and returns it mutably.
|
|
1675
|
+
/// Returns none if there is no data or the data is not of the appropriate type.
|
|
1676
|
+
pub fn user_data_mut<T: Any + Send + Sync>(&mut self) -> Option<&mut T> {
|
|
1677
|
+
self.user_data.as_mut().and_then(|b| b.downcast_mut())
|
|
1678
|
+
}
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1681
|
+
/// What kind of task the slot is used for.
|
|
1682
|
+
#[derive(Debug, Copy, Clone, derive_more::Display, Eq, PartialEq)]
|
|
1683
|
+
pub enum SlotKindType {
|
|
1684
|
+
/// Workflow tasks.
|
|
1685
|
+
Workflow,
|
|
1686
|
+
/// Activity tasks.
|
|
1687
|
+
Activity,
|
|
1688
|
+
/// Local activity tasks.
|
|
1689
|
+
LocalActivity,
|
|
1690
|
+
/// Nexus tasks.
|
|
1691
|
+
Nexus,
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1694
|
+
/// Marker struct for workflow slots.
|
|
1695
|
+
#[derive(Debug, Copy, Clone)]
|
|
1696
|
+
pub struct WorkflowSlotKind {}
|
|
1697
|
+
/// Marker struct for activity slots.
|
|
1698
|
+
#[derive(Debug, Copy, Clone)]
|
|
1699
|
+
pub struct ActivitySlotKind {}
|
|
1700
|
+
/// Marker struct for local activity slots.
|
|
1701
|
+
#[derive(Debug, Copy, Clone)]
|
|
1702
|
+
pub struct LocalActivitySlotKind {}
|
|
1703
|
+
/// Marker struct for nexus slots.
|
|
1704
|
+
#[derive(Debug, Copy, Clone)]
|
|
1705
|
+
pub struct NexusSlotKind {}
|
|
1706
|
+
|
|
1707
|
+
/// Contextual information about in-use slots.
|
|
1708
|
+
pub enum SlotInfo<'a> {
|
|
1709
|
+
/// For workflow slots.
|
|
1710
|
+
Workflow(&'a WorkflowSlotInfo),
|
|
1711
|
+
/// For activity slots.
|
|
1712
|
+
Activity(&'a ActivitySlotInfo),
|
|
1713
|
+
/// For local activity slots.
|
|
1714
|
+
LocalActivity(&'a LocalActivitySlotInfo),
|
|
1715
|
+
/// For nexus slots.
|
|
1716
|
+
Nexus(&'a NexusSlotInfo),
|
|
1717
|
+
}
|
|
1718
|
+
|
|
1719
|
+
/// Allows reifying slot info into the appropriate type.
|
|
1720
|
+
pub trait SlotInfoTrait: prost::Message {
|
|
1721
|
+
/// Downcast a protobuf message into the enum.
|
|
1722
|
+
fn downcast(&self) -> SlotInfo<'_>;
|
|
1723
|
+
}
|
|
1724
|
+
impl SlotInfoTrait for WorkflowSlotInfo {
|
|
1725
|
+
fn downcast(&self) -> SlotInfo<'_> {
|
|
1726
|
+
SlotInfo::Workflow(self)
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
impl SlotInfoTrait for ActivitySlotInfo {
|
|
1730
|
+
fn downcast(&self) -> SlotInfo<'_> {
|
|
1731
|
+
SlotInfo::Activity(self)
|
|
1732
|
+
}
|
|
1733
|
+
}
|
|
1734
|
+
impl SlotInfoTrait for LocalActivitySlotInfo {
|
|
1735
|
+
fn downcast(&self) -> SlotInfo<'_> {
|
|
1736
|
+
SlotInfo::LocalActivity(self)
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
impl SlotInfoTrait for NexusSlotInfo {
|
|
1740
|
+
fn downcast(&self) -> SlotInfo<'_> {
|
|
1741
|
+
SlotInfo::Nexus(self)
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
|
|
1745
|
+
/// Associates slot info/kinds together.
|
|
1746
|
+
pub trait SlotKind {
|
|
1747
|
+
/// The associated info for this kind.
|
|
1748
|
+
type Info: SlotInfoTrait;
|
|
1749
|
+
|
|
1750
|
+
/// Return this kind.
|
|
1751
|
+
fn kind() -> SlotKindType;
|
|
1752
|
+
}
|
|
1753
|
+
impl SlotKind for WorkflowSlotKind {
|
|
1754
|
+
type Info = WorkflowSlotInfo;
|
|
1755
|
+
|
|
1756
|
+
fn kind() -> SlotKindType {
|
|
1757
|
+
SlotKindType::Workflow
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
impl SlotKind for ActivitySlotKind {
|
|
1761
|
+
type Info = ActivitySlotInfo;
|
|
1762
|
+
|
|
1763
|
+
fn kind() -> SlotKindType {
|
|
1764
|
+
SlotKindType::Activity
|
|
1765
|
+
}
|
|
1766
|
+
}
|
|
1767
|
+
impl SlotKind for LocalActivitySlotKind {
|
|
1768
|
+
type Info = LocalActivitySlotInfo;
|
|
1769
|
+
|
|
1770
|
+
fn kind() -> SlotKindType {
|
|
1771
|
+
SlotKindType::LocalActivity
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
impl SlotKind for NexusSlotKind {
|
|
1775
|
+
type Info = NexusSlotInfo;
|
|
1776
|
+
|
|
1777
|
+
fn kind() -> SlotKindType {
|
|
1778
|
+
SlotKindType::Nexus
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
/// Different strategies for task polling
|
|
1783
|
+
#[derive(Clone, Copy, Debug, PartialEq)]
|
|
1784
|
+
pub enum PollerBehavior {
|
|
1785
|
+
/// Will attempt to poll as long as a slot is available, up to the provided maximum. Cannot
|
|
1786
|
+
/// be less than two for workflow tasks, or one for other tasks.
|
|
1787
|
+
SimpleMaximum(usize),
|
|
1788
|
+
/// Will automatically scale the number of pollers based on feedback from the server. Still
|
|
1789
|
+
/// requires a slot to be available before beginning polling.
|
|
1790
|
+
Autoscaling {
|
|
1791
|
+
/// At least this many poll calls will always be attempted (assuming slots are available).
|
|
1792
|
+
/// Cannot be zero.
|
|
1793
|
+
minimum: usize,
|
|
1794
|
+
/// At most this many poll calls will ever be open at once. Must be >= `minimum`.
|
|
1795
|
+
maximum: usize,
|
|
1796
|
+
/// This many polls will be attempted initially before scaling kicks in. Must be between
|
|
1797
|
+
/// `minimum` and `maximum`.
|
|
1798
|
+
initial: usize,
|
|
1799
|
+
},
|
|
1800
|
+
}
|
|
1801
|
+
|
|
1802
|
+
impl PollerBehavior {
|
|
1803
|
+
/// Returns true if the behavior is using autoscaling.
|
|
1804
|
+
pub fn is_autoscaling(&self) -> bool {
|
|
1805
|
+
matches!(self, PollerBehavior::Autoscaling { .. })
|
|
1806
|
+
}
|
|
1807
|
+
|
|
1808
|
+
/// Validates the behavior.
|
|
1809
|
+
pub fn validate(&self) -> Result<(), String> {
|
|
1810
|
+
match self {
|
|
1811
|
+
PollerBehavior::SimpleMaximum(x) => {
|
|
1812
|
+
if *x < 1 {
|
|
1813
|
+
return Err("SimpleMaximum poller behavior must be at least 1".to_owned());
|
|
1814
|
+
}
|
|
1815
|
+
}
|
|
1816
|
+
PollerBehavior::Autoscaling {
|
|
1817
|
+
minimum,
|
|
1818
|
+
maximum,
|
|
1819
|
+
initial,
|
|
1820
|
+
} => {
|
|
1821
|
+
if *minimum < 1 {
|
|
1822
|
+
return Err("Autoscaling minimum poller behavior must be at least 1".to_owned());
|
|
1823
|
+
}
|
|
1824
|
+
if *maximum < *minimum {
|
|
1825
|
+
return Err(
|
|
1826
|
+
"Autoscaling maximum must be greater than or equal to minimum".to_owned(),
|
|
1827
|
+
);
|
|
1828
|
+
}
|
|
1829
|
+
if *initial < *minimum || *initial > *maximum {
|
|
1830
|
+
return Err(
|
|
1831
|
+
"Autoscaling initial must be between minimum and maximum".to_owned()
|
|
1832
|
+
);
|
|
1833
|
+
}
|
|
1086
1834
|
}
|
|
1087
1835
|
}
|
|
1088
1836
|
Ok(())
|
|
1089
1837
|
}
|
|
1090
1838
|
}
|
|
1091
1839
|
|
|
1840
|
+
/// Strategy a core worker uses for versioning.
|
|
1841
|
+
#[derive(Clone, Debug)]
|
|
1842
|
+
pub enum WorkerVersioningStrategy {
|
|
1843
|
+
/// Don't enable any versioning
|
|
1844
|
+
None {
|
|
1845
|
+
/// Build ID may still be passed as a way to identify the worker, or may be left empty.
|
|
1846
|
+
build_id: String,
|
|
1847
|
+
},
|
|
1848
|
+
/// Maybe use the modern deployment-based versioning, or just pass a deployment version.
|
|
1849
|
+
WorkerDeploymentBased(WorkerDeploymentOptions),
|
|
1850
|
+
/// Use the legacy build-id-based whole worker versioning.
|
|
1851
|
+
LegacyBuildIdBased {
|
|
1852
|
+
/// A Build ID to use, must be non-empty.
|
|
1853
|
+
build_id: String,
|
|
1854
|
+
},
|
|
1855
|
+
}
|
|
1856
|
+
|
|
1857
|
+
impl Default for WorkerVersioningStrategy {
|
|
1858
|
+
fn default() -> Self {
|
|
1859
|
+
WorkerVersioningStrategy::None {
|
|
1860
|
+
build_id: String::new(),
|
|
1861
|
+
}
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1864
|
+
|
|
1865
|
+
impl WorkerVersioningStrategy {
|
|
1866
|
+
/// Return the build ID associated with this strategy.
|
|
1867
|
+
pub fn build_id(&self) -> &str {
|
|
1868
|
+
match self {
|
|
1869
|
+
WorkerVersioningStrategy::None { build_id } => build_id,
|
|
1870
|
+
WorkerVersioningStrategy::WorkerDeploymentBased(opts) => &opts.version.build_id,
|
|
1871
|
+
WorkerVersioningStrategy::LegacyBuildIdBased { build_id } => build_id,
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
/// Returns true if this uses "build id based" legacy versioning.
|
|
1876
|
+
pub fn uses_build_id_based(&self) -> bool {
|
|
1877
|
+
matches!(self, WorkerVersioningStrategy::LegacyBuildIdBased { .. })
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
/// Returns the default versioning behavior associated with this strategy, if any.
|
|
1881
|
+
pub fn default_versioning_behavior(&self) -> Option<VersioningBehavior> {
|
|
1882
|
+
match self {
|
|
1883
|
+
WorkerVersioningStrategy::WorkerDeploymentBased(opts) => {
|
|
1884
|
+
opts.default_versioning_behavior
|
|
1885
|
+
}
|
|
1886
|
+
_ => None,
|
|
1887
|
+
}
|
|
1888
|
+
}
|
|
1889
|
+
}
|
|
1890
|
+
|
|
1092
1891
|
struct ClientWorkerRegistrator {
|
|
1093
1892
|
worker_instance_key: Uuid,
|
|
1094
1893
|
slot_provider: SlotProvider,
|
|
@@ -1204,7 +2003,7 @@ impl WorkerHeartbeatManager {
|
|
|
1204
2003
|
as f32,
|
|
1205
2004
|
|
|
1206
2005
|
// Set by SharedNamespaceWorker because it relies on the client
|
|
1207
|
-
|
|
2006
|
+
worker_grouping_key: String::new(),
|
|
1208
2007
|
}),
|
|
1209
2008
|
task_queue: config.task_queue.clone(),
|
|
1210
2009
|
deployment_version,
|
|
@@ -1384,13 +2183,13 @@ mod tests {
|
|
|
1384
2183
|
use crate::{
|
|
1385
2184
|
advance_fut,
|
|
1386
2185
|
test_help::test_worker_cfg,
|
|
1387
|
-
worker::
|
|
2186
|
+
worker::{
|
|
2187
|
+
PollerBehavior,
|
|
2188
|
+
client::mocks::{mock_manual_worker_client, mock_worker_client},
|
|
2189
|
+
},
|
|
1388
2190
|
};
|
|
1389
2191
|
use futures_util::FutureExt;
|
|
1390
|
-
use temporalio_common::
|
|
1391
|
-
protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse,
|
|
1392
|
-
worker::PollerBehavior,
|
|
1393
|
-
};
|
|
2192
|
+
use temporalio_common::protos::temporal::api::workflowservice::v1::PollActivityTaskQueueResponse;
|
|
1394
2193
|
|
|
1395
2194
|
#[tokio::test]
|
|
1396
2195
|
async fn activity_timeouts_maintain_permit() {
|
|
@@ -1436,10 +2235,11 @@ mod tests {
|
|
|
1436
2235
|
|
|
1437
2236
|
#[test]
|
|
1438
2237
|
fn max_polls_calculated_properly() {
|
|
1439
|
-
let cfg =
|
|
1440
|
-
.
|
|
1441
|
-
.
|
|
1442
|
-
|
|
2238
|
+
let cfg = {
|
|
2239
|
+
let mut cfg = test_worker_cfg().build().unwrap();
|
|
2240
|
+
cfg.workflow_task_poller_behavior = PollerBehavior::SimpleMaximum(5_usize);
|
|
2241
|
+
cfg
|
|
2242
|
+
};
|
|
1443
2243
|
assert_eq!(
|
|
1444
2244
|
wft_poller_behavior(&cfg, false),
|
|
1445
2245
|
PollerBehavior::SimpleMaximum(1)
|
|
@@ -1453,10 +2253,144 @@ mod tests {
|
|
|
1453
2253
|
#[test]
|
|
1454
2254
|
fn max_polls_zero_is_err() {
|
|
1455
2255
|
assert!(
|
|
1456
|
-
|
|
2256
|
+
WorkerConfig::builder()
|
|
2257
|
+
.namespace("test")
|
|
2258
|
+
.task_queue("test")
|
|
2259
|
+
.versioning_strategy(WorkerVersioningStrategy::None {
|
|
2260
|
+
build_id: "test".to_string(),
|
|
2261
|
+
})
|
|
2262
|
+
.task_types(WorkerTaskTypes::all())
|
|
1457
2263
|
.workflow_task_poller_behavior(PollerBehavior::SimpleMaximum(0_usize))
|
|
1458
2264
|
.build()
|
|
1459
2265
|
.is_err()
|
|
1460
2266
|
);
|
|
1461
2267
|
}
|
|
2268
|
+
|
|
2269
|
+
fn default_versioning_strategy() -> WorkerVersioningStrategy {
|
|
2270
|
+
WorkerVersioningStrategy::None {
|
|
2271
|
+
build_id: String::new(),
|
|
2272
|
+
}
|
|
2273
|
+
}
|
|
2274
|
+
|
|
2275
|
+
#[test]
|
|
2276
|
+
fn test_default_configuration_polls_all_types() {
|
|
2277
|
+
let config = WorkerConfig::builder()
|
|
2278
|
+
.namespace("default")
|
|
2279
|
+
.task_queue("test-queue")
|
|
2280
|
+
.versioning_strategy(default_versioning_strategy())
|
|
2281
|
+
.task_types(WorkerTaskTypes::all())
|
|
2282
|
+
.build()
|
|
2283
|
+
.unwrap();
|
|
2284
|
+
|
|
2285
|
+
let effective = &config.task_types;
|
|
2286
|
+
assert!(
|
|
2287
|
+
effective.enable_workflows,
|
|
2288
|
+
"Should poll workflows by default"
|
|
2289
|
+
);
|
|
2290
|
+
assert!(
|
|
2291
|
+
effective.enable_local_activities,
|
|
2292
|
+
"should poll local activities by default"
|
|
2293
|
+
);
|
|
2294
|
+
assert!(
|
|
2295
|
+
effective.enable_remote_activities,
|
|
2296
|
+
"Should poll remote activities by default"
|
|
2297
|
+
);
|
|
2298
|
+
assert!(effective.enable_nexus, "Should poll nexus by default");
|
|
2299
|
+
}
|
|
2300
|
+
|
|
2301
|
+
#[test]
|
|
2302
|
+
fn test_invalid_task_types_fails_validation() {
|
|
2303
|
+
// empty task types
|
|
2304
|
+
let result = WorkerConfig::builder()
|
|
2305
|
+
.namespace("default")
|
|
2306
|
+
.task_queue("test-queue")
|
|
2307
|
+
.versioning_strategy(default_versioning_strategy())
|
|
2308
|
+
.task_types(WorkerTaskTypes {
|
|
2309
|
+
enable_workflows: false,
|
|
2310
|
+
enable_local_activities: false,
|
|
2311
|
+
enable_remote_activities: false,
|
|
2312
|
+
enable_nexus: false,
|
|
2313
|
+
})
|
|
2314
|
+
.build();
|
|
2315
|
+
|
|
2316
|
+
assert!(result.is_err(), "Empty task_types should fail validation");
|
|
2317
|
+
let err = result.err().unwrap();
|
|
2318
|
+
assert!(
|
|
2319
|
+
err.contains("At least one task type"),
|
|
2320
|
+
"Error should mention task types: {err}",
|
|
2321
|
+
);
|
|
2322
|
+
|
|
2323
|
+
// local activities with no workflows
|
|
2324
|
+
let result = WorkerConfig::builder()
|
|
2325
|
+
.namespace("default")
|
|
2326
|
+
.task_queue("test-queue")
|
|
2327
|
+
.versioning_strategy(default_versioning_strategy())
|
|
2328
|
+
.task_types(WorkerTaskTypes {
|
|
2329
|
+
enable_workflows: false,
|
|
2330
|
+
enable_local_activities: true,
|
|
2331
|
+
enable_remote_activities: false,
|
|
2332
|
+
enable_nexus: false,
|
|
2333
|
+
})
|
|
2334
|
+
.build();
|
|
2335
|
+
|
|
2336
|
+
assert!(result.is_err(), "Empty task_types should fail validation");
|
|
2337
|
+
let err = result.err().unwrap();
|
|
2338
|
+
assert!(
|
|
2339
|
+
err.contains("cannot enable local activities without workflows"),
|
|
2340
|
+
"Error should mention task types: {err}",
|
|
2341
|
+
);
|
|
2342
|
+
}
|
|
2343
|
+
|
|
2344
|
+
#[test]
|
|
2345
|
+
fn test_all_combinations() {
|
|
2346
|
+
let combinations = [
|
|
2347
|
+
(WorkerTaskTypes::workflow_only(), "workflows only"),
|
|
2348
|
+
(WorkerTaskTypes::activity_only(), "activities only"),
|
|
2349
|
+
(WorkerTaskTypes::nexus_only(), "nexus only"),
|
|
2350
|
+
(
|
|
2351
|
+
WorkerTaskTypes {
|
|
2352
|
+
enable_workflows: true,
|
|
2353
|
+
enable_local_activities: true,
|
|
2354
|
+
enable_remote_activities: true,
|
|
2355
|
+
enable_nexus: false,
|
|
2356
|
+
},
|
|
2357
|
+
"workflows + activities",
|
|
2358
|
+
),
|
|
2359
|
+
(
|
|
2360
|
+
WorkerTaskTypes {
|
|
2361
|
+
enable_workflows: true,
|
|
2362
|
+
enable_local_activities: true,
|
|
2363
|
+
enable_remote_activities: false,
|
|
2364
|
+
enable_nexus: true,
|
|
2365
|
+
},
|
|
2366
|
+
"workflows + nexus",
|
|
2367
|
+
),
|
|
2368
|
+
(
|
|
2369
|
+
WorkerTaskTypes {
|
|
2370
|
+
enable_workflows: false,
|
|
2371
|
+
enable_local_activities: false,
|
|
2372
|
+
enable_remote_activities: true,
|
|
2373
|
+
enable_nexus: true,
|
|
2374
|
+
},
|
|
2375
|
+
"activities + nexus",
|
|
2376
|
+
),
|
|
2377
|
+
(WorkerTaskTypes::all(), "all types"),
|
|
2378
|
+
];
|
|
2379
|
+
|
|
2380
|
+
for (task_types, description) in combinations {
|
|
2381
|
+
let config = WorkerConfig::builder()
|
|
2382
|
+
.namespace("default")
|
|
2383
|
+
.task_queue("test-queue")
|
|
2384
|
+
.versioning_strategy(default_versioning_strategy())
|
|
2385
|
+
.task_types(task_types)
|
|
2386
|
+
.build()
|
|
2387
|
+
.unwrap();
|
|
2388
|
+
|
|
2389
|
+
let effective = config.task_types;
|
|
2390
|
+
assert_eq!(
|
|
2391
|
+
effective, task_types,
|
|
2392
|
+
"Effective types should match for {description}",
|
|
2393
|
+
);
|
|
2394
|
+
}
|
|
2395
|
+
}
|
|
1462
2396
|
}
|